aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-05-15 10:26:50 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2013-05-15 10:26:50 -0400
commit12e04ffcd93b25dfd726d46338c2ee7d23de556e (patch)
treef91479a62805619168994fd3ee55e3ffa23fc24e /net
parent9eff37a8713939f218ab8bf0dc93f1d67af7b8b4 (diff)
parentf722406faae2d073cc1d01063d1123c35425939e (diff)
Merge tag 'v3.10-rc1' into stable/for-linus-3.10
Linux 3.10-rc1 * tag 'v3.10-rc1': (12273 commits) Linux 3.10-rc1 [SCSI] qla2xxx: Update firmware link in Kconfig file. [SCSI] iscsi class, qla4xxx: fix sess/conn refcounting when find fns are used [SCSI] sas: unify the pointlessly separated enums sas_dev_type and sas_device_type [SCSI] pm80xx: thermal, sas controller config and error handling update [SCSI] pm80xx: NCQ error handling changes [SCSI] pm80xx: WWN Modification for PM8081/88/89 controllers [SCSI] pm80xx: Changed module name and debug messages update [SCSI] pm80xx: Firmware flash memory free fix, with addition of new memory region for it [SCSI] pm80xx: SPC new firmware changes for device id 0x8081 alone [SCSI] pm80xx: Added SPCv/ve specific hardware functionalities and relevant changes in common files [SCSI] pm80xx: MSI-X implementation for using 64 interrupts [SCSI] pm80xx: Updated common functions common for SPC and SPCv/ve [SCSI] pm80xx: Multiple inbound/outbound queue configuration [SCSI] pm80xx: Added SPCv/ve specific ids, variables and modify for SPC [SCSI] lpfc: fix up Kconfig dependencies [SCSI] Handle MLQUEUE busy response in scsi_send_eh_cmnd dm cache: set config value dm cache: move config fns dm thin: generate event when metadata threshold passed ...
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/802/mrp.c4
-rw-r--r--net/8021q/Kconfig2
-rw-r--r--net/8021q/vlan.c97
-rw-r--r--net/8021q/vlan.h58
-rw-r--r--net/8021q/vlan_core.c80
-rw-r--r--net/8021q/vlan_dev.c13
-rw-r--r--net/8021q/vlan_gvrp.c4
-rw-r--r--net/8021q/vlan_mvrp.c4
-rw-r--r--net/8021q/vlan_netlink.c32
-rw-r--r--net/8021q/vlanproc.c11
-rw-r--r--net/9p/trans_virtio.c48
-rw-r--r--net/Kconfig1
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.h2
-rw-r--r--net/atm/proc.c2
-rw-r--r--net/ax25/af_ax25.c1
-rw-r--r--net/batman-adv/Kconfig14
-rw-r--r--net/batman-adv/Makefile3
-rw-r--r--net/batman-adv/bat_iv_ogm.c5
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/debugfs.c18
-rw-r--r--net/batman-adv/distributed-arp-table.c22
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/hard-interface.c66
-rw-r--r--net/batman-adv/hard-interface.h13
-rw-r--r--net/batman-adv/main.c21
-rw-r--r--net/batman-adv/main.h17
-rw-r--r--net/batman-adv/network-coding.c1822
-rw-r--r--net/batman-adv/network-coding.h123
-rw-r--r--net/batman-adv/originator.c10
-rw-r--r--net/batman-adv/packet.h33
-rw-r--r--net/batman-adv/routing.c97
-rw-r--r--net/batman-adv/send.c5
-rw-r--r--net/batman-adv/soft-interface.c288
-rw-r--r--net/batman-adv/soft-interface.h3
-rw-r--r--net/batman-adv/sysfs.c22
-rw-r--r--net/batman-adv/translation-table.c31
-rw-r--r--net/batman-adv/types.h136
-rw-r--r--net/batman-adv/unicast.c6
-rw-r--r--net/batman-adv/vis.c8
-rw-r--r--net/bluetooth/a2mp.c6
-rw-r--r--net/bluetooth/af_bluetooth.c49
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bluetooth/bnep/sock.c6
-rw-r--r--net/bluetooth/cmtp/capi.c2
-rw-r--r--net/bluetooth/cmtp/sock.c6
-rw-r--r--net/bluetooth/hci_conn.c42
-rw-r--r--net/bluetooth/hci_core.c901
-rw-r--r--net/bluetooth/hci_event.c781
-rw-r--r--net/bluetooth/hci_sock.c11
-rw-r--r--net/bluetooth/hci_sysfs.c21
-rw-r--r--net/bluetooth/hidp/core.c994
-rw-r--r--net/bluetooth/hidp/hidp.h67
-rw-r--r--net/bluetooth/hidp/sock.c28
-rw-r--r--net/bluetooth/l2cap_core.c134
-rw-r--r--net/bluetooth/l2cap_sock.c12
-rw-r--r--net/bluetooth/mgmt.c703
-rw-r--r--net/bluetooth/rfcomm/core.c167
-rw-r--r--net/bluetooth/rfcomm/sock.c6
-rw-r--r--net/bluetooth/sco.c55
-rw-r--r--net/bluetooth/smp.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c30
-rw-r--r--net/bridge/br_if.c4
-rw-r--r--net/bridge/br_mdb.c4
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_netfilter.c3
-rw-r--r--net/bridge/br_netlink.c21
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_stp.c9
-rw-r--r--net/bridge/br_stp_if.c1
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/bridge/br_vlan.c20
-rw-r--r--net/bridge/netfilter/ebt_log.c44
-rw-r--r--net/bridge/netfilter/ebt_nflog.c5
-rw-r--r--net/bridge/netfilter/ebt_ulog.c132
-rw-r--r--net/bridge/netfilter/ebtable_broute.c4
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/caif/caif_dev.c11
-rw-r--r--net/caif/caif_socket.c26
-rw-r--r--net/caif/caif_usb.c6
-rw-r--r--net/caif/cfcnfg.c21
-rw-r--r--net/caif/cfctrl.c16
-rw-r--r--net/caif/cfdbgl.c2
-rw-r--r--net/caif/cfdgml.c2
-rw-r--r--net/caif/cffrml.c6
-rw-r--r--net/caif/cfmuxl.c6
-rw-r--r--net/caif/cfpkt_skbuff.c10
-rw-r--r--net/caif/cfrfml.c6
-rw-r--r--net/caif/cfserl.c6
-rw-r--r--net/caif/cfsrvl.c15
-rw-r--r--net/caif/cfutill.c2
-rw-r--r--net/caif/cfveil.c2
-rw-r--r--net/caif/cfvidl.c2
-rw-r--r--net/caif/chnl_net.c10
-rw-r--r--net/can/af_can.c30
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/gw.c11
-rw-r--r--net/can/proc.c2
-rw-r--r--net/can/raw.c5
-rw-r--r--net/ceph/Makefile2
-rw-r--r--net/ceph/auth.c117
-rw-r--r--net/ceph/auth_x.c24
-rw-r--r--net/ceph/auth_x.h1
-rw-r--r--net/ceph/ceph_common.c7
-rw-r--r--net/ceph/debugfs.c4
-rw-r--r--net/ceph/messenger.c1019
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/osd_client.c1087
-rw-r--r--net/ceph/osdmap.c45
-rw-r--r--net/ceph/snapshot.c78
-rw-r--r--net/core/datagram.c26
-rw-r--r--net/core/dev.c136
-rw-r--r--net/core/dev_addr_lists.c210
-rw-r--r--net/core/dst.c9
-rw-r--r--net/core/ethtool.c31
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/flow.c42
-rw-r--r--net/core/flow_dissector.c68
-rw-r--r--net/core/neighbour.c57
-rw-r--r--net/core/net-procfs.c2
-rw-r--r--net/core/net-sysfs.c12
-rw-r--r--net/core/net_namespace.c7
-rw-r--r--net/core/netpoll.c22
-rw-r--r--net/core/pktgen.c54
-rw-r--r--net/core/rtnetlink.c182
-rw-r--r--net/core/scm.c20
-rw-r--r--net/core/secure_seq.c4
-rw-r--r--net/core/skbuff.c93
-rw-r--r--net/core/sock.c22
-rw-r--r--net/core/sock_diag.c33
-rw-r--r--net/core/utils.c5
-rw-r--r--net/dcb/dcbevent.c1
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/ipv4.c5
-rw-r--r--net/dccp/ipv6.c5
-rw-r--r--net/decnet/dn_dev.c4
-rw-r--r--net/decnet/dn_fib.c203
-rw-r--r--net/decnet/dn_route.c43
-rw-r--r--net/decnet/dn_table.c45
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c12
-rw-r--r--net/dsa/dsa.c233
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan.c142
-rw-r--r--net/ieee802154/6lowpan.h7
-rw-r--r--net/ieee802154/dgram.c10
-rw-r--r--net/ieee802154/netlink.c8
-rw-r--r--net/ieee802154/nl-mac.c25
-rw-r--r--net/ipv4/Kconfig7
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c16
-rw-r--r--net/ipv4/arp.c27
-rw-r--r--net/ipv4/devinet.c143
-rw-r--r--net/ipv4/esp4.c6
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/gre.c15
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c10
-rw-r--r--net/ipv4/inet_fragment.c85
-rw-r--r--net/ipv4/inet_lro.c5
-rw-r--r--net/ipv4/ip_fragment.c45
-rw-r--r--net/ipv4/ip_gre.c1517
-rw-r--r--net/ipv4/ip_input.c4
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c1035
-rw-r--r--net/ipv4/ip_vti.c42
-rw-r--r--net/ipv4/ipcomp.c1
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/ipip.c748
-rw-r--r--net/ipv4/ipmr.c12
-rw-r--r--net/ipv4/netfilter.c15
-rw-r--r--net/ipv4/netfilter/Kconfig2
-rw-r--r--net/ipv4/netfilter/arp_tables.c1
-rw-r--r--net/ipv4/netfilter/arptable_filter.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c10
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c10
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c134
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c8
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c9
-rw-r--r--net/ipv4/netfilter/iptable_nat.c23
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/proc.c13
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/syncookies.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c287
-rw-r--r--net/ipv4/tcp_input.c669
-rw-r--r--net/ipv4/tcp_ipv4.c125
-rw-r--r--net/ipv4/tcp_memcontrol.c3
-rw-r--r--net/ipv4/tcp_metrics.c15
-rw-r--r--net/ipv4/tcp_minisocks.c51
-rw-r--r--net/ipv4/tcp_output.c386
-rw-r--r--net/ipv4/tcp_timer.c21
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/udp.c137
-rw-r--r--net/ipv4/udp_diag.c10
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c8
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/Makefile2
-rw-r--r--net/ipv6/addrconf.c232
-rw-r--r--net/ipv6/addrconf_core.c19
-rw-r--r--net/ipv6/addrlabel.c12
-rw-r--r--net/ipv6/af_inet6.c7
-rw-r--r--net/ipv6/datagram.c20
-rw-r--r--net/ipv6/icmp.c41
-rw-r--r--net/ipv6/inet6_connection_sock.c10
-rw-r--r--net/ipv6/ip6_flowlabel.c11
-rw-r--r--net/ipv6/ip6_gre.c62
-rw-r--r--net/ipv6/ip6_icmp.c47
-rw-r--r--net/ipv6/ip6_offload.c4
-rw-r--r--net/ipv6/ip6_output.c7
-rw-r--r--net/ipv6/ip6_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c10
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter.c12
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/ipv6/netfilter/ip6t_NPT.c11
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c8
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c9
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c23
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/proc.c9
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/reassembly.c35
-rw-r--r--net/ipv6/route.c6
-rw-r--r--net/ipv6/sit.c41
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c76
-rw-r--r--net/ipv6/udp.c34
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/irda/af_irda.c7
-rw-r--r--net/irda/ircomm/ircomm_core.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/ircomm/ircomm_tty_attach.c6
-rw-r--r--net/irda/iriap.c3
-rw-r--r--net/irda/irlmp.c12
-rw-r--r--net/iucv/af_iucv.c39
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/cfg.c235
-rw-r--r--net/mac80211/chan.c54
-rw-r--r--net/mac80211/debugfs_key.c10
-rw-r--r--net/mac80211/debugfs_netdev.c33
-rw-r--r--net/mac80211/debugfs_sta.c33
-rw-r--r--net/mac80211/driver-ops.h67
-rw-r--r--net/mac80211/ht.c52
-rw-r--r--net/mac80211/ibss.c175
-rw-r--r--net/mac80211/ieee80211_i.h86
-rw-r--r--net/mac80211/iface.c177
-rw-r--r--net/mac80211/key.c208
-rw-r--r--net/mac80211/key.h18
-rw-r--r--net/mac80211/main.c159
-rw-r--r--net/mac80211/mesh.c70
-rw-r--r--net/mac80211/mesh.h15
-rw-r--r--net/mac80211/mesh_hwmp.c28
-rw-r--r--net/mac80211/mesh_pathtbl.c56
-rw-r--r--net/mac80211/mesh_plink.c43
-rw-r--r--net/mac80211/mlme.c578
-rw-r--r--net/mac80211/offchannel.c16
-rw-r--r--net/mac80211/pm.c132
-rw-r--r--net/mac80211/rate.c324
-rw-r--r--net/mac80211/rc80211_minstrel.c342
-rw-r--r--net/mac80211/rc80211_minstrel.h34
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c16
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c273
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h8
-rw-r--r--net/mac80211/rx.c85
-rw-r--r--net/mac80211/scan.c20
-rw-r--r--net/mac80211/sta_info.c56
-rw-r--r--net/mac80211/sta_info.h11
-rw-r--r--net/mac80211/trace.h97
-rw-r--r--net/mac80211/tx.c163
-rw-r--r--net/mac80211/util.c224
-rw-r--r--net/mac80211/vht.c212
-rw-r--r--net/mac802154/mac802154.h5
-rw-r--r--net/mac802154/mac_cmd.c1
-rw-r--r--net/mac802154/mib.c21
-rw-r--r--net/mac802154/tx.c29
-rw-r--r--net/mac802154/wpan.c4
-rw-r--r--net/netfilter/core.c30
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h277
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c411
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c620
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c414
-rw-r--r--net/netfilter/ipset/ip_set_core.c41
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h1100
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c344
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c362
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c368
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c451
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c384
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c460
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c438
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c622
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c31
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c312
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c88
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c702
-rw-r--r--net/netfilter/ipvs/ip_vs_dh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c115
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c190
-rw-r--r--net/netfilter/ipvs/ip_vs_lc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nq.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c38
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_rr.c64
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c63
-rw-r--r--net/netfilter/ipvs/ip_vs_sed.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c86
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c56
-rw-r--r--net/netfilter/ipvs/ip_vs_wlc.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_wrr.c176
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c1050
-rw-r--r--net/netfilter/nf_conntrack_amanda.c1
-rw-r--r--net/netfilter/nf_conntrack_core.c60
-rw-r--r--net/netfilter/nf_conntrack_ecache.c8
-rw-r--r--net/netfilter/nf_conntrack_expect.c9
-rw-r--r--net/netfilter/nf_conntrack_ftp.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c3
-rw-r--r--net/netfilter/nf_conntrack_irc.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c100
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c9
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c20
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c7
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c8
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c17
-rw-r--r--net/netfilter/nf_conntrack_tftp.c2
-rw-r--r--net/netfilter/nf_log.c206
-rw-r--r--net/netfilter/nf_nat_amanda.c1
-rw-r--r--net/netfilter/nf_nat_core.c50
-rw-r--r--net/netfilter/nf_nat_helper.c1
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c5
-rw-r--r--net/netfilter/nf_queue.c148
-rw-r--r--net/netfilter/nfnetlink.c27
-rw-r--r--net/netfilter/nfnetlink_log.c199
-rw-r--r--net/netfilter/nfnetlink_queue_core.c439
-rw-r--r--net/netfilter/x_tables.c7
-rw-r--r--net/netfilter/xt_LOG.c52
-rw-r--r--net/netfilter/xt_NFQUEUE.c63
-rw-r--r--net/netfilter/xt_TCPMSS.c1
-rw-r--r--net/netfilter/xt_conntrack.c1
-rw-r--r--net/netfilter/xt_hashlimit.c17
-rw-r--r--net/netfilter/xt_limit.c1
-rw-r--r--net/netfilter/xt_osf.c6
-rw-r--r--net/netfilter/xt_recent.c9
-rw-r--r--net/netfilter/xt_set.c94
-rw-r--r--net/netlink/Kconfig19
-rw-r--r--net/netlink/Makefile3
-rw-r--r--net/netlink/af_netlink.c900
-rw-r--r--net/netlink/af_netlink.h82
-rw-r--r--net/netlink/diag.c227
-rw-r--r--net/netlink/genetlink.c119
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/Kconfig2
-rw-r--r--net/nfc/Makefile6
-rw-r--r--net/nfc/core.c43
-rw-r--r--net/nfc/llcp.h (renamed from net/nfc/llcp/llcp.h)39
-rw-r--r--net/nfc/llcp/Kconfig7
-rw-r--r--net/nfc/llcp_commands.c (renamed from net/nfc/llcp/commands.c)229
-rw-r--r--net/nfc/llcp_core.c (renamed from net/nfc/llcp/llcp.c)241
-rw-r--r--net/nfc/llcp_sock.c (renamed from net/nfc/llcp/sock.c)174
-rw-r--r--net/nfc/netlink.c175
-rw-r--r--net/nfc/nfc.h60
-rw-r--r--net/openvswitch/actions.c6
-rw-r--r--net/openvswitch/datapath.c451
-rw-r--r--net/openvswitch/datapath.h72
-rw-r--r--net/openvswitch/dp_notify.c82
-rw-r--r--net/openvswitch/flow.c10
-rw-r--r--net/openvswitch/flow.h21
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c15
-rw-r--r--net/openvswitch/vport-netdev.h1
-rw-r--r--net/openvswitch/vport.c58
-rw-r--r--net/openvswitch/vport.h21
-rw-r--r--net/packet/af_packet.c355
-rw-r--r--net/packet/diag.c27
-rw-r--r--net/packet/internal.h10
-rw-r--r--net/phonet/pn_netlink.c4
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rfkill/rfkill-gpio.c7
-rw-r--r--net/rfkill/rfkill-regulator.c2
-rw-r--r--net/rose/af_rose.c1
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/act_csum.c39
-rw-r--r--net/sched/act_ipt.c33
-rw-r--r--net/sched/cls_api.c14
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/em_ipset.c2
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sched/sch_htb.c31
-rw-r--r--net/sctp/associola.c28
-rw-r--r--net/sctp/bind_addr.c7
-rw-r--r--net/sctp/endpointola.c14
-rw-r--r--net/sctp/inqueue.c7
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/outqueue.c11
-rw-r--r--net/sctp/probe.c2
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c8
-rw-r--r--net/sctp/ssnmap.c23
-rw-r--r--net/sctp/transport.c1
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c38
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/auth.c75
-rw-r--r--net/sunrpc/auth_gss/Makefile3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c8
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c13
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c4
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c124
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c358
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.h48
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c840
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h264
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c367
-rw-r--r--net/sunrpc/cache.c29
-rw-r--r--net/sunrpc/clnt.c59
-rw-r--r--net/sunrpc/netns.h6
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/xprt.c63
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--net/tipc/Kconfig7
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c40
-rw-r--r--net/tipc/bearer.c7
-rw-r--r--net/tipc/bearer.h16
-rw-r--r--net/tipc/core.c12
-rw-r--r--net/tipc/discover.c2
-rw-r--r--net/tipc/eth_media.c39
-rw-r--r--net/tipc/ib_media.c387
-rw-r--r--net/tipc/link.c11
-rw-r--r--net/tipc/netlink.c6
-rw-r--r--net/tipc/socket.c7
-rw-r--r--net/unix/af_unix.c45
-rw-r--r--net/unix/garbage.c12
-rw-r--r--net/vmw_vsock/af_vsock.c6
-rw-r--r--net/vmw_vsock/vmci_transport.c19
-rw-r--r--net/vmw_vsock/vmci_transport.h3
-rw-r--r--net/wireless/ap.c62
-rw-r--r--net/wireless/core.c77
-rw-r--r--net/wireless/core.h25
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c29
-rw-r--r--net/wireless/lib80211_crypt_tkip.c44
-rw-r--r--net/wireless/lib80211_crypt_wep.c5
-rw-r--r--net/wireless/mesh.c15
-rw-r--r--net/wireless/mlme.c235
-rw-r--r--net/wireless/nl80211.c2142
-rw-r--r--net/wireless/nl80211.h68
-rw-r--r--net/wireless/rdev-ops.h44
-rw-r--r--net/wireless/reg.c8
-rw-r--r--net/wireless/sme.c30
-rw-r--r--net/wireless/sysfs.c25
-rw-r--r--net/wireless/trace.h81
-rw-r--r--net/wireless/util.c30
-rw-r--r--net/x25/x25_proc.c47
-rw-r--r--net/xfrm/xfrm_algo.c13
-rw-r--r--net/xfrm/xfrm_policy.c23
-rw-r--r--net/xfrm/xfrm_state.c1
-rw-r--r--net/xfrm/xfrm_user.c17
491 files changed, 27030 insertions, 16491 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 8456f5d98b85..5d9630a0eb93 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
609 /* Delete timer and generate a final TRANSMIT_PDU event to flush out 609 /* Delete timer and generate a final TRANSMIT_PDU event to flush out
610 * all pending messages before the applicant is gone. */ 610 * all pending messages before the applicant is gone. */
611 del_timer_sync(&app->join_timer); 611 del_timer_sync(&app->join_timer);
612
613 spin_lock_bh(&app->lock);
612 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU); 614 garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
613 garp_pdu_queue(app); 615 garp_pdu_queue(app);
616 spin_unlock_bh(&app->lock);
617
614 garp_queue_xmit(app); 618 garp_queue_xmit(app);
615 619
616 dev_mc_del(dev, appl->proto.group_address); 620 dev_mc_del(dev, appl->proto.group_address);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index a4cc3229952a..e085bcc754f6 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -870,8 +870,12 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
870 * all pending messages before the applicant is gone. 870 * all pending messages before the applicant is gone.
871 */ 871 */
872 del_timer_sync(&app->join_timer); 872 del_timer_sync(&app->join_timer);
873
874 spin_lock(&app->lock);
873 mrp_mad_event(app, MRP_EVENT_TX); 875 mrp_mad_event(app, MRP_EVENT_TX);
874 mrp_pdu_queue(app); 876 mrp_pdu_queue(app);
877 spin_unlock(&app->lock);
878
875 mrp_queue_xmit(app); 879 mrp_queue_xmit(app);
876 880
877 dev_mc_del(dev, appl->group_address); 881 dev_mc_del(dev, appl->group_address);
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 8f7517df41a5..b85a91fa61f1 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4 4
5config VLAN_8021Q 5config VLAN_8021Q
6 tristate "802.1Q VLAN Support" 6 tristate "802.1Q/802.1ad VLAN Support"
7 ---help--- 7 ---help---
8 Select this and you will be able to create 802.1Q VLAN interfaces 8 Select this and you will be able to create 802.1Q VLAN interfaces
9 on your ethernet interfaces. 802.1Q VLAN supports almost 9 on your ethernet interfaces. 802.1Q VLAN supports almost
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 85addcd9372b..9424f3718ea7 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION;
51 51
52/* End of global variables definitions. */ 52/* End of global variables definitions. */
53 53
54static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) 54static int vlan_group_prealloc_vid(struct vlan_group *vg,
55 __be16 vlan_proto, u16 vlan_id)
55{ 56{
56 struct net_device **array; 57 struct net_device **array;
58 unsigned int pidx, vidx;
57 unsigned int size; 59 unsigned int size;
58 60
59 ASSERT_RTNL(); 61 ASSERT_RTNL();
60 62
61 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 63 pidx = vlan_proto_idx(vlan_proto);
64 vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN;
65 array = vg->vlan_devices_arrays[pidx][vidx];
62 if (array != NULL) 66 if (array != NULL)
63 return 0; 67 return 0;
64 68
@@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
67 if (array == NULL) 71 if (array == NULL)
68 return -ENOBUFS; 72 return -ENOBUFS;
69 73
70 vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; 74 vg->vlan_devices_arrays[pidx][vidx] = array;
71 return 0; 75 return 0;
72} 76}
73 77
@@ -93,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
93 if (vlan->flags & VLAN_FLAG_GVRP) 97 if (vlan->flags & VLAN_FLAG_GVRP)
94 vlan_gvrp_request_leave(dev); 98 vlan_gvrp_request_leave(dev);
95 99
96 vlan_group_set_device(grp, vlan_id, NULL); 100 vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);
97 /* Because unregister_netdevice_queue() makes sure at least one rcu 101 /* Because unregister_netdevice_queue() makes sure at least one rcu
98 * grace period is respected before device freeing, 102 * grace period is respected before device freeing,
99 * we dont need to call synchronize_net() here. 103 * we dont need to call synchronize_net() here.
@@ -112,13 +116,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
112 * VLAN is not 0 (leave it there for 802.1p). 116 * VLAN is not 0 (leave it there for 802.1p).
113 */ 117 */
114 if (vlan_id) 118 if (vlan_id)
115 vlan_vid_del(real_dev, vlan_id); 119 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
116 120
117 /* Get rid of the vlan's reference to real_dev */ 121 /* Get rid of the vlan's reference to real_dev */
118 dev_put(real_dev); 122 dev_put(real_dev);
119} 123}
120 124
121int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) 125int vlan_check_real_dev(struct net_device *real_dev,
126 __be16 protocol, u16 vlan_id)
122{ 127{
123 const char *name = real_dev->name; 128 const char *name = real_dev->name;
124 129
@@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
127 return -EOPNOTSUPP; 132 return -EOPNOTSUPP;
128 } 133 }
129 134
130 if (vlan_find_dev(real_dev, vlan_id) != NULL) 135 if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)
131 return -EEXIST; 136 return -EEXIST;
132 137
133 return 0; 138 return 0;
@@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev)
142 struct vlan_group *grp; 147 struct vlan_group *grp;
143 int err; 148 int err;
144 149
145 err = vlan_vid_add(real_dev, vlan_id); 150 err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);
146 if (err) 151 if (err)
147 return err; 152 return err;
148 153
@@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev)
160 goto out_uninit_gvrp; 165 goto out_uninit_gvrp;
161 } 166 }
162 167
163 err = vlan_group_prealloc_vid(grp, vlan_id); 168 err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);
164 if (err < 0) 169 if (err < 0)
165 goto out_uninit_mvrp; 170 goto out_uninit_mvrp;
166 171
@@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev)
181 /* So, got the sucker initialized, now lets place 186 /* So, got the sucker initialized, now lets place
182 * it into our local structure. 187 * it into our local structure.
183 */ 188 */
184 vlan_group_set_device(grp, vlan_id, dev); 189 vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);
185 grp->nr_vlan_devs++; 190 grp->nr_vlan_devs++;
186 191
187 return 0; 192 return 0;
@@ -195,7 +200,7 @@ out_uninit_gvrp:
195 if (grp->nr_vlan_devs == 0) 200 if (grp->nr_vlan_devs == 0)
196 vlan_gvrp_uninit_applicant(real_dev); 201 vlan_gvrp_uninit_applicant(real_dev);
197out_vid_del: 202out_vid_del:
198 vlan_vid_del(real_dev, vlan_id); 203 vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);
199 return err; 204 return err;
200} 205}
201 206
@@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
213 if (vlan_id >= VLAN_VID_MASK) 218 if (vlan_id >= VLAN_VID_MASK)
214 return -ERANGE; 219 return -ERANGE;
215 220
216 err = vlan_check_real_dev(real_dev, vlan_id); 221 err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);
217 if (err < 0) 222 if (err < 0)
218 return err; 223 return err;
219 224
@@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
255 new_dev->mtu = real_dev->mtu; 260 new_dev->mtu = real_dev->mtu;
256 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); 261 new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT);
257 262
263 vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);
258 vlan_dev_priv(new_dev)->vlan_id = vlan_id; 264 vlan_dev_priv(new_dev)->vlan_id = vlan_id;
259 vlan_dev_priv(new_dev)->real_dev = real_dev; 265 vlan_dev_priv(new_dev)->real_dev = real_dev;
260 vlan_dev_priv(new_dev)->dent = NULL; 266 vlan_dev_priv(new_dev)->dent = NULL;
@@ -301,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev,
301{ 307{
302 vlandev->gso_max_size = dev->gso_max_size; 308 vlandev->gso_max_size = dev->gso_max_size;
303 309
304 if (dev->features & NETIF_F_HW_VLAN_TX) 310 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
305 vlandev->hard_header_len = dev->hard_header_len; 311 vlandev->hard_header_len = dev->hard_header_len;
306 else 312 else
307 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; 313 vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
@@ -341,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
341 int i, flgs; 347 int i, flgs;
342 struct net_device *vlandev; 348 struct net_device *vlandev;
343 struct vlan_dev_priv *vlan; 349 struct vlan_dev_priv *vlan;
350 bool last = false;
344 LIST_HEAD(list); 351 LIST_HEAD(list);
345 352
346 if (is_vlan_dev(dev)) 353 if (is_vlan_dev(dev))
347 __vlan_device_event(dev, event); 354 __vlan_device_event(dev, event);
348 355
349 if ((event == NETDEV_UP) && 356 if ((event == NETDEV_UP) &&
350 (dev->features & NETIF_F_HW_VLAN_FILTER)) { 357 (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
351 pr_info("adding VLAN 0 to HW filter on device %s\n", 358 pr_info("adding VLAN 0 to HW filter on device %s\n",
352 dev->name); 359 dev->name);
353 vlan_vid_add(dev, 0); 360 vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
354 } 361 }
355 362
356 vlan_info = rtnl_dereference(dev->vlan_info); 363 vlan_info = rtnl_dereference(dev->vlan_info);
@@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
365 switch (event) { 372 switch (event) {
366 case NETDEV_CHANGE: 373 case NETDEV_CHANGE:
367 /* Propagate real device state to vlan devices */ 374 /* Propagate real device state to vlan devices */
368 for (i = 0; i < VLAN_N_VID; i++) { 375 vlan_group_for_each_dev(grp, i, vlandev)
369 vlandev = vlan_group_get_device(grp, i);
370 if (!vlandev)
371 continue;
372
373 netif_stacked_transfer_operstate(dev, vlandev); 376 netif_stacked_transfer_operstate(dev, vlandev);
374 }
375 break; 377 break;
376 378
377 case NETDEV_CHANGEADDR: 379 case NETDEV_CHANGEADDR:
378 /* Adjust unicast filters on underlying device */ 380 /* Adjust unicast filters on underlying device */
379 for (i = 0; i < VLAN_N_VID; i++) { 381 vlan_group_for_each_dev(grp, i, vlandev) {
380 vlandev = vlan_group_get_device(grp, i);
381 if (!vlandev)
382 continue;
383
384 flgs = vlandev->flags; 382 flgs = vlandev->flags;
385 if (!(flgs & IFF_UP)) 383 if (!(flgs & IFF_UP))
386 continue; 384 continue;
@@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
390 break; 388 break;
391 389
392 case NETDEV_CHANGEMTU: 390 case NETDEV_CHANGEMTU:
393 for (i = 0; i < VLAN_N_VID; i++) { 391 vlan_group_for_each_dev(grp, i, vlandev) {
394 vlandev = vlan_group_get_device(grp, i);
395 if (!vlandev)
396 continue;
397
398 if (vlandev->mtu <= dev->mtu) 392 if (vlandev->mtu <= dev->mtu)
399 continue; 393 continue;
400 394
@@ -404,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
404 398
405 case NETDEV_FEAT_CHANGE: 399 case NETDEV_FEAT_CHANGE:
406 /* Propagate device features to underlying device */ 400 /* Propagate device features to underlying device */
407 for (i = 0; i < VLAN_N_VID; i++) { 401 vlan_group_for_each_dev(grp, i, vlandev)
408 vlandev = vlan_group_get_device(grp, i);
409 if (!vlandev)
410 continue;
411
412 vlan_transfer_features(dev, vlandev); 402 vlan_transfer_features(dev, vlandev);
413 }
414
415 break; 403 break;
416 404
417 case NETDEV_DOWN: 405 case NETDEV_DOWN:
418 if (dev->features & NETIF_F_HW_VLAN_FILTER) 406 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
419 vlan_vid_del(dev, 0); 407 vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
420 408
421 /* Put all VLANs for this dev in the down state too. */ 409 /* Put all VLANs for this dev in the down state too. */
422 for (i = 0; i < VLAN_N_VID; i++) { 410 vlan_group_for_each_dev(grp, i, vlandev) {
423 vlandev = vlan_group_get_device(grp, i);
424 if (!vlandev)
425 continue;
426
427 flgs = vlandev->flags; 411 flgs = vlandev->flags;
428 if (!(flgs & IFF_UP)) 412 if (!(flgs & IFF_UP))
429 continue; 413 continue;
@@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
437 421
438 case NETDEV_UP: 422 case NETDEV_UP:
439 /* Put all VLANs for this dev in the up state too. */ 423 /* Put all VLANs for this dev in the up state too. */
440 for (i = 0; i < VLAN_N_VID; i++) { 424 vlan_group_for_each_dev(grp, i, vlandev) {
441 vlandev = vlan_group_get_device(grp, i);
442 if (!vlandev)
443 continue;
444
445 flgs = vlandev->flags; 425 flgs = vlandev->flags;
446 if (flgs & IFF_UP) 426 if (flgs & IFF_UP)
447 continue; 427 continue;
@@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
458 if (dev->reg_state != NETREG_UNREGISTERING) 438 if (dev->reg_state != NETREG_UNREGISTERING)
459 break; 439 break;
460 440
461 for (i = 0; i < VLAN_N_VID; i++) { 441 vlan_group_for_each_dev(grp, i, vlandev) {
462 vlandev = vlan_group_get_device(grp, i);
463 if (!vlandev)
464 continue;
465
466 /* removal of last vid destroys vlan_info, abort 442 /* removal of last vid destroys vlan_info, abort
467 * afterwards */ 443 * afterwards */
468 if (vlan_info->nr_vids == 1) 444 if (vlan_info->nr_vids == 1)
469 i = VLAN_N_VID; 445 last = true;
470 446
471 unregister_vlan_dev(vlandev, &list); 447 unregister_vlan_dev(vlandev, &list);
448 if (last)
449 break;
472 } 450 }
473 unregister_netdevice_many(&list); 451 unregister_netdevice_many(&list);
474 break; 452 break;
@@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
482 case NETDEV_NOTIFY_PEERS: 460 case NETDEV_NOTIFY_PEERS:
483 case NETDEV_BONDING_FAILOVER: 461 case NETDEV_BONDING_FAILOVER:
484 /* Propagate to vlan devices */ 462 /* Propagate to vlan devices */
485 for (i = 0; i < VLAN_N_VID; i++) { 463 vlan_group_for_each_dev(grp, i, vlandev)
486 vlandev = vlan_group_get_device(grp, i);
487 if (!vlandev)
488 continue;
489
490 call_netdevice_notifiers(event, vlandev); 464 call_netdevice_notifiers(event, vlandev);
491 }
492 break; 465 break;
493 } 466 }
494 467
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 670f1e8cfc0f..ba5983f34c42 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -49,6 +49,7 @@ struct netpoll;
49 * @ingress_priority_map: ingress priority mappings 49 * @ingress_priority_map: ingress priority mappings
50 * @nr_egress_mappings: number of egress priority mappings 50 * @nr_egress_mappings: number of egress priority mappings
51 * @egress_priority_map: hash of egress priority mappings 51 * @egress_priority_map: hash of egress priority mappings
52 * @vlan_proto: VLAN encapsulation protocol
52 * @vlan_id: VLAN identifier 53 * @vlan_id: VLAN identifier
53 * @flags: device flags 54 * @flags: device flags
54 * @real_dev: underlying netdevice 55 * @real_dev: underlying netdevice
@@ -62,6 +63,7 @@ struct vlan_dev_priv {
62 unsigned int nr_egress_mappings; 63 unsigned int nr_egress_mappings;
63 struct vlan_priority_tci_mapping *egress_priority_map[16]; 64 struct vlan_priority_tci_mapping *egress_priority_map[16];
64 65
66 __be16 vlan_proto;
65 u16 vlan_id; 67 u16 vlan_id;
66 u16 flags; 68 u16 flags;
67 69
@@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
87#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8 89#define VLAN_GROUP_ARRAY_SPLIT_PARTS 8
88#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) 90#define VLAN_GROUP_ARRAY_PART_LEN (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
89 91
92enum vlan_protos {
93 VLAN_PROTO_8021Q = 0,
94 VLAN_PROTO_8021AD,
95 VLAN_PROTO_NUM,
96};
97
90struct vlan_group { 98struct vlan_group {
91 unsigned int nr_vlan_devs; 99 unsigned int nr_vlan_devs;
92 struct hlist_node hlist; /* linked list */ 100 struct hlist_node hlist; /* linked list */
93 struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; 101 struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM]
102 [VLAN_GROUP_ARRAY_SPLIT_PARTS];
94}; 103};
95 104
96struct vlan_info { 105struct vlan_info {
@@ -103,37 +112,67 @@ struct vlan_info {
103 struct rcu_head rcu; 112 struct rcu_head rcu;
104}; 113};
105 114
106static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, 115static inline unsigned int vlan_proto_idx(__be16 proto)
107 u16 vlan_id) 116{
117 switch (proto) {
118 case __constant_htons(ETH_P_8021Q):
119 return VLAN_PROTO_8021Q;
120 case __constant_htons(ETH_P_8021AD):
121 return VLAN_PROTO_8021AD;
122 default:
123 BUG();
124 return 0;
125 }
126}
127
128static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
129 unsigned int pidx,
130 u16 vlan_id)
108{ 131{
109 struct net_device **array; 132 struct net_device **array;
110 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 133
134 array = vg->vlan_devices_arrays[pidx]
135 [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
111 return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL; 136 return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
112} 137}
113 138
139static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
140 __be16 vlan_proto,
141 u16 vlan_id)
142{
143 return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id);
144}
145
114static inline void vlan_group_set_device(struct vlan_group *vg, 146static inline void vlan_group_set_device(struct vlan_group *vg,
115 u16 vlan_id, 147 __be16 vlan_proto, u16 vlan_id,
116 struct net_device *dev) 148 struct net_device *dev)
117{ 149{
118 struct net_device **array; 150 struct net_device **array;
119 if (!vg) 151 if (!vg)
120 return; 152 return;
121 array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; 153 array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)]
154 [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
122 array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev; 155 array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;
123} 156}
124 157
125/* Must be invoked with rcu_read_lock or with RTNL. */ 158/* Must be invoked with rcu_read_lock or with RTNL. */
126static inline struct net_device *vlan_find_dev(struct net_device *real_dev, 159static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
127 u16 vlan_id) 160 __be16 vlan_proto, u16 vlan_id)
128{ 161{
129 struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info); 162 struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
130 163
131 if (vlan_info) 164 if (vlan_info)
132 return vlan_group_get_device(&vlan_info->grp, vlan_id); 165 return vlan_group_get_device(&vlan_info->grp,
166 vlan_proto, vlan_id);
133 167
134 return NULL; 168 return NULL;
135} 169}
136 170
171#define vlan_group_for_each_dev(grp, i, dev) \
172 for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \
173 if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \
174 (i) % VLAN_N_VID)))
175
137/* found in vlan_dev.c */ 176/* found in vlan_dev.c */
138void vlan_dev_set_ingress_priority(const struct net_device *dev, 177void vlan_dev_set_ingress_priority(const struct net_device *dev,
139 u32 skb_prio, u16 vlan_prio); 178 u32 skb_prio, u16 vlan_prio);
@@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
142int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); 181int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);
143void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); 182void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
144 183
145int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); 184int vlan_check_real_dev(struct net_device *real_dev,
185 __be16 protocol, u16 vlan_id);
146void vlan_setup(struct net_device *dev); 186void vlan_setup(struct net_device *dev);
147int register_vlan_dev(struct net_device *dev); 187int register_vlan_dev(struct net_device *dev);
148void unregister_vlan_dev(struct net_device *dev, struct list_head *head); 188void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f3b6f515eba6..8a15eaadc4bd 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,11 +8,12 @@
8bool vlan_do_receive(struct sk_buff **skbp) 8bool vlan_do_receive(struct sk_buff **skbp)
9{ 9{
10 struct sk_buff *skb = *skbp; 10 struct sk_buff *skb = *skbp;
11 __be16 vlan_proto = skb->vlan_proto;
11 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; 12 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
12 struct net_device *vlan_dev; 13 struct net_device *vlan_dev;
13 struct vlan_pcpu_stats *rx_stats; 14 struct vlan_pcpu_stats *rx_stats;
14 15
15 vlan_dev = vlan_find_dev(skb->dev, vlan_id); 16 vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);
16 if (!vlan_dev) 17 if (!vlan_dev)
17 return false; 18 return false;
18 19
@@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)
38 * original position later 39 * original position later
39 */ 40 */
40 skb_push(skb, offset); 41 skb_push(skb, offset);
41 skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); 42 skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto,
43 skb->vlan_tci);
42 if (!skb) 44 if (!skb)
43 return false; 45 return false;
44 skb_pull(skb, offset + VLAN_HLEN); 46 skb_pull(skb, offset + VLAN_HLEN);
@@ -62,12 +64,13 @@ bool vlan_do_receive(struct sk_buff **skbp)
62 64
63/* Must be invoked with rcu_read_lock. */ 65/* Must be invoked with rcu_read_lock. */
64struct net_device *__vlan_find_dev_deep(struct net_device *dev, 66struct net_device *__vlan_find_dev_deep(struct net_device *dev,
65 u16 vlan_id) 67 __be16 vlan_proto, u16 vlan_id)
66{ 68{
67 struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info); 69 struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
68 70
69 if (vlan_info) { 71 if (vlan_info) {
70 return vlan_group_get_device(&vlan_info->grp, vlan_id); 72 return vlan_group_get_device(&vlan_info->grp,
73 vlan_proto, vlan_id);
71 } else { 74 } else {
72 /* 75 /*
73 * Lower devices of master uppers (bonding, team) do not have 76 * Lower devices of master uppers (bonding, team) do not have
@@ -78,7 +81,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
78 81
79 upper_dev = netdev_master_upper_dev_get_rcu(dev); 82 upper_dev = netdev_master_upper_dev_get_rcu(dev);
80 if (upper_dev) 83 if (upper_dev)
81 return __vlan_find_dev_deep(upper_dev, vlan_id); 84 return __vlan_find_dev_deep(upper_dev,
85 vlan_proto, vlan_id);
82 } 86 }
83 87
84 return NULL; 88 return NULL;
@@ -125,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)
125 129
126 vhdr = (struct vlan_hdr *) skb->data; 130 vhdr = (struct vlan_hdr *) skb->data;
127 vlan_tci = ntohs(vhdr->h_vlan_TCI); 131 vlan_tci = ntohs(vhdr->h_vlan_TCI);
128 __vlan_hwaccel_put_tag(skb, vlan_tci); 132 __vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);
129 133
130 skb_pull_rcsum(skb, VLAN_HLEN); 134 skb_pull_rcsum(skb, VLAN_HLEN);
131 vlan_set_encap_proto(skb, vhdr); 135 vlan_set_encap_proto(skb, vhdr);
@@ -153,10 +157,11 @@ EXPORT_SYMBOL(vlan_untag);
153 157
154static void vlan_group_free(struct vlan_group *grp) 158static void vlan_group_free(struct vlan_group *grp)
155{ 159{
156 int i; 160 int i, j;
157 161
158 for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) 162 for (i = 0; i < VLAN_PROTO_NUM; i++)
159 kfree(grp->vlan_devices_arrays[i]); 163 for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++)
164 kfree(grp->vlan_devices_arrays[i][j]);
160} 165}
161 166
162static void vlan_info_free(struct vlan_info *vlan_info) 167static void vlan_info_free(struct vlan_info *vlan_info)
@@ -185,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev)
185 190
186struct vlan_vid_info { 191struct vlan_vid_info {
187 struct list_head list; 192 struct list_head list;
188 unsigned short vid; 193 __be16 proto;
194 u16 vid;
189 int refcount; 195 int refcount;
190}; 196};
191 197
198static bool vlan_hw_filter_capable(const struct net_device *dev,
199 const struct vlan_vid_info *vid_info)
200{
201 if (vid_info->proto == htons(ETH_P_8021Q) &&
202 dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
203 return true;
204 if (vid_info->proto == htons(ETH_P_8021AD) &&
205 dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
206 return true;
207 return false;
208}
209
192static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, 210static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
193 unsigned short vid) 211 __be16 proto, u16 vid)
194{ 212{
195 struct vlan_vid_info *vid_info; 213 struct vlan_vid_info *vid_info;
196 214
197 list_for_each_entry(vid_info, &vlan_info->vid_list, list) { 215 list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
198 if (vid_info->vid == vid) 216 if (vid_info->proto == proto && vid_info->vid == vid)
199 return vid_info; 217 return vid_info;
200 } 218 }
201 return NULL; 219 return NULL;
202} 220}
203 221
204static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) 222static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)
205{ 223{
206 struct vlan_vid_info *vid_info; 224 struct vlan_vid_info *vid_info;
207 225
208 vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL); 226 vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
209 if (!vid_info) 227 if (!vid_info)
210 return NULL; 228 return NULL;
229 vid_info->proto = proto;
211 vid_info->vid = vid; 230 vid_info->vid = vid;
212 231
213 return vid_info; 232 return vid_info;
214} 233}
215 234
216static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, 235static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
217 struct vlan_vid_info **pvid_info) 236 struct vlan_vid_info **pvid_info)
218{ 237{
219 struct net_device *dev = vlan_info->real_dev; 238 struct net_device *dev = vlan_info->real_dev;
@@ -221,12 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
221 struct vlan_vid_info *vid_info; 240 struct vlan_vid_info *vid_info;
222 int err; 241 int err;
223 242
224 vid_info = vlan_vid_info_alloc(vid); 243 vid_info = vlan_vid_info_alloc(proto, vid);
225 if (!vid_info) 244 if (!vid_info)
226 return -ENOMEM; 245 return -ENOMEM;
227 246
228 if (dev->features & NETIF_F_HW_VLAN_FILTER) { 247 if (vlan_hw_filter_capable(dev, vid_info)) {
229 err = ops->ndo_vlan_rx_add_vid(dev, vid); 248 err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
230 if (err) { 249 if (err) {
231 kfree(vid_info); 250 kfree(vid_info);
232 return err; 251 return err;
@@ -238,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
238 return 0; 257 return 0;
239} 258}
240 259
241int vlan_vid_add(struct net_device *dev, unsigned short vid) 260int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)
242{ 261{
243 struct vlan_info *vlan_info; 262 struct vlan_info *vlan_info;
244 struct vlan_vid_info *vid_info; 263 struct vlan_vid_info *vid_info;
@@ -254,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid)
254 return -ENOMEM; 273 return -ENOMEM;
255 vlan_info_created = true; 274 vlan_info_created = true;
256 } 275 }
257 vid_info = vlan_vid_info_get(vlan_info, vid); 276 vid_info = vlan_vid_info_get(vlan_info, proto, vid);
258 if (!vid_info) { 277 if (!vid_info) {
259 err = __vlan_vid_add(vlan_info, vid, &vid_info); 278 err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);
260 if (err) 279 if (err)
261 goto out_free_vlan_info; 280 goto out_free_vlan_info;
262 } 281 }
@@ -279,14 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
279{ 298{
280 struct net_device *dev = vlan_info->real_dev; 299 struct net_device *dev = vlan_info->real_dev;
281 const struct net_device_ops *ops = dev->netdev_ops; 300 const struct net_device_ops *ops = dev->netdev_ops;
282 unsigned short vid = vid_info->vid; 301 __be16 proto = vid_info->proto;
302 u16 vid = vid_info->vid;
283 int err; 303 int err;
284 304
285 if (dev->features & NETIF_F_HW_VLAN_FILTER) { 305 if (vlan_hw_filter_capable(dev, vid_info)) {
286 err = ops->ndo_vlan_rx_kill_vid(dev, vid); 306 err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
287 if (err) { 307 if (err) {
288 pr_warn("failed to kill vid %d for device %s\n", 308 pr_warn("failed to kill vid %04x/%d for device %s\n",
289 vid, dev->name); 309 proto, vid, dev->name);
290 } 310 }
291 } 311 }
292 list_del(&vid_info->list); 312 list_del(&vid_info->list);
@@ -294,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
294 vlan_info->nr_vids--; 314 vlan_info->nr_vids--;
295} 315}
296 316
297void vlan_vid_del(struct net_device *dev, unsigned short vid) 317void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)
298{ 318{
299 struct vlan_info *vlan_info; 319 struct vlan_info *vlan_info;
300 struct vlan_vid_info *vid_info; 320 struct vlan_vid_info *vid_info;
@@ -305,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
305 if (!vlan_info) 325 if (!vlan_info)
306 return; 326 return;
307 327
308 vid_info = vlan_vid_info_get(vlan_info, vid); 328 vid_info = vlan_vid_info_get(vlan_info, proto, vid);
309 if (!vid_info) 329 if (!vid_info)
310 return; 330 return;
311 vid_info->refcount--; 331 vid_info->refcount--;
@@ -333,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev,
333 return 0; 353 return 0;
334 354
335 list_for_each_entry(vid_info, &vlan_info->vid_list, list) { 355 list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
336 err = vlan_vid_add(dev, vid_info->vid); 356 err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
337 if (err) 357 if (err)
338 goto unwind; 358 goto unwind;
339 } 359 }
@@ -343,7 +363,7 @@ unwind:
343 list_for_each_entry_continue_reverse(vid_info, 363 list_for_each_entry_continue_reverse(vid_info,
344 &vlan_info->vid_list, 364 &vlan_info->vid_list,
345 list) { 365 list) {
346 vlan_vid_del(dev, vid_info->vid); 366 vlan_vid_del(dev, vid_info->proto, vid_info->vid);
347 } 367 }
348 368
349 return err; 369 return err;
@@ -363,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev,
363 return; 383 return;
364 384
365 list_for_each_entry(vid_info, &vlan_info->vid_list, list) 385 list_for_each_entry(vid_info, &vlan_info->vid_list, list)
366 vlan_vid_del(dev, vid_info->vid); 386 vlan_vid_del(dev, vid_info->proto, vid_info->vid);
367} 387}
368EXPORT_SYMBOL(vlan_vids_del_by_dev); 388EXPORT_SYMBOL(vlan_vids_del_by_dev);
369 389
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 19cf81bf9f69..3a8c8fd63c88 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
99 const void *daddr, const void *saddr, 99 const void *daddr, const void *saddr,
100 unsigned int len) 100 unsigned int len)
101{ 101{
102 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
102 struct vlan_hdr *vhdr; 103 struct vlan_hdr *vhdr;
103 unsigned int vhdrlen = 0; 104 unsigned int vhdrlen = 0;
104 u16 vlan_tci = 0; 105 u16 vlan_tci = 0;
@@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
120 else 121 else
121 vhdr->h_vlan_encapsulated_proto = htons(len); 122 vhdr->h_vlan_encapsulated_proto = htons(len);
122 123
123 skb->protocol = htons(ETH_P_8021Q); 124 skb->protocol = vlan->vlan_proto;
124 type = ETH_P_8021Q; 125 type = ntohs(vlan->vlan_proto);
125 vhdrlen = VLAN_HLEN; 126 vhdrlen = VLAN_HLEN;
126 } 127 }
127 128
@@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
161 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING 162 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
162 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs... 163 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
163 */ 164 */
164 if (veth->h_vlan_proto != htons(ETH_P_8021Q) || 165 if (veth->h_vlan_proto != vlan->vlan_proto ||
165 vlan->flags & VLAN_FLAG_REORDER_HDR) { 166 vlan->flags & VLAN_FLAG_REORDER_HDR) {
166 u16 vlan_tci; 167 u16 vlan_tci;
167 vlan_tci = vlan->vlan_id; 168 vlan_tci = vlan->vlan_id;
168 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); 169 vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
169 skb = __vlan_hwaccel_put_tag(skb, vlan_tci); 170 skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
170 } 171 }
171 172
172 skb->dev = vlan->real_dev; 173 skb->dev = vlan->real_dev;
@@ -583,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev)
583#endif 584#endif
584 585
585 dev->needed_headroom = real_dev->needed_headroom; 586 dev->needed_headroom = real_dev->needed_headroom;
586 if (real_dev->features & NETIF_F_HW_VLAN_TX) { 587 if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
587 dev->header_ops = real_dev->header_ops; 588 dev->header_ops = real_dev->header_ops;
588 dev->hard_header_len = real_dev->hard_header_len; 589 dev->hard_header_len = real_dev->hard_header_len;
589 } else { 590 } else {
@@ -627,7 +628,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
627 netdev_features_t features) 628 netdev_features_t features)
628{ 629{
629 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; 630 struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
630 u32 old_features = features; 631 netdev_features_t old_features = features;
631 632
632 features &= real_dev->vlan_features; 633 features &= real_dev->vlan_features;
633 features |= NETIF_F_RXCSUM; 634 features |= NETIF_F_RXCSUM;
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 6f9755352760..66a80320b032 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev)
32 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 32 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
33 __be16 vlan_id = htons(vlan->vlan_id); 33 __be16 vlan_id = htons(vlan->vlan_id);
34 34
35 if (vlan->vlan_proto != htons(ETH_P_8021Q))
36 return 0;
35 return garp_request_join(vlan->real_dev, &vlan_gvrp_app, 37 return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
36 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); 38 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
37} 39}
@@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev)
41 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 43 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
42 __be16 vlan_id = htons(vlan->vlan_id); 44 __be16 vlan_id = htons(vlan->vlan_id);
43 45
46 if (vlan->vlan_proto != htons(ETH_P_8021Q))
47 return;
44 garp_request_leave(vlan->real_dev, &vlan_gvrp_app, 48 garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
45 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID); 49 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);
46} 50}
diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c
index d9ec1d5964aa..e0fe091801b0 100644
--- a/net/8021q/vlan_mvrp.c
+++ b/net/8021q/vlan_mvrp.c
@@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev)
38 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 38 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
39 __be16 vlan_id = htons(vlan->vlan_id); 39 __be16 vlan_id = htons(vlan->vlan_id);
40 40
41 if (vlan->vlan_proto != htons(ETH_P_8021Q))
42 return 0;
41 return mrp_request_join(vlan->real_dev, &vlan_mrp_app, 43 return mrp_request_join(vlan->real_dev, &vlan_mrp_app,
42 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); 44 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
43} 45}
@@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev)
47 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 49 const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
48 __be16 vlan_id = htons(vlan->vlan_id); 50 __be16 vlan_id = htons(vlan->vlan_id);
49 51
52 if (vlan->vlan_proto != htons(ETH_P_8021Q))
53 return;
50 mrp_request_leave(vlan->real_dev, &vlan_mrp_app, 54 mrp_request_leave(vlan->real_dev, &vlan_mrp_app,
51 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID); 55 &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);
52} 56}
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1789658b7cd7..309129732285 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
23 [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, 23 [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) },
24 [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, 24 [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED },
25 [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, 25 [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
26 [IFLA_VLAN_PROTOCOL] = { .type = NLA_U16 },
26}; 27};
27 28
28static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { 29static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
53 if (!data) 54 if (!data)
54 return -EINVAL; 55 return -EINVAL;
55 56
57 if (data[IFLA_VLAN_PROTOCOL]) {
58 switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
59 case __constant_htons(ETH_P_8021Q):
60 case __constant_htons(ETH_P_8021AD):
61 break;
62 default:
63 return -EPROTONOSUPPORT;
64 }
65 }
66
56 if (data[IFLA_VLAN_ID]) { 67 if (data[IFLA_VLAN_ID]) {
57 id = nla_get_u16(data[IFLA_VLAN_ID]); 68 id = nla_get_u16(data[IFLA_VLAN_ID]);
58 if (id >= VLAN_VID_MASK) 69 if (id >= VLAN_VID_MASK)
@@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
107{ 118{
108 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 119 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
109 struct net_device *real_dev; 120 struct net_device *real_dev;
121 __be16 proto;
110 int err; 122 int err;
111 123
112 if (!data[IFLA_VLAN_ID]) 124 if (!data[IFLA_VLAN_ID])
@@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
118 if (!real_dev) 130 if (!real_dev)
119 return -ENODEV; 131 return -ENODEV;
120 132
121 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); 133 if (data[IFLA_VLAN_PROTOCOL])
122 vlan->real_dev = real_dev; 134 proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
123 vlan->flags = VLAN_FLAG_REORDER_HDR; 135 else
136 proto = htons(ETH_P_8021Q);
137
138 vlan->vlan_proto = proto;
139 vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]);
140 vlan->real_dev = real_dev;
141 vlan->flags = VLAN_FLAG_REORDER_HDR;
124 142
125 err = vlan_check_real_dev(real_dev, vlan->vlan_id); 143 err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);
126 if (err < 0) 144 if (err < 0)
127 return err; 145 return err;
128 146
@@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)
151{ 169{
152 struct vlan_dev_priv *vlan = vlan_dev_priv(dev); 170 struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
153 171
154 return nla_total_size(2) + /* IFLA_VLAN_ID */ 172 return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
173 nla_total_size(2) + /* IFLA_VLAN_ID */
155 sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ 174 sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
156 vlan_qos_map_size(vlan->nr_ingress_mappings) + 175 vlan_qos_map_size(vlan->nr_ingress_mappings) +
157 vlan_qos_map_size(vlan->nr_egress_mappings); 176 vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
166 struct nlattr *nest; 185 struct nlattr *nest;
167 unsigned int i; 186 unsigned int i;
168 187
169 if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id)) 188 if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
189 nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
170 goto nla_put_failure; 190 goto nla_put_failure;
171 if (vlan->flags) { 191 if (vlan->flags) {
172 f.flags = vlan->flags; 192 f.flags = vlan->flags;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index dc526ec965e4..1d0e89213a28 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -93,7 +93,7 @@ static const struct file_operations vlan_fops = {
93 93
94static int vlandev_seq_open(struct inode *inode, struct file *file) 94static int vlandev_seq_open(struct inode *inode, struct file *file)
95{ 95{
96 return single_open(file, vlandev_seq_show, PDE(inode)->data); 96 return single_open(file, vlandev_seq_show, PDE_DATA(inode));
97} 97}
98 98
99static const struct file_operations vlandev_fops = { 99static const struct file_operations vlandev_fops = {
@@ -184,14 +184,9 @@ int vlan_proc_add_dev(struct net_device *vlandev)
184 */ 184 */
185int vlan_proc_rem_dev(struct net_device *vlandev) 185int vlan_proc_rem_dev(struct net_device *vlandev)
186{ 186{
187 struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
188
189 /** NOTE: This will consume the memory pointed to by dent, it seems. */ 187 /** NOTE: This will consume the memory pointed to by dent, it seems. */
190 if (vlan_dev_priv(vlandev)->dent) { 188 proc_remove(vlan_dev_priv(vlandev)->dent);
191 remove_proc_entry(vlan_dev_priv(vlandev)->dent->name, 189 vlan_dev_priv(vlandev)->dent = NULL;
192 vn->proc_vlan_dir);
193 vlan_dev_priv(vlandev)->dent = NULL;
194 }
195 return 0; 190 return 0;
196} 191}
197 192
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de2e950a0a7a..e1c26b101830 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
194 if (s > count) 194 if (s > count)
195 s = count; 195 s = count;
196 BUG_ON(index > limit); 196 BUG_ON(index > limit);
197 /* Make sure we don't terminate early. */
198 sg_unmark_end(&sg[index]);
197 sg_set_buf(&sg[index++], data, s); 199 sg_set_buf(&sg[index++], data, s);
198 count -= s; 200 count -= s;
199 data += s; 201 data += s;
200 } 202 }
201 203 if (index-start)
204 sg_mark_end(&sg[index - 1]);
202 return index-start; 205 return index-start;
203} 206}
204 207
@@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
236 s = rest_of_page(data); 239 s = rest_of_page(data);
237 if (s > count) 240 if (s > count)
238 s = count; 241 s = count;
242 /* Make sure we don't terminate early. */
243 sg_unmark_end(&sg[index]);
239 sg_set_page(&sg[index++], pdata[i++], s, data_off); 244 sg_set_page(&sg[index++], pdata[i++], s, data_off);
240 data_off = 0; 245 data_off = 0;
241 data += s; 246 data += s;
242 count -= s; 247 count -= s;
243 nr_pages--; 248 nr_pages--;
244 } 249 }
250
251 if (index-start)
252 sg_mark_end(&sg[index - 1]);
245 return index - start; 253 return index - start;
246} 254}
247 255
@@ -256,9 +264,10 @@ static int
256p9_virtio_request(struct p9_client *client, struct p9_req_t *req) 264p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
257{ 265{
258 int err; 266 int err;
259 int in, out; 267 int in, out, out_sgs, in_sgs;
260 unsigned long flags; 268 unsigned long flags;
261 struct virtio_chan *chan = client->trans; 269 struct virtio_chan *chan = client->trans;
270 struct scatterlist *sgs[2];
262 271
263 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); 272 p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
264 273
@@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
266req_retry: 275req_retry:
267 spin_lock_irqsave(&chan->lock, flags); 276 spin_lock_irqsave(&chan->lock, flags);
268 277
278 out_sgs = in_sgs = 0;
269 /* Handle out VirtIO ring buffers */ 279 /* Handle out VirtIO ring buffers */
270 out = pack_sg_list(chan->sg, 0, 280 out = pack_sg_list(chan->sg, 0,
271 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 281 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
282 if (out)
283 sgs[out_sgs++] = chan->sg;
272 284
273 in = pack_sg_list(chan->sg, out, 285 in = pack_sg_list(chan->sg, out,
274 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); 286 VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
287 if (in)
288 sgs[out_sgs + in_sgs++] = chan->sg + out;
275 289
276 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 290 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
277 GFP_ATOMIC); 291 GFP_ATOMIC);
278 if (err < 0) { 292 if (err < 0) {
279 if (err == -ENOSPC) { 293 if (err == -ENOSPC) {
@@ -289,7 +303,7 @@ req_retry:
289 } else { 303 } else {
290 spin_unlock_irqrestore(&chan->lock, flags); 304 spin_unlock_irqrestore(&chan->lock, flags);
291 p9_debug(P9_DEBUG_TRANS, 305 p9_debug(P9_DEBUG_TRANS,
292 "virtio rpc add_buf returned failure\n"); 306 "virtio rpc add_sgs returned failure\n");
293 return -EIO; 307 return -EIO;
294 } 308 }
295 } 309 }
@@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
351 char *uidata, char *uodata, int inlen, 365 char *uidata, char *uodata, int inlen,
352 int outlen, int in_hdr_len, int kern_buf) 366 int outlen, int in_hdr_len, int kern_buf)
353{ 367{
354 int in, out, err; 368 int in, out, err, out_sgs, in_sgs;
355 unsigned long flags; 369 unsigned long flags;
356 int in_nr_pages = 0, out_nr_pages = 0; 370 int in_nr_pages = 0, out_nr_pages = 0;
357 struct page **in_pages = NULL, **out_pages = NULL; 371 struct page **in_pages = NULL, **out_pages = NULL;
358 struct virtio_chan *chan = client->trans; 372 struct virtio_chan *chan = client->trans;
373 struct scatterlist *sgs[4];
359 374
360 p9_debug(P9_DEBUG_TRANS, "virtio request\n"); 375 p9_debug(P9_DEBUG_TRANS, "virtio request\n");
361 376
@@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
396 req->status = REQ_STATUS_SENT; 411 req->status = REQ_STATUS_SENT;
397req_retry_pinned: 412req_retry_pinned:
398 spin_lock_irqsave(&chan->lock, flags); 413 spin_lock_irqsave(&chan->lock, flags);
414
415 out_sgs = in_sgs = 0;
416
399 /* out data */ 417 /* out data */
400 out = pack_sg_list(chan->sg, 0, 418 out = pack_sg_list(chan->sg, 0,
401 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); 419 VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
402 420
403 if (out_pages) 421 if (out)
422 sgs[out_sgs++] = chan->sg;
423
424 if (out_pages) {
425 sgs[out_sgs++] = chan->sg + out;
404 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, 426 out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
405 out_pages, out_nr_pages, uodata, outlen); 427 out_pages, out_nr_pages, uodata, outlen);
428 }
429
406 /* 430 /*
407 * Take care of in data 431 * Take care of in data
408 * For example TREAD have 11. 432 * For example TREAD have 11.
@@ -412,11 +436,17 @@ req_retry_pinned:
412 */ 436 */
413 in = pack_sg_list(chan->sg, out, 437 in = pack_sg_list(chan->sg, out,
414 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); 438 VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
415 if (in_pages) 439 if (in)
440 sgs[out_sgs + in_sgs++] = chan->sg + out;
441
442 if (in_pages) {
443 sgs[out_sgs + in_sgs++] = chan->sg + out + in;
416 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, 444 in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
417 in_pages, in_nr_pages, uidata, inlen); 445 in_pages, in_nr_pages, uidata, inlen);
446 }
418 447
419 err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, 448 BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
449 err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
420 GFP_ATOMIC); 450 GFP_ATOMIC);
421 if (err < 0) { 451 if (err < 0) {
422 if (err == -ENOSPC) { 452 if (err == -ENOSPC) {
@@ -432,7 +462,7 @@ req_retry_pinned:
432 } else { 462 } else {
433 spin_unlock_irqrestore(&chan->lock, flags); 463 spin_unlock_irqrestore(&chan->lock, flags);
434 p9_debug(P9_DEBUG_TRANS, 464 p9_debug(P9_DEBUG_TRANS,
435 "virtio rpc add_buf returned failure\n"); 465 "virtio rpc add_sgs returned failure\n");
436 err = -EIO; 466 err = -EIO;
437 goto err_out; 467 goto err_out;
438 } 468 }
diff --git a/net/Kconfig b/net/Kconfig
index 6f676ab885be..2ddc9046868e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,6 +217,7 @@ source "net/dns_resolver/Kconfig"
217source "net/batman-adv/Kconfig" 217source "net/batman-adv/Kconfig"
218source "net/openvswitch/Kconfig" 218source "net/openvswitch/Kconfig"
219source "net/vmw_vsock/Kconfig" 219source "net/vmw_vsock/Kconfig"
220source "net/netlink/Kconfig"
220 221
221config RPS 222config RPS
222 boolean 223 boolean
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 4a141e3cf076..ef12839a7cfe 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1253,7 +1253,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,
1253 goto out; 1253 goto out;
1254 1254
1255 *uaddr_len = sizeof(struct sockaddr_at); 1255 *uaddr_len = sizeof(struct sockaddr_at);
1256 memset(&sat.sat_zero, 0, sizeof(sat.sat_zero)); 1256 memset(&sat, 0, sizeof(sat));
1257 1257
1258 if (peer) { 1258 if (peer) {
1259 err = -ENOTCONN; 1259 err = -ENOTCONN;
diff --git a/net/atm/common.c b/net/atm/common.c
index 7b491006eaf4..737bef59ce89 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -531,6 +531,8 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
531 struct sk_buff *skb; 531 struct sk_buff *skb;
532 int copied, error = -EINVAL; 532 int copied, error = -EINVAL;
533 533
534 msg->msg_namelen = 0;
535
534 if (sock->state != SS_CONNECTED) 536 if (sock->state != SS_CONNECTED)
535 return -ENOTCONN; 537 return -ENOTCONN;
536 538
diff --git a/net/atm/lec.h b/net/atm/lec.h
index a86aff9a3c04..4149db1b7885 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -58,7 +58,7 @@ struct lane2_ops {
58 * field in h_type field. Data follows immediately after header. 58 * field in h_type field. Data follows immediately after header.
59 * 2. LLC Data frames whose total length, including LLC field and data, 59 * 2. LLC Data frames whose total length, including LLC field and data,
60 * but not padding required to meet the minimum data frame length, 60 * but not padding required to meet the minimum data frame length,
61 * is less than 1536(0x0600) MUST be encoded by placing that length 61 * is less than ETH_P_802_3_MIN MUST be encoded by placing that length
62 * in the h_type field. The LLC field follows header immediately. 62 * in the h_type field. The LLC field follows header immediately.
63 * 3. LLC data frames longer than this maximum MUST be encoded by placing 63 * 3. LLC data frames longer than this maximum MUST be encoded by placing
64 * the value 0 in the h_type field. 64 * the value 0 in the h_type field.
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 6ac35ff0d6b9..bbb6461a4b7f 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -385,7 +385,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf,
385 page = get_zeroed_page(GFP_KERNEL); 385 page = get_zeroed_page(GFP_KERNEL);
386 if (!page) 386 if (!page)
387 return -ENOMEM; 387 return -ENOMEM;
388 dev = PDE(file_inode(file))->data; 388 dev = PDE_DATA(file_inode(file));
389 if (!dev->ops->proc_read) 389 if (!dev->ops->proc_read)
390 length = -EINVAL; 390 length = -EINVAL;
391 else { 391 else {
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 7b11f8bc5071..e277e38f736b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1642,6 +1642,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
1642 ax25_address src; 1642 ax25_address src;
1643 const unsigned char *mac = skb_mac_header(skb); 1643 const unsigned char *mac = skb_mac_header(skb);
1644 1644
1645 memset(sax, 0, sizeof(struct full_sockaddr_ax25));
1645 ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, 1646 ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
1646 &digi, NULL, NULL); 1647 &digi, NULL, NULL);
1647 sax->sax25_family = AF_AX25; 1648 sax->sax25_family = AF_AX25;
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 8d8afb134b3a..fa780b76630e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -36,6 +36,20 @@ config BATMAN_ADV_DAT
36 mesh networks. If you think that your network does not need 36 mesh networks. If you think that your network does not need
37 this option you can safely remove it and save some space. 37 this option you can safely remove it and save some space.
38 38
39config BATMAN_ADV_NC
40 bool "Network Coding"
41 depends on BATMAN_ADV
42 default n
43 help
44 This option enables network coding, a mechanism that aims to
45 increase the overall network throughput by fusing multiple
46 packets in one transmission.
47 Note that interfaces controlled by batman-adv must be manually
48 configured to have promiscuous mode enabled in order to make
49 network coding work.
50 If you think that your network does not need this feature you
51 can safely disable it and save some space.
52
39config BATMAN_ADV_DEBUG 53config BATMAN_ADV_DEBUG
40 bool "B.A.T.M.A.N. debugging" 54 bool "B.A.T.M.A.N. debugging"
41 depends on BATMAN_ADV 55 depends on BATMAN_ADV
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index e45e3b4e32e3..acbac2a9c62f 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
1# 1#
2# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: 2# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:
3# 3#
4# Marek Lindner, Simon Wunderlich 4# Marek Lindner, Simon Wunderlich
5# 5#
@@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o
30batman-adv-y += hash.o 30batman-adv-y += hash.o
31batman-adv-y += icmp_socket.o 31batman-adv-y += icmp_socket.o
32batman-adv-y += main.o 32batman-adv-y += main.o
33batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
33batman-adv-y += originator.o 34batman-adv-y += originator.o
34batman-adv-y += ring_buffer.o 35batman-adv-y += ring_buffer.o
35batman-adv-y += routing.o 36batman-adv-y += routing.o
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a5bb0a769eb9..071f288b77a8 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,6 +27,7 @@
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "send.h" 28#include "send.h"
29#include "bat_algo.h" 29#include "bat_algo.h"
30#include "network-coding.h"
30 31
31static struct batadv_neigh_node * 32static struct batadv_neigh_node *
32batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, 33batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
@@ -1185,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,
1185 if (!orig_neigh_node) 1186 if (!orig_neigh_node)
1186 goto out; 1187 goto out;
1187 1188
1189 /* Update nc_nodes of the originator */
1190 batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node,
1191 batadv_ogm_packet, is_single_hop_neigh);
1192
1188 orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); 1193 orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node);
1189 1194
1190 /* drop packet if sender is not a direct neighbor and if we 1195 /* drop packet if sender is not a direct neighbor and if we
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 6a4f728680ae..379061c72549 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,
341 } 341 }
342 342
343 if (vid != -1) 343 if (vid != -1)
344 skb = vlan_insert_tag(skb, vid); 344 skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);
345 345
346 skb_reset_mac_header(skb); 346 skb_reset_mac_header(skb);
347 skb->protocol = eth_type_trans(skb, soft_iface); 347 skb->protocol = eth_type_trans(skb, soft_iface);
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 6ae86516db4d..f186a55b23c3 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -32,6 +32,7 @@
32#include "icmp_socket.h" 32#include "icmp_socket.h"
33#include "bridge_loop_avoidance.h" 33#include "bridge_loop_avoidance.h"
34#include "distributed-arp-table.h" 34#include "distributed-arp-table.h"
35#include "network-coding.h"
35 36
36static struct dentry *batadv_debugfs; 37static struct dentry *batadv_debugfs;
37 38
@@ -310,6 +311,14 @@ struct batadv_debuginfo {
310 const struct file_operations fops; 311 const struct file_operations fops;
311}; 312};
312 313
314#ifdef CONFIG_BATMAN_ADV_NC
315static int batadv_nc_nodes_open(struct inode *inode, struct file *file)
316{
317 struct net_device *net_dev = (struct net_device *)inode->i_private;
318 return single_open(file, batadv_nc_nodes_seq_print_text, net_dev);
319}
320#endif
321
313#define BATADV_DEBUGINFO(_name, _mode, _open) \ 322#define BATADV_DEBUGINFO(_name, _mode, _open) \
314struct batadv_debuginfo batadv_debuginfo_##_name = { \ 323struct batadv_debuginfo batadv_debuginfo_##_name = { \
315 .attr = { .name = __stringify(_name), \ 324 .attr = { .name = __stringify(_name), \
@@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
348static BATADV_DEBUGINFO(transtable_local, S_IRUGO, 357static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
349 batadv_transtable_local_open); 358 batadv_transtable_local_open);
350static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); 359static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open);
360#ifdef CONFIG_BATMAN_ADV_NC
361static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
362#endif
351 363
352static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { 364static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
353 &batadv_debuginfo_originators, 365 &batadv_debuginfo_originators,
@@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
362#endif 374#endif
363 &batadv_debuginfo_transtable_local, 375 &batadv_debuginfo_transtable_local,
364 &batadv_debuginfo_vis_data, 376 &batadv_debuginfo_vis_data,
377#ifdef CONFIG_BATMAN_ADV_NC
378 &batadv_debuginfo_nc_nodes,
379#endif
365 NULL, 380 NULL,
366}; 381};
367 382
@@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev)
431 } 446 }
432 } 447 }
433 448
449 if (batadv_nc_init_debugfs(bat_priv) < 0)
450 goto rem_attr;
451
434 return 0; 452 return 0;
435rem_attr: 453rem_attr:
436 debugfs_remove_recursive(bat_priv->debug_dir); 454 debugfs_remove_recursive(bat_priv->debug_dir);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index d54188a112ea..8e15d966d9b0 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
816 bool ret = false; 816 bool ret = false;
817 struct batadv_dat_entry *dat_entry = NULL; 817 struct batadv_dat_entry *dat_entry = NULL;
818 struct sk_buff *skb_new; 818 struct sk_buff *skb_new;
819 struct batadv_hard_iface *primary_if = NULL;
820 819
821 if (!atomic_read(&bat_priv->distributed_arp_table)) 820 if (!atomic_read(&bat_priv->distributed_arp_table))
822 goto out; 821 goto out;
@@ -838,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
838 837
839 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); 838 dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);
840 if (dat_entry) { 839 if (dat_entry) {
841 primary_if = batadv_primary_if_get_selected(bat_priv);
842 if (!primary_if)
843 goto out;
844
845 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 840 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
846 primary_if->soft_iface, ip_dst, hw_src, 841 bat_priv->soft_iface, ip_dst, hw_src,
847 dat_entry->mac_addr, hw_src); 842 dat_entry->mac_addr, hw_src);
848 if (!skb_new) 843 if (!skb_new)
849 goto out; 844 goto out;
850 845
851 skb_reset_mac_header(skb_new); 846 skb_reset_mac_header(skb_new);
852 skb_new->protocol = eth_type_trans(skb_new, 847 skb_new->protocol = eth_type_trans(skb_new,
853 primary_if->soft_iface); 848 bat_priv->soft_iface);
854 bat_priv->stats.rx_packets++; 849 bat_priv->stats.rx_packets++;
855 bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; 850 bat_priv->stats.rx_bytes += skb->len + ETH_HLEN;
856 primary_if->soft_iface->last_rx = jiffies; 851 bat_priv->soft_iface->last_rx = jiffies;
857 852
858 netif_rx(skb_new); 853 netif_rx(skb_new);
859 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); 854 batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n");
@@ -866,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
866out: 861out:
867 if (dat_entry) 862 if (dat_entry)
868 batadv_dat_entry_free_ref(dat_entry); 863 batadv_dat_entry_free_ref(dat_entry);
869 if (primary_if)
870 batadv_hardif_free_ref(primary_if);
871 return ret; 864 return ret;
872} 865}
873 866
@@ -887,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
887 __be32 ip_src, ip_dst; 880 __be32 ip_src, ip_dst;
888 uint8_t *hw_src; 881 uint8_t *hw_src;
889 struct sk_buff *skb_new; 882 struct sk_buff *skb_new;
890 struct batadv_hard_iface *primary_if = NULL;
891 struct batadv_dat_entry *dat_entry = NULL; 883 struct batadv_dat_entry *dat_entry = NULL;
892 bool ret = false; 884 bool ret = false;
893 int err; 885 int err;
@@ -912,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
912 if (!dat_entry) 904 if (!dat_entry)
913 goto out; 905 goto out;
914 906
915 primary_if = batadv_primary_if_get_selected(bat_priv);
916 if (!primary_if)
917 goto out;
918
919 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, 907 skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
920 primary_if->soft_iface, ip_dst, hw_src, 908 bat_priv->soft_iface, ip_dst, hw_src,
921 dat_entry->mac_addr, hw_src); 909 dat_entry->mac_addr, hw_src);
922 910
923 if (!skb_new) 911 if (!skb_new)
@@ -941,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
941out: 929out:
942 if (dat_entry) 930 if (dat_entry)
943 batadv_dat_entry_free_ref(dat_entry); 931 batadv_dat_entry_free_ref(dat_entry);
944 if (primary_if)
945 batadv_hardif_free_ref(primary_if);
946 if (ret) 932 if (ret)
947 kfree_skb(skb); 933 kfree_skb(skb);
948 return ret; 934 return ret;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 34f99a46ec1d..f105219f4a4b 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
500 rcu_read_unlock(); 500 rcu_read_unlock();
501 501
502 if (gw_count == 0) 502 if (gw_count == 0)
503 seq_printf(seq, "No gateways in range ...\n"); 503 seq_puts(seq, "No gateways in range ...\n");
504 504
505out: 505out:
506 if (primary_if) 506 if (primary_if)
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 368219e026a9..522243aff2f3 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
307 batadv_update_min_mtu(hard_iface->soft_iface); 307 batadv_update_min_mtu(hard_iface->soft_iface);
308} 308}
309 309
310/**
311 * batadv_master_del_slave - remove hard_iface from the current master interface
312 * @slave: the interface enslaved in another master
313 * @master: the master from which slave has to be removed
314 *
315 * Invoke ndo_del_slave on master passing slave as argument. In this way slave
316 * is free'd and master can correctly change its internal state.
317 * Return 0 on success, a negative value representing the error otherwise
318 */
319static int batadv_master_del_slave(struct batadv_hard_iface *slave,
320 struct net_device *master)
321{
322 int ret;
323
324 if (!master)
325 return 0;
326
327 ret = -EBUSY;
328 if (master->netdev_ops->ndo_del_slave)
329 ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev);
330
331 return ret;
332}
333
310int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 334int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
311 const char *iface_name) 335 const char *iface_name)
312{ 336{
313 struct batadv_priv *bat_priv; 337 struct batadv_priv *bat_priv;
314 struct net_device *soft_iface; 338 struct net_device *soft_iface, *master;
315 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 339 __be16 ethertype = __constant_htons(ETH_P_BATMAN);
316 int ret; 340 int ret;
317 341
@@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
321 if (!atomic_inc_not_zero(&hard_iface->refcount)) 345 if (!atomic_inc_not_zero(&hard_iface->refcount))
322 goto out; 346 goto out;
323 347
324 /* hard-interface is part of a bridge */
325 if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT)
326 pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n",
327 hard_iface->net_dev->name);
328
329 soft_iface = dev_get_by_name(&init_net, iface_name); 348 soft_iface = dev_get_by_name(&init_net, iface_name);
330 349
331 if (!soft_iface) { 350 if (!soft_iface) {
@@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
347 goto err_dev; 366 goto err_dev;
348 } 367 }
349 368
369 /* check if the interface is enslaved in another virtual one and
370 * in that case unlink it first
371 */
372 master = netdev_master_upper_dev_get(hard_iface->net_dev);
373 ret = batadv_master_del_slave(hard_iface, master);
374 if (ret)
375 goto err_dev;
376
350 hard_iface->soft_iface = soft_iface; 377 hard_iface->soft_iface = soft_iface;
351 bat_priv = netdev_priv(hard_iface->soft_iface); 378 bat_priv = netdev_priv(hard_iface->soft_iface);
352 379
380 ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface);
381 if (ret)
382 goto err_dev;
383
353 ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); 384 ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);
354 if (ret < 0) 385 if (ret < 0)
355 goto err_dev; 386 goto err_upper;
356 387
357 hard_iface->if_num = bat_priv->num_ifaces; 388 hard_iface->if_num = bat_priv->num_ifaces;
358 bat_priv->num_ifaces++; 389 bat_priv->num_ifaces++;
@@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
362 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); 393 bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);
363 bat_priv->num_ifaces--; 394 bat_priv->num_ifaces--;
364 hard_iface->if_status = BATADV_IF_NOT_IN_USE; 395 hard_iface->if_status = BATADV_IF_NOT_IN_USE;
365 goto err_dev; 396 goto err_upper;
366 } 397 }
367 398
368 hard_iface->batman_adv_ptype.type = ethertype; 399 hard_iface->batman_adv_ptype.type = ethertype;
@@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
401out: 432out:
402 return 0; 433 return 0;
403 434
435err_upper:
436 netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);
404err_dev: 437err_dev:
438 hard_iface->soft_iface = NULL;
405 dev_put(soft_iface); 439 dev_put(soft_iface);
406err: 440err:
407 batadv_hardif_free_ref(hard_iface); 441 batadv_hardif_free_ref(hard_iface);
408 return ret; 442 return ret;
409} 443}
410 444
411void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) 445void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
446 enum batadv_hard_if_cleanup autodel)
412{ 447{
413 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); 448 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
414 struct batadv_hard_iface *primary_if = NULL; 449 struct batadv_hard_iface *primary_if = NULL;
@@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)
446 dev_put(hard_iface->soft_iface); 481 dev_put(hard_iface->soft_iface);
447 482
448 /* nobody uses this interface anymore */ 483 /* nobody uses this interface anymore */
449 if (!bat_priv->num_ifaces) 484 if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO)
450 batadv_softif_destroy(hard_iface->soft_iface); 485 batadv_softif_destroy_sysfs(hard_iface->soft_iface);
451 486
487 netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);
452 hard_iface->soft_iface = NULL; 488 hard_iface->soft_iface = NULL;
453 batadv_hardif_free_ref(hard_iface); 489 batadv_hardif_free_ref(hard_iface);
454 490
@@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)
533 569
534 /* first deactivate interface */ 570 /* first deactivate interface */
535 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 571 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
536 batadv_hardif_disable_interface(hard_iface); 572 batadv_hardif_disable_interface(hard_iface,
573 BATADV_IF_CLEANUP_AUTO);
537 574
538 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 575 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
539 return; 576 return;
@@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
563 struct batadv_hard_iface *primary_if = NULL; 600 struct batadv_hard_iface *primary_if = NULL;
564 struct batadv_priv *bat_priv; 601 struct batadv_priv *bat_priv;
565 602
603 if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
604 batadv_sysfs_add_meshif(net_dev);
605 return NOTIFY_DONE;
606 }
607
566 hard_iface = batadv_hardif_get_by_netdev(net_dev); 608 hard_iface = batadv_hardif_get_by_netdev(net_dev);
567 if (!hard_iface && event == NETDEV_REGISTER) 609 if (!hard_iface && event == NETDEV_REGISTER)
568 hard_iface = batadv_hardif_add_interface(net_dev); 610 hard_iface = batadv_hardif_add_interface(net_dev);
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index 308437d52e22..49892881a7c5 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -29,13 +29,24 @@ enum batadv_hard_if_state {
29 BATADV_IF_I_WANT_YOU, 29 BATADV_IF_I_WANT_YOU,
30}; 30};
31 31
32/**
33 * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
34 * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
35 * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed
36 */
37enum batadv_hard_if_cleanup {
38 BATADV_IF_CLEANUP_KEEP,
39 BATADV_IF_CLEANUP_AUTO,
40};
41
32extern struct notifier_block batadv_hard_if_notifier; 42extern struct notifier_block batadv_hard_if_notifier;
33 43
34struct batadv_hard_iface* 44struct batadv_hard_iface*
35batadv_hardif_get_by_netdev(const struct net_device *net_dev); 45batadv_hardif_get_by_netdev(const struct net_device *net_dev);
36int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, 46int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
37 const char *iface_name); 47 const char *iface_name);
38void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); 48void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface,
49 enum batadv_hard_if_cleanup autodel);
39void batadv_hardif_remove_interfaces(void); 50void batadv_hardif_remove_interfaces(void);
40int batadv_hardif_min_mtu(struct net_device *soft_iface); 51int batadv_hardif_min_mtu(struct net_device *soft_iface);
41void batadv_update_min_mtu(struct net_device *soft_iface); 52void batadv_update_min_mtu(struct net_device *soft_iface);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 0488d70c8c35..3e30a0f1b908 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -35,6 +35,7 @@
35#include "vis.h" 35#include "vis.h"
36#include "hash.h" 36#include "hash.h"
37#include "bat_algo.h" 37#include "bat_algo.h"
38#include "network-coding.h"
38 39
39 40
40/* List manipulations on hardif_list have to be rtnl_lock()'ed, 41/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -70,6 +71,7 @@ static int __init batadv_init(void)
70 batadv_debugfs_init(); 71 batadv_debugfs_init();
71 72
72 register_netdevice_notifier(&batadv_hard_if_notifier); 73 register_netdevice_notifier(&batadv_hard_if_notifier);
74 rtnl_link_register(&batadv_link_ops);
73 75
74 pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", 76 pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",
75 BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); 77 BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
@@ -80,6 +82,7 @@ static int __init batadv_init(void)
80static void __exit batadv_exit(void) 82static void __exit batadv_exit(void)
81{ 83{
82 batadv_debugfs_destroy(); 84 batadv_debugfs_destroy();
85 rtnl_link_unregister(&batadv_link_ops);
83 unregister_netdevice_notifier(&batadv_hard_if_notifier); 86 unregister_netdevice_notifier(&batadv_hard_if_notifier);
84 batadv_hardif_remove_interfaces(); 87 batadv_hardif_remove_interfaces();
85 88
@@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
135 if (ret < 0) 138 if (ret < 0)
136 goto err; 139 goto err;
137 140
141 ret = batadv_nc_init(bat_priv);
142 if (ret < 0)
143 goto err;
144
138 atomic_set(&bat_priv->gw.reselect, 0); 145 atomic_set(&bat_priv->gw.reselect, 0);
139 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); 146 atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE);
140 147
@@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
157 164
158 batadv_gw_node_purge(bat_priv); 165 batadv_gw_node_purge(bat_priv);
159 batadv_originator_free(bat_priv); 166 batadv_originator_free(bat_priv);
167 batadv_nc_free(bat_priv);
160 168
161 batadv_tt_free(bat_priv); 169 batadv_tt_free(bat_priv);
162 170
@@ -169,7 +177,13 @@ void batadv_mesh_free(struct net_device *soft_iface)
169 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); 177 atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
170} 178}
171 179
172int batadv_is_my_mac(const uint8_t *addr) 180/**
181 * batadv_is_my_mac - check if the given mac address belongs to any of the real
182 * interfaces in the current mesh
183 * @bat_priv: the bat priv with all the soft interface information
184 * @addr: the address to check
185 */
186int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)
173{ 187{
174 const struct batadv_hard_iface *hard_iface; 188 const struct batadv_hard_iface *hard_iface;
175 189
@@ -178,6 +192,9 @@ int batadv_is_my_mac(const uint8_t *addr)
178 if (hard_iface->if_status != BATADV_IF_ACTIVE) 192 if (hard_iface->if_status != BATADV_IF_ACTIVE)
179 continue; 193 continue;
180 194
195 if (hard_iface->soft_iface != bat_priv->soft_iface)
196 continue;
197
181 if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) { 198 if (batadv_compare_eth(hard_iface->net_dev->dev_addr, addr)) {
182 rcu_read_unlock(); 199 rcu_read_unlock();
183 return 1; 200 return 1;
@@ -411,7 +428,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
411{ 428{
412 struct batadv_algo_ops *bat_algo_ops; 429 struct batadv_algo_ops *bat_algo_ops;
413 430
414 seq_printf(seq, "Available routing algorithms:\n"); 431 seq_puts(seq, "Available routing algorithms:\n");
415 432
416 hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { 433 hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
417 seq_printf(seq, "%s\n", bat_algo_ops->name); 434 seq_printf(seq, "%s\n", bat_algo_ops->name);
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index ced08b936a96..59a0d6af15c8 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -26,7 +26,7 @@
26#define BATADV_DRIVER_DEVICE "batman-adv" 26#define BATADV_DRIVER_DEVICE "batman-adv"
27 27
28#ifndef BATADV_SOURCE_VERSION 28#ifndef BATADV_SOURCE_VERSION
29#define BATADV_SOURCE_VERSION "2013.1.0" 29#define BATADV_SOURCE_VERSION "2013.2.0"
30#endif 30#endif
31 31
32/* B.A.T.M.A.N. parameters */ 32/* B.A.T.M.A.N. parameters */
@@ -105,6 +105,8 @@
105#define BATADV_RESET_PROTECTION_MS 30000 105#define BATADV_RESET_PROTECTION_MS 30000
106#define BATADV_EXPECTED_SEQNO_RANGE 65536 106#define BATADV_EXPECTED_SEQNO_RANGE 65536
107 107
108#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */
109
108enum batadv_mesh_state { 110enum batadv_mesh_state {
109 BATADV_MESH_INACTIVE, 111 BATADV_MESH_INACTIVE,
110 BATADV_MESH_ACTIVE, 112 BATADV_MESH_ACTIVE,
@@ -150,6 +152,7 @@ enum batadv_uev_type {
150#include <linux/percpu.h> 152#include <linux/percpu.h>
151#include <linux/slab.h> 153#include <linux/slab.h>
152#include <net/sock.h> /* struct sock */ 154#include <net/sock.h> /* struct sock */
155#include <net/rtnetlink.h>
153#include <linux/jiffies.h> 156#include <linux/jiffies.h>
154#include <linux/seq_file.h> 157#include <linux/seq_file.h>
155#include "types.h" 158#include "types.h"
@@ -162,7 +165,7 @@ extern struct workqueue_struct *batadv_event_workqueue;
162 165
163int batadv_mesh_init(struct net_device *soft_iface); 166int batadv_mesh_init(struct net_device *soft_iface);
164void batadv_mesh_free(struct net_device *soft_iface); 167void batadv_mesh_free(struct net_device *soft_iface);
165int batadv_is_my_mac(const uint8_t *addr); 168int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
166struct batadv_hard_iface * 169struct batadv_hard_iface *
167batadv_seq_print_text_primary_if_get(struct seq_file *seq); 170batadv_seq_print_text_primary_if_get(struct seq_file *seq);
168int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 171int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
@@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);
185 * @BATADV_DBG_TT: translation table messages 188 * @BATADV_DBG_TT: translation table messages
186 * @BATADV_DBG_BLA: bridge loop avoidance messages 189 * @BATADV_DBG_BLA: bridge loop avoidance messages
187 * @BATADV_DBG_DAT: ARP snooping and DAT related messages 190 * @BATADV_DBG_DAT: ARP snooping and DAT related messages
191 * @BATADV_DBG_NC: network coding related messages
188 * @BATADV_DBG_ALL: the union of all the above log levels 192 * @BATADV_DBG_ALL: the union of all the above log levels
189 */ 193 */
190enum batadv_dbg_level { 194enum batadv_dbg_level {
@@ -193,7 +197,8 @@ enum batadv_dbg_level {
193 BATADV_DBG_TT = BIT(2), 197 BATADV_DBG_TT = BIT(2),
194 BATADV_DBG_BLA = BIT(3), 198 BATADV_DBG_BLA = BIT(3),
195 BATADV_DBG_DAT = BIT(4), 199 BATADV_DBG_DAT = BIT(4),
196 BATADV_DBG_ALL = 31, 200 BATADV_DBG_NC = BIT(5),
201 BATADV_DBG_ALL = 63,
197}; 202};
198 203
199#ifdef CONFIG_BATMAN_ADV_DEBUG 204#ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,
298 return sum; 303 return sum;
299} 304}
300 305
306/* Define a macro to reach the control buffer of the skb. The members of the
307 * control buffer are defined in struct batadv_skb_cb in types.h.
308 * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
309 */
310#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0]))
311
301#endif /* _NET_BATMAN_ADV_MAIN_H_ */ 312#endif /* _NET_BATMAN_ADV_MAIN_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
new file mode 100644
index 000000000000..f7c54305a918
--- /dev/null
+++ b/net/batman-adv/network-coding.c
@@ -0,0 +1,1822 @@
1/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include <linux/debugfs.h>
21
22#include "main.h"
23#include "hash.h"
24#include "network-coding.h"
25#include "send.h"
26#include "originator.h"
27#include "hard-interface.h"
28#include "routing.h"
29
30static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
31static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;
32
33static void batadv_nc_worker(struct work_struct *work);
34static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
35 struct batadv_hard_iface *recv_if);
36
37/**
38 * batadv_nc_start_timer - initialise the nc periodic worker
39 * @bat_priv: the bat priv with all the soft interface information
40 */
41static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
42{
43 queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work,
44 msecs_to_jiffies(10));
45}
46
47/**
48 * batadv_nc_init - initialise coding hash table and start house keeping
49 * @bat_priv: the bat priv with all the soft interface information
50 */
51int batadv_nc_init(struct batadv_priv *bat_priv)
52{
53 bat_priv->nc.timestamp_fwd_flush = jiffies;
54 bat_priv->nc.timestamp_sniffed_purge = jiffies;
55
56 if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash)
57 return 0;
58
59 bat_priv->nc.coding_hash = batadv_hash_new(128);
60 if (!bat_priv->nc.coding_hash)
61 goto err;
62
63 batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
64 &batadv_nc_coding_hash_lock_class_key);
65
66 bat_priv->nc.decoding_hash = batadv_hash_new(128);
67 if (!bat_priv->nc.decoding_hash)
68 goto err;
69
70 batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
71 &batadv_nc_decoding_hash_lock_class_key);
72
73 /* Register our packet type */
74 if (batadv_recv_handler_register(BATADV_CODED,
75 batadv_nc_recv_coded_packet) < 0)
76 goto err;
77
78 INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
79 batadv_nc_start_timer(bat_priv);
80
81 return 0;
82
83err:
84 return -ENOMEM;
85}
86
87/**
88 * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables
89 * @bat_priv: the bat priv with all the soft interface information
90 */
91void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
92{
93 atomic_set(&bat_priv->network_coding, 1);
94 bat_priv->nc.min_tq = 200;
95 bat_priv->nc.max_fwd_delay = 10;
96 bat_priv->nc.max_buffer_time = 200;
97}
98
99/**
100 * batadv_nc_init_orig - initialise the nc fields of an orig_node
101 * @orig_node: the orig_node which is going to be initialised
102 */
103void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
104{
105 INIT_LIST_HEAD(&orig_node->in_coding_list);
106 INIT_LIST_HEAD(&orig_node->out_coding_list);
107 spin_lock_init(&orig_node->in_coding_list_lock);
108 spin_lock_init(&orig_node->out_coding_list_lock);
109}
110
111/**
112 * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove
113 * its refcount on the orig_node
114 * @rcu: rcu pointer of the nc node
115 */
116static void batadv_nc_node_free_rcu(struct rcu_head *rcu)
117{
118 struct batadv_nc_node *nc_node;
119
120 nc_node = container_of(rcu, struct batadv_nc_node, rcu);
121 batadv_orig_node_free_ref(nc_node->orig_node);
122 kfree(nc_node);
123}
124
125/**
126 * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly
127 * frees it
128 * @nc_node: the nc node to free
129 */
130static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node)
131{
132 if (atomic_dec_and_test(&nc_node->refcount))
133 call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu);
134}
135
136/**
137 * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly
138 * frees it
139 * @nc_path: the nc node to free
140 */
141static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path)
142{
143 if (atomic_dec_and_test(&nc_path->refcount))
144 kfree_rcu(nc_path, rcu);
145}
146
147/**
148 * batadv_nc_packet_free - frees nc packet
149 * @nc_packet: the nc packet to free
150 */
151static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
152{
153 if (nc_packet->skb)
154 kfree_skb(nc_packet->skb);
155
156 batadv_nc_path_free_ref(nc_packet->nc_path);
157 kfree(nc_packet);
158}
159
160/**
161 * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged
162 * @bat_priv: the bat priv with all the soft interface information
163 * @nc_node: the nc node to check
164 *
165 * Returns true if the entry has to be purged now, false otherwise
166 */
167static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv,
168 struct batadv_nc_node *nc_node)
169{
170 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
171 return true;
172
173 return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT);
174}
175
176/**
177 * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out
178 * @bat_priv: the bat priv with all the soft interface information
179 * @nc_path: the nc path to check
180 *
181 * Returns true if the entry has to be purged now, false otherwise
182 */
183static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv,
184 struct batadv_nc_path *nc_path)
185{
186 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
187 return true;
188
189 /* purge the path when no packets has been added for 10 times the
190 * max_fwd_delay time
191 */
192 return batadv_has_timed_out(nc_path->last_valid,
193 bat_priv->nc.max_fwd_delay * 10);
194}
195
196/**
197 * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out
198 * @bat_priv: the bat priv with all the soft interface information
199 * @nc_path: the nc path to check
200 *
201 * Returns true if the entry has to be purged now, false otherwise
202 */
203static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv,
204 struct batadv_nc_path *nc_path)
205{
206 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
207 return true;
208
209 /* purge the path when no packets has been added for 10 times the
210 * max_buffer time
211 */
212 return batadv_has_timed_out(nc_path->last_valid,
213 bat_priv->nc.max_buffer_time*10);
214}
215
216/**
217 * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale
218 * entries
219 * @bat_priv: the bat priv with all the soft interface information
220 * @list: list of nc nodes
221 * @lock: nc node list lock
222 * @to_purge: function in charge to decide whether an entry has to be purged or
223 * not. This function takes the nc node as argument and has to return
224 * a boolean value: true if the entry has to be deleted, false
225 * otherwise
226 */
227static void
228batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv,
229 struct list_head *list,
230 spinlock_t *lock,
231 bool (*to_purge)(struct batadv_priv *,
232 struct batadv_nc_node *))
233{
234 struct batadv_nc_node *nc_node, *nc_node_tmp;
235
236 /* For each nc_node in list */
237 spin_lock_bh(lock);
238 list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) {
239 /* if an helper function has been passed as parameter,
240 * ask it if the entry has to be purged or not
241 */
242 if (to_purge && !to_purge(bat_priv, nc_node))
243 continue;
244
245 batadv_dbg(BATADV_DBG_NC, bat_priv,
246 "Removing nc_node %pM -> %pM\n",
247 nc_node->addr, nc_node->orig_node->orig);
248 list_del_rcu(&nc_node->list);
249 batadv_nc_node_free_ref(nc_node);
250 }
251 spin_unlock_bh(lock);
252}
253
254/**
255 * batadv_nc_purge_orig - purges all nc node data attached of the given
256 * originator
257 * @bat_priv: the bat priv with all the soft interface information
258 * @orig_node: orig_node with the nc node entries to be purged
259 * @to_purge: function in charge to decide whether an entry has to be purged or
260 * not. This function takes the nc node as argument and has to return
261 * a boolean value: true is the entry has to be deleted, false
262 * otherwise
263 */
264void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
265 struct batadv_orig_node *orig_node,
266 bool (*to_purge)(struct batadv_priv *,
267 struct batadv_nc_node *))
268{
269 /* Check ingoing nc_node's of this orig_node */
270 batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list,
271 &orig_node->in_coding_list_lock,
272 to_purge);
273
274 /* Check outgoing nc_node's of this orig_node */
275 batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list,
276 &orig_node->out_coding_list_lock,
277 to_purge);
278}
279
280/**
281 * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they
282 * have timed out nc nodes
283 * @bat_priv: the bat priv with all the soft interface information
284 */
285static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv)
286{
287 struct batadv_hashtable *hash = bat_priv->orig_hash;
288 struct hlist_head *head;
289 struct batadv_orig_node *orig_node;
290 uint32_t i;
291
292 if (!hash)
293 return;
294
295 /* For each orig_node */
296 for (i = 0; i < hash->size; i++) {
297 head = &hash->table[i];
298
299 rcu_read_lock();
300 hlist_for_each_entry_rcu(orig_node, head, hash_entry)
301 batadv_nc_purge_orig(bat_priv, orig_node,
302 batadv_nc_to_purge_nc_node);
303 rcu_read_unlock();
304 }
305}
306
307/**
308 * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove
309 * unused ones
310 * @bat_priv: the bat priv with all the soft interface information
311 * @hash: hash table containing the nc paths to check
312 * @to_purge: function in charge to decide whether an entry has to be purged or
313 * not. This function takes the nc node as argument and has to return
314 * a boolean value: true is the entry has to be deleted, false
315 * otherwise
316 */
317static void batadv_nc_purge_paths(struct batadv_priv *bat_priv,
318 struct batadv_hashtable *hash,
319 bool (*to_purge)(struct batadv_priv *,
320 struct batadv_nc_path *))
321{
322 struct hlist_head *head;
323 struct hlist_node *node_tmp;
324 struct batadv_nc_path *nc_path;
325 spinlock_t *lock; /* Protects lists in hash */
326 uint32_t i;
327
328 for (i = 0; i < hash->size; i++) {
329 head = &hash->table[i];
330 lock = &hash->list_locks[i];
331
332 /* For each nc_path in this bin */
333 spin_lock_bh(lock);
334 hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) {
335 /* if an helper function has been passed as parameter,
336 * ask it if the entry has to be purged or not
337 */
338 if (to_purge && !to_purge(bat_priv, nc_path))
339 continue;
340
341 /* purging an non-empty nc_path should never happen, but
342 * is observed under high CPU load. Delay the purging
343 * until next iteration to allow the packet_list to be
344 * emptied first.
345 */
346 if (!unlikely(list_empty(&nc_path->packet_list))) {
347 net_ratelimited_function(printk,
348 KERN_WARNING
349 "Skipping free of non-empty nc_path (%pM -> %pM)!\n",
350 nc_path->prev_hop,
351 nc_path->next_hop);
352 continue;
353 }
354
355 /* nc_path is unused, so remove it */
356 batadv_dbg(BATADV_DBG_NC, bat_priv,
357 "Remove nc_path %pM -> %pM\n",
358 nc_path->prev_hop, nc_path->next_hop);
359 hlist_del_rcu(&nc_path->hash_entry);
360 batadv_nc_path_free_ref(nc_path);
361 }
362 spin_unlock_bh(lock);
363 }
364}
365
366/**
367 * batadv_nc_hash_key_gen - computes the nc_path hash key
368 * @key: buffer to hold the final hash key
369 * @src: source ethernet mac address going into the hash key
370 * @dst: destination ethernet mac address going into the hash key
371 */
372static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src,
373 const char *dst)
374{
375 memcpy(key->prev_hop, src, sizeof(key->prev_hop));
376 memcpy(key->next_hop, dst, sizeof(key->next_hop));
377}
378
379/**
380 * batadv_nc_hash_choose - compute the hash value for an nc path
381 * @data: data to hash
382 * @size: size of the hash table
383 *
384 * Returns the selected index in the hash table for the given data.
385 */
386static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size)
387{
388 const struct batadv_nc_path *nc_path = data;
389 uint32_t hash = 0;
390
391 hash = batadv_hash_bytes(hash, &nc_path->prev_hop,
392 sizeof(nc_path->prev_hop));
393 hash = batadv_hash_bytes(hash, &nc_path->next_hop,
394 sizeof(nc_path->next_hop));
395
396 hash += (hash << 3);
397 hash ^= (hash >> 11);
398 hash += (hash << 15);
399
400 return hash % size;
401}
402
403/**
404 * batadv_nc_hash_compare - comparing function used in the network coding hash
405 * tables
406 * @node: node in the local table
407 * @data2: second object to compare the node to
408 *
409 * Returns 1 if the two entry are the same, 0 otherwise
410 */
411static int batadv_nc_hash_compare(const struct hlist_node *node,
412 const void *data2)
413{
414 const struct batadv_nc_path *nc_path1, *nc_path2;
415
416 nc_path1 = container_of(node, struct batadv_nc_path, hash_entry);
417 nc_path2 = data2;
418
419 /* Return 1 if the two keys are identical */
420 if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop,
421 sizeof(nc_path1->prev_hop)) != 0)
422 return 0;
423
424 if (memcmp(nc_path1->next_hop, nc_path2->next_hop,
425 sizeof(nc_path1->next_hop)) != 0)
426 return 0;
427
428 return 1;
429}
430
431/**
432 * batadv_nc_hash_find - search for an existing nc path and return it
433 * @hash: hash table containing the nc path
434 * @data: search key
435 *
436 * Returns the nc_path if found, NULL otherwise.
437 */
438static struct batadv_nc_path *
439batadv_nc_hash_find(struct batadv_hashtable *hash,
440 void *data)
441{
442 struct hlist_head *head;
443 struct batadv_nc_path *nc_path, *nc_path_tmp = NULL;
444 int index;
445
446 if (!hash)
447 return NULL;
448
449 index = batadv_nc_hash_choose(data, hash->size);
450 head = &hash->table[index];
451
452 rcu_read_lock();
453 hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
454 if (!batadv_nc_hash_compare(&nc_path->hash_entry, data))
455 continue;
456
457 if (!atomic_inc_not_zero(&nc_path->refcount))
458 continue;
459
460 nc_path_tmp = nc_path;
461 break;
462 }
463 rcu_read_unlock();
464
465 return nc_path_tmp;
466}
467
468/**
469 * batadv_nc_send_packet - send non-coded packet and free nc_packet struct
470 * @nc_packet: the nc packet to send
471 */
472static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
473{
474 batadv_send_skb_packet(nc_packet->skb,
475 nc_packet->neigh_node->if_incoming,
476 nc_packet->nc_path->next_hop);
477 nc_packet->skb = NULL;
478 batadv_nc_packet_free(nc_packet);
479}
480
481/**
482 * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet.
483 * @bat_priv: the bat priv with all the soft interface information
484 * @nc_path: the nc path the packet belongs to
485 * @nc_packet: the nc packet to be checked
486 *
487 * Checks whether the given sniffed (overheard) nc_packet has hit its buffering
488 * timeout. If so, the packet is no longer kept and the entry deleted from the
489 * queue. Has to be called with the appropriate locks.
490 *
491 * Returns false as soon as the entry in the fifo queue has not been timed out
492 * yet and true otherwise.
493 */
494static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
495 struct batadv_nc_path *nc_path,
496 struct batadv_nc_packet *nc_packet)
497{
498 unsigned long timeout = bat_priv->nc.max_buffer_time;
499 bool res = false;
500
501 /* Packets are added to tail, so the remaining packets did not time
502 * out and we can stop processing the current queue
503 */
504 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
505 !batadv_has_timed_out(nc_packet->timestamp, timeout))
506 goto out;
507
508 /* purge nc packet */
509 list_del(&nc_packet->list);
510 batadv_nc_packet_free(nc_packet);
511
512 res = true;
513
514out:
515 return res;
516}
517
518/**
519 * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet.
520 * @bat_priv: the bat priv with all the soft interface information
521 * @nc_path: the nc path the packet belongs to
522 * @nc_packet: the nc packet to be checked
523 *
524 * Checks whether the given nc packet has hit its forward timeout. If so, the
525 * packet is no longer delayed, immediately sent and the entry deleted from the
526 * queue. Has to be called with the appropriate locks.
527 *
528 * Returns false as soon as the entry in the fifo queue has not been timed out
529 * yet and true otherwise.
530 */
531static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv,
532 struct batadv_nc_path *nc_path,
533 struct batadv_nc_packet *nc_packet)
534{
535 unsigned long timeout = bat_priv->nc.max_fwd_delay;
536
537 /* Packets are added to tail, so the remaining packets did not time
538 * out and we can stop processing the current queue
539 */
540 if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE &&
541 !batadv_has_timed_out(nc_packet->timestamp, timeout))
542 return false;
543
544 /* Send packet */
545 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
546 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
547 nc_packet->skb->len + ETH_HLEN);
548 list_del(&nc_packet->list);
549 batadv_nc_send_packet(nc_packet);
550
551 return true;
552}
553
554/**
555 * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out
556 * nc packets
557 * @bat_priv: the bat priv with all the soft interface information
558 * @hash: to be processed hash table
559 * @process_fn: Function called to process given nc packet. Should return true
560 * to encourage this function to proceed with the next packet.
561 * Otherwise the rest of the current queue is skipped.
562 */
563static void
564batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
565 struct batadv_hashtable *hash,
566 bool (*process_fn)(struct batadv_priv *,
567 struct batadv_nc_path *,
568 struct batadv_nc_packet *))
569{
570 struct hlist_head *head;
571 struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
572 struct batadv_nc_path *nc_path;
573 bool ret;
574 int i;
575
576 if (!hash)
577 return;
578
579 /* Loop hash table bins */
580 for (i = 0; i < hash->size; i++) {
581 head = &hash->table[i];
582
583 /* Loop coding paths */
584 rcu_read_lock();
585 hlist_for_each_entry_rcu(nc_path, head, hash_entry) {
586 /* Loop packets */
587 spin_lock_bh(&nc_path->packet_list_lock);
588 list_for_each_entry_safe(nc_packet, nc_packet_tmp,
589 &nc_path->packet_list, list) {
590 ret = process_fn(bat_priv, nc_path, nc_packet);
591 if (!ret)
592 break;
593 }
594 spin_unlock_bh(&nc_path->packet_list_lock);
595 }
596 rcu_read_unlock();
597 }
598}
599
600/**
601 * batadv_nc_worker - periodic task for house keeping related to network coding
602 * @work: kernel work struct
603 */
604static void batadv_nc_worker(struct work_struct *work)
605{
606 struct delayed_work *delayed_work;
607 struct batadv_priv_nc *priv_nc;
608 struct batadv_priv *bat_priv;
609 unsigned long timeout;
610
611 delayed_work = container_of(work, struct delayed_work, work);
612 priv_nc = container_of(delayed_work, struct batadv_priv_nc, work);
613 bat_priv = container_of(priv_nc, struct batadv_priv, nc);
614
615 batadv_nc_purge_orig_hash(bat_priv);
616 batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash,
617 batadv_nc_to_purge_nc_path_coding);
618 batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash,
619 batadv_nc_to_purge_nc_path_decoding);
620
621 timeout = bat_priv->nc.max_fwd_delay;
622
623 if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) {
624 batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash,
625 batadv_nc_fwd_flush);
626 bat_priv->nc.timestamp_fwd_flush = jiffies;
627 }
628
629 if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge,
630 bat_priv->nc.max_buffer_time)) {
631 batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash,
632 batadv_nc_sniffed_purge);
633 bat_priv->nc.timestamp_sniffed_purge = jiffies;
634 }
635
636 /* Schedule a new check */
637 batadv_nc_start_timer(bat_priv);
638}
639
640/**
641 * batadv_can_nc_with_orig - checks whether the given orig node is suitable for
642 * coding or not
643 * @bat_priv: the bat priv with all the soft interface information
644 * @orig_node: neighboring orig node which may be used as nc candidate
645 * @ogm_packet: incoming ogm packet also used for the checks
646 *
647 * Returns true if:
648 * 1) The OGM must have the most recent sequence number.
649 * 2) The TTL must be decremented by one and only one.
650 * 3) The OGM must be received from the first hop from orig_node.
651 * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq.
652 */
653static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv,
654 struct batadv_orig_node *orig_node,
655 struct batadv_ogm_packet *ogm_packet)
656{
657 if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno))
658 return false;
659 if (orig_node->last_ttl != ogm_packet->header.ttl + 1)
660 return false;
661 if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender))
662 return false;
663 if (ogm_packet->tq < bat_priv->nc.min_tq)
664 return false;
665
666 return true;
667}
668
669/**
670 * batadv_nc_find_nc_node - search for an existing nc node and return it
671 * @orig_node: orig node originating the ogm packet
672 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
673 * (can be equal to orig_node)
674 * @in_coding: traverse incoming or outgoing network coding list
675 *
676 * Returns the nc_node if found, NULL otherwise.
677 */
678static struct batadv_nc_node
679*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node,
680 struct batadv_orig_node *orig_neigh_node,
681 bool in_coding)
682{
683 struct batadv_nc_node *nc_node, *nc_node_out = NULL;
684 struct list_head *list;
685
686 if (in_coding)
687 list = &orig_neigh_node->in_coding_list;
688 else
689 list = &orig_neigh_node->out_coding_list;
690
691 /* Traverse list of nc_nodes to orig_node */
692 rcu_read_lock();
693 list_for_each_entry_rcu(nc_node, list, list) {
694 if (!batadv_compare_eth(nc_node->addr, orig_node->orig))
695 continue;
696
697 if (!atomic_inc_not_zero(&nc_node->refcount))
698 continue;
699
700 /* Found a match */
701 nc_node_out = nc_node;
702 break;
703 }
704 rcu_read_unlock();
705
706 return nc_node_out;
707}
708
709/**
710 * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was
711 * not found
712 * @bat_priv: the bat priv with all the soft interface information
713 * @orig_node: orig node originating the ogm packet
714 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
715 * (can be equal to orig_node)
716 * @in_coding: traverse incoming or outgoing network coding list
717 *
718 * Returns the nc_node if found or created, NULL in case of an error.
719 */
720static struct batadv_nc_node
721*batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
722 struct batadv_orig_node *orig_node,
723 struct batadv_orig_node *orig_neigh_node,
724 bool in_coding)
725{
726 struct batadv_nc_node *nc_node;
727 spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
728 struct list_head *list;
729
730 /* Check if nc_node is already added */
731 nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
732
733 /* Node found */
734 if (nc_node)
735 return nc_node;
736
737 nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
738 if (!nc_node)
739 return NULL;
740
741 if (!atomic_inc_not_zero(&orig_neigh_node->refcount))
742 goto free;
743
744 /* Initialize nc_node */
745 INIT_LIST_HEAD(&nc_node->list);
746 memcpy(nc_node->addr, orig_node->orig, ETH_ALEN);
747 nc_node->orig_node = orig_neigh_node;
748 atomic_set(&nc_node->refcount, 2);
749
750 /* Select ingoing or outgoing coding node */
751 if (in_coding) {
752 lock = &orig_neigh_node->in_coding_list_lock;
753 list = &orig_neigh_node->in_coding_list;
754 } else {
755 lock = &orig_neigh_node->out_coding_list_lock;
756 list = &orig_neigh_node->out_coding_list;
757 }
758
759 batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
760 nc_node->addr, nc_node->orig_node->orig);
761
762 /* Add nc_node to orig_node */
763 spin_lock_bh(lock);
764 list_add_tail_rcu(&nc_node->list, list);
765 spin_unlock_bh(lock);
766
767 return nc_node;
768
769free:
770 kfree(nc_node);
771 return NULL;
772}
773
774/**
775 * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs
776 * (best called on incoming OGMs)
777 * @bat_priv: the bat priv with all the soft interface information
778 * @orig_node: orig node originating the ogm packet
779 * @orig_neigh_node: neighboring orig node from which we received the ogm packet
780 * (can be equal to orig_node)
781 * @ogm_packet: incoming ogm packet
782 * @is_single_hop_neigh: orig_node is a single hop neighbor
783 */
784void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
785 struct batadv_orig_node *orig_node,
786 struct batadv_orig_node *orig_neigh_node,
787 struct batadv_ogm_packet *ogm_packet,
788 int is_single_hop_neigh)
789{
790 struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL;
791
792 /* Check if network coding is enabled */
793 if (!atomic_read(&bat_priv->network_coding))
794 goto out;
795
796 /* accept ogms from 'good' neighbors and single hop neighbors */
797 if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) &&
798 !is_single_hop_neigh)
799 goto out;
800
801 /* Add orig_node as in_nc_node on hop */
802 in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node,
803 orig_neigh_node, true);
804 if (!in_nc_node)
805 goto out;
806
807 in_nc_node->last_seen = jiffies;
808
809 /* Add hop as out_nc_node on orig_node */
810 out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node,
811 orig_node, false);
812 if (!out_nc_node)
813 goto out;
814
815 out_nc_node->last_seen = jiffies;
816
817out:
818 if (in_nc_node)
819 batadv_nc_node_free_ref(in_nc_node);
820 if (out_nc_node)
821 batadv_nc_node_free_ref(out_nc_node);
822}
823
824/**
825 * batadv_nc_get_path - get existing nc_path or allocate a new one
826 * @bat_priv: the bat priv with all the soft interface information
827 * @hash: hash table containing the nc path
828 * @src: ethernet source address - first half of the nc path search key
829 * @dst: ethernet destination address - second half of the nc path search key
830 *
831 * Returns pointer to nc_path if the path was found or created, returns NULL
832 * on error.
833 */
834static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv,
835 struct batadv_hashtable *hash,
836 uint8_t *src,
837 uint8_t *dst)
838{
839 int hash_added;
840 struct batadv_nc_path *nc_path, nc_path_key;
841
842 batadv_nc_hash_key_gen(&nc_path_key, src, dst);
843
844 /* Search for existing nc_path */
845 nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key);
846
847 if (nc_path) {
848 /* Set timestamp to delay removal of nc_path */
849 nc_path->last_valid = jiffies;
850 return nc_path;
851 }
852
853 /* No existing nc_path was found; create a new */
854 nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC);
855
856 if (!nc_path)
857 return NULL;
858
859 /* Initialize nc_path */
860 INIT_LIST_HEAD(&nc_path->packet_list);
861 spin_lock_init(&nc_path->packet_list_lock);
862 atomic_set(&nc_path->refcount, 2);
863 nc_path->last_valid = jiffies;
864 memcpy(nc_path->next_hop, dst, ETH_ALEN);
865 memcpy(nc_path->prev_hop, src, ETH_ALEN);
866
867 batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n",
868 nc_path->prev_hop,
869 nc_path->next_hop);
870
871 /* Add nc_path to hash table */
872 hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
873 batadv_nc_hash_choose, &nc_path_key,
874 &nc_path->hash_entry);
875
876 if (hash_added < 0) {
877 kfree(nc_path);
878 return NULL;
879 }
880
881 return nc_path;
882}
883
884/**
885 * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair
886 * selection of a receiver with slightly lower TQ than the other
887 * @tq: to be weighted tq value
888 */
889static uint8_t batadv_nc_random_weight_tq(uint8_t tq)
890{
891 uint8_t rand_val, rand_tq;
892
893 get_random_bytes(&rand_val, sizeof(rand_val));
894
895 /* randomize the estimated packet loss (max TQ - estimated TQ) */
896 rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq);
897
898 /* normalize the randomized packet loss */
899 rand_tq /= BATADV_TQ_MAX_VALUE;
900
901 /* convert to (randomized) estimated tq again */
902 return BATADV_TQ_MAX_VALUE - rand_tq;
903}
904
905/**
906 * batadv_nc_memxor - XOR destination with source
907 * @dst: byte array to XOR into
908 * @src: byte array to XOR from
909 * @len: length of destination array
910 */
911static void batadv_nc_memxor(char *dst, const char *src, unsigned int len)
912{
913 unsigned int i;
914
915 for (i = 0; i < len; ++i)
916 dst[i] ^= src[i];
917}
918
919/**
920 * batadv_nc_code_packets - code a received unicast_packet with an nc packet
921 * into a coded_packet and send it
922 * @bat_priv: the bat priv with all the soft interface information
923 * @skb: data skb to forward
924 * @ethhdr: pointer to the ethernet header inside the skb
925 * @nc_packet: structure containing the packet to the skb can be coded with
926 * @neigh_node: next hop to forward packet to
927 *
928 * Returns true if both packets are consumed, false otherwise.
929 */
930static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
931 struct sk_buff *skb,
932 struct ethhdr *ethhdr,
933 struct batadv_nc_packet *nc_packet,
934 struct batadv_neigh_node *neigh_node)
935{
936 uint8_t tq_weighted_neigh, tq_weighted_coding;
937 struct sk_buff *skb_dest, *skb_src;
938 struct batadv_unicast_packet *packet1;
939 struct batadv_unicast_packet *packet2;
940 struct batadv_coded_packet *coded_packet;
941 struct batadv_neigh_node *neigh_tmp, *router_neigh;
942 struct batadv_neigh_node *router_coding = NULL;
943 uint8_t *first_source, *first_dest, *second_source, *second_dest;
944 __be32 packet_id1, packet_id2;
945 size_t count;
946 bool res = false;
947 int coding_len;
948 int unicast_size = sizeof(*packet1);
949 int coded_size = sizeof(*coded_packet);
950 int header_add = coded_size - unicast_size;
951
952 router_neigh = batadv_orig_node_get_router(neigh_node->orig_node);
953 if (!router_neigh)
954 goto out;
955
956 neigh_tmp = nc_packet->neigh_node;
957 router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node);
958 if (!router_coding)
959 goto out;
960
961 tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg);
962 tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg);
963
964 /* Select one destination for the MAC-header dst-field based on
965 * weighted TQ-values.
966 */
967 if (tq_weighted_neigh >= tq_weighted_coding) {
968 /* Destination from nc_packet is selected for MAC-header */
969 first_dest = nc_packet->nc_path->next_hop;
970 first_source = nc_packet->nc_path->prev_hop;
971 second_dest = neigh_node->addr;
972 second_source = ethhdr->h_source;
973 packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data;
974 packet2 = (struct batadv_unicast_packet *)skb->data;
975 packet_id1 = nc_packet->packet_id;
976 packet_id2 = batadv_skb_crc32(skb,
977 skb->data + sizeof(*packet2));
978 } else {
979 /* Destination for skb is selected for MAC-header */
980 first_dest = neigh_node->addr;
981 first_source = ethhdr->h_source;
982 second_dest = nc_packet->nc_path->next_hop;
983 second_source = nc_packet->nc_path->prev_hop;
984 packet1 = (struct batadv_unicast_packet *)skb->data;
985 packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data;
986 packet_id1 = batadv_skb_crc32(skb,
987 skb->data + sizeof(*packet1));
988 packet_id2 = nc_packet->packet_id;
989 }
990
991 /* Instead of zero padding the smallest data buffer, we
992 * code into the largest.
993 */
994 if (skb->len <= nc_packet->skb->len) {
995 skb_dest = nc_packet->skb;
996 skb_src = skb;
997 } else {
998 skb_dest = skb;
999 skb_src = nc_packet->skb;
1000 }
1001
1002 /* coding_len is used when decoding the packet shorter packet */
1003 coding_len = skb_src->len - unicast_size;
1004
1005 if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0)
1006 goto out;
1007
1008 skb_push(skb_dest, header_add);
1009
1010 coded_packet = (struct batadv_coded_packet *)skb_dest->data;
1011 skb_reset_mac_header(skb_dest);
1012
1013 coded_packet->header.packet_type = BATADV_CODED;
1014 coded_packet->header.version = BATADV_COMPAT_VERSION;
1015 coded_packet->header.ttl = packet1->header.ttl;
1016
1017 /* Info about first unicast packet */
1018 memcpy(coded_packet->first_source, first_source, ETH_ALEN);
1019 memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN);
1020 coded_packet->first_crc = packet_id1;
1021 coded_packet->first_ttvn = packet1->ttvn;
1022
1023 /* Info about second unicast packet */
1024 memcpy(coded_packet->second_dest, second_dest, ETH_ALEN);
1025 memcpy(coded_packet->second_source, second_source, ETH_ALEN);
1026 memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN);
1027 coded_packet->second_crc = packet_id2;
1028 coded_packet->second_ttl = packet2->header.ttl;
1029 coded_packet->second_ttvn = packet2->ttvn;
1030 coded_packet->coded_len = htons(coding_len);
1031
1032 /* This is where the magic happens: Code skb_src into skb_dest */
1033 batadv_nc_memxor(skb_dest->data + coded_size,
1034 skb_src->data + unicast_size, coding_len);
1035
1036 /* Update counters accordingly */
1037 if (BATADV_SKB_CB(skb_src)->decoded &&
1038 BATADV_SKB_CB(skb_dest)->decoded) {
1039 /* Both packets are recoded */
1040 count = skb_src->len + ETH_HLEN;
1041 count += skb_dest->len + ETH_HLEN;
1042 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2);
1043 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count);
1044 } else if (!BATADV_SKB_CB(skb_src)->decoded &&
1045 !BATADV_SKB_CB(skb_dest)->decoded) {
1046 /* Both packets are newly coded */
1047 count = skb_src->len + ETH_HLEN;
1048 count += skb_dest->len + ETH_HLEN;
1049 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2);
1050 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count);
1051 } else if (BATADV_SKB_CB(skb_src)->decoded &&
1052 !BATADV_SKB_CB(skb_dest)->decoded) {
1053 /* skb_src recoded and skb_dest is newly coded */
1054 batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
1055 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
1056 skb_src->len + ETH_HLEN);
1057 batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
1058 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
1059 skb_dest->len + ETH_HLEN);
1060 } else if (!BATADV_SKB_CB(skb_src)->decoded &&
1061 BATADV_SKB_CB(skb_dest)->decoded) {
1062 /* skb_src is newly coded and skb_dest is recoded */
1063 batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE);
1064 batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES,
1065 skb_src->len + ETH_HLEN);
1066 batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE);
1067 batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES,
1068 skb_dest->len + ETH_HLEN);
1069 }
1070
1071 /* skb_src is now coded into skb_dest, so free it */
1072 kfree_skb(skb_src);
1073
1074 /* avoid duplicate free of skb from nc_packet */
1075 nc_packet->skb = NULL;
1076 batadv_nc_packet_free(nc_packet);
1077
1078 /* Send the coded packet and return true */
1079 batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest);
1080 res = true;
1081out:
1082 if (router_neigh)
1083 batadv_neigh_node_free_ref(router_neigh);
1084 if (router_coding)
1085 batadv_neigh_node_free_ref(router_coding);
1086 return res;
1087}
1088
1089/**
1090 * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst.
1091 * @skb: data skb to forward
1092 * @dst: destination mac address of the other skb to code with
1093 * @src: source mac address of skb
1094 *
1095 * Whenever we network code a packet we have to check whether we received it in
1096 * a network coded form. If so, we may not be able to use it for coding because
1097 * some neighbors may also have received (overheard) the packet in the network
1098 * coded form without being able to decode it. It is hard to know which of the
1099 * neighboring nodes was able to decode the packet, therefore we can only
1100 * re-code the packet if the source of the previous encoded packet is involved.
1101 * Since the source encoded the packet we can be certain it has all necessary
1102 * decode information.
1103 *
1104 * Returns true if coding of a decoded packet is allowed.
1105 */
1106static bool batadv_nc_skb_coding_possible(struct sk_buff *skb,
1107 uint8_t *dst, uint8_t *src)
1108{
1109 if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src))
1110 return false;
1111 else
1112 return true;
1113}
1114
1115/**
1116 * batadv_nc_path_search - Find the coding path matching in_nc_node and
1117 * out_nc_node to retrieve a buffered packet that can be used for coding.
1118 * @bat_priv: the bat priv with all the soft interface information
1119 * @in_nc_node: pointer to skb next hop's neighbor nc node
1120 * @out_nc_node: pointer to skb source's neighbor nc node
1121 * @skb: data skb to forward
1122 * @eth_dst: next hop mac address of skb
1123 *
1124 * Returns true if coding of a decoded skb is allowed.
1125 */
1126static struct batadv_nc_packet *
1127batadv_nc_path_search(struct batadv_priv *bat_priv,
1128 struct batadv_nc_node *in_nc_node,
1129 struct batadv_nc_node *out_nc_node,
1130 struct sk_buff *skb,
1131 uint8_t *eth_dst)
1132{
1133 struct batadv_nc_path *nc_path, nc_path_key;
1134 struct batadv_nc_packet *nc_packet_out = NULL;
1135 struct batadv_nc_packet *nc_packet, *nc_packet_tmp;
1136 struct batadv_hashtable *hash = bat_priv->nc.coding_hash;
1137 int idx;
1138
1139 if (!hash)
1140 return NULL;
1141
1142 /* Create almost path key */
1143 batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr,
1144 out_nc_node->addr);
1145 idx = batadv_nc_hash_choose(&nc_path_key, hash->size);
1146
1147 /* Check for coding opportunities in this nc_path */
1148 rcu_read_lock();
1149 hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) {
1150 if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr))
1151 continue;
1152
1153 if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr))
1154 continue;
1155
1156 spin_lock_bh(&nc_path->packet_list_lock);
1157 if (list_empty(&nc_path->packet_list)) {
1158 spin_unlock_bh(&nc_path->packet_list_lock);
1159 continue;
1160 }
1161
1162 list_for_each_entry_safe(nc_packet, nc_packet_tmp,
1163 &nc_path->packet_list, list) {
1164 if (!batadv_nc_skb_coding_possible(nc_packet->skb,
1165 eth_dst,
1166 in_nc_node->addr))
1167 continue;
1168
1169 /* Coding opportunity is found! */
1170 list_del(&nc_packet->list);
1171 nc_packet_out = nc_packet;
1172 break;
1173 }
1174
1175 spin_unlock_bh(&nc_path->packet_list_lock);
1176 break;
1177 }
1178 rcu_read_unlock();
1179
1180 return nc_packet_out;
1181}
1182
1183/**
1184 * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the
1185 * skb's sender (may be equal to the originator).
1186 * @bat_priv: the bat priv with all the soft interface information
1187 * @skb: data skb to forward
1188 * @eth_dst: next hop mac address of skb
1189 * @eth_src: source mac address of skb
1190 * @in_nc_node: pointer to skb next hop's neighbor nc node
1191 *
1192 * Returns an nc packet if a suitable coding packet was found, NULL otherwise.
1193 */
1194static struct batadv_nc_packet *
1195batadv_nc_skb_src_search(struct batadv_priv *bat_priv,
1196 struct sk_buff *skb,
1197 uint8_t *eth_dst,
1198 uint8_t *eth_src,
1199 struct batadv_nc_node *in_nc_node)
1200{
1201 struct batadv_orig_node *orig_node;
1202 struct batadv_nc_node *out_nc_node;
1203 struct batadv_nc_packet *nc_packet = NULL;
1204
1205 orig_node = batadv_orig_hash_find(bat_priv, eth_src);
1206 if (!orig_node)
1207 return NULL;
1208
1209 rcu_read_lock();
1210 list_for_each_entry_rcu(out_nc_node,
1211 &orig_node->out_coding_list, list) {
1212 /* Check if the skb is decoded and if recoding is possible */
1213 if (!batadv_nc_skb_coding_possible(skb,
1214 out_nc_node->addr, eth_src))
1215 continue;
1216
1217 /* Search for an opportunity in this nc_path */
1218 nc_packet = batadv_nc_path_search(bat_priv, in_nc_node,
1219 out_nc_node, skb, eth_dst);
1220 if (nc_packet)
1221 break;
1222 }
1223 rcu_read_unlock();
1224
1225 batadv_orig_node_free_ref(orig_node);
1226 return nc_packet;
1227}
1228
1229/**
1230 * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the
1231 * unicast skb before it is stored for use in later decoding
1232 * @bat_priv: the bat priv with all the soft interface information
1233 * @skb: data skb to store
1234 * @eth_dst_new: new destination mac address of skb
1235 */
1236static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
1237 struct sk_buff *skb,
1238 uint8_t *eth_dst_new)
1239{
1240 struct ethhdr *ethhdr;
1241
1242 /* Copy skb header to change the mac header */
1243 skb = pskb_copy(skb, GFP_ATOMIC);
1244 if (!skb)
1245 return;
1246
1247 /* Set the mac header as if we actually sent the packet uncoded */
1248 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1249 memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN);
1250 memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN);
1251
1252 /* Set data pointer to MAC header to mimic packets from our tx path */
1253 skb_push(skb, ETH_HLEN);
1254
1255 /* Add the packet to the decoding packet pool */
1256 batadv_nc_skb_store_for_decoding(bat_priv, skb);
1257
1258 /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
1259 * our ref
1260 */
1261 kfree_skb(skb);
1262}
1263
1264/**
1265 * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst.
1266 * @skb: data skb to forward
1267 * @neigh_node: next hop to forward packet to
1268 * @ethhdr: pointer to the ethernet header inside the skb
1269 *
1270 * Loops through list of neighboring nodes the next hop has a good connection to
1271 * (receives OGMs with a sufficient quality). We need to find a neighbor of our
1272 * next hop that potentially sent a packet which our next hop also received
1273 * (overheard) and has stored for later decoding.
1274 *
1275 * Returns true if the skb was consumed (encoded packet sent) or false otherwise
1276 */
1277static bool batadv_nc_skb_dst_search(struct sk_buff *skb,
1278 struct batadv_neigh_node *neigh_node,
1279 struct ethhdr *ethhdr)
1280{
1281 struct net_device *netdev = neigh_node->if_incoming->soft_iface;
1282 struct batadv_priv *bat_priv = netdev_priv(netdev);
1283 struct batadv_orig_node *orig_node = neigh_node->orig_node;
1284 struct batadv_nc_node *nc_node;
1285 struct batadv_nc_packet *nc_packet = NULL;
1286
1287 rcu_read_lock();
1288 list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) {
1289 /* Search for coding opportunity with this in_nc_node */
1290 nc_packet = batadv_nc_skb_src_search(bat_priv, skb,
1291 neigh_node->addr,
1292 ethhdr->h_source, nc_node);
1293
1294 /* Opportunity was found, so stop searching */
1295 if (nc_packet)
1296 break;
1297 }
1298 rcu_read_unlock();
1299
1300 if (!nc_packet)
1301 return false;
1302
1303 /* Save packets for later decoding */
1304 batadv_nc_skb_store_before_coding(bat_priv, skb,
1305 neigh_node->addr);
1306 batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb,
1307 nc_packet->neigh_node->addr);
1308
1309 /* Code and send packets */
1310 if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet,
1311 neigh_node))
1312 return true;
1313
1314 /* out of mem ? Coding failed - we have to free the buffered packet
1315 * to avoid memleaks. The skb passed as argument will be dealt with
1316 * by the calling function.
1317 */
1318 batadv_nc_send_packet(nc_packet);
1319 return false;
1320}
1321
1322/**
1323 * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding
1324 * @skb: skb to add to path
1325 * @nc_path: path to add skb to
1326 * @neigh_node: next hop to forward packet to
1327 * @packet_id: checksum to identify packet
1328 *
1329 * Returns true if the packet was buffered or false in case of an error.
1330 */
1331static bool batadv_nc_skb_add_to_path(struct sk_buff *skb,
1332 struct batadv_nc_path *nc_path,
1333 struct batadv_neigh_node *neigh_node,
1334 __be32 packet_id)
1335{
1336 struct batadv_nc_packet *nc_packet;
1337
1338 nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC);
1339 if (!nc_packet)
1340 return false;
1341
1342 /* Initialize nc_packet */
1343 nc_packet->timestamp = jiffies;
1344 nc_packet->packet_id = packet_id;
1345 nc_packet->skb = skb;
1346 nc_packet->neigh_node = neigh_node;
1347 nc_packet->nc_path = nc_path;
1348
1349 /* Add coding packet to list */
1350 spin_lock_bh(&nc_path->packet_list_lock);
1351 list_add_tail(&nc_packet->list, &nc_path->packet_list);
1352 spin_unlock_bh(&nc_path->packet_list_lock);
1353
1354 return true;
1355}
1356
1357/**
1358 * batadv_nc_skb_forward - try to code a packet or add it to the coding packet
1359 * buffer
1360 * @skb: data skb to forward
1361 * @neigh_node: next hop to forward packet to
1362 * @ethhdr: pointer to the ethernet header inside the skb
1363 *
1364 * Returns true if the skb was consumed (encoded packet sent) or false otherwise
1365 */
1366bool batadv_nc_skb_forward(struct sk_buff *skb,
1367 struct batadv_neigh_node *neigh_node,
1368 struct ethhdr *ethhdr)
1369{
1370 const struct net_device *netdev = neigh_node->if_incoming->soft_iface;
1371 struct batadv_priv *bat_priv = netdev_priv(netdev);
1372 struct batadv_unicast_packet *packet;
1373 struct batadv_nc_path *nc_path;
1374 __be32 packet_id;
1375 u8 *payload;
1376
1377 /* Check if network coding is enabled */
1378 if (!atomic_read(&bat_priv->network_coding))
1379 goto out;
1380
1381 /* We only handle unicast packets */
1382 payload = skb_network_header(skb);
1383 packet = (struct batadv_unicast_packet *)payload;
1384 if (packet->header.packet_type != BATADV_UNICAST)
1385 goto out;
1386
1387 /* Try to find a coding opportunity and send the skb if one is found */
1388 if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr))
1389 return true;
1390
1391 /* Find or create a nc_path for this src-dst pair */
1392 nc_path = batadv_nc_get_path(bat_priv,
1393 bat_priv->nc.coding_hash,
1394 ethhdr->h_source,
1395 neigh_node->addr);
1396
1397 if (!nc_path)
1398 goto out;
1399
1400 /* Add skb to nc_path */
1401 packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
1402 if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id))
1403 goto free_nc_path;
1404
1405 /* Packet is consumed */
1406 return true;
1407
1408free_nc_path:
1409 batadv_nc_path_free_ref(nc_path);
1410out:
1411 /* Packet is not consumed */
1412 return false;
1413}
1414
1415/**
1416 * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used
1417 * when decoding coded packets
1418 * @bat_priv: the bat priv with all the soft interface information
1419 * @skb: data skb to store
1420 */
1421void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
1422 struct sk_buff *skb)
1423{
1424 struct batadv_unicast_packet *packet;
1425 struct batadv_nc_path *nc_path;
1426 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1427 __be32 packet_id;
1428 u8 *payload;
1429
1430 /* Check if network coding is enabled */
1431 if (!atomic_read(&bat_priv->network_coding))
1432 goto out;
1433
1434 /* Check for supported packet type */
1435 payload = skb_network_header(skb);
1436 packet = (struct batadv_unicast_packet *)payload;
1437 if (packet->header.packet_type != BATADV_UNICAST)
1438 goto out;
1439
1440 /* Find existing nc_path or create a new */
1441 nc_path = batadv_nc_get_path(bat_priv,
1442 bat_priv->nc.decoding_hash,
1443 ethhdr->h_source,
1444 ethhdr->h_dest);
1445
1446 if (!nc_path)
1447 goto out;
1448
1449 /* Clone skb and adjust skb->data to point at batman header */
1450 skb = skb_clone(skb, GFP_ATOMIC);
1451 if (unlikely(!skb))
1452 goto free_nc_path;
1453
1454 if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
1455 goto free_skb;
1456
1457 if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN)))
1458 goto free_skb;
1459
1460 /* Add skb to nc_path */
1461 packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet));
1462 if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id))
1463 goto free_skb;
1464
1465 batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER);
1466 return;
1467
1468free_skb:
1469 kfree_skb(skb);
1470free_nc_path:
1471 batadv_nc_path_free_ref(nc_path);
1472out:
1473 return;
1474}
1475
1476/**
1477 * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet
1478 * should be saved in the decoding buffer and, if so, store it there
1479 * @bat_priv: the bat priv with all the soft interface information
1480 * @skb: unicast skb to store
1481 */
1482void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
1483 struct sk_buff *skb)
1484{
1485 struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
1486
1487 if (batadv_is_my_mac(bat_priv, ethhdr->h_dest))
1488 return;
1489
1490 /* Set data pointer to MAC header to mimic packets from our tx path */
1491 skb_push(skb, ETH_HLEN);
1492
1493 batadv_nc_skb_store_for_decoding(bat_priv, skb);
1494}
1495
1496/**
1497 * batadv_nc_skb_decode_packet - decode given skb using the decode data stored
1498 * in nc_packet
1499 * @bat_priv: the bat priv with all the soft interface information
1500 * @skb: unicast skb to decode
1501 * @nc_packet: decode data needed to decode the skb
1502 *
1503 * Returns pointer to decoded unicast packet if the packet was decoded or NULL
1504 * in case of an error.
1505 */
1506static struct batadv_unicast_packet *
1507batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
1508 struct batadv_nc_packet *nc_packet)
1509{
1510 const int h_size = sizeof(struct batadv_unicast_packet);
1511 const int h_diff = sizeof(struct batadv_coded_packet) - h_size;
1512 struct batadv_unicast_packet *unicast_packet;
1513 struct batadv_coded_packet coded_packet_tmp;
1514 struct ethhdr *ethhdr, ethhdr_tmp;
1515 uint8_t *orig_dest, ttl, ttvn;
1516 unsigned int coding_len;
1517
1518 /* Save headers temporarily */
1519 memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp));
1520 memcpy(&ethhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp));
1521
1522 if (skb_cow(skb, 0) < 0)
1523 return NULL;
1524
1525 if (unlikely(!skb_pull_rcsum(skb, h_diff)))
1526 return NULL;
1527
1528 /* Data points to batman header, so set mac header 14 bytes before
1529 * and network to data
1530 */
1531 skb_set_mac_header(skb, -ETH_HLEN);
1532 skb_reset_network_header(skb);
1533
1534 /* Reconstruct original mac header */
1535 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1536 memcpy(ethhdr, &ethhdr_tmp, sizeof(*ethhdr));
1537
1538 /* Select the correct unicast header information based on the location
1539 * of our mac address in the coded_packet header
1540 */
1541 if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) {
1542 /* If we are the second destination the packet was overheard,
1543 * so the Ethernet address must be copied to h_dest and
1544 * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST
1545 */
1546 memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN);
1547 skb->pkt_type = PACKET_HOST;
1548
1549 orig_dest = coded_packet_tmp.second_orig_dest;
1550 ttl = coded_packet_tmp.second_ttl;
1551 ttvn = coded_packet_tmp.second_ttvn;
1552 } else {
1553 orig_dest = coded_packet_tmp.first_orig_dest;
1554 ttl = coded_packet_tmp.header.ttl;
1555 ttvn = coded_packet_tmp.first_ttvn;
1556 }
1557
1558 coding_len = ntohs(coded_packet_tmp.coded_len);
1559
1560 if (coding_len > skb->len)
1561 return NULL;
1562
1563 /* Here the magic is reversed:
1564 * extract the missing packet from the received coded packet
1565 */
1566 batadv_nc_memxor(skb->data + h_size,
1567 nc_packet->skb->data + h_size,
1568 coding_len);
1569
1570 /* Resize decoded skb if decoded with larger packet */
1571 if (nc_packet->skb->len > coding_len + h_size)
1572 pskb_trim_rcsum(skb, coding_len + h_size);
1573
1574 /* Create decoded unicast packet */
1575 unicast_packet = (struct batadv_unicast_packet *)skb->data;
1576 unicast_packet->header.packet_type = BATADV_UNICAST;
1577 unicast_packet->header.version = BATADV_COMPAT_VERSION;
1578 unicast_packet->header.ttl = ttl;
1579 memcpy(unicast_packet->dest, orig_dest, ETH_ALEN);
1580 unicast_packet->ttvn = ttvn;
1581
1582 batadv_nc_packet_free(nc_packet);
1583 return unicast_packet;
1584}
1585
1586/**
1587 * batadv_nc_find_decoding_packet - search through buffered decoding data to
1588 * find the data needed to decode the coded packet
1589 * @bat_priv: the bat priv with all the soft interface information
1590 * @ethhdr: pointer to the ethernet header inside the coded packet
1591 * @coded: coded packet we try to find decode data for
1592 *
1593 * Returns pointer to nc packet if the needed data was found or NULL otherwise.
1594 */
1595static struct batadv_nc_packet *
1596batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv,
1597 struct ethhdr *ethhdr,
1598 struct batadv_coded_packet *coded)
1599{
1600 struct batadv_hashtable *hash = bat_priv->nc.decoding_hash;
1601 struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL;
1602 struct batadv_nc_path *nc_path, nc_path_key;
1603 uint8_t *dest, *source;
1604 __be32 packet_id;
1605 int index;
1606
1607 if (!hash)
1608 return NULL;
1609
1610 /* Select the correct packet id based on the location of our mac-addr */
1611 dest = ethhdr->h_source;
1612 if (!batadv_is_my_mac(bat_priv, coded->second_dest)) {
1613 source = coded->second_source;
1614 packet_id = coded->second_crc;
1615 } else {
1616 source = coded->first_source;
1617 packet_id = coded->first_crc;
1618 }
1619
1620 batadv_nc_hash_key_gen(&nc_path_key, source, dest);
1621 index = batadv_nc_hash_choose(&nc_path_key, hash->size);
1622
1623 /* Search for matching coding path */
1624 rcu_read_lock();
1625 hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) {
1626 /* Find matching nc_packet */
1627 spin_lock_bh(&nc_path->packet_list_lock);
1628 list_for_each_entry(tmp_nc_packet,
1629 &nc_path->packet_list, list) {
1630 if (packet_id == tmp_nc_packet->packet_id) {
1631 list_del(&tmp_nc_packet->list);
1632
1633 nc_packet = tmp_nc_packet;
1634 break;
1635 }
1636 }
1637 spin_unlock_bh(&nc_path->packet_list_lock);
1638
1639 if (nc_packet)
1640 break;
1641 }
1642 rcu_read_unlock();
1643
1644 if (!nc_packet)
1645 batadv_dbg(BATADV_DBG_NC, bat_priv,
1646 "No decoding packet found for %u\n", packet_id);
1647
1648 return nc_packet;
1649}
1650
1651/**
1652 * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the
1653 * resulting unicast packet
1654 * @skb: incoming coded packet
1655 * @recv_if: pointer to interface this packet was received on
1656 */
1657static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
1658 struct batadv_hard_iface *recv_if)
1659{
1660 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1661 struct batadv_unicast_packet *unicast_packet;
1662 struct batadv_coded_packet *coded_packet;
1663 struct batadv_nc_packet *nc_packet;
1664 struct ethhdr *ethhdr;
1665 int hdr_size = sizeof(*coded_packet);
1666
1667 /* Check if network coding is enabled */
1668 if (!atomic_read(&bat_priv->network_coding))
1669 return NET_RX_DROP;
1670
1671 /* Make sure we can access (and remove) header */
1672 if (unlikely(!pskb_may_pull(skb, hdr_size)))
1673 return NET_RX_DROP;
1674
1675 coded_packet = (struct batadv_coded_packet *)skb->data;
1676 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1677
1678 /* Verify frame is destined for us */
1679 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
1680 !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
1681 return NET_RX_DROP;
1682
1683 /* Update stat counter */
1684 if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
1685 batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED);
1686
1687 nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr,
1688 coded_packet);
1689 if (!nc_packet) {
1690 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
1691 return NET_RX_DROP;
1692 }
1693
1694 /* Make skb's linear, because decoding accesses the entire buffer */
1695 if (skb_linearize(skb) < 0)
1696 goto free_nc_packet;
1697
1698 if (skb_linearize(nc_packet->skb) < 0)
1699 goto free_nc_packet;
1700
1701 /* Decode the packet */
1702 unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet);
1703 if (!unicast_packet) {
1704 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
1705 goto free_nc_packet;
1706 }
1707
1708 /* Mark packet as decoded to do correct recoding when forwarding */
1709 BATADV_SKB_CB(skb)->decoded = true;
1710 batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE);
1711 batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES,
1712 skb->len + ETH_HLEN);
1713 return batadv_recv_unicast_packet(skb, recv_if);
1714
1715free_nc_packet:
1716 batadv_nc_packet_free(nc_packet);
1717 return NET_RX_DROP;
1718}
1719
1720/**
1721 * batadv_nc_free - clean up network coding memory
1722 * @bat_priv: the bat priv with all the soft interface information
1723 */
1724void batadv_nc_free(struct batadv_priv *bat_priv)
1725{
1726 batadv_recv_handler_unregister(BATADV_CODED);
1727 cancel_delayed_work_sync(&bat_priv->nc.work);
1728
1729 batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
1730 batadv_hash_destroy(bat_priv->nc.coding_hash);
1731 batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL);
1732 batadv_hash_destroy(bat_priv->nc.decoding_hash);
1733}
1734
1735/**
1736 * batadv_nc_nodes_seq_print_text - print the nc node information
1737 * @seq: seq file to print on
1738 * @offset: not used
1739 */
1740int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset)
1741{
1742 struct net_device *net_dev = (struct net_device *)seq->private;
1743 struct batadv_priv *bat_priv = netdev_priv(net_dev);
1744 struct batadv_hashtable *hash = bat_priv->orig_hash;
1745 struct batadv_hard_iface *primary_if;
1746 struct hlist_head *head;
1747 struct batadv_orig_node *orig_node;
1748 struct batadv_nc_node *nc_node;
1749 int i;
1750
1751 primary_if = batadv_seq_print_text_primary_if_get(seq);
1752 if (!primary_if)
1753 goto out;
1754
1755 /* Traverse list of originators */
1756 for (i = 0; i < hash->size; i++) {
1757 head = &hash->table[i];
1758
1759 /* For each orig_node in this bin */
1760 rcu_read_lock();
1761 hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
1762 seq_printf(seq, "Node: %pM\n", orig_node->orig);
1763
1764 seq_puts(seq, " Ingoing: ");
1765 /* For each in_nc_node to this orig_node */
1766 list_for_each_entry_rcu(nc_node,
1767 &orig_node->in_coding_list,
1768 list)
1769 seq_printf(seq, "%pM ",
1770 nc_node->addr);
1771 seq_puts(seq, "\n");
1772
1773 seq_puts(seq, " Outgoing: ");
1774 /* For out_nc_node to this orig_node */
1775 list_for_each_entry_rcu(nc_node,
1776 &orig_node->out_coding_list,
1777 list)
1778 seq_printf(seq, "%pM ",
1779 nc_node->addr);
1780 seq_puts(seq, "\n\n");
1781 }
1782 rcu_read_unlock();
1783 }
1784
1785out:
1786 if (primary_if)
1787 batadv_hardif_free_ref(primary_if);
1788 return 0;
1789}
1790
1791/**
1792 * batadv_nc_init_debugfs - create nc folder and related files in debugfs
1793 * @bat_priv: the bat priv with all the soft interface information
1794 */
1795int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
1796{
1797 struct dentry *nc_dir, *file;
1798
1799 nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir);
1800 if (!nc_dir)
1801 goto out;
1802
1803 file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
1804 &bat_priv->nc.min_tq);
1805 if (!file)
1806 goto out;
1807
1808 file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
1809 &bat_priv->nc.max_fwd_delay);
1810 if (!file)
1811 goto out;
1812
1813 file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
1814 &bat_priv->nc.max_buffer_time);
1815 if (!file)
1816 goto out;
1817
1818 return 0;
1819
1820out:
1821 return -ENOMEM;
1822}
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
new file mode 100644
index 000000000000..4fa6d0caddbd
--- /dev/null
+++ b/net/batman-adv/network-coding.h
@@ -0,0 +1,123 @@
1/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll, Jeppe Ledet-Pedersen
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_
21#define _NET_BATMAN_ADV_NETWORK_CODING_H_
22
23#ifdef CONFIG_BATMAN_ADV_NC
24
25int batadv_nc_init(struct batadv_priv *bat_priv);
26void batadv_nc_free(struct batadv_priv *bat_priv);
27void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
28 struct batadv_orig_node *orig_node,
29 struct batadv_orig_node *orig_neigh_node,
30 struct batadv_ogm_packet *ogm_packet,
31 int is_single_hop_neigh);
32void batadv_nc_purge_orig(struct batadv_priv *bat_priv,
33 struct batadv_orig_node *orig_node,
34 bool (*to_purge)(struct batadv_priv *,
35 struct batadv_nc_node *));
36void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv);
37void batadv_nc_init_orig(struct batadv_orig_node *orig_node);
38bool batadv_nc_skb_forward(struct sk_buff *skb,
39 struct batadv_neigh_node *neigh_node,
40 struct ethhdr *ethhdr);
41void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
42 struct sk_buff *skb);
43void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
44 struct sk_buff *skb);
45int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset);
46int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
47
48#else /* ifdef CONFIG_BATMAN_ADV_NC */
49
50static inline int batadv_nc_init(struct batadv_priv *bat_priv)
51{
52 return 0;
53}
54
55static inline void batadv_nc_free(struct batadv_priv *bat_priv)
56{
57 return;
58}
59
60static inline void
61batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
62 struct batadv_orig_node *orig_node,
63 struct batadv_orig_node *orig_neigh_node,
64 struct batadv_ogm_packet *ogm_packet,
65 int is_single_hop_neigh)
66{
67 return;
68}
69
70static inline void
71batadv_nc_purge_orig(struct batadv_priv *bat_priv,
72 struct batadv_orig_node *orig_node,
73 bool (*to_purge)(struct batadv_priv *,
74 struct batadv_nc_node *))
75{
76 return;
77}
78
79static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv)
80{
81 return;
82}
83
84static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node)
85{
86 return;
87}
88
89static inline bool batadv_nc_skb_forward(struct sk_buff *skb,
90 struct batadv_neigh_node *neigh_node,
91 struct ethhdr *ethhdr)
92{
93 return false;
94}
95
96static inline void
97batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
98 struct sk_buff *skb)
99{
100 return;
101}
102
103static inline void
104batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
105 struct sk_buff *skb)
106{
107 return;
108}
109
110static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq,
111 void *offset)
112{
113 return 0;
114}
115
116static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
117{
118 return 0;
119}
120
121#endif /* ifdef CONFIG_BATMAN_ADV_NC */
122
123#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 96fb80b724dc..2f3452546636 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,6 +28,7 @@
28#include "unicast.h" 28#include "unicast.h"
29#include "soft-interface.h" 29#include "soft-interface.h"
30#include "bridge_loop_avoidance.h" 30#include "bridge_loop_avoidance.h"
31#include "network-coding.h"
31 32
32/* hash class keys */ 33/* hash class keys */
33static struct lock_class_key batadv_orig_hash_lock_class_key; 34static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
142 143
143 spin_unlock_bh(&orig_node->neigh_list_lock); 144 spin_unlock_bh(&orig_node->neigh_list_lock);
144 145
146 /* Free nc_nodes */
147 batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
148
145 batadv_frag_list_free(&orig_node->frag_list); 149 batadv_frag_list_free(&orig_node->frag_list);
146 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, 150 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
147 "originator timed out"); 151 "originator timed out");
@@ -219,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
219 spin_lock_init(&orig_node->neigh_list_lock); 223 spin_lock_init(&orig_node->neigh_list_lock);
220 spin_lock_init(&orig_node->tt_buff_lock); 224 spin_lock_init(&orig_node->tt_buff_lock);
221 225
226 batadv_nc_init_orig(orig_node);
227
222 /* extra reference for return */ 228 /* extra reference for return */
223 atomic_set(&orig_node->refcount, 2); 229 atomic_set(&orig_node->refcount, 2);
224 230
@@ -459,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
459 neigh_node_tmp->tq_avg); 465 neigh_node_tmp->tq_avg);
460 } 466 }
461 467
462 seq_printf(seq, "\n"); 468 seq_puts(seq, "\n");
463 batman_count++; 469 batman_count++;
464 470
465next: 471next:
@@ -469,7 +475,7 @@ next:
469 } 475 }
470 476
471 if (batman_count == 0) 477 if (batman_count == 0)
472 seq_printf(seq, "No batman nodes in range ...\n"); 478 seq_puts(seq, "No batman nodes in range ...\n");
473 479
474out: 480out:
475 if (primary_if) 481 if (primary_if)
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index ed0aa89bbf8b..a51ccfc39da4 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -30,6 +30,7 @@ enum batadv_packettype {
30 BATADV_TT_QUERY = 0x07, 30 BATADV_TT_QUERY = 0x07,
31 BATADV_ROAM_ADV = 0x08, 31 BATADV_ROAM_ADV = 0x08,
32 BATADV_UNICAST_4ADDR = 0x09, 32 BATADV_UNICAST_4ADDR = 0x09,
33 BATADV_CODED = 0x0a,
33}; 34};
34 35
35/** 36/**
@@ -278,4 +279,36 @@ struct batadv_tt_change {
278 uint8_t addr[ETH_ALEN]; 279 uint8_t addr[ETH_ALEN];
279} __packed; 280} __packed;
280 281
282/**
283 * struct batadv_coded_packet - network coded packet
284 * @header: common batman packet header and ttl of first included packet
285 * @reserved: Align following fields to 2-byte boundaries
286 * @first_source: original source of first included packet
287 * @first_orig_dest: original destinal of first included packet
288 * @first_crc: checksum of first included packet
289 * @first_ttvn: tt-version number of first included packet
290 * @second_ttl: ttl of second packet
291 * @second_dest: second receiver of this coded packet
292 * @second_source: original source of second included packet
293 * @second_orig_dest: original destination of second included packet
294 * @second_crc: checksum of second included packet
295 * @second_ttvn: tt version number of second included packet
296 * @coded_len: length of network coded part of the payload
297 */
298struct batadv_coded_packet {
299 struct batadv_header header;
300 uint8_t first_ttvn;
301 /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */
302 uint8_t first_source[ETH_ALEN];
303 uint8_t first_orig_dest[ETH_ALEN];
304 __be32 first_crc;
305 uint8_t second_ttl;
306 uint8_t second_ttvn;
307 uint8_t second_dest[ETH_ALEN];
308 uint8_t second_source[ETH_ALEN];
309 uint8_t second_orig_dest[ETH_ALEN];
310 __be32 second_crc;
311 __be16 coded_len;
312};
313
281#endif /* _NET_BATMAN_ADV_PACKET_H_ */ 314#endif /* _NET_BATMAN_ADV_PACKET_H_ */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 5ee21cebbbb0..b27a4d792d15 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -29,6 +29,7 @@
29#include "unicast.h" 29#include "unicast.h"
30#include "bridge_loop_avoidance.h" 30#include "bridge_loop_avoidance.h"
31#include "distributed-arp-table.h" 31#include "distributed-arp-table.h"
32#include "network-coding.h"
32 33
33static int batadv_route_unicast_packet(struct sk_buff *skb, 34static int batadv_route_unicast_packet(struct sk_buff *skb,
34 struct batadv_hard_iface *recv_if); 35 struct batadv_hard_iface *recv_if);
@@ -402,7 +403,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
402 goto out; 403 goto out;
403 404
404 /* not for me */ 405 /* not for me */
405 if (!batadv_is_my_mac(ethhdr->h_dest)) 406 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
406 goto out; 407 goto out;
407 408
408 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; 409 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
@@ -416,7 +417,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
416 } 417 }
417 418
418 /* packet for me */ 419 /* packet for me */
419 if (batadv_is_my_mac(icmp_packet->dst)) 420 if (batadv_is_my_mac(bat_priv, icmp_packet->dst))
420 return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); 421 return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
421 422
422 /* TTL exceeded */ 423 /* TTL exceeded */
@@ -548,27 +549,39 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,
548 return router; 549 return router;
549} 550}
550 551
551static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) 552/**
553 * batadv_check_unicast_packet - Check for malformed unicast packets
554 * @bat_priv: the bat priv with all the soft interface information
555 * @skb: packet to check
556 * @hdr_size: size of header to pull
557 *
558 * Check for short header and bad addresses in given packet. Returns negative
559 * value when check fails and 0 otherwise. The negative value depends on the
560 * reason: -ENODATA for bad header, -EBADR for broadcast destination or source,
561 * and -EREMOTE for non-local (other host) destination.
562 */
563static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
564 struct sk_buff *skb, int hdr_size)
552{ 565{
553 struct ethhdr *ethhdr; 566 struct ethhdr *ethhdr;
554 567
555 /* drop packet if it has not necessary minimum size */ 568 /* drop packet if it has not necessary minimum size */
556 if (unlikely(!pskb_may_pull(skb, hdr_size))) 569 if (unlikely(!pskb_may_pull(skb, hdr_size)))
557 return -1; 570 return -ENODATA;
558 571
559 ethhdr = (struct ethhdr *)skb_mac_header(skb); 572 ethhdr = (struct ethhdr *)skb_mac_header(skb);
560 573
561 /* packet with unicast indication but broadcast recipient */ 574 /* packet with unicast indication but broadcast recipient */
562 if (is_broadcast_ether_addr(ethhdr->h_dest)) 575 if (is_broadcast_ether_addr(ethhdr->h_dest))
563 return -1; 576 return -EBADR;
564 577
565 /* packet with broadcast sender address */ 578 /* packet with broadcast sender address */
566 if (is_broadcast_ether_addr(ethhdr->h_source)) 579 if (is_broadcast_ether_addr(ethhdr->h_source))
567 return -1; 580 return -EBADR;
568 581
569 /* not for me */ 582 /* not for me */
570 if (!batadv_is_my_mac(ethhdr->h_dest)) 583 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
571 return -1; 584 return -EREMOTE;
572 585
573 return 0; 586 return 0;
574} 587}
@@ -582,7 +595,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
582 char tt_flag; 595 char tt_flag;
583 size_t packet_size; 596 size_t packet_size;
584 597
585 if (batadv_check_unicast_packet(skb, hdr_size) < 0) 598 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
586 return NET_RX_DROP; 599 return NET_RX_DROP;
587 600
588 /* I could need to modify it */ 601 /* I could need to modify it */
@@ -614,7 +627,7 @@ int batadv_recv_tt_query(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
614 case BATADV_TT_RESPONSE: 627 case BATADV_TT_RESPONSE:
615 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); 628 batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX);
616 629
617 if (batadv_is_my_mac(tt_query->dst)) { 630 if (batadv_is_my_mac(bat_priv, tt_query->dst)) {
618 /* packet needs to be linearized to access the TT 631 /* packet needs to be linearized to access the TT
619 * changes 632 * changes
620 */ 633 */
@@ -657,14 +670,15 @@ int batadv_recv_roam_adv(struct sk_buff *skb, struct batadv_hard_iface *recv_if)
657 struct batadv_roam_adv_packet *roam_adv_packet; 670 struct batadv_roam_adv_packet *roam_adv_packet;
658 struct batadv_orig_node *orig_node; 671 struct batadv_orig_node *orig_node;
659 672
660 if (batadv_check_unicast_packet(skb, sizeof(*roam_adv_packet)) < 0) 673 if (batadv_check_unicast_packet(bat_priv, skb,
674 sizeof(*roam_adv_packet)) < 0)
661 goto out; 675 goto out;
662 676
663 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); 677 batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX);
664 678
665 roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data; 679 roam_adv_packet = (struct batadv_roam_adv_packet *)skb->data;
666 680
667 if (!batadv_is_my_mac(roam_adv_packet->dst)) 681 if (!batadv_is_my_mac(bat_priv, roam_adv_packet->dst))
668 return batadv_route_unicast_packet(skb, recv_if); 682 return batadv_route_unicast_packet(skb, recv_if);
669 683
670 /* check if it is a backbone gateway. we don't accept 684 /* check if it is a backbone gateway. we don't accept
@@ -850,15 +864,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
850 /* decrement ttl */ 864 /* decrement ttl */
851 unicast_packet->header.ttl--; 865 unicast_packet->header.ttl--;
852 866
853 /* Update stats counter */ 867 /* network code packet if possible */
854 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); 868 if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) {
855 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, 869 ret = NET_RX_SUCCESS;
856 skb->len + ETH_HLEN); 870 } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {
857
858 /* route it */
859 if (batadv_send_skb_to_orig(skb, orig_node, recv_if))
860 ret = NET_RX_SUCCESS; 871 ret = NET_RX_SUCCESS;
861 872
873 /* Update stats counter */
874 batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD);
875 batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES,
876 skb->len + ETH_HLEN);
877 }
878
862out: 879out:
863 if (neigh_node) 880 if (neigh_node)
864 batadv_neigh_node_free_ref(neigh_node); 881 batadv_neigh_node_free_ref(neigh_node);
@@ -922,7 +939,7 @@ out:
922} 939}
923 940
924static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, 941static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
925 struct sk_buff *skb) { 942 struct sk_buff *skb, int hdr_len) {
926 uint8_t curr_ttvn, old_ttvn; 943 uint8_t curr_ttvn, old_ttvn;
927 struct batadv_orig_node *orig_node; 944 struct batadv_orig_node *orig_node;
928 struct ethhdr *ethhdr; 945 struct ethhdr *ethhdr;
@@ -931,7 +948,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
931 int is_old_ttvn; 948 int is_old_ttvn;
932 949
933 /* check if there is enough data before accessing it */ 950 /* check if there is enough data before accessing it */
934 if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0) 951 if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
935 return 0; 952 return 0;
936 953
937 /* create a copy of the skb (in case of for re-routing) to modify it. */ 954 /* create a copy of the skb (in case of for re-routing) to modify it. */
@@ -939,7 +956,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
939 return 0; 956 return 0;
940 957
941 unicast_packet = (struct batadv_unicast_packet *)skb->data; 958 unicast_packet = (struct batadv_unicast_packet *)skb->data;
942 ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet)); 959 ethhdr = (struct ethhdr *)(skb->data + hdr_len);
943 960
944 /* check if the destination client was served by this node and it is now 961 /* check if the destination client was served by this node and it is now
945 * roaming. In this case, it means that the node has got a ROAM_ADV 962 * roaming. In this case, it means that the node has got a ROAM_ADV
@@ -967,7 +984,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
967 * last time) the packet had an updated information or not 984 * last time) the packet had an updated information or not
968 */ 985 */
969 curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn); 986 curr_ttvn = (uint8_t)atomic_read(&bat_priv->tt.vn);
970 if (!batadv_is_my_mac(unicast_packet->dest)) { 987 if (!batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
971 orig_node = batadv_orig_hash_find(bat_priv, 988 orig_node = batadv_orig_hash_find(bat_priv,
972 unicast_packet->dest); 989 unicast_packet->dest);
973 /* if it is not possible to find the orig_node representing the 990 /* if it is not possible to find the orig_node representing the
@@ -1033,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1033 struct batadv_unicast_4addr_packet *unicast_4addr_packet; 1050 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
1034 uint8_t *orig_addr; 1051 uint8_t *orig_addr;
1035 struct batadv_orig_node *orig_node = NULL; 1052 struct batadv_orig_node *orig_node = NULL;
1036 int hdr_size = sizeof(*unicast_packet); 1053 int check, hdr_size = sizeof(*unicast_packet);
1037 bool is4addr; 1054 bool is4addr;
1038 1055
1039 unicast_packet = (struct batadv_unicast_packet *)skb->data; 1056 unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -1044,14 +1061,22 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
1044 if (is4addr) 1061 if (is4addr)
1045 hdr_size = sizeof(*unicast_4addr_packet); 1062 hdr_size = sizeof(*unicast_4addr_packet);
1046 1063
1047 if (batadv_check_unicast_packet(skb, hdr_size) < 0) 1064 /* function returns -EREMOTE for promiscuous packets */
1048 return NET_RX_DROP; 1065 check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
1049 1066
1050 if (!batadv_check_unicast_ttvn(bat_priv, skb)) 1067 /* Even though the packet is not for us, we might save it to use for
1068 * decoding a later received coded packet
1069 */
1070 if (check == -EREMOTE)
1071 batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
1072
1073 if (check < 0)
1074 return NET_RX_DROP;
1075 if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
1051 return NET_RX_DROP; 1076 return NET_RX_DROP;
1052 1077
1053 /* packet for me */ 1078 /* packet for me */
1054 if (batadv_is_my_mac(unicast_packet->dest)) { 1079 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
1055 if (is4addr) { 1080 if (is4addr) {
1056 batadv_dat_inc_counter(bat_priv, 1081 batadv_dat_inc_counter(bat_priv,
1057 unicast_4addr_packet->subtype); 1082 unicast_4addr_packet->subtype);
@@ -1088,16 +1113,16 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
1088 struct sk_buff *new_skb = NULL; 1113 struct sk_buff *new_skb = NULL;
1089 int ret; 1114 int ret;
1090 1115
1091 if (batadv_check_unicast_packet(skb, hdr_size) < 0) 1116 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
1092 return NET_RX_DROP; 1117 return NET_RX_DROP;
1093 1118
1094 if (!batadv_check_unicast_ttvn(bat_priv, skb)) 1119 if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
1095 return NET_RX_DROP; 1120 return NET_RX_DROP;
1096 1121
1097 unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; 1122 unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
1098 1123
1099 /* packet for me */ 1124 /* packet for me */
1100 if (batadv_is_my_mac(unicast_packet->dest)) { 1125 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
1101 ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb); 1126 ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
1102 1127
1103 if (ret == NET_RX_DROP) 1128 if (ret == NET_RX_DROP)
@@ -1151,13 +1176,13 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
1151 goto out; 1176 goto out;
1152 1177
1153 /* ignore broadcasts sent by myself */ 1178 /* ignore broadcasts sent by myself */
1154 if (batadv_is_my_mac(ethhdr->h_source)) 1179 if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
1155 goto out; 1180 goto out;
1156 1181
1157 bcast_packet = (struct batadv_bcast_packet *)skb->data; 1182 bcast_packet = (struct batadv_bcast_packet *)skb->data;
1158 1183
1159 /* ignore broadcasts originated by myself */ 1184 /* ignore broadcasts originated by myself */
1160 if (batadv_is_my_mac(bcast_packet->orig)) 1185 if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
1161 goto out; 1186 goto out;
1162 1187
1163 if (bcast_packet->header.ttl < 2) 1188 if (bcast_packet->header.ttl < 2)
@@ -1243,14 +1268,14 @@ int batadv_recv_vis_packet(struct sk_buff *skb,
1243 ethhdr = (struct ethhdr *)skb_mac_header(skb); 1268 ethhdr = (struct ethhdr *)skb_mac_header(skb);
1244 1269
1245 /* not for me */ 1270 /* not for me */
1246 if (!batadv_is_my_mac(ethhdr->h_dest)) 1271 if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
1247 return NET_RX_DROP; 1272 return NET_RX_DROP;
1248 1273
1249 /* ignore own packets */ 1274 /* ignore own packets */
1250 if (batadv_is_my_mac(vis_packet->vis_orig)) 1275 if (batadv_is_my_mac(bat_priv, vis_packet->vis_orig))
1251 return NET_RX_DROP; 1276 return NET_RX_DROP;
1252 1277
1253 if (batadv_is_my_mac(vis_packet->sender_orig)) 1278 if (batadv_is_my_mac(bat_priv, vis_packet->sender_orig))
1254 return NET_RX_DROP; 1279 return NET_RX_DROP;
1255 1280
1256 switch (vis_packet->vis_type) { 1281 switch (vis_packet->vis_type) {
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index a67cffde37ae..263cfd1ccee7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -27,6 +27,7 @@
27#include "vis.h" 27#include "vis.h"
28#include "gateway_common.h" 28#include "gateway_common.h"
29#include "originator.h" 29#include "originator.h"
30#include "network-coding.h"
30 31
31#include <linux/if_ether.h> 32#include <linux/if_ether.h>
32 33
@@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,
39 struct batadv_hard_iface *hard_iface, 40 struct batadv_hard_iface *hard_iface,
40 const uint8_t *dst_addr) 41 const uint8_t *dst_addr)
41{ 42{
43 struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
42 struct ethhdr *ethhdr; 44 struct ethhdr *ethhdr;
43 45
44 if (hard_iface->if_status != BATADV_IF_ACTIVE) 46 if (hard_iface->if_status != BATADV_IF_ACTIVE)
@@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb,
70 72
71 skb->dev = hard_iface->net_dev; 73 skb->dev = hard_iface->net_dev;
72 74
75 /* Save a clone of the skb to use when decoding coded packets */
76 batadv_nc_skb_store_for_decoding(bat_priv, skb);
77
73 /* dev_queue_xmit() returns a negative result on error. However on 78 /* dev_queue_xmit() returns a negative result on error. However on
74 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP 79 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
75 * (which is > 0). This will not be treated as an error. 80 * (which is > 0). This will not be treated as an error.
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 2711e870f557..6f20d339e33a 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -37,6 +37,7 @@
37#include <linux/if_ether.h> 37#include <linux/if_ether.h>
38#include "unicast.h" 38#include "unicast.h"
39#include "bridge_loop_avoidance.h" 39#include "bridge_loop_avoidance.h"
40#include "network-coding.h"
40 41
41 42
42static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); 43static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd);
@@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)
401} 402}
402 403
403/** 404/**
404 * batadv_softif_init - Late stage initialization of soft interface
405 * @dev: registered network device to modify
406 *
407 * Returns error code on failures
408 */
409static int batadv_softif_init(struct net_device *dev)
410{
411 batadv_set_lockdep_class(dev);
412
413 return 0;
414}
415
416static const struct net_device_ops batadv_netdev_ops = {
417 .ndo_init = batadv_softif_init,
418 .ndo_open = batadv_interface_open,
419 .ndo_stop = batadv_interface_release,
420 .ndo_get_stats = batadv_interface_stats,
421 .ndo_set_mac_address = batadv_interface_set_mac_addr,
422 .ndo_change_mtu = batadv_interface_change_mtu,
423 .ndo_start_xmit = batadv_interface_tx,
424 .ndo_validate_addr = eth_validate_addr
425};
426
427static void batadv_interface_setup(struct net_device *dev)
428{
429 struct batadv_priv *priv = netdev_priv(dev);
430
431 ether_setup(dev);
432
433 dev->netdev_ops = &batadv_netdev_ops;
434 dev->destructor = free_netdev;
435 dev->tx_queue_len = 0;
436
437 /* can't call min_mtu, because the needed variables
438 * have not been initialized yet
439 */
440 dev->mtu = ETH_DATA_LEN;
441 /* reserve more space in the skbuff for our header */
442 dev->hard_header_len = BATADV_HEADER_LEN;
443
444 /* generate random address */
445 eth_hw_addr_random(dev);
446
447 SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
448
449 memset(priv, 0, sizeof(*priv));
450}
451
452/**
453 * batadv_softif_destroy_finish - cleans up the remains of a softif 405 * batadv_softif_destroy_finish - cleans up the remains of a softif
454 * @work: work queue item 406 * @work: work queue item
455 * 407 *
@@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
465 cleanup_work); 417 cleanup_work);
466 soft_iface = bat_priv->soft_iface; 418 soft_iface = bat_priv->soft_iface;
467 419
468 batadv_debugfs_del_meshif(soft_iface);
469 batadv_sysfs_del_meshif(soft_iface); 420 batadv_sysfs_del_meshif(soft_iface);
470 421
471 rtnl_lock(); 422 rtnl_lock();
@@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
473 rtnl_unlock(); 424 rtnl_unlock();
474} 425}
475 426
476struct net_device *batadv_softif_create(const char *name) 427/**
428 * batadv_softif_init_late - late stage initialization of soft interface
429 * @dev: registered network device to modify
430 *
431 * Returns error code on failures
432 */
433static int batadv_softif_init_late(struct net_device *dev)
477{ 434{
478 struct net_device *soft_iface;
479 struct batadv_priv *bat_priv; 435 struct batadv_priv *bat_priv;
480 int ret; 436 int ret;
481 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; 437 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
482 438
483 soft_iface = alloc_netdev(sizeof(*bat_priv), name, 439 batadv_set_lockdep_class(dev);
484 batadv_interface_setup);
485
486 if (!soft_iface)
487 goto out;
488 440
489 bat_priv = netdev_priv(soft_iface); 441 bat_priv = netdev_priv(dev);
490 bat_priv->soft_iface = soft_iface; 442 bat_priv->soft_iface = dev;
491 INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); 443 INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
492 444
493 /* batadv_interface_stats() needs to be available as soon as 445 /* batadv_interface_stats() needs to be available as soon as
@@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name)
495 */ 447 */
496 bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); 448 bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));
497 if (!bat_priv->bat_counters) 449 if (!bat_priv->bat_counters)
498 goto free_soft_iface; 450 return -ENOMEM;
499
500 ret = register_netdevice(soft_iface);
501 if (ret < 0) {
502 pr_err("Unable to register the batman interface '%s': %i\n",
503 name, ret);
504 goto free_bat_counters;
505 }
506 451
507 atomic_set(&bat_priv->aggregated_ogms, 1); 452 atomic_set(&bat_priv->aggregated_ogms, 1);
508 atomic_set(&bat_priv->bonding, 0); 453 atomic_set(&bat_priv->bonding, 0);
@@ -540,49 +485,196 @@ struct net_device *batadv_softif_create(const char *name)
540 bat_priv->primary_if = NULL; 485 bat_priv->primary_if = NULL;
541 bat_priv->num_ifaces = 0; 486 bat_priv->num_ifaces = 0;
542 487
543 ret = batadv_algo_select(bat_priv, batadv_routing_algo); 488 batadv_nc_init_bat_priv(bat_priv);
544 if (ret < 0)
545 goto unreg_soft_iface;
546 489
547 ret = batadv_sysfs_add_meshif(soft_iface); 490 ret = batadv_algo_select(bat_priv, batadv_routing_algo);
548 if (ret < 0) 491 if (ret < 0)
549 goto unreg_soft_iface; 492 goto free_bat_counters;
550 493
551 ret = batadv_debugfs_add_meshif(soft_iface); 494 ret = batadv_debugfs_add_meshif(dev);
552 if (ret < 0) 495 if (ret < 0)
553 goto unreg_sysfs; 496 goto free_bat_counters;
554 497
555 ret = batadv_mesh_init(soft_iface); 498 ret = batadv_mesh_init(dev);
556 if (ret < 0) 499 if (ret < 0)
557 goto unreg_debugfs; 500 goto unreg_debugfs;
558 501
559 return soft_iface; 502 return 0;
560 503
561unreg_debugfs: 504unreg_debugfs:
562 batadv_debugfs_del_meshif(soft_iface); 505 batadv_debugfs_del_meshif(dev);
563unreg_sysfs:
564 batadv_sysfs_del_meshif(soft_iface);
565unreg_soft_iface:
566 free_percpu(bat_priv->bat_counters);
567 unregister_netdevice(soft_iface);
568 return NULL;
569
570free_bat_counters: 506free_bat_counters:
571 free_percpu(bat_priv->bat_counters); 507 free_percpu(bat_priv->bat_counters);
572free_soft_iface: 508
573 free_netdev(soft_iface); 509 return ret;
510}
511
512/**
513 * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface
514 * @dev: batadv_soft_interface used as master interface
515 * @slave_dev: net_device which should become the slave interface
516 *
517 * Return 0 if successful or error otherwise.
518 */
519static int batadv_softif_slave_add(struct net_device *dev,
520 struct net_device *slave_dev)
521{
522 struct batadv_hard_iface *hard_iface;
523 int ret = -EINVAL;
524
525 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
526 if (!hard_iface || hard_iface->soft_iface != NULL)
527 goto out;
528
529 ret = batadv_hardif_enable_interface(hard_iface, dev->name);
530
574out: 531out:
575 return NULL; 532 if (hard_iface)
533 batadv_hardif_free_ref(hard_iface);
534 return ret;
576} 535}
577 536
578void batadv_softif_destroy(struct net_device *soft_iface) 537/**
538 * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface
539 * @dev: batadv_soft_interface used as master interface
540 * @slave_dev: net_device which should be removed from the master interface
541 *
542 * Return 0 if successful or error otherwise.
543 */
544static int batadv_softif_slave_del(struct net_device *dev,
545 struct net_device *slave_dev)
546{
547 struct batadv_hard_iface *hard_iface;
548 int ret = -EINVAL;
549
550 hard_iface = batadv_hardif_get_by_netdev(slave_dev);
551
552 if (!hard_iface || hard_iface->soft_iface != dev)
553 goto out;
554
555 batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP);
556 ret = 0;
557
558out:
559 if (hard_iface)
560 batadv_hardif_free_ref(hard_iface);
561 return ret;
562}
563
564static const struct net_device_ops batadv_netdev_ops = {
565 .ndo_init = batadv_softif_init_late,
566 .ndo_open = batadv_interface_open,
567 .ndo_stop = batadv_interface_release,
568 .ndo_get_stats = batadv_interface_stats,
569 .ndo_set_mac_address = batadv_interface_set_mac_addr,
570 .ndo_change_mtu = batadv_interface_change_mtu,
571 .ndo_start_xmit = batadv_interface_tx,
572 .ndo_validate_addr = eth_validate_addr,
573 .ndo_add_slave = batadv_softif_slave_add,
574 .ndo_del_slave = batadv_softif_slave_del,
575};
576
577/**
578 * batadv_softif_free - Deconstructor of batadv_soft_interface
579 * @dev: Device to cleanup and remove
580 */
581static void batadv_softif_free(struct net_device *dev)
582{
583 batadv_debugfs_del_meshif(dev);
584 batadv_mesh_free(dev);
585
586 /* some scheduled RCU callbacks need the bat_priv struct to accomplish
587 * their tasks. Wait for them all to be finished before freeing the
588 * netdev and its private data (bat_priv)
589 */
590 rcu_barrier();
591
592 free_netdev(dev);
593}
594
595/**
596 * batadv_softif_init_early - early stage initialization of soft interface
597 * @dev: registered network device to modify
598 */
599static void batadv_softif_init_early(struct net_device *dev)
600{
601 struct batadv_priv *priv = netdev_priv(dev);
602
603 ether_setup(dev);
604
605 dev->netdev_ops = &batadv_netdev_ops;
606 dev->destructor = batadv_softif_free;
607 dev->tx_queue_len = 0;
608
609 /* can't call min_mtu, because the needed variables
610 * have not been initialized yet
611 */
612 dev->mtu = ETH_DATA_LEN;
613 /* reserve more space in the skbuff for our header */
614 dev->hard_header_len = BATADV_HEADER_LEN;
615
616 /* generate random address */
617 eth_hw_addr_random(dev);
618
619 SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
620
621 memset(priv, 0, sizeof(*priv));
622}
623
624struct net_device *batadv_softif_create(const char *name)
625{
626 struct net_device *soft_iface;
627 int ret;
628
629 soft_iface = alloc_netdev(sizeof(struct batadv_priv), name,
630 batadv_softif_init_early);
631 if (!soft_iface)
632 return NULL;
633
634 soft_iface->rtnl_link_ops = &batadv_link_ops;
635
636 ret = register_netdevice(soft_iface);
637 if (ret < 0) {
638 pr_err("Unable to register the batman interface '%s': %i\n",
639 name, ret);
640 free_netdev(soft_iface);
641 return NULL;
642 }
643
644 return soft_iface;
645}
646
647/**
648 * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs
649 * @soft_iface: the to-be-removed batman-adv interface
650 */
651void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
579{ 652{
580 struct batadv_priv *bat_priv = netdev_priv(soft_iface); 653 struct batadv_priv *bat_priv = netdev_priv(soft_iface);
581 654
582 batadv_mesh_free(soft_iface);
583 queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); 655 queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
584} 656}
585 657
658/**
659 * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink
660 * @soft_iface: the to-be-removed batman-adv interface
661 * @head: list pointer
662 */
663static void batadv_softif_destroy_netlink(struct net_device *soft_iface,
664 struct list_head *head)
665{
666 struct batadv_hard_iface *hard_iface;
667
668 list_for_each_entry(hard_iface, &batadv_hardif_list, list) {
669 if (hard_iface->soft_iface == soft_iface)
670 batadv_hardif_disable_interface(hard_iface,
671 BATADV_IF_CLEANUP_KEEP);
672 }
673
674 batadv_sysfs_del_meshif(soft_iface);
675 unregister_netdevice_queue(soft_iface, head);
676}
677
586int batadv_softif_is_valid(const struct net_device *net_dev) 678int batadv_softif_is_valid(const struct net_device *net_dev)
587{ 679{
588 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) 680 if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx)
@@ -591,6 +683,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)
591 return 0; 683 return 0;
592} 684}
593 685
686struct rtnl_link_ops batadv_link_ops __read_mostly = {
687 .kind = "batadv",
688 .priv_size = sizeof(struct batadv_priv),
689 .setup = batadv_softif_init_early,
690 .dellink = batadv_softif_destroy_netlink,
691};
692
594/* ethtool */ 693/* ethtool */
595static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) 694static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
596{ 695{
@@ -662,6 +761,17 @@ static const struct {
662 { "dat_put_rx" }, 761 { "dat_put_rx" },
663 { "dat_cached_reply_tx" }, 762 { "dat_cached_reply_tx" },
664#endif 763#endif
764#ifdef CONFIG_BATMAN_ADV_NC
765 { "nc_code" },
766 { "nc_code_bytes" },
767 { "nc_recode" },
768 { "nc_recode_bytes" },
769 { "nc_buffer" },
770 { "nc_decode" },
771 { "nc_decode_bytes" },
772 { "nc_decode_failed" },
773 { "nc_sniffed" },
774#endif
665}; 775};
666 776
667static void batadv_get_strings(struct net_device *dev, uint32_t stringset, 777static void batadv_get_strings(struct net_device *dev, uint32_t stringset,
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index 43182e5e603a..2f2472c2ea0d 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface,
25 struct sk_buff *skb, struct batadv_hard_iface *recv_if, 25 struct sk_buff *skb, struct batadv_hard_iface *recv_if,
26 int hdr_size, struct batadv_orig_node *orig_node); 26 int hdr_size, struct batadv_orig_node *orig_node);
27struct net_device *batadv_softif_create(const char *name); 27struct net_device *batadv_softif_create(const char *name);
28void batadv_softif_destroy(struct net_device *soft_iface); 28void batadv_softif_destroy_sysfs(struct net_device *soft_iface);
29int batadv_softif_is_valid(const struct net_device *net_dev); 29int batadv_softif_is_valid(const struct net_device *net_dev);
30extern struct rtnl_link_ops batadv_link_ops;
30 31
31#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ 32#endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index afbba319d73a..15a22efa9a67 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
442#ifdef CONFIG_BATMAN_ADV_DEBUG 442#ifdef CONFIG_BATMAN_ADV_DEBUG
443BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); 443BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);
444#endif 444#endif
445#ifdef CONFIG_BATMAN_ADV_NC
446BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL);
447#endif
445 448
446static struct batadv_attribute *batadv_mesh_attrs[] = { 449static struct batadv_attribute *batadv_mesh_attrs[] = {
447 &batadv_attr_aggregated_ogms, 450 &batadv_attr_aggregated_ogms,
@@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
464#ifdef CONFIG_BATMAN_ADV_DEBUG 467#ifdef CONFIG_BATMAN_ADV_DEBUG
465 &batadv_attr_log_level, 468 &batadv_attr_log_level,
466#endif 469#endif
470#ifdef CONFIG_BATMAN_ADV_NC
471 &batadv_attr_network_coding,
472#endif
467 NULL, 473 NULL,
468}; 474};
469 475
@@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
582 } 588 }
583 589
584 if (status_tmp == BATADV_IF_NOT_IN_USE) { 590 if (status_tmp == BATADV_IF_NOT_IN_USE) {
585 batadv_hardif_disable_interface(hard_iface); 591 batadv_hardif_disable_interface(hard_iface,
592 BATADV_IF_CLEANUP_AUTO);
586 goto unlock; 593 goto unlock;
587 } 594 }
588 595
589 /* if the interface already is in use */ 596 /* if the interface already is in use */
590 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 597 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
591 batadv_hardif_disable_interface(hard_iface); 598 batadv_hardif_disable_interface(hard_iface,
599 BATADV_IF_CLEANUP_AUTO);
592 600
593 ret = batadv_hardif_enable_interface(hard_iface, buff); 601 ret = batadv_hardif_enable_interface(hard_iface, buff);
594 602
@@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,
688 enum batadv_uev_action action, const char *data) 696 enum batadv_uev_action action, const char *data)
689{ 697{
690 int ret = -ENOMEM; 698 int ret = -ENOMEM;
691 struct batadv_hard_iface *primary_if;
692 struct kobject *bat_kobj; 699 struct kobject *bat_kobj;
693 char *uevent_env[4] = { NULL, NULL, NULL, NULL }; 700 char *uevent_env[4] = { NULL, NULL, NULL, NULL };
694 701
695 primary_if = batadv_primary_if_get_selected(bat_priv); 702 bat_kobj = &bat_priv->soft_iface->dev.kobj;
696 if (!primary_if)
697 goto out;
698
699 bat_kobj = &primary_if->soft_iface->dev.kobj;
700 703
701 uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + 704 uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +
702 strlen(batadv_uev_type_str[type]) + 1, 705 strlen(batadv_uev_type_str[type]) + 1,
@@ -732,9 +735,6 @@ out:
732 kfree(uevent_env[1]); 735 kfree(uevent_env[1]);
733 kfree(uevent_env[2]); 736 kfree(uevent_env[2]);
734 737
735 if (primary_if)
736 batadv_hardif_free_ref(primary_if);
737
738 if (ret) 738 if (ret)
739 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 739 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
740 "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", 740 "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n",
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 98a66a021a60..5e89deeb9542 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -385,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,
385 int *packet_buff_len, 385 int *packet_buff_len,
386 int min_packet_len) 386 int min_packet_len)
387{ 387{
388 struct batadv_hard_iface *primary_if;
389 int req_len; 388 int req_len;
390 389
391 primary_if = batadv_primary_if_get_selected(bat_priv);
392
393 req_len = min_packet_len; 390 req_len = min_packet_len;
394 req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); 391 req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));
395 392
396 /* if we have too many changes for one packet don't send any 393 /* if we have too many changes for one packet don't send any
397 * and wait for the tt table request which will be fragmented 394 * and wait for the tt table request which will be fragmented
398 */ 395 */
399 if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) 396 if (req_len > bat_priv->soft_iface->mtu)
400 req_len = min_packet_len; 397 req_len = min_packet_len;
401 398
402 batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, 399 batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,
403 min_packet_len, req_len); 400 min_packet_len, req_len);
404
405 if (primary_if)
406 batadv_hardif_free_ref(primary_if);
407} 401}
408 402
409static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, 403static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv,
@@ -908,7 +902,7 @@ out_remove:
908 /* remove address from local hash if present */ 902 /* remove address from local hash if present */
909 local_flags = batadv_tt_local_remove(bat_priv, tt_addr, 903 local_flags = batadv_tt_local_remove(bat_priv, tt_addr,
910 "global tt received", 904 "global tt received",
911 !!(flags & BATADV_TT_CLIENT_ROAM)); 905 flags & BATADV_TT_CLIENT_ROAM);
912 tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; 906 tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;
913 907
914 if (!(flags & BATADV_TT_CLIENT_ROAM)) 908 if (!(flags & BATADV_TT_CLIENT_ROAM))
@@ -1580,7 +1574,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,
1580static struct sk_buff * 1574static struct sk_buff *
1581batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, 1575batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1582 struct batadv_hashtable *hash, 1576 struct batadv_hashtable *hash,
1583 struct batadv_hard_iface *primary_if, 1577 struct batadv_priv *bat_priv,
1584 int (*valid_cb)(const void *, const void *), 1578 int (*valid_cb)(const void *, const void *),
1585 void *cb_data) 1579 void *cb_data)
1586{ 1580{
@@ -1594,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,
1594 uint32_t i; 1588 uint32_t i;
1595 size_t len; 1589 size_t len;
1596 1590
1597 if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { 1591 if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) {
1598 tt_len = primary_if->soft_iface->mtu - tt_query_size; 1592 tt_len = bat_priv->soft_iface->mtu - tt_query_size;
1599 tt_len -= tt_len % sizeof(struct batadv_tt_change); 1593 tt_len -= tt_len % sizeof(struct batadv_tt_change);
1600 } 1594 }
1601 tt_tot = tt_len / sizeof(struct batadv_tt_change); 1595 tt_tot = tt_len / sizeof(struct batadv_tt_change);
@@ -1715,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1715{ 1709{
1716 struct batadv_orig_node *req_dst_orig_node; 1710 struct batadv_orig_node *req_dst_orig_node;
1717 struct batadv_orig_node *res_dst_orig_node = NULL; 1711 struct batadv_orig_node *res_dst_orig_node = NULL;
1718 struct batadv_hard_iface *primary_if = NULL;
1719 uint8_t orig_ttvn, req_ttvn, ttvn; 1712 uint8_t orig_ttvn, req_ttvn, ttvn;
1720 int ret = false; 1713 int ret = false;
1721 unsigned char *tt_buff; 1714 unsigned char *tt_buff;
@@ -1740,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1740 if (!res_dst_orig_node) 1733 if (!res_dst_orig_node)
1741 goto out; 1734 goto out;
1742 1735
1743 primary_if = batadv_primary_if_get_selected(bat_priv);
1744 if (!primary_if)
1745 goto out;
1746
1747 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); 1736 orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);
1748 req_ttvn = tt_request->ttvn; 1737 req_ttvn = tt_request->ttvn;
1749 1738
@@ -1791,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,
1791 1780
1792 skb = batadv_tt_response_fill_table(tt_len, ttvn, 1781 skb = batadv_tt_response_fill_table(tt_len, ttvn,
1793 bat_priv->tt.global_hash, 1782 bat_priv->tt.global_hash,
1794 primary_if, 1783 bat_priv,
1795 batadv_tt_global_valid, 1784 batadv_tt_global_valid,
1796 req_dst_orig_node); 1785 req_dst_orig_node);
1797 if (!skb) 1786 if (!skb)
@@ -1828,8 +1817,6 @@ out:
1828 batadv_orig_node_free_ref(res_dst_orig_node); 1817 batadv_orig_node_free_ref(res_dst_orig_node);
1829 if (req_dst_orig_node) 1818 if (req_dst_orig_node)
1830 batadv_orig_node_free_ref(req_dst_orig_node); 1819 batadv_orig_node_free_ref(req_dst_orig_node);
1831 if (primary_if)
1832 batadv_hardif_free_ref(primary_if);
1833 if (!ret) 1820 if (!ret)
1834 kfree_skb(skb); 1821 kfree_skb(skb);
1835 return ret; 1822 return ret;
@@ -1907,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,
1907 1894
1908 skb = batadv_tt_response_fill_table(tt_len, ttvn, 1895 skb = batadv_tt_response_fill_table(tt_len, ttvn,
1909 bat_priv->tt.local_hash, 1896 bat_priv->tt.local_hash,
1910 primary_if, 1897 bat_priv,
1911 batadv_tt_local_valid_entry, 1898 batadv_tt_local_valid_entry,
1912 NULL); 1899 NULL);
1913 if (!skb) 1900 if (!skb)
@@ -1953,7 +1940,7 @@ out:
1953bool batadv_send_tt_response(struct batadv_priv *bat_priv, 1940bool batadv_send_tt_response(struct batadv_priv *bat_priv,
1954 struct batadv_tt_query_packet *tt_request) 1941 struct batadv_tt_query_packet *tt_request)
1955{ 1942{
1956 if (batadv_is_my_mac(tt_request->dst)) { 1943 if (batadv_is_my_mac(bat_priv, tt_request->dst)) {
1957 /* don't answer backbone gws! */ 1944 /* don't answer backbone gws! */
1958 if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src)) 1945 if (batadv_bla_is_backbone_gw_orig(bat_priv, tt_request->src))
1959 return true; 1946 return true;
@@ -2528,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,
2528 if (!tt_global_entry) 2515 if (!tt_global_entry)
2529 goto out; 2516 goto out;
2530 2517
2531 ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); 2518 ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;
2532 batadv_tt_global_entry_free_ref(tt_global_entry); 2519 batadv_tt_global_entry_free_ref(tt_global_entry);
2533out: 2520out:
2534 return ret; 2521 return ret;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4cd87a0b5b80..aba8364c3689 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -128,6 +128,10 @@ struct batadv_hard_iface {
128 * @bond_list: list of bonding candidates 128 * @bond_list: list of bonding candidates
129 * @refcount: number of contexts the object is used 129 * @refcount: number of contexts the object is used
130 * @rcu: struct used for freeing in an RCU-safe manner 130 * @rcu: struct used for freeing in an RCU-safe manner
131 * @in_coding_list: list of nodes this orig can hear
132 * @out_coding_list: list of nodes that can hear this orig
133 * @in_coding_list_lock: protects in_coding_list
134 * @out_coding_list_lock: protects out_coding_list
131 */ 135 */
132struct batadv_orig_node { 136struct batadv_orig_node {
133 uint8_t orig[ETH_ALEN]; 137 uint8_t orig[ETH_ALEN];
@@ -171,6 +175,12 @@ struct batadv_orig_node {
171 struct list_head bond_list; 175 struct list_head bond_list;
172 atomic_t refcount; 176 atomic_t refcount;
173 struct rcu_head rcu; 177 struct rcu_head rcu;
178#ifdef CONFIG_BATMAN_ADV_NC
179 struct list_head in_coding_list;
180 struct list_head out_coding_list;
181 spinlock_t in_coding_list_lock; /* Protects in_coding_list */
182 spinlock_t out_coding_list_lock; /* Protects out_coding_list */
183#endif
174}; 184};
175 185
176/** 186/**
@@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry {
265 * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter 275 * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter
266 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet 276 * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet
267 * counter 277 * counter
278 * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter
279 * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter
280 * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter
281 * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter
282 * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding
283 * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter
284 * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter
285 * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet
286 * counter
287 * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc
288 * mode.
268 * @BATADV_CNT_NUM: number of traffic counters 289 * @BATADV_CNT_NUM: number of traffic counters
269 */ 290 */
270enum batadv_counters { 291enum batadv_counters {
@@ -292,6 +313,17 @@ enum batadv_counters {
292 BATADV_CNT_DAT_PUT_RX, 313 BATADV_CNT_DAT_PUT_RX,
293 BATADV_CNT_DAT_CACHED_REPLY_TX, 314 BATADV_CNT_DAT_CACHED_REPLY_TX,
294#endif 315#endif
316#ifdef CONFIG_BATMAN_ADV_NC
317 BATADV_CNT_NC_CODE,
318 BATADV_CNT_NC_CODE_BYTES,
319 BATADV_CNT_NC_RECODE,
320 BATADV_CNT_NC_RECODE_BYTES,
321 BATADV_CNT_NC_BUFFER,
322 BATADV_CNT_NC_DECODE,
323 BATADV_CNT_NC_DECODE_BYTES,
324 BATADV_CNT_NC_DECODE_FAILED,
325 BATADV_CNT_NC_SNIFFED,
326#endif
295 BATADV_CNT_NUM, 327 BATADV_CNT_NUM,
296}; 328};
297 329
@@ -428,6 +460,35 @@ struct batadv_priv_dat {
428#endif 460#endif
429 461
430/** 462/**
463 * struct batadv_priv_nc - per mesh interface network coding private data
464 * @work: work queue callback item for cleanup
465 * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
466 * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq
467 * @max_fwd_delay: maximum packet forward delay to allow coding of packets
468 * @max_buffer_time: buffer time for sniffed packets used to decoding
469 * @timestamp_fwd_flush: timestamp of last forward packet queue flush
470 * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge
471 * @coding_hash: Hash table used to buffer skbs while waiting for another
472 * incoming skb to code it with. Skbs are added to the buffer just before being
473 * forwarded in routing.c
474 * @decoding_hash: Hash table used to buffer skbs that might be needed to decode
475 * a received coded skb. The buffer is used for 1) skbs arriving on the
476 * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs
477 * forwarded by batman-adv.
478 */
479struct batadv_priv_nc {
480 struct delayed_work work;
481 struct dentry *debug_dir;
482 u8 min_tq;
483 u32 max_fwd_delay;
484 u32 max_buffer_time;
485 unsigned long timestamp_fwd_flush;
486 unsigned long timestamp_sniffed_purge;
487 struct batadv_hashtable *coding_hash;
488 struct batadv_hashtable *decoding_hash;
489};
490
491/**
431 * struct batadv_priv - per mesh interface data 492 * struct batadv_priv - per mesh interface data
432 * @mesh_state: current status of the mesh (inactive/active/deactivating) 493 * @mesh_state: current status of the mesh (inactive/active/deactivating)
433 * @soft_iface: net device which holds this struct as private data 494 * @soft_iface: net device which holds this struct as private data
@@ -470,6 +531,8 @@ struct batadv_priv_dat {
470 * @tt: translation table data 531 * @tt: translation table data
471 * @vis: vis data 532 * @vis: vis data
472 * @dat: distributed arp table data 533 * @dat: distributed arp table data
534 * @network_coding: bool indicating whether network coding is enabled
535 * @batadv_priv_nc: network coding data
473 */ 536 */
474struct batadv_priv { 537struct batadv_priv {
475 atomic_t mesh_state; 538 atomic_t mesh_state;
@@ -522,6 +585,10 @@ struct batadv_priv {
522#ifdef CONFIG_BATMAN_ADV_DAT 585#ifdef CONFIG_BATMAN_ADV_DAT
523 struct batadv_priv_dat dat; 586 struct batadv_priv_dat dat;
524#endif 587#endif
588#ifdef CONFIG_BATMAN_ADV_NC
589 atomic_t network_coding;
590 struct batadv_priv_nc nc;
591#endif /* CONFIG_BATMAN_ADV_NC */
525}; 592};
526 593
527/** 594/**
@@ -702,6 +769,75 @@ struct batadv_tt_roam_node {
702}; 769};
703 770
704/** 771/**
772 * struct batadv_nc_node - network coding node
773 * @list: next and prev pointer for the list handling
774 * @addr: the node's mac address
775 * @refcount: number of contexts the object is used by
776 * @rcu: struct used for freeing in an RCU-safe manner
777 * @orig_node: pointer to corresponding orig node struct
778 * @last_seen: timestamp of last ogm received from this node
779 */
780struct batadv_nc_node {
781 struct list_head list;
782 uint8_t addr[ETH_ALEN];
783 atomic_t refcount;
784 struct rcu_head rcu;
785 struct batadv_orig_node *orig_node;
786 unsigned long last_seen;
787};
788
789/**
790 * struct batadv_nc_path - network coding path
791 * @hash_entry: next and prev pointer for the list handling
792 * @rcu: struct used for freeing in an RCU-safe manner
793 * @refcount: number of contexts the object is used by
794 * @packet_list: list of buffered packets for this path
795 * @packet_list_lock: access lock for packet list
796 * @next_hop: next hop (destination) of path
797 * @prev_hop: previous hop (source) of path
798 * @last_valid: timestamp for last validation of path
799 */
800struct batadv_nc_path {
801 struct hlist_node hash_entry;
802 struct rcu_head rcu;
803 atomic_t refcount;
804 struct list_head packet_list;
805 spinlock_t packet_list_lock; /* Protects packet_list */
806 uint8_t next_hop[ETH_ALEN];
807 uint8_t prev_hop[ETH_ALEN];
808 unsigned long last_valid;
809};
810
811/**
812 * struct batadv_nc_packet - network coding packet used when coding and
813 * decoding packets
814 * @list: next and prev pointer for the list handling
815 * @packet_id: crc32 checksum of skb data
816 * @timestamp: field containing the info when the packet was added to path
817 * @neigh_node: pointer to original next hop neighbor of skb
818 * @skb: skb which can be encoded or used for decoding
819 * @nc_path: pointer to path this nc packet is attached to
820 */
821struct batadv_nc_packet {
822 struct list_head list;
823 __be32 packet_id;
824 unsigned long timestamp;
825 struct batadv_neigh_node *neigh_node;
826 struct sk_buff *skb;
827 struct batadv_nc_path *nc_path;
828};
829
830/**
831 * batadv_skb_cb - control buffer structure used to store private data relevant
832 * to batman-adv in the skb->cb buffer in skbs.
833 * @decoded: Marks a skb as decoded, which is checked when searching for coding
834 * opportunities in network-coding.c
835 */
836struct batadv_skb_cb {
837 bool decoded;
838};
839
840/**
705 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded 841 * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
706 * @list: list node for batadv_socket_client::queue_list 842 * @list: list node for batadv_socket_client::queue_list
707 * @send_time: execution time for delayed_work (packet sending) 843 * @send_time: execution time for delayed_work (packet sending)
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index 50e079f00be6..0bb3b5982f94 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head,
122{ 122{
123 struct batadv_frag_packet_list_entry *tfp; 123 struct batadv_frag_packet_list_entry *tfp;
124 struct batadv_unicast_frag_packet *tmp_up = NULL; 124 struct batadv_unicast_frag_packet *tmp_up = NULL;
125 int is_head_tmp, is_head; 125 bool is_head_tmp, is_head;
126 uint16_t search_seqno; 126 uint16_t search_seqno;
127 127
128 if (up->flags & BATADV_UNI_FRAG_HEAD) 128 if (up->flags & BATADV_UNI_FRAG_HEAD)
@@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head,
130 else 130 else
131 search_seqno = ntohs(up->seqno)-1; 131 search_seqno = ntohs(up->seqno)-1;
132 132
133 is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); 133 is_head = up->flags & BATADV_UNI_FRAG_HEAD;
134 134
135 list_for_each_entry(tfp, head, list) { 135 list_for_each_entry(tfp, head, list) {
136 if (!tfp->skb) 136 if (!tfp->skb)
@@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head,
142 tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; 142 tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
143 143
144 if (tfp->seqno == search_seqno) { 144 if (tfp->seqno == search_seqno) {
145 is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); 145 is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
146 if (is_head_tmp != is_head) 146 if (is_head_tmp != is_head)
147 return tfp; 147 return tfp;
148 else 148 else
diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c
index c053244b97bd..1625e5793a89 100644
--- a/net/batman-adv/vis.c
+++ b/net/batman-adv/vis.c
@@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,
149 149
150 hlist_for_each_entry(entry, if_list, list) { 150 hlist_for_each_entry(entry, if_list, list) {
151 if (entry->primary) 151 if (entry->primary)
152 seq_printf(seq, "PRIMARY, "); 152 seq_puts(seq, "PRIMARY, ");
153 else 153 else
154 seq_printf(seq, "SEC %pM, ", entry->addr); 154 seq_printf(seq, "SEC %pM, ", entry->addr);
155 } 155 }
@@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,
207 if (batadv_compare_eth(entry->addr, packet->vis_orig)) 207 if (batadv_compare_eth(entry->addr, packet->vis_orig))
208 batadv_vis_data_read_prim_sec(seq, list); 208 batadv_vis_data_read_prim_sec(seq, list);
209 209
210 seq_printf(seq, "\n"); 210 seq_puts(seq, "\n");
211 } 211 }
212} 212}
213 213
@@ -477,7 +477,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
477 477
478 /* Are we the target for this VIS packet? */ 478 /* Are we the target for this VIS packet? */
479 if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC && 479 if (vis_server == BATADV_VIS_TYPE_SERVER_SYNC &&
480 batadv_is_my_mac(vis_packet->target_orig)) 480 batadv_is_my_mac(bat_priv, vis_packet->target_orig))
481 are_target = 1; 481 are_target = 1;
482 482
483 spin_lock_bh(&bat_priv->vis.hash_lock); 483 spin_lock_bh(&bat_priv->vis.hash_lock);
@@ -496,7 +496,7 @@ void batadv_receive_client_update_packet(struct batadv_priv *bat_priv,
496 batadv_send_list_add(bat_priv, info); 496 batadv_send_list_add(bat_priv, info);
497 497
498 /* ... we're not the recipient (and thus need to forward). */ 498 /* ... we're not the recipient (and thus need to forward). */
499 } else if (!batadv_is_my_mac(packet->target_orig)) { 499 } else if (!batadv_is_my_mac(bat_priv, packet->target_orig)) {
500 batadv_send_list_add(bat_priv, info); 500 batadv_send_list_add(bat_priv, info);
501 } 501 }
502 502
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index eb0f4b16ff09..17f33a62f6db 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
397 if (ctrl) { 397 if (ctrl) {
398 u8 *assoc; 398 u8 *assoc;
399 399
400 assoc = kzalloc(assoc_len, GFP_KERNEL); 400 assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);
401 if (!assoc) { 401 if (!assoc) {
402 amp_ctrl_put(ctrl); 402 amp_ctrl_put(ctrl);
403 return -ENOMEM; 403 return -ENOMEM;
404 } 404 }
405 405
406 memcpy(assoc, rsp->amp_assoc, assoc_len);
407 ctrl->assoc = assoc; 406 ctrl->assoc = assoc;
408 ctrl->assoc_len = assoc_len; 407 ctrl->assoc_len = assoc_len;
409 ctrl->assoc_rem_len = assoc_len; 408 ctrl->assoc_rem_len = assoc_len;
@@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
472 size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req); 471 size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);
473 u8 *assoc; 472 u8 *assoc;
474 473
475 assoc = kzalloc(assoc_len, GFP_KERNEL); 474 assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
476 if (!assoc) { 475 if (!assoc) {
477 amp_ctrl_put(ctrl); 476 amp_ctrl_put(ctrl);
478 return -ENOMEM; 477 return -ENOMEM;
479 } 478 }
480 479
481 memcpy(assoc, req->amp_assoc, assoc_len);
482 ctrl->assoc = assoc; 480 ctrl->assoc = assoc;
483 ctrl->assoc_len = assoc_len; 481 ctrl->assoc_len = assoc_len;
484 ctrl->assoc_rem_len = assoc_len; 482 ctrl->assoc_rem_len = assoc_len;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index d3ee69b35a78..9096137c889c 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)
92} 92}
93EXPORT_SYMBOL(bt_sock_register); 93EXPORT_SYMBOL(bt_sock_register);
94 94
95int bt_sock_unregister(int proto) 95void bt_sock_unregister(int proto)
96{ 96{
97 int err = 0;
98
99 if (proto < 0 || proto >= BT_MAX_PROTO) 97 if (proto < 0 || proto >= BT_MAX_PROTO)
100 return -EINVAL; 98 return;
101 99
102 write_lock(&bt_proto_lock); 100 write_lock(&bt_proto_lock);
103 101 bt_proto[proto] = NULL;
104 if (!bt_proto[proto])
105 err = -ENOENT;
106 else
107 bt_proto[proto] = NULL;
108
109 write_unlock(&bt_proto_lock); 102 write_unlock(&bt_proto_lock);
110
111 return err;
112} 103}
113EXPORT_SYMBOL(bt_sock_unregister); 104EXPORT_SYMBOL(bt_sock_unregister);
114 105
@@ -230,6 +221,8 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
230 if (flags & (MSG_OOB)) 221 if (flags & (MSG_OOB))
231 return -EOPNOTSUPP; 222 return -EOPNOTSUPP;
232 223
224 msg->msg_namelen = 0;
225
233 skb = skb_recv_datagram(sk, flags, noblock, &err); 226 skb = skb_recv_datagram(sk, flags, noblock, &err);
234 if (!skb) { 227 if (!skb) {
235 if (sk->sk_shutdown & RCV_SHUTDOWN) 228 if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -237,8 +230,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
237 return err; 230 return err;
238 } 231 }
239 232
240 msg->msg_namelen = 0;
241
242 copied = skb->len; 233 copied = skb->len;
243 if (len < copied) { 234 if (len < copied) {
244 msg->msg_flags |= MSG_TRUNC; 235 msg->msg_flags |= MSG_TRUNC;
@@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
422 return bt_accept_poll(sk); 413 return bt_accept_poll(sk);
423 414
424 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 415 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
425 mask |= POLLERR; 416 mask |= POLLERR |
417 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
426 418
427 if (sk->sk_shutdown & RCV_SHUTDOWN) 419 if (sk->sk_shutdown & RCV_SHUTDOWN)
428 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 420 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
@@ -617,7 +609,7 @@ static int bt_seq_open(struct inode *inode, struct file *file)
617 struct bt_sock_list *sk_list; 609 struct bt_sock_list *sk_list;
618 struct bt_seq_state *s; 610 struct bt_seq_state *s;
619 611
620 sk_list = PDE(inode)->data; 612 sk_list = PDE_DATA(inode);
621 s = __seq_open_private(file, &bt_seq_ops, 613 s = __seq_open_private(file, &bt_seq_ops,
622 sizeof(struct bt_seq_state)); 614 sizeof(struct bt_seq_state));
623 if (!s) 615 if (!s)
@@ -627,26 +619,21 @@ static int bt_seq_open(struct inode *inode, struct file *file)
627 return 0; 619 return 0;
628} 620}
629 621
630int bt_procfs_init(struct module* module, struct net *net, const char *name, 622static const struct file_operations bt_fops = {
623 .open = bt_seq_open,
624 .read = seq_read,
625 .llseek = seq_lseek,
626 .release = seq_release_private
627};
628
629int bt_procfs_init(struct net *net, const char *name,
631 struct bt_sock_list* sk_list, 630 struct bt_sock_list* sk_list,
632 int (* seq_show)(struct seq_file *, void *)) 631 int (* seq_show)(struct seq_file *, void *))
633{ 632{
634 struct proc_dir_entry * pde;
635
636 sk_list->custom_seq_show = seq_show; 633 sk_list->custom_seq_show = seq_show;
637 634
638 sk_list->fops.owner = module; 635 if (!proc_create_data(name, 0, net->proc_net, &bt_fops, sk_list))
639 sk_list->fops.open = bt_seq_open;
640 sk_list->fops.read = seq_read;
641 sk_list->fops.llseek = seq_lseek;
642 sk_list->fops.release = seq_release_private;
643
644 pde = proc_create(name, 0, net->proc_net, &sk_list->fops);
645 if (!pde)
646 return -ENOMEM; 636 return -ENOMEM;
647
648 pde->data = sk_list;
649
650 return 0; 637 return 0;
651} 638}
652 639
@@ -655,7 +642,7 @@ void bt_procfs_cleanup(struct net *net, const char *name)
655 remove_proc_entry(name, net->proc_net); 642 remove_proc_entry(name, net->proc_net);
656} 643}
657#else 644#else
658int bt_procfs_init(struct module* module, struct net *net, const char *name, 645int bt_procfs_init(struct net *net, const char *name,
659 struct bt_sock_list* sk_list, 646 struct bt_sock_list* sk_list,
660 int (* seq_show)(struct seq_file *, void *)) 647 int (* seq_show)(struct seq_file *, void *))
661{ 648{
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index e58c8b32589c..4b488ec26105 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)
136 struct ethhdr *eh = (void *) skb->data; 136 struct ethhdr *eh = (void *) skb->data;
137 u16 proto = ntohs(eh->h_proto); 137 u16 proto = ntohs(eh->h_proto);
138 138
139 if (proto >= 1536) 139 if (proto >= ETH_P_802_3_MIN)
140 return proto; 140 return proto;
141 141
142 if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) 142 if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index e7154a58465f..5f051290daba 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -234,7 +234,7 @@ int __init bnep_sock_init(void)
234 goto error; 234 goto error;
235 } 235 }
236 236
237 err = bt_procfs_init(THIS_MODULE, &init_net, "bnep", &bnep_sk_list, NULL); 237 err = bt_procfs_init(&init_net, "bnep", &bnep_sk_list, NULL);
238 if (err < 0) { 238 if (err < 0) {
239 BT_ERR("Failed to create BNEP proc file"); 239 BT_ERR("Failed to create BNEP proc file");
240 bt_sock_unregister(BTPROTO_BNEP); 240 bt_sock_unregister(BTPROTO_BNEP);
@@ -253,8 +253,6 @@ error:
253void __exit bnep_sock_cleanup(void) 253void __exit bnep_sock_cleanup(void)
254{ 254{
255 bt_procfs_cleanup(&init_net, "bnep"); 255 bt_procfs_cleanup(&init_net, "bnep");
256 if (bt_sock_unregister(BTPROTO_BNEP) < 0) 256 bt_sock_unregister(BTPROTO_BNEP);
257 BT_ERR("Can't unregister BNEP socket");
258
259 proto_unregister(&bnep_proto); 257 proto_unregister(&bnep_proto);
260} 258}
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index a4a9d4b6816c..cd75e4d64b90 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -539,7 +539,7 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
539 539
540static int cmtp_proc_open(struct inode *inode, struct file *file) 540static int cmtp_proc_open(struct inode *inode, struct file *file)
541{ 541{
542 return single_open(file, cmtp_proc_show, PDE(inode)->data); 542 return single_open(file, cmtp_proc_show, PDE_DATA(inode));
543} 543}
544 544
545static const struct file_operations cmtp_proc_fops = { 545static const struct file_operations cmtp_proc_fops = {
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index 1c57482112b6..d82787d417bd 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -245,7 +245,7 @@ int cmtp_init_sockets(void)
245 goto error; 245 goto error;
246 } 246 }
247 247
248 err = bt_procfs_init(THIS_MODULE, &init_net, "cmtp", &cmtp_sk_list, NULL); 248 err = bt_procfs_init(&init_net, "cmtp", &cmtp_sk_list, NULL);
249 if (err < 0) { 249 if (err < 0) {
250 BT_ERR("Failed to create CMTP proc file"); 250 BT_ERR("Failed to create CMTP proc file");
251 bt_sock_unregister(BTPROTO_HIDP); 251 bt_sock_unregister(BTPROTO_HIDP);
@@ -264,8 +264,6 @@ error:
264void cmtp_cleanup_sockets(void) 264void cmtp_cleanup_sockets(void)
265{ 265{
266 bt_procfs_cleanup(&init_net, "cmtp"); 266 bt_procfs_cleanup(&init_net, "cmtp");
267 if (bt_sock_unregister(BTPROTO_CMTP) < 0) 267 bt_sock_unregister(BTPROTO_CMTP);
268 BT_ERR("Can't unregister CMTP socket");
269
270 proto_unregister(&cmtp_proto); 268 proto_unregister(&cmtp_proto);
271} 269}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4925a02ae7e4..6c7f36379722 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -117,7 +117,17 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)
117 hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp); 117 hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
118} 118}
119 119
120void hci_acl_disconn(struct hci_conn *conn, __u8 reason) 120static void hci_reject_sco(struct hci_conn *conn)
121{
122 struct hci_cp_reject_sync_conn_req cp;
123
124 cp.reason = HCI_ERROR_REMOTE_USER_TERM;
125 bacpy(&cp.bdaddr, &conn->dst);
126
127 hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
128}
129
130void hci_disconnect(struct hci_conn *conn, __u8 reason)
121{ 131{
122 struct hci_cp_disconnect cp; 132 struct hci_cp_disconnect cp;
123 133
@@ -253,7 +263,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)
253 hci_amp_disconn(conn, reason); 263 hci_amp_disconn(conn, reason);
254 break; 264 break;
255 default: 265 default:
256 hci_acl_disconn(conn, reason); 266 hci_disconnect(conn, reason);
257 break; 267 break;
258 } 268 }
259} 269}
@@ -276,6 +286,8 @@ static void hci_conn_timeout(struct work_struct *work)
276 hci_acl_create_connection_cancel(conn); 286 hci_acl_create_connection_cancel(conn);
277 else if (conn->type == LE_LINK) 287 else if (conn->type == LE_LINK)
278 hci_le_create_connection_cancel(conn); 288 hci_le_create_connection_cancel(conn);
289 } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
290 hci_reject_sco(conn);
279 } 291 }
280 break; 292 break;
281 case BT_CONFIG: 293 case BT_CONFIG:
@@ -398,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
398 if (hdev->notify) 410 if (hdev->notify)
399 hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); 411 hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
400 412
401 atomic_set(&conn->devref, 0);
402
403 hci_conn_init_sysfs(conn); 413 hci_conn_init_sysfs(conn);
404 414
405 return conn; 415 return conn;
@@ -433,7 +443,7 @@ int hci_conn_del(struct hci_conn *conn)
433 struct hci_conn *acl = conn->link; 443 struct hci_conn *acl = conn->link;
434 if (acl) { 444 if (acl) {
435 acl->link = NULL; 445 acl->link = NULL;
436 hci_conn_put(acl); 446 hci_conn_drop(acl);
437 } 447 }
438 } 448 }
439 449
@@ -448,12 +458,11 @@ int hci_conn_del(struct hci_conn *conn)
448 458
449 skb_queue_purge(&conn->data_q); 459 skb_queue_purge(&conn->data_q);
450 460
451 hci_conn_put_device(conn); 461 hci_conn_del_sysfs(conn);
452 462
453 hci_dev_put(hdev); 463 hci_dev_put(hdev);
454 464
455 if (conn->handle == 0) 465 hci_conn_put(conn);
456 kfree(conn);
457 466
458 return 0; 467 return 0;
459} 468}
@@ -565,7 +574,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,
565 if (!sco) { 574 if (!sco) {
566 sco = hci_conn_add(hdev, type, dst); 575 sco = hci_conn_add(hdev, type, dst);
567 if (!sco) { 576 if (!sco) {
568 hci_conn_put(acl); 577 hci_conn_drop(acl);
569 return ERR_PTR(-ENOMEM); 578 return ERR_PTR(-ENOMEM);
570 } 579 }
571 } 580 }
@@ -835,19 +844,6 @@ void hci_conn_check_pending(struct hci_dev *hdev)
835 hci_dev_unlock(hdev); 844 hci_dev_unlock(hdev);
836} 845}
837 846
838void hci_conn_hold_device(struct hci_conn *conn)
839{
840 atomic_inc(&conn->devref);
841}
842EXPORT_SYMBOL(hci_conn_hold_device);
843
844void hci_conn_put_device(struct hci_conn *conn)
845{
846 if (atomic_dec_and_test(&conn->devref))
847 hci_conn_del_sysfs(conn);
848}
849EXPORT_SYMBOL(hci_conn_put_device);
850
851int hci_get_conn_list(void __user *arg) 847int hci_get_conn_list(void __user *arg)
852{ 848{
853 struct hci_conn *c; 849 struct hci_conn *c;
@@ -980,7 +976,7 @@ void hci_chan_del(struct hci_chan *chan)
980 976
981 synchronize_rcu(); 977 synchronize_rcu();
982 978
983 hci_conn_put(conn); 979 hci_conn_drop(conn);
984 980
985 skb_queue_purge(&chan->data_q); 981 skb_queue_purge(&chan->data_q);
986 kfree(chan); 982 kfree(chan);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 60793e7b768b..33843c5c4939 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)
57 57
58/* ---- HCI requests ---- */ 58/* ---- HCI requests ---- */
59 59
60void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) 60static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
61{ 61{
62 BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); 62 BT_DBG("%s result 0x%2.2x", hdev->name, result);
63
64 /* If this is the init phase check if the completed command matches
65 * the last init command, and if not just return.
66 */
67 if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) {
68 struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data;
69 u16 opcode = __le16_to_cpu(sent->opcode);
70 struct sk_buff *skb;
71
72 /* Some CSR based controllers generate a spontaneous
73 * reset complete event during init and any pending
74 * command will never be completed. In such a case we
75 * need to resend whatever was the last sent
76 * command.
77 */
78
79 if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET)
80 return;
81
82 skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC);
83 if (skb) {
84 skb_queue_head(&hdev->cmd_q, skb);
85 queue_work(hdev->workqueue, &hdev->cmd_work);
86 }
87
88 return;
89 }
90 63
91 if (hdev->req_status == HCI_REQ_PEND) { 64 if (hdev->req_status == HCI_REQ_PEND) {
92 hdev->req_result = result; 65 hdev->req_result = result;
@@ -106,22 +79,158 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)
106 } 79 }
107} 80}
108 81
82static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
83 u8 event)
84{
85 struct hci_ev_cmd_complete *ev;
86 struct hci_event_hdr *hdr;
87 struct sk_buff *skb;
88
89 hci_dev_lock(hdev);
90
91 skb = hdev->recv_evt;
92 hdev->recv_evt = NULL;
93
94 hci_dev_unlock(hdev);
95
96 if (!skb)
97 return ERR_PTR(-ENODATA);
98
99 if (skb->len < sizeof(*hdr)) {
100 BT_ERR("Too short HCI event");
101 goto failed;
102 }
103
104 hdr = (void *) skb->data;
105 skb_pull(skb, HCI_EVENT_HDR_SIZE);
106
107 if (event) {
108 if (hdr->evt != event)
109 goto failed;
110 return skb;
111 }
112
113 if (hdr->evt != HCI_EV_CMD_COMPLETE) {
114 BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt);
115 goto failed;
116 }
117
118 if (skb->len < sizeof(*ev)) {
119 BT_ERR("Too short cmd_complete event");
120 goto failed;
121 }
122
123 ev = (void *) skb->data;
124 skb_pull(skb, sizeof(*ev));
125
126 if (opcode == __le16_to_cpu(ev->opcode))
127 return skb;
128
129 BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode,
130 __le16_to_cpu(ev->opcode));
131
132failed:
133 kfree_skb(skb);
134 return ERR_PTR(-ENODATA);
135}
136
137struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
138 const void *param, u8 event, u32 timeout)
139{
140 DECLARE_WAITQUEUE(wait, current);
141 struct hci_request req;
142 int err = 0;
143
144 BT_DBG("%s", hdev->name);
145
146 hci_req_init(&req, hdev);
147
148 hci_req_add_ev(&req, opcode, plen, param, event);
149
150 hdev->req_status = HCI_REQ_PEND;
151
152 err = hci_req_run(&req, hci_req_sync_complete);
153 if (err < 0)
154 return ERR_PTR(err);
155
156 add_wait_queue(&hdev->req_wait_q, &wait);
157 set_current_state(TASK_INTERRUPTIBLE);
158
159 schedule_timeout(timeout);
160
161 remove_wait_queue(&hdev->req_wait_q, &wait);
162
163 if (signal_pending(current))
164 return ERR_PTR(-EINTR);
165
166 switch (hdev->req_status) {
167 case HCI_REQ_DONE:
168 err = -bt_to_errno(hdev->req_result);
169 break;
170
171 case HCI_REQ_CANCELED:
172 err = -hdev->req_result;
173 break;
174
175 default:
176 err = -ETIMEDOUT;
177 break;
178 }
179
180 hdev->req_status = hdev->req_result = 0;
181
182 BT_DBG("%s end: err %d", hdev->name, err);
183
184 if (err < 0)
185 return ERR_PTR(err);
186
187 return hci_get_cmd_complete(hdev, opcode, event);
188}
189EXPORT_SYMBOL(__hci_cmd_sync_ev);
190
191struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
192 const void *param, u32 timeout)
193{
194 return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout);
195}
196EXPORT_SYMBOL(__hci_cmd_sync);
197
109/* Execute request and wait for completion. */ 198/* Execute request and wait for completion. */
110static int __hci_request(struct hci_dev *hdev, 199static int __hci_req_sync(struct hci_dev *hdev,
111 void (*req)(struct hci_dev *hdev, unsigned long opt), 200 void (*func)(struct hci_request *req,
112 unsigned long opt, __u32 timeout) 201 unsigned long opt),
202 unsigned long opt, __u32 timeout)
113{ 203{
204 struct hci_request req;
114 DECLARE_WAITQUEUE(wait, current); 205 DECLARE_WAITQUEUE(wait, current);
115 int err = 0; 206 int err = 0;
116 207
117 BT_DBG("%s start", hdev->name); 208 BT_DBG("%s start", hdev->name);
118 209
210 hci_req_init(&req, hdev);
211
119 hdev->req_status = HCI_REQ_PEND; 212 hdev->req_status = HCI_REQ_PEND;
120 213
214 func(&req, opt);
215
216 err = hci_req_run(&req, hci_req_sync_complete);
217 if (err < 0) {
218 hdev->req_status = 0;
219
220 /* ENODATA means the HCI request command queue is empty.
221 * This can happen when a request with conditionals doesn't
222 * trigger any commands to be sent. This is normal behavior
223 * and should not trigger an error return.
224 */
225 if (err == -ENODATA)
226 return 0;
227
228 return err;
229 }
230
121 add_wait_queue(&hdev->req_wait_q, &wait); 231 add_wait_queue(&hdev->req_wait_q, &wait);
122 set_current_state(TASK_INTERRUPTIBLE); 232 set_current_state(TASK_INTERRUPTIBLE);
123 233
124 req(hdev, opt);
125 schedule_timeout(timeout); 234 schedule_timeout(timeout);
126 235
127 remove_wait_queue(&hdev->req_wait_q, &wait); 236 remove_wait_queue(&hdev->req_wait_q, &wait);
@@ -150,9 +259,10 @@ static int __hci_request(struct hci_dev *hdev,
150 return err; 259 return err;
151} 260}
152 261
153static int hci_request(struct hci_dev *hdev, 262static int hci_req_sync(struct hci_dev *hdev,
154 void (*req)(struct hci_dev *hdev, unsigned long opt), 263 void (*req)(struct hci_request *req,
155 unsigned long opt, __u32 timeout) 264 unsigned long opt),
265 unsigned long opt, __u32 timeout)
156{ 266{
157 int ret; 267 int ret;
158 268
@@ -161,75 +271,66 @@ static int hci_request(struct hci_dev *hdev,
161 271
162 /* Serialize all requests */ 272 /* Serialize all requests */
163 hci_req_lock(hdev); 273 hci_req_lock(hdev);
164 ret = __hci_request(hdev, req, opt, timeout); 274 ret = __hci_req_sync(hdev, req, opt, timeout);
165 hci_req_unlock(hdev); 275 hci_req_unlock(hdev);
166 276
167 return ret; 277 return ret;
168} 278}
169 279
170static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) 280static void hci_reset_req(struct hci_request *req, unsigned long opt)
171{ 281{
172 BT_DBG("%s %ld", hdev->name, opt); 282 BT_DBG("%s %ld", req->hdev->name, opt);
173 283
174 /* Reset device */ 284 /* Reset device */
175 set_bit(HCI_RESET, &hdev->flags); 285 set_bit(HCI_RESET, &req->hdev->flags);
176 hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); 286 hci_req_add(req, HCI_OP_RESET, 0, NULL);
177} 287}
178 288
179static void bredr_init(struct hci_dev *hdev) 289static void bredr_init(struct hci_request *req)
180{ 290{
181 hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; 291 req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;
182 292
183 /* Read Local Supported Features */ 293 /* Read Local Supported Features */
184 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); 294 hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);
185 295
186 /* Read Local Version */ 296 /* Read Local Version */
187 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); 297 hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
298
299 /* Read BD Address */
300 hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);
188} 301}
189 302
190static void amp_init(struct hci_dev *hdev) 303static void amp_init(struct hci_request *req)
191{ 304{
192 hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; 305 req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;
193 306
194 /* Read Local Version */ 307 /* Read Local Version */
195 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); 308 hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);
196 309
197 /* Read Local AMP Info */ 310 /* Read Local AMP Info */
198 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); 311 hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
199 312
200 /* Read Data Blk size */ 313 /* Read Data Blk size */
201 hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); 314 hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);
202} 315}
203 316
204static void hci_init_req(struct hci_dev *hdev, unsigned long opt) 317static void hci_init1_req(struct hci_request *req, unsigned long opt)
205{ 318{
206 struct sk_buff *skb; 319 struct hci_dev *hdev = req->hdev;
207 320
208 BT_DBG("%s %ld", hdev->name, opt); 321 BT_DBG("%s %ld", hdev->name, opt);
209 322
210 /* Driver initialization */
211
212 /* Special commands */
213 while ((skb = skb_dequeue(&hdev->driver_init))) {
214 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
215 skb->dev = (void *) hdev;
216
217 skb_queue_tail(&hdev->cmd_q, skb);
218 queue_work(hdev->workqueue, &hdev->cmd_work);
219 }
220 skb_queue_purge(&hdev->driver_init);
221
222 /* Reset */ 323 /* Reset */
223 if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) 324 if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks))
224 hci_reset_req(hdev, 0); 325 hci_reset_req(req, 0);
225 326
226 switch (hdev->dev_type) { 327 switch (hdev->dev_type) {
227 case HCI_BREDR: 328 case HCI_BREDR:
228 bredr_init(hdev); 329 bredr_init(req);
229 break; 330 break;
230 331
231 case HCI_AMP: 332 case HCI_AMP:
232 amp_init(hdev); 333 amp_init(req);
233 break; 334 break;
234 335
235 default: 336 default:
@@ -238,44 +339,347 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)
238 } 339 }
239} 340}
240 341
241static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) 342static void bredr_setup(struct hci_request *req)
343{
344 struct hci_cp_delete_stored_link_key cp;
345 __le16 param;
346 __u8 flt_type;
347
348 /* Read Buffer Size (ACL mtu, max pkt, etc.) */
349 hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
350
351 /* Read Class of Device */
352 hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
353
354 /* Read Local Name */
355 hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL);
356
357 /* Read Voice Setting */
358 hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL);
359
360 /* Clear Event Filters */
361 flt_type = HCI_FLT_CLEAR_ALL;
362 hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
363
364 /* Connection accept timeout ~20 secs */
365 param = __constant_cpu_to_le16(0x7d00);
366 hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
367
368 bacpy(&cp.bdaddr, BDADDR_ANY);
369 cp.delete_all = 0x01;
370 hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
371
372 /* Read page scan parameters */
373 if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) {
374 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
375 hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
376 }
377}
378
379static void le_setup(struct hci_request *req)
380{
381 struct hci_dev *hdev = req->hdev;
382
383 /* Read LE Buffer Size */
384 hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
385
386 /* Read LE Local Supported Features */
387 hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
388
389 /* Read LE Advertising Channel TX Power */
390 hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
391
392 /* Read LE White List Size */
393 hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
394
395 /* Read LE Supported States */
396 hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
397
398 /* LE-only controllers have LE implicitly enabled */
399 if (!lmp_bredr_capable(hdev))
400 set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
401}
402
403static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
404{
405 if (lmp_ext_inq_capable(hdev))
406 return 0x02;
407
408 if (lmp_inq_rssi_capable(hdev))
409 return 0x01;
410
411 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
412 hdev->lmp_subver == 0x0757)
413 return 0x01;
414
415 if (hdev->manufacturer == 15) {
416 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
417 return 0x01;
418 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
419 return 0x01;
420 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
421 return 0x01;
422 }
423
424 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
425 hdev->lmp_subver == 0x1805)
426 return 0x01;
427
428 return 0x00;
429}
430
431static void hci_setup_inquiry_mode(struct hci_request *req)
432{
433 u8 mode;
434
435 mode = hci_get_inquiry_mode(req->hdev);
436
437 hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
438}
439
440static void hci_setup_event_mask(struct hci_request *req)
441{
442 struct hci_dev *hdev = req->hdev;
443
444 /* The second byte is 0xff instead of 0x9f (two reserved bits
445 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
446 * command otherwise.
447 */
448 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
449
450 /* CSR 1.1 dongles does not accept any bitfield so don't try to set
451 * any event mask for pre 1.2 devices.
452 */
453 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
454 return;
455
456 if (lmp_bredr_capable(hdev)) {
457 events[4] |= 0x01; /* Flow Specification Complete */
458 events[4] |= 0x02; /* Inquiry Result with RSSI */
459 events[4] |= 0x04; /* Read Remote Extended Features Complete */
460 events[5] |= 0x08; /* Synchronous Connection Complete */
461 events[5] |= 0x10; /* Synchronous Connection Changed */
462 }
463
464 if (lmp_inq_rssi_capable(hdev))
465 events[4] |= 0x02; /* Inquiry Result with RSSI */
466
467 if (lmp_sniffsubr_capable(hdev))
468 events[5] |= 0x20; /* Sniff Subrating */
469
470 if (lmp_pause_enc_capable(hdev))
471 events[5] |= 0x80; /* Encryption Key Refresh Complete */
472
473 if (lmp_ext_inq_capable(hdev))
474 events[5] |= 0x40; /* Extended Inquiry Result */
475
476 if (lmp_no_flush_capable(hdev))
477 events[7] |= 0x01; /* Enhanced Flush Complete */
478
479 if (lmp_lsto_capable(hdev))
480 events[6] |= 0x80; /* Link Supervision Timeout Changed */
481
482 if (lmp_ssp_capable(hdev)) {
483 events[6] |= 0x01; /* IO Capability Request */
484 events[6] |= 0x02; /* IO Capability Response */
485 events[6] |= 0x04; /* User Confirmation Request */
486 events[6] |= 0x08; /* User Passkey Request */
487 events[6] |= 0x10; /* Remote OOB Data Request */
488 events[6] |= 0x20; /* Simple Pairing Complete */
489 events[7] |= 0x04; /* User Passkey Notification */
490 events[7] |= 0x08; /* Keypress Notification */
491 events[7] |= 0x10; /* Remote Host Supported
492 * Features Notification
493 */
494 }
495
496 if (lmp_le_capable(hdev))
497 events[7] |= 0x20; /* LE Meta-Event */
498
499 hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
500
501 if (lmp_le_capable(hdev)) {
502 memset(events, 0, sizeof(events));
503 events[0] = 0x1f;
504 hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK,
505 sizeof(events), events);
506 }
507}
508
509static void hci_init2_req(struct hci_request *req, unsigned long opt)
510{
511 struct hci_dev *hdev = req->hdev;
512
513 if (lmp_bredr_capable(hdev))
514 bredr_setup(req);
515
516 if (lmp_le_capable(hdev))
517 le_setup(req);
518
519 hci_setup_event_mask(req);
520
521 if (hdev->hci_ver > BLUETOOTH_VER_1_1)
522 hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
523
524 if (lmp_ssp_capable(hdev)) {
525 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
526 u8 mode = 0x01;
527 hci_req_add(req, HCI_OP_WRITE_SSP_MODE,
528 sizeof(mode), &mode);
529 } else {
530 struct hci_cp_write_eir cp;
531
532 memset(hdev->eir, 0, sizeof(hdev->eir));
533 memset(&cp, 0, sizeof(cp));
534
535 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
536 }
537 }
538
539 if (lmp_inq_rssi_capable(hdev))
540 hci_setup_inquiry_mode(req);
541
542 if (lmp_inq_tx_pwr_capable(hdev))
543 hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
544
545 if (lmp_ext_feat_capable(hdev)) {
546 struct hci_cp_read_local_ext_features cp;
547
548 cp.page = 0x01;
549 hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
550 sizeof(cp), &cp);
551 }
552
553 if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
554 u8 enable = 1;
555 hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
556 &enable);
557 }
558}
559
560static void hci_setup_link_policy(struct hci_request *req)
561{
562 struct hci_dev *hdev = req->hdev;
563 struct hci_cp_write_def_link_policy cp;
564 u16 link_policy = 0;
565
566 if (lmp_rswitch_capable(hdev))
567 link_policy |= HCI_LP_RSWITCH;
568 if (lmp_hold_capable(hdev))
569 link_policy |= HCI_LP_HOLD;
570 if (lmp_sniff_capable(hdev))
571 link_policy |= HCI_LP_SNIFF;
572 if (lmp_park_capable(hdev))
573 link_policy |= HCI_LP_PARK;
574
575 cp.policy = cpu_to_le16(link_policy);
576 hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
577}
578
579static void hci_set_le_support(struct hci_request *req)
580{
581 struct hci_dev *hdev = req->hdev;
582 struct hci_cp_write_le_host_supported cp;
583
584 /* LE-only devices do not support explicit enablement */
585 if (!lmp_bredr_capable(hdev))
586 return;
587
588 memset(&cp, 0, sizeof(cp));
589
590 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
591 cp.le = 0x01;
592 cp.simul = lmp_le_br_capable(hdev);
593 }
594
595 if (cp.le != lmp_host_le_capable(hdev))
596 hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
597 &cp);
598}
599
600static void hci_init3_req(struct hci_request *req, unsigned long opt)
601{
602 struct hci_dev *hdev = req->hdev;
603 u8 p;
604
605 if (hdev->commands[5] & 0x10)
606 hci_setup_link_policy(req);
607
608 if (lmp_le_capable(hdev)) {
609 hci_set_le_support(req);
610 hci_update_ad(req);
611 }
612
613 /* Read features beyond page 1 if available */
614 for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) {
615 struct hci_cp_read_local_ext_features cp;
616
617 cp.page = p;
618 hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES,
619 sizeof(cp), &cp);
620 }
621}
622
623static int __hci_init(struct hci_dev *hdev)
624{
625 int err;
626
627 err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT);
628 if (err < 0)
629 return err;
630
631 /* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode
632 * BR/EDR/LE type controllers. AMP controllers only need the
633 * first stage init.
634 */
635 if (hdev->dev_type != HCI_BREDR)
636 return 0;
637
638 err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
639 if (err < 0)
640 return err;
641
642 return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT);
643}
644
645static void hci_scan_req(struct hci_request *req, unsigned long opt)
242{ 646{
243 __u8 scan = opt; 647 __u8 scan = opt;
244 648
245 BT_DBG("%s %x", hdev->name, scan); 649 BT_DBG("%s %x", req->hdev->name, scan);
246 650
247 /* Inquiry and Page scans */ 651 /* Inquiry and Page scans */
248 hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); 652 hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
249} 653}
250 654
251static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) 655static void hci_auth_req(struct hci_request *req, unsigned long opt)
252{ 656{
253 __u8 auth = opt; 657 __u8 auth = opt;
254 658
255 BT_DBG("%s %x", hdev->name, auth); 659 BT_DBG("%s %x", req->hdev->name, auth);
256 660
257 /* Authentication */ 661 /* Authentication */
258 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); 662 hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);
259} 663}
260 664
261static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) 665static void hci_encrypt_req(struct hci_request *req, unsigned long opt)
262{ 666{
263 __u8 encrypt = opt; 667 __u8 encrypt = opt;
264 668
265 BT_DBG("%s %x", hdev->name, encrypt); 669 BT_DBG("%s %x", req->hdev->name, encrypt);
266 670
267 /* Encryption */ 671 /* Encryption */
268 hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); 672 hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);
269} 673}
270 674
271static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) 675static void hci_linkpol_req(struct hci_request *req, unsigned long opt)
272{ 676{
273 __le16 policy = cpu_to_le16(opt); 677 __le16 policy = cpu_to_le16(opt);
274 678
275 BT_DBG("%s %x", hdev->name, policy); 679 BT_DBG("%s %x", req->hdev->name, policy);
276 680
277 /* Default link policy */ 681 /* Default link policy */
278 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); 682 hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);
279} 683}
280 684
281/* Get HCI device by index. 685/* Get HCI device by index.
@@ -512,9 +916,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)
512 return copied; 916 return copied;
513} 917}
514 918
515static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) 919static void hci_inq_req(struct hci_request *req, unsigned long opt)
516{ 920{
517 struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; 921 struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt;
922 struct hci_dev *hdev = req->hdev;
518 struct hci_cp_inquiry cp; 923 struct hci_cp_inquiry cp;
519 924
520 BT_DBG("%s", hdev->name); 925 BT_DBG("%s", hdev->name);
@@ -526,7 +931,13 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)
526 memcpy(&cp.lap, &ir->lap, 3); 931 memcpy(&cp.lap, &ir->lap, 3);
527 cp.length = ir->length; 932 cp.length = ir->length;
528 cp.num_rsp = ir->num_rsp; 933 cp.num_rsp = ir->num_rsp;
529 hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); 934 hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp);
935}
936
937static int wait_inquiry(void *word)
938{
939 schedule();
940 return signal_pending(current);
530} 941}
531 942
532int hci_inquiry(void __user *arg) 943int hci_inquiry(void __user *arg)
@@ -556,9 +967,17 @@ int hci_inquiry(void __user *arg)
556 timeo = ir.length * msecs_to_jiffies(2000); 967 timeo = ir.length * msecs_to_jiffies(2000);
557 968
558 if (do_inquiry) { 969 if (do_inquiry) {
559 err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); 970 err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir,
971 timeo);
560 if (err < 0) 972 if (err < 0)
561 goto done; 973 goto done;
974
975 /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is
976 * cleared). If it is interrupted by a signal, return -EINTR.
977 */
978 if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry,
979 TASK_INTERRUPTIBLE))
980 return -EINTR;
562 } 981 }
563 982
564 /* for unlimited number of responses we will use buffer with 983 /* for unlimited number of responses we will use buffer with
@@ -654,39 +1073,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)
654 return ad_len; 1073 return ad_len;
655} 1074}
656 1075
657int hci_update_ad(struct hci_dev *hdev) 1076void hci_update_ad(struct hci_request *req)
658{ 1077{
1078 struct hci_dev *hdev = req->hdev;
659 struct hci_cp_le_set_adv_data cp; 1079 struct hci_cp_le_set_adv_data cp;
660 u8 len; 1080 u8 len;
661 int err;
662
663 hci_dev_lock(hdev);
664 1081
665 if (!lmp_le_capable(hdev)) { 1082 if (!lmp_le_capable(hdev))
666 err = -EINVAL; 1083 return;
667 goto unlock;
668 }
669 1084
670 memset(&cp, 0, sizeof(cp)); 1085 memset(&cp, 0, sizeof(cp));
671 1086
672 len = create_ad(hdev, cp.data); 1087 len = create_ad(hdev, cp.data);
673 1088
674 if (hdev->adv_data_len == len && 1089 if (hdev->adv_data_len == len &&
675 memcmp(cp.data, hdev->adv_data, len) == 0) { 1090 memcmp(cp.data, hdev->adv_data, len) == 0)
676 err = 0; 1091 return;
677 goto unlock;
678 }
679 1092
680 memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); 1093 memcpy(hdev->adv_data, cp.data, sizeof(cp.data));
681 hdev->adv_data_len = len; 1094 hdev->adv_data_len = len;
682 1095
683 cp.length = len; 1096 cp.length = len;
684 err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
685 1097
686unlock: 1098 hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);
687 hci_dev_unlock(hdev);
688
689 return err;
690} 1099}
691 1100
692/* ---- HCI ioctl helpers ---- */ 1101/* ---- HCI ioctl helpers ---- */
@@ -719,34 +1128,37 @@ int hci_dev_open(__u16 dev)
719 goto done; 1128 goto done;
720 } 1129 }
721 1130
722 if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
723 set_bit(HCI_RAW, &hdev->flags);
724
725 /* Treat all non BR/EDR controllers as raw devices if
726 enable_hs is not set */
727 if (hdev->dev_type != HCI_BREDR && !enable_hs)
728 set_bit(HCI_RAW, &hdev->flags);
729
730 if (hdev->open(hdev)) { 1131 if (hdev->open(hdev)) {
731 ret = -EIO; 1132 ret = -EIO;
732 goto done; 1133 goto done;
733 } 1134 }
734 1135
735 if (!test_bit(HCI_RAW, &hdev->flags)) { 1136 atomic_set(&hdev->cmd_cnt, 1);
736 atomic_set(&hdev->cmd_cnt, 1); 1137 set_bit(HCI_INIT, &hdev->flags);
737 set_bit(HCI_INIT, &hdev->flags);
738 hdev->init_last_cmd = 0;
739 1138
740 ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); 1139 if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags))
1140 ret = hdev->setup(hdev);
741 1141
742 clear_bit(HCI_INIT, &hdev->flags); 1142 if (!ret) {
1143 /* Treat all non BR/EDR controllers as raw devices if
1144 * enable_hs is not set.
1145 */
1146 if (hdev->dev_type != HCI_BREDR && !enable_hs)
1147 set_bit(HCI_RAW, &hdev->flags);
1148
1149 if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
1150 set_bit(HCI_RAW, &hdev->flags);
1151
1152 if (!test_bit(HCI_RAW, &hdev->flags))
1153 ret = __hci_init(hdev);
743 } 1154 }
744 1155
1156 clear_bit(HCI_INIT, &hdev->flags);
1157
745 if (!ret) { 1158 if (!ret) {
746 hci_dev_hold(hdev); 1159 hci_dev_hold(hdev);
747 set_bit(HCI_UP, &hdev->flags); 1160 set_bit(HCI_UP, &hdev->flags);
748 hci_notify(hdev, HCI_DEV_UP); 1161 hci_notify(hdev, HCI_DEV_UP);
749 hci_update_ad(hdev);
750 if (!test_bit(HCI_SETUP, &hdev->dev_flags) && 1162 if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&
751 mgmt_valid_hdev(hdev)) { 1163 mgmt_valid_hdev(hdev)) {
752 hci_dev_lock(hdev); 1164 hci_dev_lock(hdev);
@@ -828,7 +1240,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
828 if (!test_bit(HCI_RAW, &hdev->flags) && 1240 if (!test_bit(HCI_RAW, &hdev->flags) &&
829 test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) { 1241 test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
830 set_bit(HCI_INIT, &hdev->flags); 1242 set_bit(HCI_INIT, &hdev->flags);
831 __hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); 1243 __hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
832 clear_bit(HCI_INIT, &hdev->flags); 1244 clear_bit(HCI_INIT, &hdev->flags);
833 } 1245 }
834 1246
@@ -847,10 +1259,17 @@ static int hci_dev_do_close(struct hci_dev *hdev)
847 hdev->sent_cmd = NULL; 1259 hdev->sent_cmd = NULL;
848 } 1260 }
849 1261
1262 kfree_skb(hdev->recv_evt);
1263 hdev->recv_evt = NULL;
1264
850 /* After this point our queues are empty 1265 /* After this point our queues are empty
851 * and no tasks are scheduled. */ 1266 * and no tasks are scheduled. */
852 hdev->close(hdev); 1267 hdev->close(hdev);
853 1268
1269 /* Clear flags */
1270 hdev->flags = 0;
1271 hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
1272
854 if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) && 1273 if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&
855 mgmt_valid_hdev(hdev)) { 1274 mgmt_valid_hdev(hdev)) {
856 hci_dev_lock(hdev); 1275 hci_dev_lock(hdev);
@@ -858,9 +1277,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
858 hci_dev_unlock(hdev); 1277 hci_dev_unlock(hdev);
859 } 1278 }
860 1279
861 /* Clear flags */
862 hdev->flags = 0;
863
864 /* Controller radio is available but is currently powered down */ 1280 /* Controller radio is available but is currently powered down */
865 hdev->amp_status = 0; 1281 hdev->amp_status = 0;
866 1282
@@ -921,7 +1337,7 @@ int hci_dev_reset(__u16 dev)
921 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; 1337 hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;
922 1338
923 if (!test_bit(HCI_RAW, &hdev->flags)) 1339 if (!test_bit(HCI_RAW, &hdev->flags))
924 ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); 1340 ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);
925 1341
926done: 1342done:
927 hci_req_unlock(hdev); 1343 hci_req_unlock(hdev);
@@ -960,8 +1376,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
960 1376
961 switch (cmd) { 1377 switch (cmd) {
962 case HCISETAUTH: 1378 case HCISETAUTH:
963 err = hci_request(hdev, hci_auth_req, dr.dev_opt, 1379 err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
964 HCI_INIT_TIMEOUT); 1380 HCI_INIT_TIMEOUT);
965 break; 1381 break;
966 1382
967 case HCISETENCRYPT: 1383 case HCISETENCRYPT:
@@ -972,24 +1388,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)
972 1388
973 if (!test_bit(HCI_AUTH, &hdev->flags)) { 1389 if (!test_bit(HCI_AUTH, &hdev->flags)) {
974 /* Auth must be enabled first */ 1390 /* Auth must be enabled first */
975 err = hci_request(hdev, hci_auth_req, dr.dev_opt, 1391 err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt,
976 HCI_INIT_TIMEOUT); 1392 HCI_INIT_TIMEOUT);
977 if (err) 1393 if (err)
978 break; 1394 break;
979 } 1395 }
980 1396
981 err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, 1397 err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt,
982 HCI_INIT_TIMEOUT); 1398 HCI_INIT_TIMEOUT);
983 break; 1399 break;
984 1400
985 case HCISETSCAN: 1401 case HCISETSCAN:
986 err = hci_request(hdev, hci_scan_req, dr.dev_opt, 1402 err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt,
987 HCI_INIT_TIMEOUT); 1403 HCI_INIT_TIMEOUT);
988 break; 1404 break;
989 1405
990 case HCISETLINKPOL: 1406 case HCISETLINKPOL:
991 err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, 1407 err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt,
992 HCI_INIT_TIMEOUT); 1408 HCI_INIT_TIMEOUT);
993 break; 1409 break;
994 1410
995 case HCISETLINKMODE: 1411 case HCISETLINKMODE:
@@ -1566,7 +1982,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
1566 return mgmt_device_unblocked(hdev, bdaddr, type); 1982 return mgmt_device_unblocked(hdev, bdaddr, type);
1567} 1983}
1568 1984
1569static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) 1985static void le_scan_param_req(struct hci_request *req, unsigned long opt)
1570{ 1986{
1571 struct le_scan_params *param = (struct le_scan_params *) opt; 1987 struct le_scan_params *param = (struct le_scan_params *) opt;
1572 struct hci_cp_le_set_scan_param cp; 1988 struct hci_cp_le_set_scan_param cp;
@@ -1576,18 +1992,18 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)
1576 cp.interval = cpu_to_le16(param->interval); 1992 cp.interval = cpu_to_le16(param->interval);
1577 cp.window = cpu_to_le16(param->window); 1993 cp.window = cpu_to_le16(param->window);
1578 1994
1579 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); 1995 hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);
1580} 1996}
1581 1997
1582static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) 1998static void le_scan_enable_req(struct hci_request *req, unsigned long opt)
1583{ 1999{
1584 struct hci_cp_le_set_scan_enable cp; 2000 struct hci_cp_le_set_scan_enable cp;
1585 2001
1586 memset(&cp, 0, sizeof(cp)); 2002 memset(&cp, 0, sizeof(cp));
1587 cp.enable = 1; 2003 cp.enable = LE_SCAN_ENABLE;
1588 cp.filter_dup = 1; 2004 cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
1589 2005
1590 hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); 2006 hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
1591} 2007}
1592 2008
1593static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, 2009static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
@@ -1608,10 +2024,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
1608 2024
1609 hci_req_lock(hdev); 2025 hci_req_lock(hdev);
1610 2026
1611 err = __hci_request(hdev, le_scan_param_req, (unsigned long) &param, 2027 err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) &param,
1612 timeo); 2028 timeo);
1613 if (!err) 2029 if (!err)
1614 err = __hci_request(hdev, le_scan_enable_req, 0, timeo); 2030 err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);
1615 2031
1616 hci_req_unlock(hdev); 2032 hci_req_unlock(hdev);
1617 2033
@@ -1619,7 +2035,7 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,
1619 return err; 2035 return err;
1620 2036
1621 queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, 2037 queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable,
1622 msecs_to_jiffies(timeout)); 2038 timeout);
1623 2039
1624 return 0; 2040 return 0;
1625} 2041}
@@ -1729,7 +2145,6 @@ struct hci_dev *hci_alloc_dev(void)
1729 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); 2145 INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
1730 INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); 2146 INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
1731 2147
1732 skb_queue_head_init(&hdev->driver_init);
1733 skb_queue_head_init(&hdev->rx_q); 2148 skb_queue_head_init(&hdev->rx_q);
1734 skb_queue_head_init(&hdev->cmd_q); 2149 skb_queue_head_init(&hdev->cmd_q);
1735 skb_queue_head_init(&hdev->raw_q); 2150 skb_queue_head_init(&hdev->raw_q);
@@ -1748,8 +2163,6 @@ EXPORT_SYMBOL(hci_alloc_dev);
1748/* Free HCI device */ 2163/* Free HCI device */
1749void hci_free_dev(struct hci_dev *hdev) 2164void hci_free_dev(struct hci_dev *hdev)
1750{ 2165{
1751 skb_queue_purge(&hdev->driver_init);
1752
1753 /* will free via device release */ 2166 /* will free via device release */
1754 put_device(&hdev->dev); 2167 put_device(&hdev->dev);
1755} 2168}
@@ -2160,20 +2573,55 @@ static int hci_send_frame(struct sk_buff *skb)
2160 return hdev->send(skb); 2573 return hdev->send(skb);
2161} 2574}
2162 2575
2163/* Send HCI command */ 2576void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
2164int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) 2577{
2578 skb_queue_head_init(&req->cmd_q);
2579 req->hdev = hdev;
2580 req->err = 0;
2581}
2582
2583int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
2584{
2585 struct hci_dev *hdev = req->hdev;
2586 struct sk_buff *skb;
2587 unsigned long flags;
2588
2589 BT_DBG("length %u", skb_queue_len(&req->cmd_q));
2590
2591 /* If an error occured during request building, remove all HCI
2592 * commands queued on the HCI request queue.
2593 */
2594 if (req->err) {
2595 skb_queue_purge(&req->cmd_q);
2596 return req->err;
2597 }
2598
2599 /* Do not allow empty requests */
2600 if (skb_queue_empty(&req->cmd_q))
2601 return -ENODATA;
2602
2603 skb = skb_peek_tail(&req->cmd_q);
2604 bt_cb(skb)->req.complete = complete;
2605
2606 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
2607 skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
2608 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
2609
2610 queue_work(hdev->workqueue, &hdev->cmd_work);
2611
2612 return 0;
2613}
2614
2615static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
2616 u32 plen, const void *param)
2165{ 2617{
2166 int len = HCI_COMMAND_HDR_SIZE + plen; 2618 int len = HCI_COMMAND_HDR_SIZE + plen;
2167 struct hci_command_hdr *hdr; 2619 struct hci_command_hdr *hdr;
2168 struct sk_buff *skb; 2620 struct sk_buff *skb;
2169 2621
2170 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2171
2172 skb = bt_skb_alloc(len, GFP_ATOMIC); 2622 skb = bt_skb_alloc(len, GFP_ATOMIC);
2173 if (!skb) { 2623 if (!skb)
2174 BT_ERR("%s no memory for command", hdev->name); 2624 return NULL;
2175 return -ENOMEM;
2176 }
2177 2625
2178 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE); 2626 hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
2179 hdr->opcode = cpu_to_le16(opcode); 2627 hdr->opcode = cpu_to_le16(opcode);
@@ -2187,8 +2635,27 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
2187 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; 2635 bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
2188 skb->dev = (void *) hdev; 2636 skb->dev = (void *) hdev;
2189 2637
2190 if (test_bit(HCI_INIT, &hdev->flags)) 2638 return skb;
2191 hdev->init_last_cmd = opcode; 2639}
2640
2641/* Send HCI command */
2642int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
2643 const void *param)
2644{
2645 struct sk_buff *skb;
2646
2647 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2648
2649 skb = hci_prepare_cmd(hdev, opcode, plen, param);
2650 if (!skb) {
2651 BT_ERR("%s no memory for command", hdev->name);
2652 return -ENOMEM;
2653 }
2654
2655 /* Stand-alone HCI commands must be flaged as
2656 * single-command requests.
2657 */
2658 bt_cb(skb)->req.start = true;
2192 2659
2193 skb_queue_tail(&hdev->cmd_q, skb); 2660 skb_queue_tail(&hdev->cmd_q, skb);
2194 queue_work(hdev->workqueue, &hdev->cmd_work); 2661 queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -2196,6 +2663,43 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)
2196 return 0; 2663 return 0;
2197} 2664}
2198 2665
2666/* Queue a command to an asynchronous HCI request */
2667void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
2668 const void *param, u8 event)
2669{
2670 struct hci_dev *hdev = req->hdev;
2671 struct sk_buff *skb;
2672
2673 BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
2674
2675 /* If an error occured during request building, there is no point in
2676 * queueing the HCI command. We can simply return.
2677 */
2678 if (req->err)
2679 return;
2680
2681 skb = hci_prepare_cmd(hdev, opcode, plen, param);
2682 if (!skb) {
2683 BT_ERR("%s no memory for command (opcode 0x%4.4x)",
2684 hdev->name, opcode);
2685 req->err = -ENOMEM;
2686 return;
2687 }
2688
2689 if (skb_queue_empty(&req->cmd_q))
2690 bt_cb(skb)->req.start = true;
2691
2692 bt_cb(skb)->req.event = event;
2693
2694 skb_queue_tail(&req->cmd_q, skb);
2695}
2696
2697void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
2698 const void *param)
2699{
2700 hci_req_add_ev(req, opcode, plen, param, 0);
2701}
2702
2199/* Get data from the previously sent command */ 2703/* Get data from the previously sent command */
2200void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) 2704void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
2201{ 2705{
@@ -2398,7 +2902,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)
2398 if (c->type == type && c->sent) { 2902 if (c->type == type && c->sent) {
2399 BT_ERR("%s killing stalled connection %pMR", 2903 BT_ERR("%s killing stalled connection %pMR",
2400 hdev->name, &c->dst); 2904 hdev->name, &c->dst);
2401 hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); 2905 hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);
2402 } 2906 }
2403 } 2907 }
2404 2908
@@ -2860,6 +3364,97 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)
2860 kfree_skb(skb); 3364 kfree_skb(skb);
2861} 3365}
2862 3366
3367static bool hci_req_is_complete(struct hci_dev *hdev)
3368{
3369 struct sk_buff *skb;
3370
3371 skb = skb_peek(&hdev->cmd_q);
3372 if (!skb)
3373 return true;
3374
3375 return bt_cb(skb)->req.start;
3376}
3377
3378static void hci_resend_last(struct hci_dev *hdev)
3379{
3380 struct hci_command_hdr *sent;
3381 struct sk_buff *skb;
3382 u16 opcode;
3383
3384 if (!hdev->sent_cmd)
3385 return;
3386
3387 sent = (void *) hdev->sent_cmd->data;
3388 opcode = __le16_to_cpu(sent->opcode);
3389 if (opcode == HCI_OP_RESET)
3390 return;
3391
3392 skb = skb_clone(hdev->sent_cmd, GFP_KERNEL);
3393 if (!skb)
3394 return;
3395
3396 skb_queue_head(&hdev->cmd_q, skb);
3397 queue_work(hdev->workqueue, &hdev->cmd_work);
3398}
3399
3400void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
3401{
3402 hci_req_complete_t req_complete = NULL;
3403 struct sk_buff *skb;
3404 unsigned long flags;
3405
3406 BT_DBG("opcode 0x%04x status 0x%02x", opcode, status);
3407
3408 /* If the completed command doesn't match the last one that was
3409 * sent we need to do special handling of it.
3410 */
3411 if (!hci_sent_cmd_data(hdev, opcode)) {
3412 /* Some CSR based controllers generate a spontaneous
3413 * reset complete event during init and any pending
3414 * command will never be completed. In such a case we
3415 * need to resend whatever was the last sent
3416 * command.
3417 */
3418 if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET)
3419 hci_resend_last(hdev);
3420
3421 return;
3422 }
3423
3424 /* If the command succeeded and there's still more commands in
3425 * this request the request is not yet complete.
3426 */
3427 if (!status && !hci_req_is_complete(hdev))
3428 return;
3429
3430 /* If this was the last command in a request the complete
3431 * callback would be found in hdev->sent_cmd instead of the
3432 * command queue (hdev->cmd_q).
3433 */
3434 if (hdev->sent_cmd) {
3435 req_complete = bt_cb(hdev->sent_cmd)->req.complete;
3436 if (req_complete)
3437 goto call_complete;
3438 }
3439
3440 /* Remove all pending commands belonging to this request */
3441 spin_lock_irqsave(&hdev->cmd_q.lock, flags);
3442 while ((skb = __skb_dequeue(&hdev->cmd_q))) {
3443 if (bt_cb(skb)->req.start) {
3444 __skb_queue_head(&hdev->cmd_q, skb);
3445 break;
3446 }
3447
3448 req_complete = bt_cb(skb)->req.complete;
3449 kfree_skb(skb);
3450 }
3451 spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
3452
3453call_complete:
3454 if (req_complete)
3455 req_complete(hdev, status);
3456}
3457
2863static void hci_rx_work(struct work_struct *work) 3458static void hci_rx_work(struct work_struct *work)
2864{ 3459{
2865 struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); 3460 struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 477726a63512..b93cd2eb5d58 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -48,13 +48,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
48 } 48 }
49 49
50 clear_bit(HCI_INQUIRY, &hdev->flags); 50 clear_bit(HCI_INQUIRY, &hdev->flags);
51 smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
52 wake_up_bit(&hdev->flags, HCI_INQUIRY);
51 53
52 hci_dev_lock(hdev); 54 hci_dev_lock(hdev);
53 hci_discovery_set_state(hdev, DISCOVERY_STOPPED); 55 hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
54 hci_dev_unlock(hdev); 56 hci_dev_unlock(hdev);
55 57
56 hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status);
57
58 hci_conn_check_pending(hdev); 58 hci_conn_check_pending(hdev);
59} 59}
60 60
@@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,
183 183
184 if (!status) 184 if (!status)
185 hdev->link_policy = get_unaligned_le16(sent); 185 hdev->link_policy = get_unaligned_le16(sent);
186
187 hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);
188} 186}
189 187
190static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) 188static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
@@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
195 193
196 clear_bit(HCI_RESET, &hdev->flags); 194 clear_bit(HCI_RESET, &hdev->flags);
197 195
198 hci_req_complete(hdev, HCI_OP_RESET, status);
199
200 /* Reset all non-persistent flags */ 196 /* Reset all non-persistent flags */
201 hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | 197 hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
202 BIT(HCI_PERIODIC_INQ));
203 198
204 hdev->discovery.state = DISCOVERY_STOPPED; 199 hdev->discovery.state = DISCOVERY_STOPPED;
205 hdev->inq_tx_power = HCI_TX_POWER_INVALID; 200 hdev->inq_tx_power = HCI_TX_POWER_INVALID;
@@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
228 memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH); 223 memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);
229 224
230 hci_dev_unlock(hdev); 225 hci_dev_unlock(hdev);
231
232 if (!status && !test_bit(HCI_INIT, &hdev->flags))
233 hci_update_ad(hdev);
234
235 hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);
236} 226}
237 227
238static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) 228static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb)
@@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)
270 260
271 if (test_bit(HCI_MGMT, &hdev->dev_flags)) 261 if (test_bit(HCI_MGMT, &hdev->dev_flags))
272 mgmt_auth_enable_complete(hdev, status); 262 mgmt_auth_enable_complete(hdev, status);
273
274 hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);
275} 263}
276 264
277static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) 265static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
293 else 281 else
294 clear_bit(HCI_ENCRYPT, &hdev->flags); 282 clear_bit(HCI_ENCRYPT, &hdev->flags);
295 } 283 }
296
297 hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);
298} 284}
299 285
300static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) 286static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
@@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)
343 329
344done: 330done:
345 hci_dev_unlock(hdev); 331 hci_dev_unlock(hdev);
346 hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);
347} 332}
348 333
349static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) 334static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb)
@@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,
435 hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING); 420 hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);
436} 421}
437 422
438static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
439{
440 __u8 status = *((__u8 *) skb->data);
441
442 BT_DBG("%s status 0x%2.2x", hdev->name, status);
443
444 hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status);
445}
446
447static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) 423static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
448{ 424{
449 __u8 status = *((__u8 *) skb->data); 425 __u8 status = *((__u8 *) skb->data);
@@ -457,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
457 433
458 if (!status) { 434 if (!status) {
459 if (sent->mode) 435 if (sent->mode)
460 hdev->host_features[0] |= LMP_HOST_SSP; 436 hdev->features[1][0] |= LMP_HOST_SSP;
461 else 437 else
462 hdev->host_features[0] &= ~LMP_HOST_SSP; 438 hdev->features[1][0] &= ~LMP_HOST_SSP;
463 } 439 }
464 440
465 if (test_bit(HCI_MGMT, &hdev->dev_flags)) 441 if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)
472 } 448 }
473} 449}
474 450
475static u8 hci_get_inquiry_mode(struct hci_dev *hdev)
476{
477 if (lmp_ext_inq_capable(hdev))
478 return 2;
479
480 if (lmp_inq_rssi_capable(hdev))
481 return 1;
482
483 if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 &&
484 hdev->lmp_subver == 0x0757)
485 return 1;
486
487 if (hdev->manufacturer == 15) {
488 if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963)
489 return 1;
490 if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963)
491 return 1;
492 if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965)
493 return 1;
494 }
495
496 if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 &&
497 hdev->lmp_subver == 0x1805)
498 return 1;
499
500 return 0;
501}
502
503static void hci_setup_inquiry_mode(struct hci_dev *hdev)
504{
505 u8 mode;
506
507 mode = hci_get_inquiry_mode(hdev);
508
509 hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode);
510}
511
512static void hci_setup_event_mask(struct hci_dev *hdev)
513{
514 /* The second byte is 0xff instead of 0x9f (two reserved bits
515 * disabled) since a Broadcom 1.2 dongle doesn't respond to the
516 * command otherwise */
517 u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 };
518
519 /* CSR 1.1 dongles does not accept any bitfield so don't try to set
520 * any event mask for pre 1.2 devices */
521 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
522 return;
523
524 if (lmp_bredr_capable(hdev)) {
525 events[4] |= 0x01; /* Flow Specification Complete */
526 events[4] |= 0x02; /* Inquiry Result with RSSI */
527 events[4] |= 0x04; /* Read Remote Extended Features Complete */
528 events[5] |= 0x08; /* Synchronous Connection Complete */
529 events[5] |= 0x10; /* Synchronous Connection Changed */
530 }
531
532 if (lmp_inq_rssi_capable(hdev))
533 events[4] |= 0x02; /* Inquiry Result with RSSI */
534
535 if (lmp_sniffsubr_capable(hdev))
536 events[5] |= 0x20; /* Sniff Subrating */
537
538 if (lmp_pause_enc_capable(hdev))
539 events[5] |= 0x80; /* Encryption Key Refresh Complete */
540
541 if (lmp_ext_inq_capable(hdev))
542 events[5] |= 0x40; /* Extended Inquiry Result */
543
544 if (lmp_no_flush_capable(hdev))
545 events[7] |= 0x01; /* Enhanced Flush Complete */
546
547 if (lmp_lsto_capable(hdev))
548 events[6] |= 0x80; /* Link Supervision Timeout Changed */
549
550 if (lmp_ssp_capable(hdev)) {
551 events[6] |= 0x01; /* IO Capability Request */
552 events[6] |= 0x02; /* IO Capability Response */
553 events[6] |= 0x04; /* User Confirmation Request */
554 events[6] |= 0x08; /* User Passkey Request */
555 events[6] |= 0x10; /* Remote OOB Data Request */
556 events[6] |= 0x20; /* Simple Pairing Complete */
557 events[7] |= 0x04; /* User Passkey Notification */
558 events[7] |= 0x08; /* Keypress Notification */
559 events[7] |= 0x10; /* Remote Host Supported
560 * Features Notification */
561 }
562
563 if (lmp_le_capable(hdev))
564 events[7] |= 0x20; /* LE Meta-Event */
565
566 hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events);
567
568 if (lmp_le_capable(hdev)) {
569 memset(events, 0, sizeof(events));
570 events[0] = 0x1f;
571 hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK,
572 sizeof(events), events);
573 }
574}
575
576static void bredr_setup(struct hci_dev *hdev)
577{
578 struct hci_cp_delete_stored_link_key cp;
579 __le16 param;
580 __u8 flt_type;
581
582 /* Read Buffer Size (ACL mtu, max pkt, etc.) */
583 hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL);
584
585 /* Read Class of Device */
586 hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL);
587
588 /* Read Local Name */
589 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL);
590
591 /* Read Voice Setting */
592 hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL);
593
594 /* Clear Event Filters */
595 flt_type = HCI_FLT_CLEAR_ALL;
596 hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type);
597
598 /* Connection accept timeout ~20 secs */
599 param = __constant_cpu_to_le16(0x7d00);
600 hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
601
602 bacpy(&cp.bdaddr, BDADDR_ANY);
603 cp.delete_all = 1;
604 hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp);
605}
606
607static void le_setup(struct hci_dev *hdev)
608{
609 /* Read LE Buffer Size */
610 hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL);
611
612 /* Read LE Local Supported Features */
613 hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL);
614
615 /* Read LE Advertising Channel TX Power */
616 hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
617
618 /* Read LE White List Size */
619 hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
620
621 /* Read LE Supported States */
622 hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
623}
624
625static void hci_setup(struct hci_dev *hdev)
626{
627 if (hdev->dev_type != HCI_BREDR)
628 return;
629
630 /* Read BD Address */
631 hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL);
632
633 if (lmp_bredr_capable(hdev))
634 bredr_setup(hdev);
635
636 if (lmp_le_capable(hdev))
637 le_setup(hdev);
638
639 hci_setup_event_mask(hdev);
640
641 if (hdev->hci_ver > BLUETOOTH_VER_1_1)
642 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
643
644 if (lmp_ssp_capable(hdev)) {
645 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) {
646 u8 mode = 0x01;
647 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE,
648 sizeof(mode), &mode);
649 } else {
650 struct hci_cp_write_eir cp;
651
652 memset(hdev->eir, 0, sizeof(hdev->eir));
653 memset(&cp, 0, sizeof(cp));
654
655 hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
656 }
657 }
658
659 if (lmp_inq_rssi_capable(hdev))
660 hci_setup_inquiry_mode(hdev);
661
662 if (lmp_inq_tx_pwr_capable(hdev))
663 hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL);
664
665 if (lmp_ext_feat_capable(hdev)) {
666 struct hci_cp_read_local_ext_features cp;
667
668 cp.page = 0x01;
669 hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp),
670 &cp);
671 }
672
673 if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) {
674 u8 enable = 1;
675 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable),
676 &enable);
677 }
678}
679
680static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb) 451static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
681{ 452{
682 struct hci_rp_read_local_version *rp = (void *) skb->data; 453 struct hci_rp_read_local_version *rp = (void *) skb->data;
@@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
684 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 455 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
685 456
686 if (rp->status) 457 if (rp->status)
687 goto done; 458 return;
688 459
689 hdev->hci_ver = rp->hci_ver; 460 hdev->hci_ver = rp->hci_ver;
690 hdev->hci_rev = __le16_to_cpu(rp->hci_rev); 461 hdev->hci_rev = __le16_to_cpu(rp->hci_rev);
@@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
694 465
695 BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name, 466 BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,
696 hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); 467 hdev->manufacturer, hdev->hci_ver, hdev->hci_rev);
697
698 if (test_bit(HCI_INIT, &hdev->flags))
699 hci_setup(hdev);
700
701done:
702 hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status);
703}
704
705static void hci_setup_link_policy(struct hci_dev *hdev)
706{
707 struct hci_cp_write_def_link_policy cp;
708 u16 link_policy = 0;
709
710 if (lmp_rswitch_capable(hdev))
711 link_policy |= HCI_LP_RSWITCH;
712 if (lmp_hold_capable(hdev))
713 link_policy |= HCI_LP_HOLD;
714 if (lmp_sniff_capable(hdev))
715 link_policy |= HCI_LP_SNIFF;
716 if (lmp_park_capable(hdev))
717 link_policy |= HCI_LP_PARK;
718
719 cp.policy = cpu_to_le16(link_policy);
720 hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);
721} 468}
722 469
723static void hci_cc_read_local_commands(struct hci_dev *hdev, 470static void hci_cc_read_local_commands(struct hci_dev *hdev,
@@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,
727 474
728 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 475 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
729 476
730 if (rp->status) 477 if (!rp->status)
731 goto done; 478 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
732
733 memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));
734
735 if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10))
736 hci_setup_link_policy(hdev);
737
738done:
739 hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status);
740} 479}
741 480
742static void hci_cc_read_local_features(struct hci_dev *hdev, 481static void hci_cc_read_local_features(struct hci_dev *hdev,
@@ -754,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
754 /* Adjust default settings according to features 493 /* Adjust default settings according to features
755 * supported by device. */ 494 * supported by device. */
756 495
757 if (hdev->features[0] & LMP_3SLOT) 496 if (hdev->features[0][0] & LMP_3SLOT)
758 hdev->pkt_type |= (HCI_DM3 | HCI_DH3); 497 hdev->pkt_type |= (HCI_DM3 | HCI_DH3);
759 498
760 if (hdev->features[0] & LMP_5SLOT) 499 if (hdev->features[0][0] & LMP_5SLOT)
761 hdev->pkt_type |= (HCI_DM5 | HCI_DH5); 500 hdev->pkt_type |= (HCI_DM5 | HCI_DH5);
762 501
763 if (hdev->features[1] & LMP_HV2) { 502 if (hdev->features[0][1] & LMP_HV2) {
764 hdev->pkt_type |= (HCI_HV2); 503 hdev->pkt_type |= (HCI_HV2);
765 hdev->esco_type |= (ESCO_HV2); 504 hdev->esco_type |= (ESCO_HV2);
766 } 505 }
767 506
768 if (hdev->features[1] & LMP_HV3) { 507 if (hdev->features[0][1] & LMP_HV3) {
769 hdev->pkt_type |= (HCI_HV3); 508 hdev->pkt_type |= (HCI_HV3);
770 hdev->esco_type |= (ESCO_HV3); 509 hdev->esco_type |= (ESCO_HV3);
771 } 510 }
@@ -773,42 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,
773 if (lmp_esco_capable(hdev)) 512 if (lmp_esco_capable(hdev))
774 hdev->esco_type |= (ESCO_EV3); 513 hdev->esco_type |= (ESCO_EV3);
775 514
776 if (hdev->features[4] & LMP_EV4) 515 if (hdev->features[0][4] & LMP_EV4)
777 hdev->esco_type |= (ESCO_EV4); 516 hdev->esco_type |= (ESCO_EV4);
778 517
779 if (hdev->features[4] & LMP_EV5) 518 if (hdev->features[0][4] & LMP_EV5)
780 hdev->esco_type |= (ESCO_EV5); 519 hdev->esco_type |= (ESCO_EV5);
781 520
782 if (hdev->features[5] & LMP_EDR_ESCO_2M) 521 if (hdev->features[0][5] & LMP_EDR_ESCO_2M)
783 hdev->esco_type |= (ESCO_2EV3); 522 hdev->esco_type |= (ESCO_2EV3);
784 523
785 if (hdev->features[5] & LMP_EDR_ESCO_3M) 524 if (hdev->features[0][5] & LMP_EDR_ESCO_3M)
786 hdev->esco_type |= (ESCO_3EV3); 525 hdev->esco_type |= (ESCO_3EV3);
787 526
788 if (hdev->features[5] & LMP_EDR_3S_ESCO) 527 if (hdev->features[0][5] & LMP_EDR_3S_ESCO)
789 hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); 528 hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);
790 529
791 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, 530 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name,
792 hdev->features[0], hdev->features[1], 531 hdev->features[0][0], hdev->features[0][1],
793 hdev->features[2], hdev->features[3], 532 hdev->features[0][2], hdev->features[0][3],
794 hdev->features[4], hdev->features[5], 533 hdev->features[0][4], hdev->features[0][5],
795 hdev->features[6], hdev->features[7]); 534 hdev->features[0][6], hdev->features[0][7]);
796}
797
798static void hci_set_le_support(struct hci_dev *hdev)
799{
800 struct hci_cp_write_le_host_supported cp;
801
802 memset(&cp, 0, sizeof(cp));
803
804 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
805 cp.le = 1;
806 cp.simul = lmp_le_br_capable(hdev);
807 }
808
809 if (cp.le != lmp_host_le_capable(hdev))
810 hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp),
811 &cp);
812} 535}
813 536
814static void hci_cc_read_local_ext_features(struct hci_dev *hdev, 537static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
@@ -819,22 +542,12 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,
819 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 542 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
820 543
821 if (rp->status) 544 if (rp->status)
822 goto done; 545 return;
823
824 switch (rp->page) {
825 case 0:
826 memcpy(hdev->features, rp->features, 8);
827 break;
828 case 1:
829 memcpy(hdev->host_features, rp->features, 8);
830 break;
831 }
832 546
833 if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) 547 hdev->max_page = rp->max_page;
834 hci_set_le_support(hdev);
835 548
836done: 549 if (rp->page < HCI_MAX_PAGES)
837 hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); 550 memcpy(hdev->features[rp->page], rp->features, 8);
838} 551}
839 552
840static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, 553static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
@@ -844,12 +557,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,
844 557
845 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 558 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
846 559
847 if (rp->status) 560 if (!rp->status)
848 return; 561 hdev->flow_ctl_mode = rp->mode;
849
850 hdev->flow_ctl_mode = rp->mode;
851
852 hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status);
853} 562}
854 563
855static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) 564static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb)
@@ -886,8 +595,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
886 595
887 if (!rp->status) 596 if (!rp->status)
888 bacpy(&hdev->bdaddr, &rp->bdaddr); 597 bacpy(&hdev->bdaddr, &rp->bdaddr);
598}
599
600static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
601 struct sk_buff *skb)
602{
603 struct hci_rp_read_page_scan_activity *rp = (void *) skb->data;
604
605 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
889 606
890 hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); 607 if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) {
608 hdev->page_scan_interval = __le16_to_cpu(rp->interval);
609 hdev->page_scan_window = __le16_to_cpu(rp->window);
610 }
611}
612
613static void hci_cc_write_page_scan_activity(struct hci_dev *hdev,
614 struct sk_buff *skb)
615{
616 u8 status = *((u8 *) skb->data);
617 struct hci_cp_write_page_scan_activity *sent;
618
619 BT_DBG("%s status 0x%2.2x", hdev->name, status);
620
621 if (status)
622 return;
623
624 sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY);
625 if (!sent)
626 return;
627
628 hdev->page_scan_interval = __le16_to_cpu(sent->interval);
629 hdev->page_scan_window = __le16_to_cpu(sent->window);
630}
631
632static void hci_cc_read_page_scan_type(struct hci_dev *hdev,
633 struct sk_buff *skb)
634{
635 struct hci_rp_read_page_scan_type *rp = (void *) skb->data;
636
637 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
638
639 if (test_bit(HCI_INIT, &hdev->flags) && !rp->status)
640 hdev->page_scan_type = rp->type;
641}
642
643static void hci_cc_write_page_scan_type(struct hci_dev *hdev,
644 struct sk_buff *skb)
645{
646 u8 status = *((u8 *) skb->data);
647 u8 *type;
648
649 BT_DBG("%s status 0x%2.2x", hdev->name, status);
650
651 if (status)
652 return;
653
654 type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE);
655 if (type)
656 hdev->page_scan_type = *type;
891} 657}
892 658
893static void hci_cc_read_data_block_size(struct hci_dev *hdev, 659static void hci_cc_read_data_block_size(struct hci_dev *hdev,
@@ -908,17 +674,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,
908 674
909 BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu, 675 BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,
910 hdev->block_cnt, hdev->block_len); 676 hdev->block_cnt, hdev->block_len);
911
912 hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status);
913}
914
915static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb)
916{
917 __u8 status = *((__u8 *) skb->data);
918
919 BT_DBG("%s status 0x%2.2x", hdev->name, status);
920
921 hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);
922} 677}
923 678
924static void hci_cc_read_local_amp_info(struct hci_dev *hdev, 679static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
@@ -942,8 +697,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
942 hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to); 697 hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
943 hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); 698 hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
944 699
945 hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status);
946
947a2mp_rsp: 700a2mp_rsp:
948 a2mp_send_getinfo_rsp(hdev); 701 a2mp_send_getinfo_rsp(hdev);
949} 702}
@@ -985,35 +738,6 @@ a2mp_rsp:
985 a2mp_send_create_phy_link_req(hdev, rp->status); 738 a2mp_send_create_phy_link_req(hdev, rp->status);
986} 739}
987 740
988static void hci_cc_delete_stored_link_key(struct hci_dev *hdev,
989 struct sk_buff *skb)
990{
991 __u8 status = *((__u8 *) skb->data);
992
993 BT_DBG("%s status 0x%2.2x", hdev->name, status);
994
995 hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status);
996}
997
998static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
999{
1000 __u8 status = *((__u8 *) skb->data);
1001
1002 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1003
1004 hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status);
1005}
1006
1007static void hci_cc_write_inquiry_mode(struct hci_dev *hdev,
1008 struct sk_buff *skb)
1009{
1010 __u8 status = *((__u8 *) skb->data);
1011
1012 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1013
1014 hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status);
1015}
1016
1017static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev, 741static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
1018 struct sk_buff *skb) 742 struct sk_buff *skb)
1019{ 743{
@@ -1023,17 +747,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
1023 747
1024 if (!rp->status) 748 if (!rp->status)
1025 hdev->inq_tx_power = rp->tx_power; 749 hdev->inq_tx_power = rp->tx_power;
1026
1027 hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status);
1028}
1029
1030static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb)
1031{
1032 __u8 status = *((__u8 *) skb->data);
1033
1034 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1035
1036 hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);
1037} 750}
1038 751
1039static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) 752static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1095,8 +808,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,
1095 hdev->le_cnt = hdev->le_pkts; 808 hdev->le_cnt = hdev->le_pkts;
1096 809
1097 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); 810 BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts);
1098
1099 hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);
1100} 811}
1101 812
1102static void hci_cc_le_read_local_features(struct hci_dev *hdev, 813static void hci_cc_le_read_local_features(struct hci_dev *hdev,
@@ -1108,8 +819,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,
1108 819
1109 if (!rp->status) 820 if (!rp->status)
1110 memcpy(hdev->le_features, rp->features, 8); 821 memcpy(hdev->le_features, rp->features, 8);
1111
1112 hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status);
1113} 822}
1114 823
1115static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, 824static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
@@ -1119,22 +828,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,
1119 828
1120 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); 829 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1121 830
1122 if (!rp->status) { 831 if (!rp->status)
1123 hdev->adv_tx_power = rp->tx_power; 832 hdev->adv_tx_power = rp->tx_power;
1124 if (!test_bit(HCI_INIT, &hdev->flags))
1125 hci_update_ad(hdev);
1126 }
1127
1128 hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status);
1129}
1130
1131static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb)
1132{
1133 __u8 status = *((__u8 *) skb->data);
1134
1135 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1136
1137 hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);
1138} 833}
1139 834
1140static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) 835static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1231,12 +926,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
1231 clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags); 926 clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);
1232 } 927 }
1233 928
1234 hci_dev_unlock(hdev); 929 if (!test_bit(HCI_INIT, &hdev->flags)) {
930 struct hci_request req;
1235 931
1236 if (!test_bit(HCI_INIT, &hdev->flags)) 932 hci_req_init(&req, hdev);
1237 hci_update_ad(hdev); 933 hci_update_ad(&req);
934 hci_req_run(&req, NULL);
935 }
1238 936
1239 hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); 937 hci_dev_unlock(hdev);
1240} 938}
1241 939
1242static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) 940static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1245,8 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)
1245 943
1246 BT_DBG("%s status 0x%2.2x", hdev->name, status); 944 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1247 945
1248 hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status);
1249
1250 if (status) { 946 if (status) {
1251 hci_dev_lock(hdev); 947 hci_dev_lock(hdev);
1252 mgmt_start_discovery_failed(hdev, status); 948 mgmt_start_discovery_failed(hdev, status);
@@ -1268,9 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
1268 return; 964 return;
1269 965
1270 switch (cp->enable) { 966 switch (cp->enable) {
1271 case LE_SCANNING_ENABLED: 967 case LE_SCAN_ENABLE:
1272 hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status);
1273
1274 if (status) { 968 if (status) {
1275 hci_dev_lock(hdev); 969 hci_dev_lock(hdev);
1276 mgmt_start_discovery_failed(hdev, status); 970 mgmt_start_discovery_failed(hdev, status);
@@ -1285,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,
1285 hci_dev_unlock(hdev); 979 hci_dev_unlock(hdev);
1286 break; 980 break;
1287 981
1288 case LE_SCANNING_DISABLED: 982 case LE_SCAN_DISABLE:
1289 if (status) { 983 if (status) {
1290 hci_dev_lock(hdev); 984 hci_dev_lock(hdev);
1291 mgmt_stop_discovery_failed(hdev, status); 985 mgmt_stop_discovery_failed(hdev, status);
@@ -1321,32 +1015,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
1321 1015
1322 if (!rp->status) 1016 if (!rp->status)
1323 hdev->le_white_list_size = rp->size; 1017 hdev->le_white_list_size = rp->size;
1324
1325 hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status);
1326}
1327
1328static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb)
1329{
1330 struct hci_rp_le_ltk_reply *rp = (void *) skb->data;
1331
1332 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1333
1334 if (rp->status)
1335 return;
1336
1337 hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status);
1338}
1339
1340static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb)
1341{
1342 struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data;
1343
1344 BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
1345
1346 if (rp->status)
1347 return;
1348
1349 hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);
1350} 1018}
1351 1019
1352static void hci_cc_le_read_supported_states(struct hci_dev *hdev, 1020static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
@@ -1358,8 +1026,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
1358 1026
1359 if (!rp->status) 1027 if (!rp->status)
1360 memcpy(hdev->le_states, rp->le_states, 8); 1028 memcpy(hdev->le_states, rp->le_states, 8);
1361
1362 hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status);
1363} 1029}
1364 1030
1365static void hci_cc_write_le_host_supported(struct hci_dev *hdev, 1031static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
@@ -1376,21 +1042,19 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
1376 1042
1377 if (!status) { 1043 if (!status) {
1378 if (sent->le) 1044 if (sent->le)
1379 hdev->host_features[0] |= LMP_HOST_LE; 1045 hdev->features[1][0] |= LMP_HOST_LE;
1380 else 1046 else
1381 hdev->host_features[0] &= ~LMP_HOST_LE; 1047 hdev->features[1][0] &= ~LMP_HOST_LE;
1382 1048
1383 if (sent->simul) 1049 if (sent->simul)
1384 hdev->host_features[0] |= LMP_HOST_LE_BREDR; 1050 hdev->features[1][0] |= LMP_HOST_LE_BREDR;
1385 else 1051 else
1386 hdev->host_features[0] &= ~LMP_HOST_LE_BREDR; 1052 hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
1387 } 1053 }
1388 1054
1389 if (test_bit(HCI_MGMT, &hdev->dev_flags) && 1055 if (test_bit(HCI_MGMT, &hdev->dev_flags) &&
1390 !test_bit(HCI_INIT, &hdev->flags)) 1056 !test_bit(HCI_INIT, &hdev->flags))
1391 mgmt_le_enable_complete(hdev, sent->le, status); 1057 mgmt_le_enable_complete(hdev, sent->le, status);
1392
1393 hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);
1394} 1058}
1395 1059
1396static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, 1060static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
@@ -1412,7 +1076,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
1412 BT_DBG("%s status 0x%2.2x", hdev->name, status); 1076 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1413 1077
1414 if (status) { 1078 if (status) {
1415 hci_req_complete(hdev, HCI_OP_INQUIRY, status);
1416 hci_conn_check_pending(hdev); 1079 hci_conn_check_pending(hdev);
1417 hci_dev_lock(hdev); 1080 hci_dev_lock(hdev);
1418 if (test_bit(HCI_MGMT, &hdev->dev_flags)) 1081 if (test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -1523,7 +1186,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)
1523 if (conn) { 1186 if (conn) {
1524 if (conn->state == BT_CONFIG) { 1187 if (conn->state == BT_CONFIG) {
1525 hci_proto_connect_cfm(conn, status); 1188 hci_proto_connect_cfm(conn, status);
1526 hci_conn_put(conn); 1189 hci_conn_drop(conn);
1527 } 1190 }
1528 } 1191 }
1529 1192
@@ -1550,7 +1213,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)
1550 if (conn) { 1213 if (conn) {
1551 if (conn->state == BT_CONFIG) { 1214 if (conn->state == BT_CONFIG) {
1552 hci_proto_connect_cfm(conn, status); 1215 hci_proto_connect_cfm(conn, status);
1553 hci_conn_put(conn); 1216 hci_conn_drop(conn);
1554 } 1217 }
1555 } 1218 }
1556 1219
@@ -1712,7 +1375,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)
1712 if (conn) { 1375 if (conn) {
1713 if (conn->state == BT_CONFIG) { 1376 if (conn->state == BT_CONFIG) {
1714 hci_proto_connect_cfm(conn, status); 1377 hci_proto_connect_cfm(conn, status);
1715 hci_conn_put(conn); 1378 hci_conn_drop(conn);
1716 } 1379 }
1717 } 1380 }
1718 1381
@@ -1739,7 +1402,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)
1739 if (conn) { 1402 if (conn) {
1740 if (conn->state == BT_CONFIG) { 1403 if (conn->state == BT_CONFIG) {
1741 hci_proto_connect_cfm(conn, status); 1404 hci_proto_connect_cfm(conn, status);
1742 hci_conn_put(conn); 1405 hci_conn_drop(conn);
1743 } 1406 }
1744 } 1407 }
1745 1408
@@ -1884,11 +1547,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)
1884 } 1547 }
1885} 1548}
1886 1549
1887static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status)
1888{
1889 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1890}
1891
1892static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status) 1550static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
1893{ 1551{
1894 struct hci_cp_create_phy_link *cp; 1552 struct hci_cp_create_phy_link *cp;
@@ -1930,11 +1588,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
1930 amp_write_remote_assoc(hdev, cp->phy_handle); 1588 amp_write_remote_assoc(hdev, cp->phy_handle);
1931} 1589}
1932 1590
1933static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status)
1934{
1935 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1936}
1937
1938static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 1591static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
1939{ 1592{
1940 __u8 status = *((__u8 *) skb->data); 1593 __u8 status = *((__u8 *) skb->data);
@@ -1943,13 +1596,14 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
1943 1596
1944 BT_DBG("%s status 0x%2.2x", hdev->name, status); 1597 BT_DBG("%s status 0x%2.2x", hdev->name, status);
1945 1598
1946 hci_req_complete(hdev, HCI_OP_INQUIRY, status);
1947
1948 hci_conn_check_pending(hdev); 1599 hci_conn_check_pending(hdev);
1949 1600
1950 if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) 1601 if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
1951 return; 1602 return;
1952 1603
1604 smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
1605 wake_up_bit(&hdev->flags, HCI_INQUIRY);
1606
1953 if (!test_bit(HCI_MGMT, &hdev->dev_flags)) 1607 if (!test_bit(HCI_MGMT, &hdev->dev_flags))
1954 return; 1608 return;
1955 1609
@@ -2048,7 +1702,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2048 } else 1702 } else
2049 conn->state = BT_CONNECTED; 1703 conn->state = BT_CONNECTED;
2050 1704
2051 hci_conn_hold_device(conn);
2052 hci_conn_add_sysfs(conn); 1705 hci_conn_add_sysfs(conn);
2053 1706
2054 if (test_bit(HCI_AUTH, &hdev->flags)) 1707 if (test_bit(HCI_AUTH, &hdev->flags))
@@ -2095,42 +1748,6 @@ unlock:
2095 hci_conn_check_pending(hdev); 1748 hci_conn_check_pending(hdev);
2096} 1749}
2097 1750
2098void hci_conn_accept(struct hci_conn *conn, int mask)
2099{
2100 struct hci_dev *hdev = conn->hdev;
2101
2102 BT_DBG("conn %p", conn);
2103
2104 conn->state = BT_CONFIG;
2105
2106 if (!lmp_esco_capable(hdev)) {
2107 struct hci_cp_accept_conn_req cp;
2108
2109 bacpy(&cp.bdaddr, &conn->dst);
2110
2111 if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
2112 cp.role = 0x00; /* Become master */
2113 else
2114 cp.role = 0x01; /* Remain slave */
2115
2116 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
2117 } else /* lmp_esco_capable(hdev)) */ {
2118 struct hci_cp_accept_sync_conn_req cp;
2119
2120 bacpy(&cp.bdaddr, &conn->dst);
2121 cp.pkt_type = cpu_to_le16(conn->pkt_type);
2122
2123 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
2124 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
2125 cp.max_latency = __constant_cpu_to_le16(0xffff);
2126 cp.content_format = cpu_to_le16(hdev->voice_setting);
2127 cp.retrans_effort = 0xff;
2128
2129 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
2130 sizeof(cp), &cp);
2131 }
2132}
2133
2134static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) 1751static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2135{ 1752{
2136 struct hci_ev_conn_request *ev = (void *) skb->data; 1753 struct hci_ev_conn_request *ev = (void *) skb->data;
@@ -2202,7 +1819,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
2202 } else { 1819 } else {
2203 conn->state = BT_CONNECT2; 1820 conn->state = BT_CONNECT2;
2204 hci_proto_connect_cfm(conn, 0); 1821 hci_proto_connect_cfm(conn, 0);
2205 hci_conn_put(conn);
2206 } 1822 }
2207 } else { 1823 } else {
2208 /* Connection rejected */ 1824 /* Connection rejected */
@@ -2309,14 +1925,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2309 } else { 1925 } else {
2310 conn->state = BT_CONNECTED; 1926 conn->state = BT_CONNECTED;
2311 hci_proto_connect_cfm(conn, ev->status); 1927 hci_proto_connect_cfm(conn, ev->status);
2312 hci_conn_put(conn); 1928 hci_conn_drop(conn);
2313 } 1929 }
2314 } else { 1930 } else {
2315 hci_auth_cfm(conn, ev->status); 1931 hci_auth_cfm(conn, ev->status);
2316 1932
2317 hci_conn_hold(conn); 1933 hci_conn_hold(conn);
2318 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 1934 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
2319 hci_conn_put(conn); 1935 hci_conn_drop(conn);
2320 } 1936 }
2321 1937
2322 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { 1938 if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) {
@@ -2399,8 +2015,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2399 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); 2015 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
2400 2016
2401 if (ev->status && conn->state == BT_CONNECTED) { 2017 if (ev->status && conn->state == BT_CONNECTED) {
2402 hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); 2018 hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
2403 hci_conn_put(conn); 2019 hci_conn_drop(conn);
2404 goto unlock; 2020 goto unlock;
2405 } 2021 }
2406 2022
@@ -2409,7 +2025,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
2409 conn->state = BT_CONNECTED; 2025 conn->state = BT_CONNECTED;
2410 2026
2411 hci_proto_connect_cfm(conn, ev->status); 2027 hci_proto_connect_cfm(conn, ev->status);
2412 hci_conn_put(conn); 2028 hci_conn_drop(conn);
2413 } else 2029 } else
2414 hci_encrypt_cfm(conn, ev->status, ev->encrypt); 2030 hci_encrypt_cfm(conn, ev->status, ev->encrypt);
2415 } 2031 }
@@ -2456,7 +2072,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
2456 goto unlock; 2072 goto unlock;
2457 2073
2458 if (!ev->status) 2074 if (!ev->status)
2459 memcpy(conn->features, ev->features, 8); 2075 memcpy(conn->features[0], ev->features, 8);
2460 2076
2461 if (conn->state != BT_CONFIG) 2077 if (conn->state != BT_CONFIG)
2462 goto unlock; 2078 goto unlock;
@@ -2484,27 +2100,17 @@ static void hci_remote_features_evt(struct hci_dev *hdev,
2484 if (!hci_outgoing_auth_needed(hdev, conn)) { 2100 if (!hci_outgoing_auth_needed(hdev, conn)) {
2485 conn->state = BT_CONNECTED; 2101 conn->state = BT_CONNECTED;
2486 hci_proto_connect_cfm(conn, ev->status); 2102 hci_proto_connect_cfm(conn, ev->status);
2487 hci_conn_put(conn); 2103 hci_conn_drop(conn);
2488 } 2104 }
2489 2105
2490unlock: 2106unlock:
2491 hci_dev_unlock(hdev); 2107 hci_dev_unlock(hdev);
2492} 2108}
2493 2109
2494static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb)
2495{
2496 BT_DBG("%s", hdev->name);
2497}
2498
2499static void hci_qos_setup_complete_evt(struct hci_dev *hdev,
2500 struct sk_buff *skb)
2501{
2502 BT_DBG("%s", hdev->name);
2503}
2504
2505static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) 2110static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2506{ 2111{
2507 struct hci_ev_cmd_complete *ev = (void *) skb->data; 2112 struct hci_ev_cmd_complete *ev = (void *) skb->data;
2113 u8 status = skb->data[sizeof(*ev)];
2508 __u16 opcode; 2114 __u16 opcode;
2509 2115
2510 skb_pull(skb, sizeof(*ev)); 2116 skb_pull(skb, sizeof(*ev));
@@ -2588,10 +2194,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2588 hci_cc_write_voice_setting(hdev, skb); 2194 hci_cc_write_voice_setting(hdev, skb);
2589 break; 2195 break;
2590 2196
2591 case HCI_OP_HOST_BUFFER_SIZE:
2592 hci_cc_host_buffer_size(hdev, skb);
2593 break;
2594
2595 case HCI_OP_WRITE_SSP_MODE: 2197 case HCI_OP_WRITE_SSP_MODE:
2596 hci_cc_write_ssp_mode(hdev, skb); 2198 hci_cc_write_ssp_mode(hdev, skb);
2597 break; 2199 break;
@@ -2620,46 +2222,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2620 hci_cc_read_bd_addr(hdev, skb); 2222 hci_cc_read_bd_addr(hdev, skb);
2621 break; 2223 break;
2622 2224
2623 case HCI_OP_READ_DATA_BLOCK_SIZE: 2225 case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
2624 hci_cc_read_data_block_size(hdev, skb); 2226 hci_cc_read_page_scan_activity(hdev, skb);
2625 break; 2227 break;
2626 2228
2627 case HCI_OP_WRITE_CA_TIMEOUT: 2229 case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY:
2628 hci_cc_write_ca_timeout(hdev, skb); 2230 hci_cc_write_page_scan_activity(hdev, skb);
2629 break; 2231 break;
2630 2232
2631 case HCI_OP_READ_FLOW_CONTROL_MODE: 2233 case HCI_OP_READ_PAGE_SCAN_TYPE:
2632 hci_cc_read_flow_control_mode(hdev, skb); 2234 hci_cc_read_page_scan_type(hdev, skb);
2633 break; 2235 break;
2634 2236
2635 case HCI_OP_READ_LOCAL_AMP_INFO: 2237 case HCI_OP_WRITE_PAGE_SCAN_TYPE:
2636 hci_cc_read_local_amp_info(hdev, skb); 2238 hci_cc_write_page_scan_type(hdev, skb);
2637 break; 2239 break;
2638 2240
2639 case HCI_OP_READ_LOCAL_AMP_ASSOC: 2241 case HCI_OP_READ_DATA_BLOCK_SIZE:
2640 hci_cc_read_local_amp_assoc(hdev, skb); 2242 hci_cc_read_data_block_size(hdev, skb);
2641 break; 2243 break;
2642 2244
2643 case HCI_OP_DELETE_STORED_LINK_KEY: 2245 case HCI_OP_READ_FLOW_CONTROL_MODE:
2644 hci_cc_delete_stored_link_key(hdev, skb); 2246 hci_cc_read_flow_control_mode(hdev, skb);
2645 break; 2247 break;
2646 2248
2647 case HCI_OP_SET_EVENT_MASK: 2249 case HCI_OP_READ_LOCAL_AMP_INFO:
2648 hci_cc_set_event_mask(hdev, skb); 2250 hci_cc_read_local_amp_info(hdev, skb);
2649 break; 2251 break;
2650 2252
2651 case HCI_OP_WRITE_INQUIRY_MODE: 2253 case HCI_OP_READ_LOCAL_AMP_ASSOC:
2652 hci_cc_write_inquiry_mode(hdev, skb); 2254 hci_cc_read_local_amp_assoc(hdev, skb);
2653 break; 2255 break;
2654 2256
2655 case HCI_OP_READ_INQ_RSP_TX_POWER: 2257 case HCI_OP_READ_INQ_RSP_TX_POWER:
2656 hci_cc_read_inq_rsp_tx_power(hdev, skb); 2258 hci_cc_read_inq_rsp_tx_power(hdev, skb);
2657 break; 2259 break;
2658 2260
2659 case HCI_OP_SET_EVENT_FLT:
2660 hci_cc_set_event_flt(hdev, skb);
2661 break;
2662
2663 case HCI_OP_PIN_CODE_REPLY: 2261 case HCI_OP_PIN_CODE_REPLY:
2664 hci_cc_pin_code_reply(hdev, skb); 2262 hci_cc_pin_code_reply(hdev, skb);
2665 break; 2263 break;
@@ -2684,10 +2282,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2684 hci_cc_le_read_adv_tx_power(hdev, skb); 2282 hci_cc_le_read_adv_tx_power(hdev, skb);
2685 break; 2283 break;
2686 2284
2687 case HCI_OP_LE_SET_EVENT_MASK:
2688 hci_cc_le_set_event_mask(hdev, skb);
2689 break;
2690
2691 case HCI_OP_USER_CONFIRM_REPLY: 2285 case HCI_OP_USER_CONFIRM_REPLY:
2692 hci_cc_user_confirm_reply(hdev, skb); 2286 hci_cc_user_confirm_reply(hdev, skb);
2693 break; 2287 break;
@@ -2720,14 +2314,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2720 hci_cc_le_read_white_list_size(hdev, skb); 2314 hci_cc_le_read_white_list_size(hdev, skb);
2721 break; 2315 break;
2722 2316
2723 case HCI_OP_LE_LTK_REPLY:
2724 hci_cc_le_ltk_reply(hdev, skb);
2725 break;
2726
2727 case HCI_OP_LE_LTK_NEG_REPLY:
2728 hci_cc_le_ltk_neg_reply(hdev, skb);
2729 break;
2730
2731 case HCI_OP_LE_READ_SUPPORTED_STATES: 2317 case HCI_OP_LE_READ_SUPPORTED_STATES:
2732 hci_cc_le_read_supported_states(hdev, skb); 2318 hci_cc_le_read_supported_states(hdev, skb);
2733 break; 2319 break;
@@ -2745,9 +2331,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
2745 break; 2331 break;
2746 } 2332 }
2747 2333
2748 if (ev->opcode != HCI_OP_NOP) 2334 if (opcode != HCI_OP_NOP)
2749 del_timer(&hdev->cmd_timer); 2335 del_timer(&hdev->cmd_timer);
2750 2336
2337 hci_req_cmd_complete(hdev, opcode, status);
2338
2751 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { 2339 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
2752 atomic_set(&hdev->cmd_cnt, 1); 2340 atomic_set(&hdev->cmd_cnt, 1);
2753 if (!skb_queue_empty(&hdev->cmd_q)) 2341 if (!skb_queue_empty(&hdev->cmd_q))
@@ -2817,10 +2405,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
2817 hci_cs_le_create_conn(hdev, ev->status); 2405 hci_cs_le_create_conn(hdev, ev->status);
2818 break; 2406 break;
2819 2407
2820 case HCI_OP_LE_START_ENC:
2821 hci_cs_le_start_enc(hdev, ev->status);
2822 break;
2823
2824 case HCI_OP_CREATE_PHY_LINK: 2408 case HCI_OP_CREATE_PHY_LINK:
2825 hci_cs_create_phylink(hdev, ev->status); 2409 hci_cs_create_phylink(hdev, ev->status);
2826 break; 2410 break;
@@ -2829,18 +2413,18 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)
2829 hci_cs_accept_phylink(hdev, ev->status); 2413 hci_cs_accept_phylink(hdev, ev->status);
2830 break; 2414 break;
2831 2415
2832 case HCI_OP_CREATE_LOGICAL_LINK:
2833 hci_cs_create_logical_link(hdev, ev->status);
2834 break;
2835
2836 default: 2416 default:
2837 BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode); 2417 BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);
2838 break; 2418 break;
2839 } 2419 }
2840 2420
2841 if (ev->opcode != HCI_OP_NOP) 2421 if (opcode != HCI_OP_NOP)
2842 del_timer(&hdev->cmd_timer); 2422 del_timer(&hdev->cmd_timer);
2843 2423
2424 if (ev->status ||
2425 (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
2426 hci_req_cmd_complete(hdev, opcode, ev->status);
2427
2844 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) { 2428 if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {
2845 atomic_set(&hdev->cmd_cnt, 1); 2429 atomic_set(&hdev->cmd_cnt, 1);
2846 if (!skb_queue_empty(&hdev->cmd_q)) 2430 if (!skb_queue_empty(&hdev->cmd_q))
@@ -3056,7 +2640,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
3056 if (conn->state == BT_CONNECTED) { 2640 if (conn->state == BT_CONNECTED) {
3057 hci_conn_hold(conn); 2641 hci_conn_hold(conn);
3058 conn->disc_timeout = HCI_PAIRING_TIMEOUT; 2642 conn->disc_timeout = HCI_PAIRING_TIMEOUT;
3059 hci_conn_put(conn); 2643 hci_conn_drop(conn);
3060 } 2644 }
3061 2645
3062 if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags)) 2646 if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags))
@@ -3159,7 +2743,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
3159 if (ev->key_type != HCI_LK_CHANGED_COMBINATION) 2743 if (ev->key_type != HCI_LK_CHANGED_COMBINATION)
3160 conn->key_type = ev->key_type; 2744 conn->key_type = ev->key_type;
3161 2745
3162 hci_conn_put(conn); 2746 hci_conn_drop(conn);
3163 } 2747 }
3164 2748
3165 if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) 2749 if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags))
@@ -3300,6 +2884,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
3300 if (!conn) 2884 if (!conn)
3301 goto unlock; 2885 goto unlock;
3302 2886
2887 if (ev->page < HCI_MAX_PAGES)
2888 memcpy(conn->features[ev->page], ev->features, 8);
2889
3303 if (!ev->status && ev->page == 0x01) { 2890 if (!ev->status && ev->page == 0x01) {
3304 struct inquiry_entry *ie; 2891 struct inquiry_entry *ie;
3305 2892
@@ -3307,8 +2894,19 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
3307 if (ie) 2894 if (ie)
3308 ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); 2895 ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
3309 2896
3310 if (ev->features[0] & LMP_HOST_SSP) 2897 if (ev->features[0] & LMP_HOST_SSP) {
3311 set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); 2898 set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
2899 } else {
2900 /* It is mandatory by the Bluetooth specification that
2901 * Extended Inquiry Results are only used when Secure
2902 * Simple Pairing is enabled, but some devices violate
2903 * this.
2904 *
2905 * To make these devices work, the internal SSP
2906 * enabled flag needs to be cleared if the remote host
2907 * features do not indicate SSP support */
2908 clear_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
2909 }
3312 } 2910 }
3313 2911
3314 if (conn->state != BT_CONFIG) 2912 if (conn->state != BT_CONFIG)
@@ -3328,7 +2926,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,
3328 if (!hci_outgoing_auth_needed(hdev, conn)) { 2926 if (!hci_outgoing_auth_needed(hdev, conn)) {
3329 conn->state = BT_CONNECTED; 2927 conn->state = BT_CONNECTED;
3330 hci_proto_connect_cfm(conn, ev->status); 2928 hci_proto_connect_cfm(conn, ev->status);
3331 hci_conn_put(conn); 2929 hci_conn_drop(conn);
3332 } 2930 }
3333 2931
3334unlock: 2932unlock:
@@ -3362,7 +2960,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
3362 conn->handle = __le16_to_cpu(ev->handle); 2960 conn->handle = __le16_to_cpu(ev->handle);
3363 conn->state = BT_CONNECTED; 2961 conn->state = BT_CONNECTED;
3364 2962
3365 hci_conn_hold_device(conn);
3366 hci_conn_add_sysfs(conn); 2963 hci_conn_add_sysfs(conn);
3367 break; 2964 break;
3368 2965
@@ -3391,18 +2988,6 @@ unlock:
3391 hci_dev_unlock(hdev); 2988 hci_dev_unlock(hdev);
3392} 2989}
3393 2990
3394static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb)
3395{
3396 BT_DBG("%s", hdev->name);
3397}
3398
3399static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb)
3400{
3401 struct hci_ev_sniff_subrate *ev = (void *) skb->data;
3402
3403 BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
3404}
3405
3406static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, 2991static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,
3407 struct sk_buff *skb) 2992 struct sk_buff *skb)
3408{ 2993{
@@ -3472,8 +3057,8 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
3472 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags); 3057 clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
3473 3058
3474 if (ev->status && conn->state == BT_CONNECTED) { 3059 if (ev->status && conn->state == BT_CONNECTED) {
3475 hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); 3060 hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
3476 hci_conn_put(conn); 3061 hci_conn_drop(conn);
3477 goto unlock; 3062 goto unlock;
3478 } 3063 }
3479 3064
@@ -3482,13 +3067,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,
3482 conn->state = BT_CONNECTED; 3067 conn->state = BT_CONNECTED;
3483 3068
3484 hci_proto_connect_cfm(conn, ev->status); 3069 hci_proto_connect_cfm(conn, ev->status);
3485 hci_conn_put(conn); 3070 hci_conn_drop(conn);
3486 } else { 3071 } else {
3487 hci_auth_cfm(conn, ev->status); 3072 hci_auth_cfm(conn, ev->status);
3488 3073
3489 hci_conn_hold(conn); 3074 hci_conn_hold(conn);
3490 conn->disc_timeout = HCI_DISCONN_TIMEOUT; 3075 conn->disc_timeout = HCI_DISCONN_TIMEOUT;
3491 hci_conn_put(conn); 3076 hci_conn_drop(conn);
3492 } 3077 }
3493 3078
3494unlock: 3079unlock:
@@ -3749,7 +3334,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
3749 mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type, 3334 mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,
3750 ev->status); 3335 ev->status);
3751 3336
3752 hci_conn_put(conn); 3337 hci_conn_drop(conn);
3753 3338
3754unlock: 3339unlock:
3755 hci_dev_unlock(hdev); 3340 hci_dev_unlock(hdev);
@@ -3760,11 +3345,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev,
3760{ 3345{
3761 struct hci_ev_remote_host_features *ev = (void *) skb->data; 3346 struct hci_ev_remote_host_features *ev = (void *) skb->data;
3762 struct inquiry_entry *ie; 3347 struct inquiry_entry *ie;
3348 struct hci_conn *conn;
3763 3349
3764 BT_DBG("%s", hdev->name); 3350 BT_DBG("%s", hdev->name);
3765 3351
3766 hci_dev_lock(hdev); 3352 hci_dev_lock(hdev);
3767 3353
3354 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
3355 if (conn)
3356 memcpy(conn->features[1], ev->features, 8);
3357
3768 ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr); 3358 ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);
3769 if (ie) 3359 if (ie)
3770 ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); 3360 ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP);
@@ -3837,9 +3427,8 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
3837 3427
3838 hci_conn_hold(hcon); 3428 hci_conn_hold(hcon);
3839 hcon->disc_timeout = HCI_DISCONN_TIMEOUT; 3429 hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
3840 hci_conn_put(hcon); 3430 hci_conn_drop(hcon);
3841 3431
3842 hci_conn_hold_device(hcon);
3843 hci_conn_add_sysfs(hcon); 3432 hci_conn_add_sysfs(hcon);
3844 3433
3845 amp_physical_cfm(bredr_hcon, hcon); 3434 amp_physical_cfm(bredr_hcon, hcon);
@@ -3973,7 +3562,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
3973 conn->handle = __le16_to_cpu(ev->handle); 3562 conn->handle = __le16_to_cpu(ev->handle);
3974 conn->state = BT_CONNECTED; 3563 conn->state = BT_CONNECTED;
3975 3564
3976 hci_conn_hold_device(conn);
3977 hci_conn_add_sysfs(conn); 3565 hci_conn_add_sysfs(conn);
3978 3566
3979 hci_proto_connect_cfm(conn, ev->status); 3567 hci_proto_connect_cfm(conn, ev->status);
@@ -4087,8 +3675,27 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
4087 struct hci_event_hdr *hdr = (void *) skb->data; 3675 struct hci_event_hdr *hdr = (void *) skb->data;
4088 __u8 event = hdr->evt; 3676 __u8 event = hdr->evt;
4089 3677
3678 hci_dev_lock(hdev);
3679
3680 /* Received events are (currently) only needed when a request is
3681 * ongoing so avoid unnecessary memory allocation.
3682 */
3683 if (hdev->req_status == HCI_REQ_PEND) {
3684 kfree_skb(hdev->recv_evt);
3685 hdev->recv_evt = skb_clone(skb, GFP_KERNEL);
3686 }
3687
3688 hci_dev_unlock(hdev);
3689
4090 skb_pull(skb, HCI_EVENT_HDR_SIZE); 3690 skb_pull(skb, HCI_EVENT_HDR_SIZE);
4091 3691
3692 if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
3693 struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data;
3694 u16 opcode = __le16_to_cpu(hdr->opcode);
3695
3696 hci_req_cmd_complete(hdev, opcode, 0);
3697 }
3698
4092 switch (event) { 3699 switch (event) {
4093 case HCI_EV_INQUIRY_COMPLETE: 3700 case HCI_EV_INQUIRY_COMPLETE:
4094 hci_inquiry_complete_evt(hdev, skb); 3701 hci_inquiry_complete_evt(hdev, skb);
@@ -4130,14 +3737,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
4130 hci_remote_features_evt(hdev, skb); 3737 hci_remote_features_evt(hdev, skb);
4131 break; 3738 break;
4132 3739
4133 case HCI_EV_REMOTE_VERSION:
4134 hci_remote_version_evt(hdev, skb);
4135 break;
4136
4137 case HCI_EV_QOS_SETUP_COMPLETE:
4138 hci_qos_setup_complete_evt(hdev, skb);
4139 break;
4140
4141 case HCI_EV_CMD_COMPLETE: 3740 case HCI_EV_CMD_COMPLETE:
4142 hci_cmd_complete_evt(hdev, skb); 3741 hci_cmd_complete_evt(hdev, skb);
4143 break; 3742 break;
@@ -4194,14 +3793,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
4194 hci_sync_conn_complete_evt(hdev, skb); 3793 hci_sync_conn_complete_evt(hdev, skb);
4195 break; 3794 break;
4196 3795
4197 case HCI_EV_SYNC_CONN_CHANGED:
4198 hci_sync_conn_changed_evt(hdev, skb);
4199 break;
4200
4201 case HCI_EV_SNIFF_SUBRATE:
4202 hci_sniff_subrate_evt(hdev, skb);
4203 break;
4204
4205 case HCI_EV_EXTENDED_INQUIRY_RESULT: 3796 case HCI_EV_EXTENDED_INQUIRY_RESULT:
4206 hci_extended_inquiry_result_evt(hdev, skb); 3797 hci_extended_inquiry_result_evt(hdev, skb);
4207 break; 3798 break;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6a93614f2c49..9bd7d959e384 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
854 skb_queue_tail(&hdev->raw_q, skb); 854 skb_queue_tail(&hdev->raw_q, skb);
855 queue_work(hdev->workqueue, &hdev->tx_work); 855 queue_work(hdev->workqueue, &hdev->tx_work);
856 } else { 856 } else {
857 /* Stand-alone HCI commands must be flaged as
858 * single-command requests.
859 */
860 bt_cb(skb)->req.start = true;
861
857 skb_queue_tail(&hdev->cmd_q, skb); 862 skb_queue_tail(&hdev->cmd_q, skb);
858 queue_work(hdev->workqueue, &hdev->cmd_work); 863 queue_work(hdev->workqueue, &hdev->cmd_work);
859 } 864 }
@@ -1102,7 +1107,7 @@ int __init hci_sock_init(void)
1102 goto error; 1107 goto error;
1103 } 1108 }
1104 1109
1105 err = bt_procfs_init(THIS_MODULE, &init_net, "hci", &hci_sk_list, NULL); 1110 err = bt_procfs_init(&init_net, "hci", &hci_sk_list, NULL);
1106 if (err < 0) { 1111 if (err < 0) {
1107 BT_ERR("Failed to create HCI proc file"); 1112 BT_ERR("Failed to create HCI proc file");
1108 bt_sock_unregister(BTPROTO_HCI); 1113 bt_sock_unregister(BTPROTO_HCI);
@@ -1121,8 +1126,6 @@ error:
1121void hci_sock_cleanup(void) 1126void hci_sock_cleanup(void)
1122{ 1127{
1123 bt_procfs_cleanup(&init_net, "hci"); 1128 bt_procfs_cleanup(&init_net, "hci");
1124 if (bt_sock_unregister(BTPROTO_HCI) < 0) 1129 bt_sock_unregister(BTPROTO_HCI);
1125 BT_ERR("HCI socket unregistration failed");
1126
1127 proto_unregister(&hci_sk_proto); 1130 proto_unregister(&hci_sk_proto);
1128} 1131}
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 23b4e242a31a..7ad6ecf36f20 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -48,10 +48,10 @@ static ssize_t show_link_features(struct device *dev,
48 struct hci_conn *conn = to_hci_conn(dev); 48 struct hci_conn *conn = to_hci_conn(dev);
49 49
50 return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", 50 return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
51 conn->features[0], conn->features[1], 51 conn->features[0][0], conn->features[0][1],
52 conn->features[2], conn->features[3], 52 conn->features[0][2], conn->features[0][3],
53 conn->features[4], conn->features[5], 53 conn->features[0][4], conn->features[0][5],
54 conn->features[6], conn->features[7]); 54 conn->features[0][6], conn->features[0][7]);
55} 55}
56 56
57#define LINK_ATTR(_name, _mode, _show, _store) \ 57#define LINK_ATTR(_name, _mode, _show, _store) \
@@ -146,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
146 } 146 }
147 147
148 device_del(&conn->dev); 148 device_del(&conn->dev);
149 put_device(&conn->dev);
150 149
151 hci_dev_put(hdev); 150 hci_dev_put(hdev);
152} 151}
@@ -234,10 +233,10 @@ static ssize_t show_features(struct device *dev,
234 struct hci_dev *hdev = to_hci_dev(dev); 233 struct hci_dev *hdev = to_hci_dev(dev);
235 234
236 return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", 235 return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
237 hdev->features[0], hdev->features[1], 236 hdev->features[0][0], hdev->features[0][1],
238 hdev->features[2], hdev->features[3], 237 hdev->features[0][2], hdev->features[0][3],
239 hdev->features[4], hdev->features[5], 238 hdev->features[0][4], hdev->features[0][5],
240 hdev->features[6], hdev->features[7]); 239 hdev->features[0][6], hdev->features[0][7]);
241} 240}
242 241
243static ssize_t show_manufacturer(struct device *dev, 242static ssize_t show_manufacturer(struct device *dev,
@@ -590,10 +589,8 @@ int __init bt_sysfs_init(void)
590 bt_debugfs = debugfs_create_dir("bluetooth", NULL); 589 bt_debugfs = debugfs_create_dir("bluetooth", NULL);
591 590
592 bt_class = class_create(THIS_MODULE, "bluetooth"); 591 bt_class = class_create(THIS_MODULE, "bluetooth");
593 if (IS_ERR(bt_class))
594 return PTR_ERR(bt_class);
595 592
596 return 0; 593 return PTR_RET(bt_class);
597} 594}
598 595
599void bt_sysfs_cleanup(void) 596void bt_sysfs_cleanup(void)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index a7352ff3fd1e..940f5acb6694 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -1,6 +1,7 @@
1/* 1/*
2 HIDP implementation for Linux Bluetooth stack (BlueZ). 2 HIDP implementation for Linux Bluetooth stack (BlueZ).
3 Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org> 3 Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org>
4 Copyright (C) 2013 David Herrmann <dh.herrmann@gmail.com>
4 5
5 This program is free software; you can redistribute it and/or modify 6 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as 7 it under the terms of the GNU General Public License version 2 as
@@ -20,6 +21,7 @@
20 SOFTWARE IS DISCLAIMED. 21 SOFTWARE IS DISCLAIMED.
21*/ 22*/
22 23
24#include <linux/kref.h>
23#include <linux/module.h> 25#include <linux/module.h>
24#include <linux/file.h> 26#include <linux/file.h>
25#include <linux/kthread.h> 27#include <linux/kthread.h>
@@ -59,39 +61,20 @@ static unsigned char hidp_keycode[256] = {
59 61
60static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 }; 62static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 };
61 63
62static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr) 64static int hidp_session_probe(struct l2cap_conn *conn,
63{ 65 struct l2cap_user *user);
64 struct hidp_session *session; 66static void hidp_session_remove(struct l2cap_conn *conn,
65 67 struct l2cap_user *user);
66 BT_DBG(""); 68static int hidp_session_thread(void *arg);
69static void hidp_session_terminate(struct hidp_session *s);
67 70
68 list_for_each_entry(session, &hidp_session_list, list) { 71static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
69 if (!bacmp(bdaddr, &session->bdaddr))
70 return session;
71 }
72
73 return NULL;
74}
75
76static void __hidp_link_session(struct hidp_session *session)
77{
78 list_add(&session->list, &hidp_session_list);
79}
80
81static void __hidp_unlink_session(struct hidp_session *session)
82{
83 hci_conn_put_device(session->conn);
84
85 list_del(&session->list);
86}
87
88static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)
89{ 72{
90 memset(ci, 0, sizeof(*ci)); 73 memset(ci, 0, sizeof(*ci));
91 bacpy(&ci->bdaddr, &session->bdaddr); 74 bacpy(&ci->bdaddr, &session->bdaddr);
92 75
93 ci->flags = session->flags; 76 ci->flags = session->flags;
94 ci->state = session->state; 77 ci->state = BT_CONNECTED;
95 78
96 ci->vendor = 0x0000; 79 ci->vendor = 0x0000;
97 ci->product = 0x0000; 80 ci->product = 0x0000;
@@ -115,58 +98,80 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin
115 } 98 }
116} 99}
117 100
118static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, 101/* assemble skb, queue message on @transmit and wake up the session thread */
119 unsigned int type, unsigned int code, int value) 102static int hidp_send_message(struct hidp_session *session, struct socket *sock,
103 struct sk_buff_head *transmit, unsigned char hdr,
104 const unsigned char *data, int size)
120{ 105{
121 unsigned char newleds;
122 struct sk_buff *skb; 106 struct sk_buff *skb;
107 struct sock *sk = sock->sk;
123 108
124 BT_DBG("session %p type %d code %d value %d", session, type, code, value); 109 BT_DBG("session %p data %p size %d", session, data, size);
125
126 if (type != EV_LED)
127 return -1;
128
129 newleds = (!!test_bit(LED_KANA, dev->led) << 3) |
130 (!!test_bit(LED_COMPOSE, dev->led) << 3) |
131 (!!test_bit(LED_SCROLLL, dev->led) << 2) |
132 (!!test_bit(LED_CAPSL, dev->led) << 1) |
133 (!!test_bit(LED_NUML, dev->led));
134
135 if (session->leds == newleds)
136 return 0;
137 110
138 session->leds = newleds; 111 if (atomic_read(&session->terminate))
112 return -EIO;
139 113
140 skb = alloc_skb(3, GFP_ATOMIC); 114 skb = alloc_skb(size + 1, GFP_ATOMIC);
141 if (!skb) { 115 if (!skb) {
142 BT_ERR("Can't allocate memory for new frame"); 116 BT_ERR("Can't allocate memory for new frame");
143 return -ENOMEM; 117 return -ENOMEM;
144 } 118 }
145 119
146 *skb_put(skb, 1) = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; 120 *skb_put(skb, 1) = hdr;
147 *skb_put(skb, 1) = 0x01; 121 if (data && size > 0)
148 *skb_put(skb, 1) = newleds; 122 memcpy(skb_put(skb, size), data, size);
149
150 skb_queue_tail(&session->intr_transmit, skb);
151 123
152 hidp_schedule(session); 124 skb_queue_tail(transmit, skb);
125 wake_up_interruptible(sk_sleep(sk));
153 126
154 return 0; 127 return 0;
155} 128}
156 129
157static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) 130static int hidp_send_ctrl_message(struct hidp_session *session,
131 unsigned char hdr, const unsigned char *data,
132 int size)
158{ 133{
159 struct hid_device *hid = input_get_drvdata(dev); 134 return hidp_send_message(session, session->ctrl_sock,
160 struct hidp_session *session = hid->driver_data; 135 &session->ctrl_transmit, hdr, data, size);
136}
161 137
162 return hidp_queue_event(session, dev, type, code, value); 138static int hidp_send_intr_message(struct hidp_session *session,
139 unsigned char hdr, const unsigned char *data,
140 int size)
141{
142 return hidp_send_message(session, session->intr_sock,
143 &session->intr_transmit, hdr, data, size);
163} 144}
164 145
165static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) 146static int hidp_input_event(struct input_dev *dev, unsigned int type,
147 unsigned int code, int value)
166{ 148{
167 struct hidp_session *session = input_get_drvdata(dev); 149 struct hidp_session *session = input_get_drvdata(dev);
150 unsigned char newleds;
151 unsigned char hdr, data[2];
152
153 BT_DBG("session %p type %d code %d value %d",
154 session, type, code, value);
155
156 if (type != EV_LED)
157 return -1;
158
159 newleds = (!!test_bit(LED_KANA, dev->led) << 3) |
160 (!!test_bit(LED_COMPOSE, dev->led) << 3) |
161 (!!test_bit(LED_SCROLLL, dev->led) << 2) |
162 (!!test_bit(LED_CAPSL, dev->led) << 1) |
163 (!!test_bit(LED_NUML, dev->led));
168 164
169 return hidp_queue_event(session, dev, type, code, value); 165 if (session->leds == newleds)
166 return 0;
167
168 session->leds = newleds;
169
170 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
171 data[0] = 0x01;
172 data[1] = newleds;
173
174 return hidp_send_intr_message(session, hdr, data, 2);
170} 175}
171 176
172static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) 177static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
@@ -224,71 +229,9 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
224 input_sync(dev); 229 input_sync(dev);
225} 230}
226 231
227static int __hidp_send_ctrl_message(struct hidp_session *session,
228 unsigned char hdr, unsigned char *data,
229 int size)
230{
231 struct sk_buff *skb;
232
233 BT_DBG("session %p data %p size %d", session, data, size);
234
235 if (atomic_read(&session->terminate))
236 return -EIO;
237
238 skb = alloc_skb(size + 1, GFP_ATOMIC);
239 if (!skb) {
240 BT_ERR("Can't allocate memory for new frame");
241 return -ENOMEM;
242 }
243
244 *skb_put(skb, 1) = hdr;
245 if (data && size > 0)
246 memcpy(skb_put(skb, size), data, size);
247
248 skb_queue_tail(&session->ctrl_transmit, skb);
249
250 return 0;
251}
252
253static int hidp_send_ctrl_message(struct hidp_session *session,
254 unsigned char hdr, unsigned char *data, int size)
255{
256 int err;
257
258 err = __hidp_send_ctrl_message(session, hdr, data, size);
259
260 hidp_schedule(session);
261
262 return err;
263}
264
265static int hidp_queue_report(struct hidp_session *session,
266 unsigned char *data, int size)
267{
268 struct sk_buff *skb;
269
270 BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size);
271
272 skb = alloc_skb(size + 1, GFP_ATOMIC);
273 if (!skb) {
274 BT_ERR("Can't allocate memory for new frame");
275 return -ENOMEM;
276 }
277
278 *skb_put(skb, 1) = 0xa2;
279 if (size > 0)
280 memcpy(skb_put(skb, size), data, size);
281
282 skb_queue_tail(&session->intr_transmit, skb);
283
284 hidp_schedule(session);
285
286 return 0;
287}
288
289static int hidp_send_report(struct hidp_session *session, struct hid_report *report) 232static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
290{ 233{
291 unsigned char buf[32]; 234 unsigned char buf[32], hdr;
292 int rsize; 235 int rsize;
293 236
294 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); 237 rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
@@ -296,8 +239,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep
296 return -EIO; 239 return -EIO;
297 240
298 hid_output_report(report, buf); 241 hid_output_report(report, buf);
242 hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
299 243
300 return hidp_queue_report(session, buf, rsize); 244 return hidp_send_intr_message(session, hdr, buf, rsize);
301} 245}
302 246
303static int hidp_get_raw_report(struct hid_device *hid, 247static int hidp_get_raw_report(struct hid_device *hid,
@@ -311,6 +255,9 @@ static int hidp_get_raw_report(struct hid_device *hid,
311 int numbered_reports = hid->report_enum[report_type].numbered; 255 int numbered_reports = hid->report_enum[report_type].numbered;
312 int ret; 256 int ret;
313 257
258 if (atomic_read(&session->terminate))
259 return -EIO;
260
314 switch (report_type) { 261 switch (report_type) {
315 case HID_FEATURE_REPORT: 262 case HID_FEATURE_REPORT:
316 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE; 263 report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE;
@@ -333,17 +280,19 @@ static int hidp_get_raw_report(struct hid_device *hid,
333 session->waiting_report_number = numbered_reports ? report_number : -1; 280 session->waiting_report_number = numbered_reports ? report_number : -1;
334 set_bit(HIDP_WAITING_FOR_RETURN, &session->flags); 281 set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
335 data[0] = report_number; 282 data[0] = report_number;
336 ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 1); 283 ret = hidp_send_ctrl_message(session, report_type, data, 1);
337 if (ret) 284 if (ret)
338 goto err; 285 goto err;
339 286
340 /* Wait for the return of the report. The returned report 287 /* Wait for the return of the report. The returned report
341 gets put in session->report_return. */ 288 gets put in session->report_return. */
342 while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) { 289 while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) &&
290 !atomic_read(&session->terminate)) {
343 int res; 291 int res;
344 292
345 res = wait_event_interruptible_timeout(session->report_queue, 293 res = wait_event_interruptible_timeout(session->report_queue,
346 !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags), 294 !test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)
295 || atomic_read(&session->terminate),
347 5*HZ); 296 5*HZ);
348 if (res == 0) { 297 if (res == 0) {
349 /* timeout */ 298 /* timeout */
@@ -386,14 +335,11 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
386 struct hidp_session *session = hid->driver_data; 335 struct hidp_session *session = hid->driver_data;
387 int ret; 336 int ret;
388 337
389 switch (report_type) { 338 if (report_type == HID_OUTPUT_REPORT) {
390 case HID_FEATURE_REPORT: 339 report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
391 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; 340 return hidp_send_intr_message(session, report_type,
392 break; 341 data, count);
393 case HID_OUTPUT_REPORT: 342 } else if (report_type != HID_FEATURE_REPORT) {
394 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT;
395 break;
396 default:
397 return -EINVAL; 343 return -EINVAL;
398 } 344 }
399 345
@@ -402,17 +348,19 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s
402 348
403 /* Set up our wait, and send the report request to the device. */ 349 /* Set up our wait, and send the report request to the device. */
404 set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); 350 set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
405 ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 351 report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE;
406 count); 352 ret = hidp_send_ctrl_message(session, report_type, data, count);
407 if (ret) 353 if (ret)
408 goto err; 354 goto err;
409 355
410 /* Wait for the ACK from the device. */ 356 /* Wait for the ACK from the device. */
411 while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { 357 while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags) &&
358 !atomic_read(&session->terminate)) {
412 int res; 359 int res;
413 360
414 res = wait_event_interruptible_timeout(session->report_queue, 361 res = wait_event_interruptible_timeout(session->report_queue,
415 !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags), 362 !test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)
363 || atomic_read(&session->terminate),
416 10*HZ); 364 10*HZ);
417 if (res == 0) { 365 if (res == 0) {
418 /* timeout */ 366 /* timeout */
@@ -443,8 +391,7 @@ static void hidp_idle_timeout(unsigned long arg)
443{ 391{
444 struct hidp_session *session = (struct hidp_session *) arg; 392 struct hidp_session *session = (struct hidp_session *) arg;
445 393
446 atomic_inc(&session->terminate); 394 hidp_session_terminate(session);
447 wake_up_process(session->task);
448} 395}
449 396
450static void hidp_set_timer(struct hidp_session *session) 397static void hidp_set_timer(struct hidp_session *session)
@@ -487,12 +434,12 @@ static void hidp_process_handshake(struct hidp_session *session,
487 case HIDP_HSHK_ERR_FATAL: 434 case HIDP_HSHK_ERR_FATAL:
488 /* Device requests a reboot, as this is the only way this error 435 /* Device requests a reboot, as this is the only way this error
489 * can be recovered. */ 436 * can be recovered. */
490 __hidp_send_ctrl_message(session, 437 hidp_send_ctrl_message(session,
491 HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0); 438 HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0);
492 break; 439 break;
493 440
494 default: 441 default:
495 __hidp_send_ctrl_message(session, 442 hidp_send_ctrl_message(session,
496 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 443 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
497 break; 444 break;
498 } 445 }
@@ -512,8 +459,7 @@ static void hidp_process_hid_control(struct hidp_session *session,
512 skb_queue_purge(&session->ctrl_transmit); 459 skb_queue_purge(&session->ctrl_transmit);
513 skb_queue_purge(&session->intr_transmit); 460 skb_queue_purge(&session->intr_transmit);
514 461
515 atomic_inc(&session->terminate); 462 hidp_session_terminate(session);
516 wake_up_process(current);
517 } 463 }
518} 464}
519 465
@@ -541,7 +487,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
541 break; 487 break;
542 488
543 default: 489 default:
544 __hidp_send_ctrl_message(session, 490 hidp_send_ctrl_message(session,
545 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); 491 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);
546 } 492 }
547 493
@@ -588,7 +534,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,
588 break; 534 break;
589 535
590 default: 536 default:
591 __hidp_send_ctrl_message(session, 537 hidp_send_ctrl_message(session,
592 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0); 538 HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0);
593 break; 539 break;
594 } 540 }
@@ -639,32 +585,24 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)
639 return kernel_sendmsg(sock, &msg, &iv, 1, len); 585 return kernel_sendmsg(sock, &msg, &iv, 1, len);
640} 586}
641 587
642static void hidp_process_intr_transmit(struct hidp_session *session) 588/* dequeue message from @transmit and send via @sock */
589static void hidp_process_transmit(struct hidp_session *session,
590 struct sk_buff_head *transmit,
591 struct socket *sock)
643{ 592{
644 struct sk_buff *skb; 593 struct sk_buff *skb;
594 int ret;
645 595
646 BT_DBG("session %p", session); 596 BT_DBG("session %p", session);
647 597
648 while ((skb = skb_dequeue(&session->intr_transmit))) { 598 while ((skb = skb_dequeue(transmit))) {
649 if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) { 599 ret = hidp_send_frame(sock, skb->data, skb->len);
650 skb_queue_head(&session->intr_transmit, skb); 600 if (ret == -EAGAIN) {
601 skb_queue_head(transmit, skb);
651 break; 602 break;
652 } 603 } else if (ret < 0) {
653 604 hidp_session_terminate(session);
654 hidp_set_timer(session); 605 kfree_skb(skb);
655 kfree_skb(skb);
656 }
657}
658
659static void hidp_process_ctrl_transmit(struct hidp_session *session)
660{
661 struct sk_buff *skb;
662
663 BT_DBG("session %p", session);
664
665 while ((skb = skb_dequeue(&session->ctrl_transmit))) {
666 if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) {
667 skb_queue_head(&session->ctrl_transmit, skb);
668 break; 606 break;
669 } 607 }
670 608
@@ -673,121 +611,6 @@ static void hidp_process_ctrl_transmit(struct hidp_session *session)
673 } 611 }
674} 612}
675 613
676static int hidp_session(void *arg)
677{
678 struct hidp_session *session = arg;
679 struct sock *ctrl_sk = session->ctrl_sock->sk;
680 struct sock *intr_sk = session->intr_sock->sk;
681 struct sk_buff *skb;
682 wait_queue_t ctrl_wait, intr_wait;
683
684 BT_DBG("session %p", session);
685
686 __module_get(THIS_MODULE);
687 set_user_nice(current, -15);
688
689 init_waitqueue_entry(&ctrl_wait, current);
690 init_waitqueue_entry(&intr_wait, current);
691 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
692 add_wait_queue(sk_sleep(intr_sk), &intr_wait);
693 session->waiting_for_startup = 0;
694 wake_up_interruptible(&session->startup_queue);
695 set_current_state(TASK_INTERRUPTIBLE);
696 while (!atomic_read(&session->terminate)) {
697 if (ctrl_sk->sk_state != BT_CONNECTED ||
698 intr_sk->sk_state != BT_CONNECTED)
699 break;
700
701 while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
702 skb_orphan(skb);
703 if (!skb_linearize(skb))
704 hidp_recv_intr_frame(session, skb);
705 else
706 kfree_skb(skb);
707 }
708
709 hidp_process_intr_transmit(session);
710
711 while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
712 skb_orphan(skb);
713 if (!skb_linearize(skb))
714 hidp_recv_ctrl_frame(session, skb);
715 else
716 kfree_skb(skb);
717 }
718
719 hidp_process_ctrl_transmit(session);
720
721 schedule();
722 set_current_state(TASK_INTERRUPTIBLE);
723 }
724 set_current_state(TASK_RUNNING);
725 remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
726 remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
727
728 clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags);
729 clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags);
730 wake_up_interruptible(&session->report_queue);
731
732 down_write(&hidp_session_sem);
733
734 hidp_del_timer(session);
735
736 if (session->input) {
737 input_unregister_device(session->input);
738 session->input = NULL;
739 }
740
741 if (session->hid) {
742 hid_destroy_device(session->hid);
743 session->hid = NULL;
744 }
745
746 /* Wakeup user-space polling for socket errors */
747 session->intr_sock->sk->sk_err = EUNATCH;
748 session->ctrl_sock->sk->sk_err = EUNATCH;
749
750 hidp_schedule(session);
751
752 fput(session->intr_sock->file);
753
754 wait_event_timeout(*(sk_sleep(ctrl_sk)),
755 (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
756
757 fput(session->ctrl_sock->file);
758
759 __hidp_unlink_session(session);
760
761 up_write(&hidp_session_sem);
762
763 kfree(session->rd_data);
764 kfree(session);
765 module_put_and_exit(0);
766 return 0;
767}
768
769static struct hci_conn *hidp_get_connection(struct hidp_session *session)
770{
771 bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src;
772 bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst;
773 struct hci_conn *conn;
774 struct hci_dev *hdev;
775
776 hdev = hci_get_route(dst, src);
777 if (!hdev)
778 return NULL;
779
780 hci_dev_lock(hdev);
781 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
782 if (conn)
783 hci_conn_hold_device(conn);
784 hci_dev_unlock(hdev);
785
786 hci_dev_put(hdev);
787
788 return conn;
789}
790
791static int hidp_setup_input(struct hidp_session *session, 614static int hidp_setup_input(struct hidp_session *session,
792 struct hidp_connadd_req *req) 615 struct hidp_connadd_req *req)
793{ 616{
@@ -835,7 +658,7 @@ static int hidp_setup_input(struct hidp_session *session,
835 input->relbit[0] |= BIT_MASK(REL_WHEEL); 658 input->relbit[0] |= BIT_MASK(REL_WHEEL);
836 } 659 }
837 660
838 input->dev.parent = &session->conn->dev; 661 input->dev.parent = &session->conn->hcon->dev;
839 662
840 input->event = hidp_input_event; 663 input->event = hidp_input_event;
841 664
@@ -894,7 +717,6 @@ static struct hid_ll_driver hidp_hid_driver = {
894 .stop = hidp_stop, 717 .stop = hidp_stop,
895 .open = hidp_open, 718 .open = hidp_open,
896 .close = hidp_close, 719 .close = hidp_close,
897 .hidinput_input_event = hidp_hidinput_event,
898}; 720};
899 721
900/* This function sets up the hid device. It does not add it 722/* This function sets up the hid device. It does not add it
@@ -939,7 +761,7 @@ static int hidp_setup_hid(struct hidp_session *session,
939 snprintf(hid->uniq, sizeof(hid->uniq), "%pMR", 761 snprintf(hid->uniq, sizeof(hid->uniq), "%pMR",
940 &bt_sk(session->ctrl_sock->sk)->dst); 762 &bt_sk(session->ctrl_sock->sk)->dst);
941 763
942 hid->dev.parent = &session->conn->dev; 764 hid->dev.parent = &session->conn->hcon->dev;
943 hid->ll_driver = &hidp_hid_driver; 765 hid->ll_driver = &hidp_hid_driver;
944 766
945 hid->hid_get_raw_report = hidp_get_raw_report; 767 hid->hid_get_raw_report = hidp_get_raw_report;
@@ -961,80 +783,217 @@ fault:
961 return err; 783 return err;
962} 784}
963 785
964int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) 786/* initialize session devices */
787static int hidp_session_dev_init(struct hidp_session *session,
788 struct hidp_connadd_req *req)
965{ 789{
966 struct hidp_session *session, *s; 790 int ret;
967 int vendor, product;
968 int err;
969 791
970 BT_DBG(""); 792 if (req->rd_size > 0) {
793 ret = hidp_setup_hid(session, req);
794 if (ret && ret != -ENODEV)
795 return ret;
796 }
971 797
972 if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) || 798 if (!session->hid) {
973 bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) 799 ret = hidp_setup_input(session, req);
974 return -ENOTUNIQ; 800 if (ret < 0)
801 return ret;
802 }
975 803
976 BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); 804 return 0;
805}
977 806
978 down_write(&hidp_session_sem); 807/* destroy session devices */
808static void hidp_session_dev_destroy(struct hidp_session *session)
809{
810 if (session->hid)
811 put_device(&session->hid->dev);
812 else if (session->input)
813 input_put_device(session->input);
979 814
980 s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); 815 kfree(session->rd_data);
981 if (s && s->state == BT_CONNECTED) { 816 session->rd_data = NULL;
982 up_write(&hidp_session_sem); 817}
983 return -EEXIST;
984 }
985 818
986 session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); 819/* add HID/input devices to their underlying bus systems */
987 if (!session) { 820static int hidp_session_dev_add(struct hidp_session *session)
988 up_write(&hidp_session_sem); 821{
989 return -ENOMEM; 822 int ret;
990 }
991 823
992 bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst); 824 /* Both HID and input systems drop a ref-count when unregistering the
825 * device but they don't take a ref-count when registering them. Work
826 * around this by explicitly taking a refcount during registration
827 * which is dropped automatically by unregistering the devices. */
993 828
994 session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->chan->omtu, 829 if (session->hid) {
995 l2cap_pi(ctrl_sock->sk)->chan->imtu); 830 ret = hid_add_device(session->hid);
996 session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->chan->omtu, 831 if (ret)
997 l2cap_pi(intr_sock->sk)->chan->imtu); 832 return ret;
833 get_device(&session->hid->dev);
834 } else if (session->input) {
835 ret = input_register_device(session->input);
836 if (ret)
837 return ret;
838 input_get_device(session->input);
839 }
998 840
999 BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu); 841 return 0;
842}
1000 843
1001 session->ctrl_sock = ctrl_sock; 844/* remove HID/input devices from their bus systems */
1002 session->intr_sock = intr_sock; 845static void hidp_session_dev_del(struct hidp_session *session)
1003 session->state = BT_CONNECTED; 846{
847 if (session->hid)
848 hid_destroy_device(session->hid);
849 else if (session->input)
850 input_unregister_device(session->input);
851}
1004 852
1005 session->conn = hidp_get_connection(session); 853/*
1006 if (!session->conn) { 854 * Create new session object
1007 err = -ENOTCONN; 855 * Allocate session object, initialize static fields, copy input data into the
1008 goto failed; 856 * object and take a reference to all sub-objects.
1009 } 857 * This returns 0 on success and puts a pointer to the new session object in
858 * \out. Otherwise, an error code is returned.
859 * The new session object has an initial ref-count of 1.
860 */
861static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr,
862 struct socket *ctrl_sock,
863 struct socket *intr_sock,
864 struct hidp_connadd_req *req,
865 struct l2cap_conn *conn)
866{
867 struct hidp_session *session;
868 int ret;
869 struct bt_sock *ctrl, *intr;
870
871 ctrl = bt_sk(ctrl_sock->sk);
872 intr = bt_sk(intr_sock->sk);
1010 873
1011 setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session); 874 session = kzalloc(sizeof(*session), GFP_KERNEL);
875 if (!session)
876 return -ENOMEM;
1012 877
878 /* object and runtime management */
879 kref_init(&session->ref);
880 atomic_set(&session->state, HIDP_SESSION_IDLING);
881 init_waitqueue_head(&session->state_queue);
882 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
883
884 /* connection management */
885 bacpy(&session->bdaddr, bdaddr);
886 session->conn = conn;
887 session->user.probe = hidp_session_probe;
888 session->user.remove = hidp_session_remove;
889 session->ctrl_sock = ctrl_sock;
890 session->intr_sock = intr_sock;
1013 skb_queue_head_init(&session->ctrl_transmit); 891 skb_queue_head_init(&session->ctrl_transmit);
1014 skb_queue_head_init(&session->intr_transmit); 892 skb_queue_head_init(&session->intr_transmit);
893 session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl)->chan->omtu,
894 l2cap_pi(ctrl)->chan->imtu);
895 session->intr_mtu = min_t(uint, l2cap_pi(intr)->chan->omtu,
896 l2cap_pi(intr)->chan->imtu);
897 session->idle_to = req->idle_to;
1015 898
899 /* device management */
900 setup_timer(&session->timer, hidp_idle_timeout,
901 (unsigned long)session);
902
903 /* session data */
1016 mutex_init(&session->report_mutex); 904 mutex_init(&session->report_mutex);
1017 init_waitqueue_head(&session->report_queue); 905 init_waitqueue_head(&session->report_queue);
1018 init_waitqueue_head(&session->startup_queue);
1019 session->waiting_for_startup = 1;
1020 session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID);
1021 session->idle_to = req->idle_to;
1022 906
1023 __hidp_link_session(session); 907 ret = hidp_session_dev_init(session, req);
908 if (ret)
909 goto err_free;
1024 910
1025 if (req->rd_size > 0) { 911 l2cap_conn_get(session->conn);
1026 err = hidp_setup_hid(session, req); 912 get_file(session->intr_sock->file);
1027 if (err && err != -ENODEV) 913 get_file(session->ctrl_sock->file);
1028 goto purge; 914 *out = session;
1029 } 915 return 0;
1030 916
1031 if (!session->hid) { 917err_free:
1032 err = hidp_setup_input(session, req); 918 kfree(session);
1033 if (err < 0) 919 return ret;
1034 goto purge; 920}
921
922/* increase ref-count of the given session by one */
923static void hidp_session_get(struct hidp_session *session)
924{
925 kref_get(&session->ref);
926}
927
928/* release callback */
929static void session_free(struct kref *ref)
930{
931 struct hidp_session *session = container_of(ref, struct hidp_session,
932 ref);
933
934 hidp_session_dev_destroy(session);
935 skb_queue_purge(&session->ctrl_transmit);
936 skb_queue_purge(&session->intr_transmit);
937 fput(session->intr_sock->file);
938 fput(session->ctrl_sock->file);
939 l2cap_conn_put(session->conn);
940 kfree(session);
941}
942
943/* decrease ref-count of the given session by one */
944static void hidp_session_put(struct hidp_session *session)
945{
946 kref_put(&session->ref, session_free);
947}
948
949/*
950 * Search the list of active sessions for a session with target address
951 * \bdaddr. You must hold at least a read-lock on \hidp_session_sem. As long as
952 * you do not release this lock, the session objects cannot vanish and you can
953 * safely take a reference to the session yourself.
954 */
955static struct hidp_session *__hidp_session_find(const bdaddr_t *bdaddr)
956{
957 struct hidp_session *session;
958
959 list_for_each_entry(session, &hidp_session_list, list) {
960 if (!bacmp(bdaddr, &session->bdaddr))
961 return session;
1035 } 962 }
1036 963
1037 hidp_set_timer(session); 964 return NULL;
965}
966
967/*
968 * Same as __hidp_session_find() but no locks must be held. This also takes a
969 * reference of the returned session (if non-NULL) so you must drop this
970 * reference if you no longer use the object.
971 */
972static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr)
973{
974 struct hidp_session *session;
975
976 down_read(&hidp_session_sem);
977
978 session = __hidp_session_find(bdaddr);
979 if (session)
980 hidp_session_get(session);
981
982 up_read(&hidp_session_sem);
983
984 return session;
985}
986
987/*
988 * Start session synchronously
989 * This starts a session thread and waits until initialization
990 * is done or returns an error if it couldn't be started.
991 * If this returns 0 the session thread is up and running. You must call
992 * hipd_session_stop_sync() before deleting any runtime resources.
993 */
994static int hidp_session_start_sync(struct hidp_session *session)
995{
996 unsigned int vendor, product;
1038 997
1039 if (session->hid) { 998 if (session->hid) {
1040 vendor = session->hid->vendor; 999 vendor = session->hid->vendor;
@@ -1047,98 +1006,320 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,
1047 product = 0x0000; 1006 product = 0x0000;
1048 } 1007 }
1049 1008
1050 session->task = kthread_run(hidp_session, session, "khidpd_%04x%04x", 1009 session->task = kthread_run(hidp_session_thread, session,
1051 vendor, product); 1010 "khidpd_%04x%04x", vendor, product);
1052 if (IS_ERR(session->task)) { 1011 if (IS_ERR(session->task))
1053 err = PTR_ERR(session->task); 1012 return PTR_ERR(session->task);
1054 goto unlink;
1055 }
1056 1013
1057 while (session->waiting_for_startup) { 1014 while (atomic_read(&session->state) <= HIDP_SESSION_IDLING)
1058 wait_event_interruptible(session->startup_queue, 1015 wait_event(session->state_queue,
1059 !session->waiting_for_startup); 1016 atomic_read(&session->state) > HIDP_SESSION_IDLING);
1060 }
1061 1017
1062 if (session->hid) 1018 return 0;
1063 err = hid_add_device(session->hid); 1019}
1064 else
1065 err = input_register_device(session->input);
1066 1020
1067 if (err < 0) { 1021/*
1068 atomic_inc(&session->terminate); 1022 * Terminate session thread
1069 wake_up_process(session->task); 1023 * Wake up session thread and notify it to stop. This is asynchronous and
1070 up_write(&hidp_session_sem); 1024 * returns immediately. Call this whenever a runtime error occurs and you want
1071 return err; 1025 * the session to stop.
1072 } 1026 * Note: wake_up_process() performs any necessary memory-barriers for us.
1027 */
1028static void hidp_session_terminate(struct hidp_session *session)
1029{
1030 atomic_inc(&session->terminate);
1031 wake_up_process(session->task);
1032}
1073 1033
1074 if (session->input) { 1034/*
1075 hidp_send_ctrl_message(session, 1035 * Probe HIDP session
1076 HIDP_TRANS_SET_PROTOCOL | HIDP_PROTO_BOOT, NULL, 0); 1036 * This is called from the l2cap_conn core when our l2cap_user object is bound
1077 session->flags |= (1 << HIDP_BOOT_PROTOCOL_MODE); 1037 * to the hci-connection. We get the session via the \user object and can now
1038 * start the session thread, register the HID/input devices and link it into
1039 * the global session list.
1040 * The global session-list owns its own reference to the session object so you
1041 * can drop your own reference after registering the l2cap_user object.
1042 */
1043static int hidp_session_probe(struct l2cap_conn *conn,
1044 struct l2cap_user *user)
1045{
1046 struct hidp_session *session = container_of(user,
1047 struct hidp_session,
1048 user);
1049 struct hidp_session *s;
1050 int ret;
1078 1051
1079 session->leds = 0xff; 1052 down_write(&hidp_session_sem);
1080 hidp_input_event(session->input, EV_LED, 0, 0); 1053
1054 /* check that no other session for this device exists */
1055 s = __hidp_session_find(&session->bdaddr);
1056 if (s) {
1057 ret = -EEXIST;
1058 goto out_unlock;
1081 } 1059 }
1082 1060
1061 ret = hidp_session_start_sync(session);
1062 if (ret)
1063 goto out_unlock;
1064
1065 ret = hidp_session_dev_add(session);
1066 if (ret)
1067 goto out_stop;
1068
1069 hidp_session_get(session);
1070 list_add(&session->list, &hidp_session_list);
1071 ret = 0;
1072 goto out_unlock;
1073
1074out_stop:
1075 hidp_session_terminate(session);
1076out_unlock:
1083 up_write(&hidp_session_sem); 1077 up_write(&hidp_session_sem);
1084 return 0; 1078 return ret;
1079}
1085 1080
1086unlink: 1081/*
1082 * Remove HIDP session
1083 * Called from the l2cap_conn core when either we explicitly unregistered
1084 * the l2cap_user object or if the underlying connection is shut down.
1085 * We signal the hidp-session thread to shut down, unregister the HID/input
1086 * devices and unlink the session from the global list.
1087 * This drops the reference to the session that is owned by the global
1088 * session-list.
1089 * Note: We _must_ not synchronosly wait for the session-thread to shut down.
1090 * This is, because the session-thread might be waiting for an HCI lock that is
1091 * held while we are called. Therefore, we only unregister the devices and
1092 * notify the session-thread to terminate. The thread itself owns a reference
1093 * to the session object so it can safely shut down.
1094 */
1095static void hidp_session_remove(struct l2cap_conn *conn,
1096 struct l2cap_user *user)
1097{
1098 struct hidp_session *session = container_of(user,
1099 struct hidp_session,
1100 user);
1101
1102 down_write(&hidp_session_sem);
1103
1104 hidp_session_terminate(session);
1105 hidp_session_dev_del(session);
1106 list_del(&session->list);
1107
1108 up_write(&hidp_session_sem);
1109
1110 hidp_session_put(session);
1111}
1112
1113/*
1114 * Session Worker
1115 * This performs the actual main-loop of the HIDP worker. We first check
1116 * whether the underlying connection is still alive, then parse all pending
1117 * messages and finally send all outstanding messages.
1118 */
1119static void hidp_session_run(struct hidp_session *session)
1120{
1121 struct sock *ctrl_sk = session->ctrl_sock->sk;
1122 struct sock *intr_sk = session->intr_sock->sk;
1123 struct sk_buff *skb;
1124
1125 for (;;) {
1126 /*
1127 * This thread can be woken up two ways:
1128 * - You call hidp_session_terminate() which sets the
1129 * session->terminate flag and wakes this thread up.
1130 * - Via modifying the socket state of ctrl/intr_sock. This
1131 * thread is woken up by ->sk_state_changed().
1132 *
1133 * Note: set_current_state() performs any necessary
1134 * memory-barriers for us.
1135 */
1136 set_current_state(TASK_INTERRUPTIBLE);
1137
1138 if (atomic_read(&session->terminate))
1139 break;
1140
1141 if (ctrl_sk->sk_state != BT_CONNECTED ||
1142 intr_sk->sk_state != BT_CONNECTED)
1143 break;
1144
1145 /* parse incoming intr-skbs */
1146 while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) {
1147 skb_orphan(skb);
1148 if (!skb_linearize(skb))
1149 hidp_recv_intr_frame(session, skb);
1150 else
1151 kfree_skb(skb);
1152 }
1153
1154 /* send pending intr-skbs */
1155 hidp_process_transmit(session, &session->intr_transmit,
1156 session->intr_sock);
1157
1158 /* parse incoming ctrl-skbs */
1159 while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) {
1160 skb_orphan(skb);
1161 if (!skb_linearize(skb))
1162 hidp_recv_ctrl_frame(session, skb);
1163 else
1164 kfree_skb(skb);
1165 }
1166
1167 /* send pending ctrl-skbs */
1168 hidp_process_transmit(session, &session->ctrl_transmit,
1169 session->ctrl_sock);
1170
1171 schedule();
1172 }
1173
1174 atomic_inc(&session->terminate);
1175 set_current_state(TASK_RUNNING);
1176}
1177
1178/*
1179 * HIDP session thread
1180 * This thread runs the I/O for a single HIDP session. Startup is synchronous
1181 * which allows us to take references to ourself here instead of doing that in
1182 * the caller.
1183 * When we are ready to run we notify the caller and call hidp_session_run().
1184 */
1185static int hidp_session_thread(void *arg)
1186{
1187 struct hidp_session *session = arg;
1188 wait_queue_t ctrl_wait, intr_wait;
1189
1190 BT_DBG("session %p", session);
1191
1192 /* initialize runtime environment */
1193 hidp_session_get(session);
1194 __module_get(THIS_MODULE);
1195 set_user_nice(current, -15);
1196 hidp_set_timer(session);
1197
1198 init_waitqueue_entry(&ctrl_wait, current);
1199 init_waitqueue_entry(&intr_wait, current);
1200 add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
1201 add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
1202 /* This memory barrier is paired with wq_has_sleeper(). See
1203 * sock_poll_wait() for more information why this is needed. */
1204 smp_mb();
1205
1206 /* notify synchronous startup that we're ready */
1207 atomic_inc(&session->state);
1208 wake_up(&session->state_queue);
1209
1210 /* run session */
1211 hidp_session_run(session);
1212
1213 /* cleanup runtime environment */
1214 remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
1215 remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait);
1216 wake_up_interruptible(&session->report_queue);
1087 hidp_del_timer(session); 1217 hidp_del_timer(session);
1088 1218
1089 if (session->input) { 1219 /*
1090 input_unregister_device(session->input); 1220 * If we stopped ourself due to any internal signal, we should try to
1091 session->input = NULL; 1221 * unregister our own session here to avoid having it linger until the
1222 * parent l2cap_conn dies or user-space cleans it up.
1223 * This does not deadlock as we don't do any synchronous shutdown.
1224 * Instead, this call has the same semantics as if user-space tried to
1225 * delete the session.
1226 */
1227 l2cap_unregister_user(session->conn, &session->user);
1228 hidp_session_put(session);
1229
1230 module_put_and_exit(0);
1231 return 0;
1232}
1233
1234static int hidp_verify_sockets(struct socket *ctrl_sock,
1235 struct socket *intr_sock)
1236{
1237 struct bt_sock *ctrl, *intr;
1238 struct hidp_session *session;
1239
1240 if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock))
1241 return -EINVAL;
1242
1243 ctrl = bt_sk(ctrl_sock->sk);
1244 intr = bt_sk(intr_sock->sk);
1245
1246 if (bacmp(&ctrl->src, &intr->src) || bacmp(&ctrl->dst, &intr->dst))
1247 return -ENOTUNIQ;
1248 if (ctrl->sk.sk_state != BT_CONNECTED ||
1249 intr->sk.sk_state != BT_CONNECTED)
1250 return -EBADFD;
1251
1252 /* early session check, we check again during session registration */
1253 session = hidp_session_find(&ctrl->dst);
1254 if (session) {
1255 hidp_session_put(session);
1256 return -EEXIST;
1092 } 1257 }
1093 1258
1094 if (session->hid) { 1259 return 0;
1095 hid_destroy_device(session->hid); 1260}
1096 session->hid = NULL; 1261
1262int hidp_connection_add(struct hidp_connadd_req *req,
1263 struct socket *ctrl_sock,
1264 struct socket *intr_sock)
1265{
1266 struct hidp_session *session;
1267 struct l2cap_conn *conn;
1268 struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan;
1269 int ret;
1270
1271 ret = hidp_verify_sockets(ctrl_sock, intr_sock);
1272 if (ret)
1273 return ret;
1274
1275 conn = NULL;
1276 l2cap_chan_lock(chan);
1277 if (chan->conn) {
1278 l2cap_conn_get(chan->conn);
1279 conn = chan->conn;
1097 } 1280 }
1281 l2cap_chan_unlock(chan);
1098 1282
1099 kfree(session->rd_data); 1283 if (!conn)
1100 session->rd_data = NULL; 1284 return -EBADFD;
1101 1285
1102purge: 1286 ret = hidp_session_new(&session, &bt_sk(ctrl_sock->sk)->dst, ctrl_sock,
1103 __hidp_unlink_session(session); 1287 intr_sock, req, conn);
1288 if (ret)
1289 goto out_conn;
1104 1290
1105 skb_queue_purge(&session->ctrl_transmit); 1291 ret = l2cap_register_user(conn, &session->user);
1106 skb_queue_purge(&session->intr_transmit); 1292 if (ret)
1293 goto out_session;
1107 1294
1108failed: 1295 ret = 0;
1109 up_write(&hidp_session_sem);
1110 1296
1111 kfree(session); 1297out_session:
1112 return err; 1298 hidp_session_put(session);
1299out_conn:
1300 l2cap_conn_put(conn);
1301 return ret;
1113} 1302}
1114 1303
1115int hidp_del_connection(struct hidp_conndel_req *req) 1304int hidp_connection_del(struct hidp_conndel_req *req)
1116{ 1305{
1117 struct hidp_session *session; 1306 struct hidp_session *session;
1118 int err = 0;
1119 1307
1120 BT_DBG(""); 1308 session = hidp_session_find(&req->bdaddr);
1309 if (!session)
1310 return -ENOENT;
1121 1311
1122 down_read(&hidp_session_sem); 1312 if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG))
1313 hidp_send_ctrl_message(session,
1314 HIDP_TRANS_HID_CONTROL |
1315 HIDP_CTRL_VIRTUAL_CABLE_UNPLUG,
1316 NULL, 0);
1317 else
1318 l2cap_unregister_user(session->conn, &session->user);
1123 1319
1124 session = __hidp_get_session(&req->bdaddr); 1320 hidp_session_put(session);
1125 if (session) {
1126 if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) {
1127 hidp_send_ctrl_message(session,
1128 HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0);
1129 } else {
1130 /* Flush the transmit queues */
1131 skb_queue_purge(&session->ctrl_transmit);
1132 skb_queue_purge(&session->intr_transmit);
1133
1134 atomic_inc(&session->terminate);
1135 wake_up_process(session->task);
1136 }
1137 } else
1138 err = -ENOENT;
1139 1321
1140 up_read(&hidp_session_sem); 1322 return 0;
1141 return err;
1142} 1323}
1143 1324
1144int hidp_get_connlist(struct hidp_connlist_req *req) 1325int hidp_get_connlist(struct hidp_connlist_req *req)
@@ -1153,7 +1334,7 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
1153 list_for_each_entry(session, &hidp_session_list, list) { 1334 list_for_each_entry(session, &hidp_session_list, list) {
1154 struct hidp_conninfo ci; 1335 struct hidp_conninfo ci;
1155 1336
1156 __hidp_copy_session(session, &ci); 1337 hidp_copy_session(session, &ci);
1157 1338
1158 if (copy_to_user(req->ci, &ci, sizeof(ci))) { 1339 if (copy_to_user(req->ci, &ci, sizeof(ci))) {
1159 err = -EFAULT; 1340 err = -EFAULT;
@@ -1174,18 +1355,14 @@ int hidp_get_connlist(struct hidp_connlist_req *req)
1174int hidp_get_conninfo(struct hidp_conninfo *ci) 1355int hidp_get_conninfo(struct hidp_conninfo *ci)
1175{ 1356{
1176 struct hidp_session *session; 1357 struct hidp_session *session;
1177 int err = 0;
1178
1179 down_read(&hidp_session_sem);
1180 1358
1181 session = __hidp_get_session(&ci->bdaddr); 1359 session = hidp_session_find(&ci->bdaddr);
1182 if (session) 1360 if (session) {
1183 __hidp_copy_session(session, ci); 1361 hidp_copy_session(session, ci);
1184 else 1362 hidp_session_put(session);
1185 err = -ENOENT; 1363 }
1186 1364
1187 up_read(&hidp_session_sem); 1365 return session ? 0 : -ENOENT;
1188 return err;
1189} 1366}
1190 1367
1191static int __init hidp_init(void) 1368static int __init hidp_init(void)
@@ -1204,6 +1381,7 @@ module_init(hidp_init);
1204module_exit(hidp_exit); 1381module_exit(hidp_exit);
1205 1382
1206MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); 1383MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
1384MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
1207MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION); 1385MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION);
1208MODULE_VERSION(VERSION); 1386MODULE_VERSION(VERSION);
1209MODULE_LICENSE("GPL"); 1387MODULE_LICENSE("GPL");
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index af1bcc823f26..6162ce8606ac 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,7 +24,9 @@
24#define __HIDP_H 24#define __HIDP_H
25 25
26#include <linux/types.h> 26#include <linux/types.h>
27#include <linux/kref.h>
27#include <net/bluetooth/bluetooth.h> 28#include <net/bluetooth/bluetooth.h>
29#include <net/bluetooth/l2cap.h>
28 30
29/* HIDP header masks */ 31/* HIDP header masks */
30#define HIDP_HEADER_TRANS_MASK 0xf0 32#define HIDP_HEADER_TRANS_MASK 0xf0
@@ -119,43 +121,52 @@ struct hidp_connlist_req {
119 struct hidp_conninfo __user *ci; 121 struct hidp_conninfo __user *ci;
120}; 122};
121 123
122int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock); 124int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock);
123int hidp_del_connection(struct hidp_conndel_req *req); 125int hidp_connection_del(struct hidp_conndel_req *req);
124int hidp_get_connlist(struct hidp_connlist_req *req); 126int hidp_get_connlist(struct hidp_connlist_req *req);
125int hidp_get_conninfo(struct hidp_conninfo *ci); 127int hidp_get_conninfo(struct hidp_conninfo *ci);
126 128
129enum hidp_session_state {
130 HIDP_SESSION_IDLING,
131 HIDP_SESSION_RUNNING,
132};
133
127/* HIDP session defines */ 134/* HIDP session defines */
128struct hidp_session { 135struct hidp_session {
129 struct list_head list; 136 struct list_head list;
137 struct kref ref;
130 138
131 struct hci_conn *conn; 139 /* runtime management */
140 atomic_t state;
141 wait_queue_head_t state_queue;
142 atomic_t terminate;
143 struct task_struct *task;
144 unsigned long flags;
132 145
146 /* connection management */
147 bdaddr_t bdaddr;
148 struct l2cap_conn *conn;
149 struct l2cap_user user;
133 struct socket *ctrl_sock; 150 struct socket *ctrl_sock;
134 struct socket *intr_sock; 151 struct socket *intr_sock;
135 152 struct sk_buff_head ctrl_transmit;
136 bdaddr_t bdaddr; 153 struct sk_buff_head intr_transmit;
137
138 unsigned long state;
139 unsigned long flags;
140 unsigned long idle_to;
141
142 uint ctrl_mtu; 154 uint ctrl_mtu;
143 uint intr_mtu; 155 uint intr_mtu;
156 unsigned long idle_to;
144 157
145 atomic_t terminate; 158 /* device management */
146 struct task_struct *task;
147
148 unsigned char keys[8];
149 unsigned char leds;
150
151 struct input_dev *input; 159 struct input_dev *input;
152
153 struct hid_device *hid; 160 struct hid_device *hid;
154
155 struct timer_list timer; 161 struct timer_list timer;
156 162
157 struct sk_buff_head ctrl_transmit; 163 /* Report descriptor */
158 struct sk_buff_head intr_transmit; 164 __u8 *rd_data;
165 uint rd_size;
166
167 /* session data */
168 unsigned char keys[8];
169 unsigned char leds;
159 170
160 /* Used in hidp_get_raw_report() */ 171 /* Used in hidp_get_raw_report() */
161 int waiting_report_type; /* HIDP_DATA_RTYPE_* */ 172 int waiting_report_type; /* HIDP_DATA_RTYPE_* */
@@ -166,24 +177,8 @@ struct hidp_session {
166 177
167 /* Used in hidp_output_raw_report() */ 178 /* Used in hidp_output_raw_report() */
168 int output_report_success; /* boolean */ 179 int output_report_success; /* boolean */
169
170 /* Report descriptor */
171 __u8 *rd_data;
172 uint rd_size;
173
174 wait_queue_head_t startup_queue;
175 int waiting_for_startup;
176}; 180};
177 181
178static inline void hidp_schedule(struct hidp_session *session)
179{
180 struct sock *ctrl_sk = session->ctrl_sock->sk;
181 struct sock *intr_sk = session->intr_sock->sk;
182
183 wake_up_interruptible(sk_sleep(ctrl_sk));
184 wake_up_interruptible(sk_sleep(intr_sk));
185}
186
187/* HIDP init defines */ 182/* HIDP init defines */
188extern int __init hidp_init_sockets(void); 183extern int __init hidp_init_sockets(void);
189extern void __exit hidp_cleanup_sockets(void); 184extern void __exit hidp_cleanup_sockets(void);
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 82a829d90b0f..cb3fdde1968a 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -77,21 +77,12 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
77 return err; 77 return err;
78 } 78 }
79 79
80 if (csock->sk->sk_state != BT_CONNECTED || 80 err = hidp_connection_add(&ca, csock, isock);
81 isock->sk->sk_state != BT_CONNECTED) { 81 if (!err && copy_to_user(argp, &ca, sizeof(ca)))
82 sockfd_put(csock); 82 err = -EFAULT;
83 sockfd_put(isock);
84 return -EBADFD;
85 }
86 83
87 err = hidp_add_connection(&ca, csock, isock); 84 sockfd_put(csock);
88 if (!err) { 85 sockfd_put(isock);
89 if (copy_to_user(argp, &ca, sizeof(ca)))
90 err = -EFAULT;
91 } else {
92 sockfd_put(csock);
93 sockfd_put(isock);
94 }
95 86
96 return err; 87 return err;
97 88
@@ -102,7 +93,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
102 if (copy_from_user(&cd, argp, sizeof(cd))) 93 if (copy_from_user(&cd, argp, sizeof(cd)))
103 return -EFAULT; 94 return -EFAULT;
104 95
105 return hidp_del_connection(&cd); 96 return hidp_connection_del(&cd);
106 97
107 case HIDPGETCONNLIST: 98 case HIDPGETCONNLIST:
108 if (copy_from_user(&cl, argp, sizeof(cl))) 99 if (copy_from_user(&cl, argp, sizeof(cl)))
@@ -284,7 +275,7 @@ int __init hidp_init_sockets(void)
284 goto error; 275 goto error;
285 } 276 }
286 277
287 err = bt_procfs_init(THIS_MODULE, &init_net, "hidp", &hidp_sk_list, NULL); 278 err = bt_procfs_init(&init_net, "hidp", &hidp_sk_list, NULL);
288 if (err < 0) { 279 if (err < 0) {
289 BT_ERR("Failed to create HIDP proc file"); 280 BT_ERR("Failed to create HIDP proc file");
290 bt_sock_unregister(BTPROTO_HIDP); 281 bt_sock_unregister(BTPROTO_HIDP);
@@ -296,7 +287,6 @@ int __init hidp_init_sockets(void)
296 return 0; 287 return 0;
297 288
298error: 289error:
299 BT_ERR("Can't register HIDP socket");
300 proto_unregister(&hidp_proto); 290 proto_unregister(&hidp_proto);
301 return err; 291 return err;
302} 292}
@@ -304,8 +294,6 @@ error:
304void __exit hidp_cleanup_sockets(void) 294void __exit hidp_cleanup_sockets(void)
305{ 295{
306 bt_procfs_cleanup(&init_net, "hidp"); 296 bt_procfs_cleanup(&init_net, "hidp");
307 if (bt_sock_unregister(BTPROTO_HIDP) < 0) 297 bt_sock_unregister(BTPROTO_HIDP);
308 BT_ERR("Can't unregister HIDP socket");
309
310 proto_unregister(&hidp_proto); 298 proto_unregister(&hidp_proto);
311} 299}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c7e9321f1ea..a76d1ac0321b 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)
571 chan->conn = NULL; 571 chan->conn = NULL;
572 572
573 if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP) 573 if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP)
574 hci_conn_put(conn->hcon); 574 hci_conn_drop(conn->hcon);
575 575
576 if (mgr && mgr->bredr_chan == chan) 576 if (mgr && mgr->bredr_chan == chan)
577 mgr->bredr_chan = NULL; 577 mgr->bredr_chan = NULL;
@@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work)
1446 l2cap_conn_start(conn); 1446 l2cap_conn_start(conn);
1447} 1447}
1448 1448
1449/*
1450 * l2cap_user
1451 * External modules can register l2cap_user objects on l2cap_conn. The ->probe
1452 * callback is called during registration. The ->remove callback is called
1453 * during unregistration.
1454 * An l2cap_user object can either be explicitly unregistered or when the
1455 * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon,
1456 * l2cap->hchan, .. are valid as long as the remove callback hasn't been called.
1457 * External modules must own a reference to the l2cap_conn object if they intend
1458 * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at
1459 * any time if they don't.
1460 */
1461
1462int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user)
1463{
1464 struct hci_dev *hdev = conn->hcon->hdev;
1465 int ret;
1466
1467 /* We need to check whether l2cap_conn is registered. If it is not, we
1468 * must not register the l2cap_user. l2cap_conn_del() is unregisters
1469 * l2cap_conn objects, but doesn't provide its own locking. Instead, it
1470 * relies on the parent hci_conn object to be locked. This itself relies
1471 * on the hci_dev object to be locked. So we must lock the hci device
1472 * here, too. */
1473
1474 hci_dev_lock(hdev);
1475
1476 if (user->list.next || user->list.prev) {
1477 ret = -EINVAL;
1478 goto out_unlock;
1479 }
1480
1481 /* conn->hchan is NULL after l2cap_conn_del() was called */
1482 if (!conn->hchan) {
1483 ret = -ENODEV;
1484 goto out_unlock;
1485 }
1486
1487 ret = user->probe(conn, user);
1488 if (ret)
1489 goto out_unlock;
1490
1491 list_add(&user->list, &conn->users);
1492 ret = 0;
1493
1494out_unlock:
1495 hci_dev_unlock(hdev);
1496 return ret;
1497}
1498EXPORT_SYMBOL(l2cap_register_user);
1499
1500void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user)
1501{
1502 struct hci_dev *hdev = conn->hcon->hdev;
1503
1504 hci_dev_lock(hdev);
1505
1506 if (!user->list.next || !user->list.prev)
1507 goto out_unlock;
1508
1509 list_del(&user->list);
1510 user->list.next = NULL;
1511 user->list.prev = NULL;
1512 user->remove(conn, user);
1513
1514out_unlock:
1515 hci_dev_unlock(hdev);
1516}
1517EXPORT_SYMBOL(l2cap_unregister_user);
1518
1519static void l2cap_unregister_all_users(struct l2cap_conn *conn)
1520{
1521 struct l2cap_user *user;
1522
1523 while (!list_empty(&conn->users)) {
1524 user = list_first_entry(&conn->users, struct l2cap_user, list);
1525 list_del(&user->list);
1526 user->list.next = NULL;
1527 user->list.prev = NULL;
1528 user->remove(conn, user);
1529 }
1530}
1531
1449static void l2cap_conn_del(struct hci_conn *hcon, int err) 1532static void l2cap_conn_del(struct hci_conn *hcon, int err)
1450{ 1533{
1451 struct l2cap_conn *conn = hcon->l2cap_data; 1534 struct l2cap_conn *conn = hcon->l2cap_data;
@@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
1458 1541
1459 kfree_skb(conn->rx_skb); 1542 kfree_skb(conn->rx_skb);
1460 1543
1544 l2cap_unregister_all_users(conn);
1545
1461 mutex_lock(&conn->chan_lock); 1546 mutex_lock(&conn->chan_lock);
1462 1547
1463 /* Kill channels */ 1548 /* Kill channels */
@@ -1486,7 +1571,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
1486 } 1571 }
1487 1572
1488 hcon->l2cap_data = NULL; 1573 hcon->l2cap_data = NULL;
1489 kfree(conn); 1574 conn->hchan = NULL;
1575 l2cap_conn_put(conn);
1490} 1576}
1491 1577
1492static void security_timeout(struct work_struct *work) 1578static void security_timeout(struct work_struct *work)
@@ -1502,12 +1588,12 @@ static void security_timeout(struct work_struct *work)
1502 } 1588 }
1503} 1589}
1504 1590
1505static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) 1591static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
1506{ 1592{
1507 struct l2cap_conn *conn = hcon->l2cap_data; 1593 struct l2cap_conn *conn = hcon->l2cap_data;
1508 struct hci_chan *hchan; 1594 struct hci_chan *hchan;
1509 1595
1510 if (conn || status) 1596 if (conn)
1511 return conn; 1597 return conn;
1512 1598
1513 hchan = hci_chan_create(hcon); 1599 hchan = hci_chan_create(hcon);
@@ -1520,8 +1606,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
1520 return NULL; 1606 return NULL;
1521 } 1607 }
1522 1608
1609 kref_init(&conn->ref);
1523 hcon->l2cap_data = conn; 1610 hcon->l2cap_data = conn;
1524 conn->hcon = hcon; 1611 conn->hcon = hcon;
1612 hci_conn_get(conn->hcon);
1525 conn->hchan = hchan; 1613 conn->hchan = hchan;
1526 1614
1527 BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); 1615 BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan);
@@ -1547,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
1547 mutex_init(&conn->chan_lock); 1635 mutex_init(&conn->chan_lock);
1548 1636
1549 INIT_LIST_HEAD(&conn->chan_l); 1637 INIT_LIST_HEAD(&conn->chan_l);
1638 INIT_LIST_HEAD(&conn->users);
1550 1639
1551 if (hcon->type == LE_LINK) 1640 if (hcon->type == LE_LINK)
1552 INIT_DELAYED_WORK(&conn->security_timer, security_timeout); 1641 INIT_DELAYED_WORK(&conn->security_timer, security_timeout);
@@ -1558,6 +1647,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
1558 return conn; 1647 return conn;
1559} 1648}
1560 1649
1650static void l2cap_conn_free(struct kref *ref)
1651{
1652 struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref);
1653
1654 hci_conn_put(conn->hcon);
1655 kfree(conn);
1656}
1657
1658void l2cap_conn_get(struct l2cap_conn *conn)
1659{
1660 kref_get(&conn->ref);
1661}
1662EXPORT_SYMBOL(l2cap_conn_get);
1663
1664void l2cap_conn_put(struct l2cap_conn *conn)
1665{
1666 kref_put(&conn->ref, l2cap_conn_free);
1667}
1668EXPORT_SYMBOL(l2cap_conn_put);
1669
1561/* ---- Socket interface ---- */ 1670/* ---- Socket interface ---- */
1562 1671
1563/* Find socket with psm and source / destination bdaddr. 1672/* Find socket with psm and source / destination bdaddr.
@@ -1695,9 +1804,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
1695 goto done; 1804 goto done;
1696 } 1805 }
1697 1806
1698 conn = l2cap_conn_add(hcon, 0); 1807 conn = l2cap_conn_add(hcon);
1699 if (!conn) { 1808 if (!conn) {
1700 hci_conn_put(hcon); 1809 hci_conn_drop(hcon);
1701 err = -ENOMEM; 1810 err = -ENOMEM;
1702 goto done; 1811 goto done;
1703 } 1812 }
@@ -1707,7 +1816,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
1707 1816
1708 if (!list_empty(&conn->chan_l)) { 1817 if (!list_empty(&conn->chan_l)) {
1709 err = -EBUSY; 1818 err = -EBUSY;
1710 hci_conn_put(hcon); 1819 hci_conn_drop(hcon);
1711 } 1820 }
1712 1821
1713 if (err) 1822 if (err)
@@ -6205,12 +6314,13 @@ drop:
6205 kfree_skb(skb); 6314 kfree_skb(skb);
6206} 6315}
6207 6316
6208static void l2cap_att_channel(struct l2cap_conn *conn, u16 cid, 6317static void l2cap_att_channel(struct l2cap_conn *conn,
6209 struct sk_buff *skb) 6318 struct sk_buff *skb)
6210{ 6319{
6211 struct l2cap_chan *chan; 6320 struct l2cap_chan *chan;
6212 6321
6213 chan = l2cap_global_chan_by_scid(0, cid, conn->src, conn->dst); 6322 chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA,
6323 conn->src, conn->dst);
6214 if (!chan) 6324 if (!chan)
6215 goto drop; 6325 goto drop;
6216 6326
@@ -6259,7 +6369,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
6259 break; 6369 break;
6260 6370
6261 case L2CAP_CID_LE_DATA: 6371 case L2CAP_CID_LE_DATA:
6262 l2cap_att_channel(conn, cid, skb); 6372 l2cap_att_channel(conn, skb);
6263 break; 6373 break;
6264 6374
6265 case L2CAP_CID_SMP: 6375 case L2CAP_CID_SMP:
@@ -6313,7 +6423,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
6313 BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status); 6423 BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);
6314 6424
6315 if (!status) { 6425 if (!status) {
6316 conn = l2cap_conn_add(hcon, status); 6426 conn = l2cap_conn_add(hcon);
6317 if (conn) 6427 if (conn)
6318 l2cap_conn_ready(conn); 6428 l2cap_conn_ready(conn);
6319 } else { 6429 } else {
@@ -6482,7 +6592,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)
6482 goto drop; 6592 goto drop;
6483 6593
6484 if (!conn) 6594 if (!conn)
6485 conn = l2cap_conn_add(hcon, 0); 6595 conn = l2cap_conn_add(hcon);
6486 6596
6487 if (!conn) 6597 if (!conn)
6488 goto drop; 6598 goto drop;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1bcfb8422fdc..36fed40c162c 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent);
43static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, 43static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
44 int proto, gfp_t prio); 44 int proto, gfp_t prio);
45 45
46bool l2cap_is_socket(struct socket *sock)
47{
48 return sock && sock->ops == &l2cap_sock_ops;
49}
50EXPORT_SYMBOL(l2cap_is_socket);
51
46static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) 52static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
47{ 53{
48 struct sock *sk = sock->sk; 54 struct sock *sk = sock->sk;
@@ -1292,7 +1298,7 @@ int __init l2cap_init_sockets(void)
1292 goto error; 1298 goto error;
1293 } 1299 }
1294 1300
1295 err = bt_procfs_init(THIS_MODULE, &init_net, "l2cap", &l2cap_sk_list, 1301 err = bt_procfs_init(&init_net, "l2cap", &l2cap_sk_list,
1296 NULL); 1302 NULL);
1297 if (err < 0) { 1303 if (err < 0) {
1298 BT_ERR("Failed to create L2CAP proc file"); 1304 BT_ERR("Failed to create L2CAP proc file");
@@ -1312,8 +1318,6 @@ error:
1312void l2cap_cleanup_sockets(void) 1318void l2cap_cleanup_sockets(void)
1313{ 1319{
1314 bt_procfs_cleanup(&init_net, "l2cap"); 1320 bt_procfs_cleanup(&init_net, "l2cap");
1315 if (bt_sock_unregister(BTPROTO_L2CAP) < 0) 1321 bt_sock_unregister(BTPROTO_L2CAP);
1316 BT_ERR("L2CAP socket unregistration failed");
1317
1318 proto_unregister(&l2cap_proto); 1322 proto_unregister(&l2cap_proto);
1319} 1323}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 39395c7144aa..35fef22703e9 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -106,11 +106,10 @@ static const u16 mgmt_events[] = {
106 * These LE scan and inquiry parameters were chosen according to LE General 106 * These LE scan and inquiry parameters were chosen according to LE General
107 * Discovery Procedure specification. 107 * Discovery Procedure specification.
108 */ 108 */
109#define LE_SCAN_TYPE 0x01
110#define LE_SCAN_WIN 0x12 109#define LE_SCAN_WIN 0x12
111#define LE_SCAN_INT 0x12 110#define LE_SCAN_INT 0x12
112#define LE_SCAN_TIMEOUT_LE_ONLY 10240 /* TGAP(gen_disc_scan_min) */ 111#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240)
113#define LE_SCAN_TIMEOUT_BREDR_LE 5120 /* TGAP(100)/2 */ 112#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120)
114 113
115#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */ 114#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */
116#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */ 115#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */
@@ -384,7 +383,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
384 383
385 if (lmp_bredr_capable(hdev)) { 384 if (lmp_bredr_capable(hdev)) {
386 settings |= MGMT_SETTING_CONNECTABLE; 385 settings |= MGMT_SETTING_CONNECTABLE;
387 settings |= MGMT_SETTING_FAST_CONNECTABLE; 386 if (hdev->hci_ver >= BLUETOOTH_VER_1_2)
387 settings |= MGMT_SETTING_FAST_CONNECTABLE;
388 settings |= MGMT_SETTING_DISCOVERABLE; 388 settings |= MGMT_SETTING_DISCOVERABLE;
389 settings |= MGMT_SETTING_BREDR; 389 settings |= MGMT_SETTING_BREDR;
390 settings |= MGMT_SETTING_LINK_SECURITY; 390 settings |= MGMT_SETTING_LINK_SECURITY;
@@ -409,6 +409,9 @@ static u32 get_current_settings(struct hci_dev *hdev)
409 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) 409 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
410 settings |= MGMT_SETTING_CONNECTABLE; 410 settings |= MGMT_SETTING_CONNECTABLE;
411 411
412 if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
413 settings |= MGMT_SETTING_FAST_CONNECTABLE;
414
412 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) 415 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
413 settings |= MGMT_SETTING_DISCOVERABLE; 416 settings |= MGMT_SETTING_DISCOVERABLE;
414 417
@@ -591,32 +594,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data)
591 ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data)); 594 ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));
592} 595}
593 596
594static int update_eir(struct hci_dev *hdev) 597static void update_eir(struct hci_request *req)
595{ 598{
599 struct hci_dev *hdev = req->hdev;
596 struct hci_cp_write_eir cp; 600 struct hci_cp_write_eir cp;
597 601
598 if (!hdev_is_powered(hdev)) 602 if (!hdev_is_powered(hdev))
599 return 0; 603 return;
600 604
601 if (!lmp_ext_inq_capable(hdev)) 605 if (!lmp_ext_inq_capable(hdev))
602 return 0; 606 return;
603 607
604 if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) 608 if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
605 return 0; 609 return;
606 610
607 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 611 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
608 return 0; 612 return;
609 613
610 memset(&cp, 0, sizeof(cp)); 614 memset(&cp, 0, sizeof(cp));
611 615
612 create_eir(hdev, cp.data); 616 create_eir(hdev, cp.data);
613 617
614 if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) 618 if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0)
615 return 0; 619 return;
616 620
617 memcpy(hdev->eir, cp.data, sizeof(cp.data)); 621 memcpy(hdev->eir, cp.data, sizeof(cp.data));
618 622
619 return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); 623 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
620} 624}
621 625
622static u8 get_service_classes(struct hci_dev *hdev) 626static u8 get_service_classes(struct hci_dev *hdev)
@@ -630,47 +634,48 @@ static u8 get_service_classes(struct hci_dev *hdev)
630 return val; 634 return val;
631} 635}
632 636
633static int update_class(struct hci_dev *hdev) 637static void update_class(struct hci_request *req)
634{ 638{
639 struct hci_dev *hdev = req->hdev;
635 u8 cod[3]; 640 u8 cod[3];
636 int err;
637 641
638 BT_DBG("%s", hdev->name); 642 BT_DBG("%s", hdev->name);
639 643
640 if (!hdev_is_powered(hdev)) 644 if (!hdev_is_powered(hdev))
641 return 0; 645 return;
642 646
643 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 647 if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
644 return 0; 648 return;
645 649
646 cod[0] = hdev->minor_class; 650 cod[0] = hdev->minor_class;
647 cod[1] = hdev->major_class; 651 cod[1] = hdev->major_class;
648 cod[2] = get_service_classes(hdev); 652 cod[2] = get_service_classes(hdev);
649 653
650 if (memcmp(cod, hdev->dev_class, 3) == 0) 654 if (memcmp(cod, hdev->dev_class, 3) == 0)
651 return 0; 655 return;
652
653 err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
654 if (err == 0)
655 set_bit(HCI_PENDING_CLASS, &hdev->dev_flags);
656 656
657 return err; 657 hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);
658} 658}
659 659
660static void service_cache_off(struct work_struct *work) 660static void service_cache_off(struct work_struct *work)
661{ 661{
662 struct hci_dev *hdev = container_of(work, struct hci_dev, 662 struct hci_dev *hdev = container_of(work, struct hci_dev,
663 service_cache.work); 663 service_cache.work);
664 struct hci_request req;
664 665
665 if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) 666 if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))
666 return; 667 return;
667 668
669 hci_req_init(&req, hdev);
670
668 hci_dev_lock(hdev); 671 hci_dev_lock(hdev);
669 672
670 update_eir(hdev); 673 update_eir(&req);
671 update_class(hdev); 674 update_class(&req);
672 675
673 hci_dev_unlock(hdev); 676 hci_dev_unlock(hdev);
677
678 hci_req_run(&req, NULL);
674} 679}
675 680
676static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) 681static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev)
@@ -994,11 +999,64 @@ failed:
994 return err; 999 return err;
995} 1000}
996 1001
1002static void write_fast_connectable(struct hci_request *req, bool enable)
1003{
1004 struct hci_dev *hdev = req->hdev;
1005 struct hci_cp_write_page_scan_activity acp;
1006 u8 type;
1007
1008 if (hdev->hci_ver < BLUETOOTH_VER_1_2)
1009 return;
1010
1011 if (enable) {
1012 type = PAGE_SCAN_TYPE_INTERLACED;
1013
1014 /* 160 msec page scan interval */
1015 acp.interval = __constant_cpu_to_le16(0x0100);
1016 } else {
1017 type = PAGE_SCAN_TYPE_STANDARD; /* default */
1018
1019 /* default 1.28 sec page scan */
1020 acp.interval = __constant_cpu_to_le16(0x0800);
1021 }
1022
1023 acp.window = __constant_cpu_to_le16(0x0012);
1024
1025 if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval ||
1026 __cpu_to_le16(hdev->page_scan_window) != acp.window)
1027 hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY,
1028 sizeof(acp), &acp);
1029
1030 if (hdev->page_scan_type != type)
1031 hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
1032}
1033
1034static void set_connectable_complete(struct hci_dev *hdev, u8 status)
1035{
1036 struct pending_cmd *cmd;
1037
1038 BT_DBG("status 0x%02x", status);
1039
1040 hci_dev_lock(hdev);
1041
1042 cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
1043 if (!cmd)
1044 goto unlock;
1045
1046 send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev);
1047
1048 mgmt_pending_remove(cmd);
1049
1050unlock:
1051 hci_dev_unlock(hdev);
1052}
1053
997static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data, 1054static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
998 u16 len) 1055 u16 len)
999{ 1056{
1000 struct mgmt_mode *cp = data; 1057 struct mgmt_mode *cp = data;
1001 struct pending_cmd *cmd; 1058 struct pending_cmd *cmd;
1059 struct hci_request req;
1002 u8 scan; 1060 u8 scan;
1003 int err; 1061 int err;
1004 1062
@@ -1065,7 +1123,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
1065 cancel_delayed_work(&hdev->discov_off); 1123 cancel_delayed_work(&hdev->discov_off);
1066 } 1124 }
1067 1125
1068 err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); 1126 hci_req_init(&req, hdev);
1127
1128 hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
1129
1130 /* If we're going from non-connectable to connectable or
1131 * vice-versa when fast connectable is enabled ensure that fast
1132 * connectable gets disabled. write_fast_connectable won't do
1133 * anything if the page scan parameters are already what they
1134 * should be.
1135 */
1136 if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags))
1137 write_fast_connectable(&req, false);
1138
1139 err = hci_req_run(&req, set_connectable_complete);
1069 if (err < 0) 1140 if (err < 0)
1070 mgmt_pending_remove(cmd); 1141 mgmt_pending_remove(cmd);
1071 1142
@@ -1280,6 +1351,11 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1280 return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, 1351 return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
1281 MGMT_STATUS_INVALID_PARAMS); 1352 MGMT_STATUS_INVALID_PARAMS);
1282 1353
1354 /* LE-only devices do not allow toggling LE on/off */
1355 if (!lmp_bredr_capable(hdev))
1356 return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
1357 MGMT_STATUS_REJECTED);
1358
1283 hci_dev_lock(hdev); 1359 hci_dev_lock(hdev);
1284 1360
1285 val = !!cp->val; 1361 val = !!cp->val;
@@ -1332,6 +1408,29 @@ unlock:
1332 return err; 1408 return err;
1333} 1409}
1334 1410
1411/* This is a helper function to test for pending mgmt commands that can
1412 * cause CoD or EIR HCI commands. We can only allow one such pending
1413 * mgmt command at a time since otherwise we cannot easily track what
1414 * the current values are, will be, and based on that calculate if a new
1415 * HCI command needs to be sent and if yes with what value.
1416 */
1417static bool pending_eir_or_class(struct hci_dev *hdev)
1418{
1419 struct pending_cmd *cmd;
1420
1421 list_for_each_entry(cmd, &hdev->mgmt_pending, list) {
1422 switch (cmd->opcode) {
1423 case MGMT_OP_ADD_UUID:
1424 case MGMT_OP_REMOVE_UUID:
1425 case MGMT_OP_SET_DEV_CLASS:
1426 case MGMT_OP_SET_POWERED:
1427 return true;
1428 }
1429 }
1430
1431 return false;
1432}
1433
1335static const u8 bluetooth_base_uuid[] = { 1434static const u8 bluetooth_base_uuid[] = {
1336 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80, 1435 0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,
1337 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 1436 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -1351,10 +1450,37 @@ static u8 get_uuid_size(const u8 *uuid)
1351 return 16; 1450 return 16;
1352} 1451}
1353 1452
1453static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status)
1454{
1455 struct pending_cmd *cmd;
1456
1457 hci_dev_lock(hdev);
1458
1459 cmd = mgmt_pending_find(mgmt_op, hdev);
1460 if (!cmd)
1461 goto unlock;
1462
1463 cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status),
1464 hdev->dev_class, 3);
1465
1466 mgmt_pending_remove(cmd);
1467
1468unlock:
1469 hci_dev_unlock(hdev);
1470}
1471
1472static void add_uuid_complete(struct hci_dev *hdev, u8 status)
1473{
1474 BT_DBG("status 0x%02x", status);
1475
1476 mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
1477}
1478
1354static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) 1479static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1355{ 1480{
1356 struct mgmt_cp_add_uuid *cp = data; 1481 struct mgmt_cp_add_uuid *cp = data;
1357 struct pending_cmd *cmd; 1482 struct pending_cmd *cmd;
1483 struct hci_request req;
1358 struct bt_uuid *uuid; 1484 struct bt_uuid *uuid;
1359 int err; 1485 int err;
1360 1486
@@ -1362,7 +1488,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1362 1488
1363 hci_dev_lock(hdev); 1489 hci_dev_lock(hdev);
1364 1490
1365 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { 1491 if (pending_eir_or_class(hdev)) {
1366 err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID, 1492 err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,
1367 MGMT_STATUS_BUSY); 1493 MGMT_STATUS_BUSY);
1368 goto failed; 1494 goto failed;
@@ -1380,23 +1506,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
1380 1506
1381 list_add_tail(&uuid->list, &hdev->uuids); 1507 list_add_tail(&uuid->list, &hdev->uuids);
1382 1508
1383 err = update_class(hdev); 1509 hci_req_init(&req, hdev);
1384 if (err < 0)
1385 goto failed;
1386 1510
1387 err = update_eir(hdev); 1511 update_class(&req);
1388 if (err < 0) 1512 update_eir(&req);
1389 goto failed; 1513
1514 err = hci_req_run(&req, add_uuid_complete);
1515 if (err < 0) {
1516 if (err != -ENODATA)
1517 goto failed;
1390 1518
1391 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1392 err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0, 1519 err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,
1393 hdev->dev_class, 3); 1520 hdev->dev_class, 3);
1394 goto failed; 1521 goto failed;
1395 } 1522 }
1396 1523
1397 cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len); 1524 cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len);
1398 if (!cmd) 1525 if (!cmd) {
1399 err = -ENOMEM; 1526 err = -ENOMEM;
1527 goto failed;
1528 }
1529
1530 err = 0;
1400 1531
1401failed: 1532failed:
1402 hci_dev_unlock(hdev); 1533 hci_dev_unlock(hdev);
@@ -1417,6 +1548,13 @@ static bool enable_service_cache(struct hci_dev *hdev)
1417 return false; 1548 return false;
1418} 1549}
1419 1550
1551static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
1552{
1553 BT_DBG("status 0x%02x", status);
1554
1555 mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
1556}
1557
1420static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data, 1558static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1421 u16 len) 1559 u16 len)
1422{ 1560{
@@ -1424,13 +1562,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1424 struct pending_cmd *cmd; 1562 struct pending_cmd *cmd;
1425 struct bt_uuid *match, *tmp; 1563 struct bt_uuid *match, *tmp;
1426 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; 1564 u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1565 struct hci_request req;
1427 int err, found; 1566 int err, found;
1428 1567
1429 BT_DBG("request for %s", hdev->name); 1568 BT_DBG("request for %s", hdev->name);
1430 1569
1431 hci_dev_lock(hdev); 1570 hci_dev_lock(hdev);
1432 1571
1433 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { 1572 if (pending_eir_or_class(hdev)) {
1434 err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID, 1573 err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,
1435 MGMT_STATUS_BUSY); 1574 MGMT_STATUS_BUSY);
1436 goto unlock; 1575 goto unlock;
@@ -1466,34 +1605,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
1466 } 1605 }
1467 1606
1468update_class: 1607update_class:
1469 err = update_class(hdev); 1608 hci_req_init(&req, hdev);
1470 if (err < 0)
1471 goto unlock;
1472 1609
1473 err = update_eir(hdev); 1610 update_class(&req);
1474 if (err < 0) 1611 update_eir(&req);
1475 goto unlock; 1612
1613 err = hci_req_run(&req, remove_uuid_complete);
1614 if (err < 0) {
1615 if (err != -ENODATA)
1616 goto unlock;
1476 1617
1477 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1478 err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0, 1618 err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,
1479 hdev->dev_class, 3); 1619 hdev->dev_class, 3);
1480 goto unlock; 1620 goto unlock;
1481 } 1621 }
1482 1622
1483 cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); 1623 cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len);
1484 if (!cmd) 1624 if (!cmd) {
1485 err = -ENOMEM; 1625 err = -ENOMEM;
1626 goto unlock;
1627 }
1628
1629 err = 0;
1486 1630
1487unlock: 1631unlock:
1488 hci_dev_unlock(hdev); 1632 hci_dev_unlock(hdev);
1489 return err; 1633 return err;
1490} 1634}
1491 1635
1636static void set_class_complete(struct hci_dev *hdev, u8 status)
1637{
1638 BT_DBG("status 0x%02x", status);
1639
1640 mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
1641}
1642
1492static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data, 1643static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1493 u16 len) 1644 u16 len)
1494{ 1645{
1495 struct mgmt_cp_set_dev_class *cp = data; 1646 struct mgmt_cp_set_dev_class *cp = data;
1496 struct pending_cmd *cmd; 1647 struct pending_cmd *cmd;
1648 struct hci_request req;
1497 int err; 1649 int err;
1498 1650
1499 BT_DBG("request for %s", hdev->name); 1651 BT_DBG("request for %s", hdev->name);
@@ -1502,15 +1654,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1502 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 1654 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1503 MGMT_STATUS_NOT_SUPPORTED); 1655 MGMT_STATUS_NOT_SUPPORTED);
1504 1656
1505 if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) 1657 hci_dev_lock(hdev);
1506 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1507 MGMT_STATUS_BUSY);
1508 1658
1509 if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) 1659 if (pending_eir_or_class(hdev)) {
1510 return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 1660 err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1511 MGMT_STATUS_INVALID_PARAMS); 1661 MGMT_STATUS_BUSY);
1662 goto unlock;
1663 }
1512 1664
1513 hci_dev_lock(hdev); 1665 if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) {
1666 err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
1667 MGMT_STATUS_INVALID_PARAMS);
1668 goto unlock;
1669 }
1514 1670
1515 hdev->major_class = cp->major; 1671 hdev->major_class = cp->major;
1516 hdev->minor_class = cp->minor; 1672 hdev->minor_class = cp->minor;
@@ -1521,26 +1677,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
1521 goto unlock; 1677 goto unlock;
1522 } 1678 }
1523 1679
1680 hci_req_init(&req, hdev);
1681
1524 if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) { 1682 if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {
1525 hci_dev_unlock(hdev); 1683 hci_dev_unlock(hdev);
1526 cancel_delayed_work_sync(&hdev->service_cache); 1684 cancel_delayed_work_sync(&hdev->service_cache);
1527 hci_dev_lock(hdev); 1685 hci_dev_lock(hdev);
1528 update_eir(hdev); 1686 update_eir(&req);
1529 } 1687 }
1530 1688
1531 err = update_class(hdev); 1689 update_class(&req);
1532 if (err < 0) 1690
1533 goto unlock; 1691 err = hci_req_run(&req, set_class_complete);
1692 if (err < 0) {
1693 if (err != -ENODATA)
1694 goto unlock;
1534 1695
1535 if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {
1536 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0, 1696 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,
1537 hdev->dev_class, 3); 1697 hdev->dev_class, 3);
1538 goto unlock; 1698 goto unlock;
1539 } 1699 }
1540 1700
1541 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); 1701 cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len);
1542 if (!cmd) 1702 if (!cmd) {
1543 err = -ENOMEM; 1703 err = -ENOMEM;
1704 goto unlock;
1705 }
1706
1707 err = 0;
1544 1708
1545unlock: 1709unlock:
1546 hci_dev_unlock(hdev); 1710 hci_dev_unlock(hdev);
@@ -1971,7 +2135,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)
1971 conn->security_cfm_cb = NULL; 2135 conn->security_cfm_cb = NULL;
1972 conn->disconn_cfm_cb = NULL; 2136 conn->disconn_cfm_cb = NULL;
1973 2137
1974 hci_conn_put(conn); 2138 hci_conn_drop(conn);
1975 2139
1976 mgmt_pending_remove(cmd); 2140 mgmt_pending_remove(cmd);
1977} 2141}
@@ -2062,7 +2226,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
2062 } 2226 }
2063 2227
2064 if (conn->connect_cfm_cb) { 2228 if (conn->connect_cfm_cb) {
2065 hci_conn_put(conn); 2229 hci_conn_drop(conn);
2066 err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE, 2230 err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,
2067 MGMT_STATUS_BUSY, &rp, sizeof(rp)); 2231 MGMT_STATUS_BUSY, &rp, sizeof(rp));
2068 goto unlock; 2232 goto unlock;
@@ -2071,7 +2235,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
2071 cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len); 2235 cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len);
2072 if (!cmd) { 2236 if (!cmd) {
2073 err = -ENOMEM; 2237 err = -ENOMEM;
2074 hci_conn_put(conn); 2238 hci_conn_drop(conn);
2075 goto unlock; 2239 goto unlock;
2076 } 2240 }
2077 2241
@@ -2140,7 +2304,7 @@ unlock:
2140} 2304}
2141 2305
2142static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, 2306static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2143 bdaddr_t *bdaddr, u8 type, u16 mgmt_op, 2307 struct mgmt_addr_info *addr, u16 mgmt_op,
2144 u16 hci_op, __le32 passkey) 2308 u16 hci_op, __le32 passkey)
2145{ 2309{
2146 struct pending_cmd *cmd; 2310 struct pending_cmd *cmd;
@@ -2150,37 +2314,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2150 hci_dev_lock(hdev); 2314 hci_dev_lock(hdev);
2151 2315
2152 if (!hdev_is_powered(hdev)) { 2316 if (!hdev_is_powered(hdev)) {
2153 err = cmd_status(sk, hdev->id, mgmt_op, 2317 err = cmd_complete(sk, hdev->id, mgmt_op,
2154 MGMT_STATUS_NOT_POWERED); 2318 MGMT_STATUS_NOT_POWERED, addr,
2319 sizeof(*addr));
2155 goto done; 2320 goto done;
2156 } 2321 }
2157 2322
2158 if (type == BDADDR_BREDR) 2323 if (addr->type == BDADDR_BREDR)
2159 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr); 2324 conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
2160 else 2325 else
2161 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); 2326 conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
2162 2327
2163 if (!conn) { 2328 if (!conn) {
2164 err = cmd_status(sk, hdev->id, mgmt_op, 2329 err = cmd_complete(sk, hdev->id, mgmt_op,
2165 MGMT_STATUS_NOT_CONNECTED); 2330 MGMT_STATUS_NOT_CONNECTED, addr,
2331 sizeof(*addr));
2166 goto done; 2332 goto done;
2167 } 2333 }
2168 2334
2169 if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) { 2335 if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {
2170 /* Continue with pairing via SMP */ 2336 /* Continue with pairing via SMP */
2171 err = smp_user_confirm_reply(conn, mgmt_op, passkey); 2337 err = smp_user_confirm_reply(conn, mgmt_op, passkey);
2172 2338
2173 if (!err) 2339 if (!err)
2174 err = cmd_status(sk, hdev->id, mgmt_op, 2340 err = cmd_complete(sk, hdev->id, mgmt_op,
2175 MGMT_STATUS_SUCCESS); 2341 MGMT_STATUS_SUCCESS, addr,
2342 sizeof(*addr));
2176 else 2343 else
2177 err = cmd_status(sk, hdev->id, mgmt_op, 2344 err = cmd_complete(sk, hdev->id, mgmt_op,
2178 MGMT_STATUS_FAILED); 2345 MGMT_STATUS_FAILED, addr,
2346 sizeof(*addr));
2179 2347
2180 goto done; 2348 goto done;
2181 } 2349 }
2182 2350
2183 cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr)); 2351 cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));
2184 if (!cmd) { 2352 if (!cmd) {
2185 err = -ENOMEM; 2353 err = -ENOMEM;
2186 goto done; 2354 goto done;
@@ -2190,11 +2358,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
2190 if (hci_op == HCI_OP_USER_PASSKEY_REPLY) { 2358 if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {
2191 struct hci_cp_user_passkey_reply cp; 2359 struct hci_cp_user_passkey_reply cp;
2192 2360
2193 bacpy(&cp.bdaddr, bdaddr); 2361 bacpy(&cp.bdaddr, &addr->bdaddr);
2194 cp.passkey = passkey; 2362 cp.passkey = passkey;
2195 err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp); 2363 err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);
2196 } else 2364 } else
2197 err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr); 2365 err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr),
2366 &addr->bdaddr);
2198 2367
2199 if (err < 0) 2368 if (err < 0)
2200 mgmt_pending_remove(cmd); 2369 mgmt_pending_remove(cmd);
@@ -2211,7 +2380,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
2211 2380
2212 BT_DBG(""); 2381 BT_DBG("");
2213 2382
2214 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2383 return user_pairing_resp(sk, hdev, &cp->addr,
2215 MGMT_OP_PIN_CODE_NEG_REPLY, 2384 MGMT_OP_PIN_CODE_NEG_REPLY,
2216 HCI_OP_PIN_CODE_NEG_REPLY, 0); 2385 HCI_OP_PIN_CODE_NEG_REPLY, 0);
2217} 2386}
@@ -2227,7 +2396,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2227 return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY, 2396 return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
2228 MGMT_STATUS_INVALID_PARAMS); 2397 MGMT_STATUS_INVALID_PARAMS);
2229 2398
2230 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2399 return user_pairing_resp(sk, hdev, &cp->addr,
2231 MGMT_OP_USER_CONFIRM_REPLY, 2400 MGMT_OP_USER_CONFIRM_REPLY,
2232 HCI_OP_USER_CONFIRM_REPLY, 0); 2401 HCI_OP_USER_CONFIRM_REPLY, 0);
2233} 2402}
@@ -2239,7 +2408,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
2239 2408
2240 BT_DBG(""); 2409 BT_DBG("");
2241 2410
2242 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2411 return user_pairing_resp(sk, hdev, &cp->addr,
2243 MGMT_OP_USER_CONFIRM_NEG_REPLY, 2412 MGMT_OP_USER_CONFIRM_NEG_REPLY,
2244 HCI_OP_USER_CONFIRM_NEG_REPLY, 0); 2413 HCI_OP_USER_CONFIRM_NEG_REPLY, 0);
2245} 2414}
@@ -2251,7 +2420,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
2251 2420
2252 BT_DBG(""); 2421 BT_DBG("");
2253 2422
2254 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2423 return user_pairing_resp(sk, hdev, &cp->addr,
2255 MGMT_OP_USER_PASSKEY_REPLY, 2424 MGMT_OP_USER_PASSKEY_REPLY,
2256 HCI_OP_USER_PASSKEY_REPLY, cp->passkey); 2425 HCI_OP_USER_PASSKEY_REPLY, cp->passkey);
2257} 2426}
@@ -2263,18 +2432,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
2263 2432
2264 BT_DBG(""); 2433 BT_DBG("");
2265 2434
2266 return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, 2435 return user_pairing_resp(sk, hdev, &cp->addr,
2267 MGMT_OP_USER_PASSKEY_NEG_REPLY, 2436 MGMT_OP_USER_PASSKEY_NEG_REPLY,
2268 HCI_OP_USER_PASSKEY_NEG_REPLY, 0); 2437 HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
2269} 2438}
2270 2439
2271static int update_name(struct hci_dev *hdev, const char *name) 2440static void update_name(struct hci_request *req)
2272{ 2441{
2442 struct hci_dev *hdev = req->hdev;
2273 struct hci_cp_write_local_name cp; 2443 struct hci_cp_write_local_name cp;
2274 2444
2275 memcpy(cp.name, name, sizeof(cp.name)); 2445 memcpy(cp.name, hdev->dev_name, sizeof(cp.name));
2446
2447 hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
2448}
2449
2450static void set_name_complete(struct hci_dev *hdev, u8 status)
2451{
2452 struct mgmt_cp_set_local_name *cp;
2453 struct pending_cmd *cmd;
2454
2455 BT_DBG("status 0x%02x", status);
2456
2457 hci_dev_lock(hdev);
2276 2458
2277 return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); 2459 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
2460 if (!cmd)
2461 goto unlock;
2462
2463 cp = cmd->param;
2464
2465 if (status)
2466 cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
2467 mgmt_status(status));
2468 else
2469 cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
2470 cp, sizeof(*cp));
2471
2472 mgmt_pending_remove(cmd);
2473
2474unlock:
2475 hci_dev_unlock(hdev);
2278} 2476}
2279 2477
2280static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, 2478static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -2282,12 +2480,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
2282{ 2480{
2283 struct mgmt_cp_set_local_name *cp = data; 2481 struct mgmt_cp_set_local_name *cp = data;
2284 struct pending_cmd *cmd; 2482 struct pending_cmd *cmd;
2483 struct hci_request req;
2285 int err; 2484 int err;
2286 2485
2287 BT_DBG(""); 2486 BT_DBG("");
2288 2487
2289 hci_dev_lock(hdev); 2488 hci_dev_lock(hdev);
2290 2489
2490 /* If the old values are the same as the new ones just return a
2491 * direct command complete event.
2492 */
2493 if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) &&
2494 !memcmp(hdev->short_name, cp->short_name,
2495 sizeof(hdev->short_name))) {
2496 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
2497 data, len);
2498 goto failed;
2499 }
2500
2291 memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name)); 2501 memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));
2292 2502
2293 if (!hdev_is_powered(hdev)) { 2503 if (!hdev_is_powered(hdev)) {
@@ -2310,7 +2520,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
2310 goto failed; 2520 goto failed;
2311 } 2521 }
2312 2522
2313 err = update_name(hdev, cp->name); 2523 memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name));
2524
2525 hci_req_init(&req, hdev);
2526
2527 if (lmp_bredr_capable(hdev)) {
2528 update_name(&req);
2529 update_eir(&req);
2530 }
2531
2532 if (lmp_le_capable(hdev))
2533 hci_update_ad(&req);
2534
2535 err = hci_req_run(&req, set_name_complete);
2314 if (err < 0) 2536 if (err < 0)
2315 mgmt_pending_remove(cmd); 2537 mgmt_pending_remove(cmd);
2316 2538
@@ -2485,7 +2707,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2485 goto failed; 2707 goto failed;
2486 } 2708 }
2487 2709
2488 err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, 2710 err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
2489 LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); 2711 LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY);
2490 break; 2712 break;
2491 2713
@@ -2497,8 +2719,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,
2497 goto failed; 2719 goto failed;
2498 } 2720 }
2499 2721
2500 err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, 2722 err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,
2501 LE_SCAN_TIMEOUT_BREDR_LE); 2723 LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE);
2502 break; 2724 break;
2503 2725
2504 default: 2726 default:
@@ -2698,6 +2920,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
2698 u16 len) 2920 u16 len)
2699{ 2921{
2700 struct mgmt_cp_set_device_id *cp = data; 2922 struct mgmt_cp_set_device_id *cp = data;
2923 struct hci_request req;
2701 int err; 2924 int err;
2702 __u16 source; 2925 __u16 source;
2703 2926
@@ -2718,24 +2941,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
2718 2941
2719 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); 2942 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0);
2720 2943
2721 update_eir(hdev); 2944 hci_req_init(&req, hdev);
2945 update_eir(&req);
2946 hci_req_run(&req, NULL);
2722 2947
2723 hci_dev_unlock(hdev); 2948 hci_dev_unlock(hdev);
2724 2949
2725 return err; 2950 return err;
2726} 2951}
2727 2952
2953static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
2954{
2955 struct pending_cmd *cmd;
2956
2957 BT_DBG("status 0x%02x", status);
2958
2959 hci_dev_lock(hdev);
2960
2961 cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev);
2962 if (!cmd)
2963 goto unlock;
2964
2965 if (status) {
2966 cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2967 mgmt_status(status));
2968 } else {
2969 struct mgmt_mode *cp = cmd->param;
2970
2971 if (cp->val)
2972 set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
2973 else
2974 clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags);
2975
2976 send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev);
2977 new_settings(hdev, cmd->sk);
2978 }
2979
2980 mgmt_pending_remove(cmd);
2981
2982unlock:
2983 hci_dev_unlock(hdev);
2984}
2985
2728static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev, 2986static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
2729 void *data, u16 len) 2987 void *data, u16 len)
2730{ 2988{
2731 struct mgmt_mode *cp = data; 2989 struct mgmt_mode *cp = data;
2732 struct hci_cp_write_page_scan_activity acp; 2990 struct pending_cmd *cmd;
2733 u8 type; 2991 struct hci_request req;
2734 int err; 2992 int err;
2735 2993
2736 BT_DBG("%s", hdev->name); 2994 BT_DBG("%s", hdev->name);
2737 2995
2738 if (!lmp_bredr_capable(hdev)) 2996 if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)
2739 return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 2997 return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2740 MGMT_STATUS_NOT_SUPPORTED); 2998 MGMT_STATUS_NOT_SUPPORTED);
2741 2999
@@ -2753,40 +3011,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
2753 3011
2754 hci_dev_lock(hdev); 3012 hci_dev_lock(hdev);
2755 3013
2756 if (cp->val) { 3014 if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) {
2757 type = PAGE_SCAN_TYPE_INTERLACED; 3015 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
3016 MGMT_STATUS_BUSY);
3017 goto unlock;
3018 }
2758 3019
2759 /* 160 msec page scan interval */ 3020 if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) {
2760 acp.interval = __constant_cpu_to_le16(0x0100); 3021 err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE,
2761 } else { 3022 hdev);
2762 type = PAGE_SCAN_TYPE_STANDARD; /* default */ 3023 goto unlock;
3024 }
2763 3025
2764 /* default 1.28 sec page scan */ 3026 cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev,
2765 acp.interval = __constant_cpu_to_le16(0x0800); 3027 data, len);
3028 if (!cmd) {
3029 err = -ENOMEM;
3030 goto unlock;
2766 } 3031 }
2767 3032
2768 /* default 11.25 msec page scan window */ 3033 hci_req_init(&req, hdev);
2769 acp.window = __constant_cpu_to_le16(0x0012);
2770 3034
2771 err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp), 3035 write_fast_connectable(&req, cp->val);
2772 &acp);
2773 if (err < 0) {
2774 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2775 MGMT_STATUS_FAILED);
2776 goto done;
2777 }
2778 3036
2779 err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); 3037 err = hci_req_run(&req, fast_connectable_complete);
2780 if (err < 0) { 3038 if (err < 0) {
2781 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 3039 err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,
2782 MGMT_STATUS_FAILED); 3040 MGMT_STATUS_FAILED);
2783 goto done; 3041 mgmt_pending_remove(cmd);
2784 } 3042 }
2785 3043
2786 err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0, 3044unlock:
2787 NULL, 0);
2788done:
2789 hci_dev_unlock(hdev); 3045 hci_dev_unlock(hdev);
3046
2790 return err; 3047 return err;
2791} 3048}
2792 3049
@@ -3043,79 +3300,116 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)
3043 mgmt_pending_free(cmd); 3300 mgmt_pending_free(cmd);
3044} 3301}
3045 3302
3046static int set_bredr_scan(struct hci_dev *hdev) 3303static void set_bredr_scan(struct hci_request *req)
3047{ 3304{
3305 struct hci_dev *hdev = req->hdev;
3048 u8 scan = 0; 3306 u8 scan = 0;
3049 3307
3308 /* Ensure that fast connectable is disabled. This function will
3309 * not do anything if the page scan parameters are already what
3310 * they should be.
3311 */
3312 write_fast_connectable(req, false);
3313
3050 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags)) 3314 if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))
3051 scan |= SCAN_PAGE; 3315 scan |= SCAN_PAGE;
3052 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags)) 3316 if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
3053 scan |= SCAN_INQUIRY; 3317 scan |= SCAN_INQUIRY;
3054 3318
3055 if (!scan) 3319 if (scan)
3056 return 0; 3320 hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
3057
3058 return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
3059} 3321}
3060 3322
3061int mgmt_powered(struct hci_dev *hdev, u8 powered) 3323static void powered_complete(struct hci_dev *hdev, u8 status)
3062{ 3324{
3063 struct cmd_lookup match = { NULL, hdev }; 3325 struct cmd_lookup match = { NULL, hdev };
3064 int err;
3065 3326
3066 if (!test_bit(HCI_MGMT, &hdev->dev_flags)) 3327 BT_DBG("status 0x%02x", status);
3067 return 0; 3328
3329 hci_dev_lock(hdev);
3068 3330
3069 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); 3331 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
3070 3332
3071 if (powered) { 3333 new_settings(hdev, match.sk);
3072 u8 link_sec;
3073 3334
3074 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && 3335 hci_dev_unlock(hdev);
3075 !lmp_host_ssp_capable(hdev)) {
3076 u8 ssp = 1;
3077 3336
3078 hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); 3337 if (match.sk)
3079 } 3338 sock_put(match.sk);
3339}
3340
3341static int powered_update_hci(struct hci_dev *hdev)
3342{
3343 struct hci_request req;
3344 u8 link_sec;
3080 3345
3081 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { 3346 hci_req_init(&req, hdev);
3082 struct hci_cp_write_le_host_supported cp;
3083 3347
3084 cp.le = 1; 3348 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) &&
3085 cp.simul = lmp_le_br_capable(hdev); 3349 !lmp_host_ssp_capable(hdev)) {
3350 u8 ssp = 1;
3086 3351
3087 /* Check first if we already have the right 3352 hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
3088 * host state (host features set) 3353 }
3089 */
3090 if (cp.le != lmp_host_le_capable(hdev) ||
3091 cp.simul != lmp_host_le_br_capable(hdev))
3092 hci_send_cmd(hdev,
3093 HCI_OP_WRITE_LE_HOST_SUPPORTED,
3094 sizeof(cp), &cp);
3095 }
3096 3354
3097 link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); 3355 if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
3098 if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) 3356 lmp_bredr_capable(hdev)) {
3099 hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 3357 struct hci_cp_write_le_host_supported cp;
3100 sizeof(link_sec), &link_sec);
3101 3358
3102 if (lmp_bredr_capable(hdev)) { 3359 cp.le = 1;
3103 set_bredr_scan(hdev); 3360 cp.simul = lmp_le_br_capable(hdev);
3104 update_class(hdev);
3105 update_name(hdev, hdev->dev_name);
3106 update_eir(hdev);
3107 }
3108 } else {
3109 u8 status = MGMT_STATUS_NOT_POWERED;
3110 u8 zero_cod[] = { 0, 0, 0 };
3111 3361
3112 mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); 3362 /* Check first if we already have the right
3363 * host state (host features set)
3364 */
3365 if (cp.le != lmp_host_le_capable(hdev) ||
3366 cp.simul != lmp_host_le_br_capable(hdev))
3367 hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED,
3368 sizeof(cp), &cp);
3369 }
3370
3371 link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
3372 if (link_sec != test_bit(HCI_AUTH, &hdev->flags))
3373 hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE,
3374 sizeof(link_sec), &link_sec);
3113 3375
3114 if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) 3376 if (lmp_bredr_capable(hdev)) {
3115 mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, 3377 set_bredr_scan(&req);
3116 zero_cod, sizeof(zero_cod), NULL); 3378 update_class(&req);
3379 update_name(&req);
3380 update_eir(&req);
3117 } 3381 }
3118 3382
3383 return hci_req_run(&req, powered_complete);
3384}
3385
3386int mgmt_powered(struct hci_dev *hdev, u8 powered)
3387{
3388 struct cmd_lookup match = { NULL, hdev };
3389 u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
3390 u8 zero_cod[] = { 0, 0, 0 };
3391 int err;
3392
3393 if (!test_bit(HCI_MGMT, &hdev->dev_flags))
3394 return 0;
3395
3396 if (powered) {
3397 if (powered_update_hci(hdev) == 0)
3398 return 0;
3399
3400 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp,
3401 &match);
3402 goto new_settings;
3403 }
3404
3405 mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
3406 mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered);
3407
3408 if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
3409 mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
3410 zero_cod, sizeof(zero_cod), NULL);
3411
3412new_settings:
3119 err = new_settings(hdev, match.sk); 3413 err = new_settings(hdev, match.sk);
3120 3414
3121 if (match.sk) 3415 if (match.sk)
@@ -3152,7 +3446,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)
3152 3446
3153int mgmt_connectable(struct hci_dev *hdev, u8 connectable) 3447int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
3154{ 3448{
3155 struct cmd_lookup match = { NULL, hdev }; 3449 struct pending_cmd *cmd;
3156 bool changed = false; 3450 bool changed = false;
3157 int err = 0; 3451 int err = 0;
3158 3452
@@ -3164,14 +3458,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)
3164 changed = true; 3458 changed = true;
3165 } 3459 }
3166 3460
3167 mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp, 3461 cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);
3168 &match);
3169 3462
3170 if (changed) 3463 if (changed)
3171 err = new_settings(hdev, match.sk); 3464 err = new_settings(hdev, cmd ? cmd->sk : NULL);
3172
3173 if (match.sk)
3174 sock_put(match.sk);
3175 3465
3176 return err; 3466 return err;
3177} 3467}
@@ -3555,23 +3845,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
3555 return err; 3845 return err;
3556} 3846}
3557 3847
3558static int clear_eir(struct hci_dev *hdev) 3848static void clear_eir(struct hci_request *req)
3559{ 3849{
3850 struct hci_dev *hdev = req->hdev;
3560 struct hci_cp_write_eir cp; 3851 struct hci_cp_write_eir cp;
3561 3852
3562 if (!lmp_ext_inq_capable(hdev)) 3853 if (!lmp_ext_inq_capable(hdev))
3563 return 0; 3854 return;
3564 3855
3565 memset(hdev->eir, 0, sizeof(hdev->eir)); 3856 memset(hdev->eir, 0, sizeof(hdev->eir));
3566 3857
3567 memset(&cp, 0, sizeof(cp)); 3858 memset(&cp, 0, sizeof(cp));
3568 3859
3569 return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); 3860 hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);
3570} 3861}
3571 3862
3572int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status) 3863int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
3573{ 3864{
3574 struct cmd_lookup match = { NULL, hdev }; 3865 struct cmd_lookup match = { NULL, hdev };
3866 struct hci_request req;
3575 bool changed = false; 3867 bool changed = false;
3576 int err = 0; 3868 int err = 0;
3577 3869
@@ -3604,29 +3896,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
3604 if (match.sk) 3896 if (match.sk)
3605 sock_put(match.sk); 3897 sock_put(match.sk);
3606 3898
3899 hci_req_init(&req, hdev);
3900
3607 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) 3901 if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags))
3608 update_eir(hdev); 3902 update_eir(&req);
3609 else 3903 else
3610 clear_eir(hdev); 3904 clear_eir(&req);
3905
3906 hci_req_run(&req, NULL);
3611 3907
3612 return err; 3908 return err;
3613} 3909}
3614 3910
3615static void class_rsp(struct pending_cmd *cmd, void *data) 3911static void sk_lookup(struct pending_cmd *cmd, void *data)
3616{ 3912{
3617 struct cmd_lookup *match = data; 3913 struct cmd_lookup *match = data;
3618 3914
3619 cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status,
3620 match->hdev->dev_class, 3);
3621
3622 list_del(&cmd->list);
3623
3624 if (match->sk == NULL) { 3915 if (match->sk == NULL) {
3625 match->sk = cmd->sk; 3916 match->sk = cmd->sk;
3626 sock_hold(match->sk); 3917 sock_hold(match->sk);
3627 } 3918 }
3628
3629 mgmt_pending_free(cmd);
3630} 3919}
3631 3920
3632int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, 3921int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
@@ -3635,11 +3924,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
3635 struct cmd_lookup match = { NULL, hdev, mgmt_status(status) }; 3924 struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };
3636 int err = 0; 3925 int err = 0;
3637 3926
3638 clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); 3927 mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match);
3639 3928 mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
3640 mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match); 3929 mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
3641 mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match);
3642 mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match);
3643 3930
3644 if (!status) 3931 if (!status)
3645 err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, 3932 err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
@@ -3653,55 +3940,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
3653 3940
3654int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) 3941int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)
3655{ 3942{
3656 struct pending_cmd *cmd;
3657 struct mgmt_cp_set_local_name ev; 3943 struct mgmt_cp_set_local_name ev;
3658 bool changed = false; 3944 struct pending_cmd *cmd;
3659 int err = 0;
3660 3945
3661 if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) { 3946 if (status)
3662 memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); 3947 return 0;
3663 changed = true;
3664 }
3665 3948
3666 memset(&ev, 0, sizeof(ev)); 3949 memset(&ev, 0, sizeof(ev));
3667 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH); 3950 memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);
3668 memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH); 3951 memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);
3669 3952
3670 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); 3953 cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev);
3671 if (!cmd) 3954 if (!cmd) {
3672 goto send_event; 3955 memcpy(hdev->dev_name, name, sizeof(hdev->dev_name));
3673
3674 /* Always assume that either the short or the complete name has
3675 * changed if there was a pending mgmt command */
3676 changed = true;
3677 3956
3678 if (status) { 3957 /* If this is a HCI command related to powering on the
3679 err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 3958 * HCI dev don't send any mgmt signals.
3680 mgmt_status(status)); 3959 */
3681 goto failed; 3960 if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev))
3961 return 0;
3682 } 3962 }
3683 3963
3684 err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev, 3964 return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
3685 sizeof(ev)); 3965 cmd ? cmd->sk : NULL);
3686 if (err < 0)
3687 goto failed;
3688
3689send_event:
3690 if (changed)
3691 err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev,
3692 sizeof(ev), cmd ? cmd->sk : NULL);
3693
3694 /* EIR is taken care of separately when powering on the
3695 * adapter so only update them here if this is a name change
3696 * unrelated to power on.
3697 */
3698 if (!test_bit(HCI_INIT, &hdev->flags))
3699 update_eir(hdev);
3700
3701failed:
3702 if (cmd)
3703 mgmt_pending_remove(cmd);
3704 return err;
3705} 3966}
3706 3967
3707int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, 3968int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash,
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b23e2713fea8..ca957d34b0c8 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
69 u8 sec_level, 69 u8 sec_level,
70 int *err); 70 int *err);
71static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); 71static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst);
72static void rfcomm_session_del(struct rfcomm_session *s); 72static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);
73 73
74/* ---- RFCOMM frame parsing macros ---- */ 74/* ---- RFCOMM frame parsing macros ---- */
75#define __get_dlci(b) ((b & 0xfc) >> 2) 75#define __get_dlci(b) ((b & 0xfc) >> 2)
@@ -108,12 +108,6 @@ static void rfcomm_schedule(void)
108 wake_up_process(rfcomm_thread); 108 wake_up_process(rfcomm_thread);
109} 109}
110 110
111static void rfcomm_session_put(struct rfcomm_session *s)
112{
113 if (atomic_dec_and_test(&s->refcnt))
114 rfcomm_session_del(s);
115}
116
117/* ---- RFCOMM FCS computation ---- */ 111/* ---- RFCOMM FCS computation ---- */
118 112
119/* reversed, 8-bit, poly=0x07 */ 113/* reversed, 8-bit, poly=0x07 */
@@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
249{ 243{
250 BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); 244 BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
251 245
252 if (!mod_timer(&s->timer, jiffies + timeout)) 246 mod_timer(&s->timer, jiffies + timeout);
253 rfcomm_session_hold(s);
254} 247}
255 248
256static void rfcomm_session_clear_timer(struct rfcomm_session *s) 249static void rfcomm_session_clear_timer(struct rfcomm_session *s)
257{ 250{
258 BT_DBG("session %p state %ld", s, s->state); 251 BT_DBG("session %p state %ld", s, s->state);
259 252
260 if (del_timer(&s->timer)) 253 del_timer_sync(&s->timer);
261 rfcomm_session_put(s);
262} 254}
263 255
264/* ---- RFCOMM DLCs ---- */ 256/* ---- RFCOMM DLCs ---- */
@@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
336{ 328{
337 BT_DBG("dlc %p session %p", d, s); 329 BT_DBG("dlc %p session %p", d, s);
338 330
339 rfcomm_session_hold(s);
340
341 rfcomm_session_clear_timer(s); 331 rfcomm_session_clear_timer(s);
342 rfcomm_dlc_hold(d); 332 rfcomm_dlc_hold(d);
343 list_add(&d->list, &s->dlcs); 333 list_add(&d->list, &s->dlcs);
@@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
356 346
357 if (list_empty(&s->dlcs)) 347 if (list_empty(&s->dlcs))
358 rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); 348 rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
359
360 rfcomm_session_put(s);
361} 349}
362 350
363static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) 351static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci)
@@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
493 481
494int rfcomm_dlc_close(struct rfcomm_dlc *d, int err) 482int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
495{ 483{
496 int r; 484 int r = 0;
485 struct rfcomm_dlc *d_list;
486 struct rfcomm_session *s, *s_list;
487
488 BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);
497 489
498 rfcomm_lock(); 490 rfcomm_lock();
499 491
500 r = __rfcomm_dlc_close(d, err); 492 s = d->session;
493 if (!s)
494 goto no_session;
495
496 /* after waiting on the mutex check the session still exists
497 * then check the dlc still exists
498 */
499 list_for_each_entry(s_list, &session_list, list) {
500 if (s_list == s) {
501 list_for_each_entry(d_list, &s->dlcs, list) {
502 if (d_list == d) {
503 r = __rfcomm_dlc_close(d, err);
504 break;
505 }
506 }
507 break;
508 }
509 }
501 510
511no_session:
502 rfcomm_unlock(); 512 rfcomm_unlock();
503 return r; 513 return r;
504} 514}
@@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
609 return s; 619 return s;
610} 620}
611 621
612static void rfcomm_session_del(struct rfcomm_session *s) 622static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)
613{ 623{
614 int state = s->state; 624 int state = s->state;
615 625
@@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)
617 627
618 list_del(&s->list); 628 list_del(&s->list);
619 629
620 if (state == BT_CONNECTED)
621 rfcomm_send_disc(s, 0);
622
623 rfcomm_session_clear_timer(s); 630 rfcomm_session_clear_timer(s);
624 sock_release(s->sock); 631 sock_release(s->sock);
625 kfree(s); 632 kfree(s);
626 633
627 if (state != BT_LISTEN) 634 if (state != BT_LISTEN)
628 module_put(THIS_MODULE); 635 module_put(THIS_MODULE);
636
637 return NULL;
629} 638}
630 639
631static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) 640static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
@@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)
644 return NULL; 653 return NULL;
645} 654}
646 655
647static void rfcomm_session_close(struct rfcomm_session *s, int err) 656static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s,
657 int err)
648{ 658{
649 struct rfcomm_dlc *d; 659 struct rfcomm_dlc *d;
650 struct list_head *p, *n; 660 struct list_head *p, *n;
651 661
652 BT_DBG("session %p state %ld err %d", s, s->state, err);
653
654 rfcomm_session_hold(s);
655
656 s->state = BT_CLOSED; 662 s->state = BT_CLOSED;
657 663
664 BT_DBG("session %p state %ld err %d", s, s->state, err);
665
658 /* Close all dlcs */ 666 /* Close all dlcs */
659 list_for_each_safe(p, n, &s->dlcs) { 667 list_for_each_safe(p, n, &s->dlcs) {
660 d = list_entry(p, struct rfcomm_dlc, list); 668 d = list_entry(p, struct rfcomm_dlc, list);
@@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
663 } 671 }
664 672
665 rfcomm_session_clear_timer(s); 673 rfcomm_session_clear_timer(s);
666 rfcomm_session_put(s); 674 return rfcomm_session_del(s);
667} 675}
668 676
669static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, 677static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
@@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
715 if (*err == 0 || *err == -EINPROGRESS) 723 if (*err == 0 || *err == -EINPROGRESS)
716 return s; 724 return s;
717 725
718 rfcomm_session_del(s); 726 return rfcomm_session_del(s);
719 return NULL;
720 727
721failed: 728failed:
722 sock_release(sock); 729 sock_release(sock);
@@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)
1105} 1112}
1106 1113
1107/* ---- RFCOMM frame reception ---- */ 1114/* ---- RFCOMM frame reception ---- */
1108static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) 1115static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1109{ 1116{
1110 BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); 1117 BT_DBG("session %p state %ld dlci %d", s, s->state, dlci);
1111 1118
@@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1114 struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci); 1121 struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);
1115 if (!d) { 1122 if (!d) {
1116 rfcomm_send_dm(s, dlci); 1123 rfcomm_send_dm(s, dlci);
1117 return 0; 1124 return s;
1118 } 1125 }
1119 1126
1120 switch (d->state) { 1127 switch (d->state) {
@@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)
1150 break; 1157 break;
1151 1158
1152 case BT_DISCONN: 1159 case BT_DISCONN:
1153 /* rfcomm_session_put is called later so don't do 1160 s = rfcomm_session_close(s, ECONNRESET);
1154 * anything here otherwise we will mess up the session
1155 * reference counter:
1156 *
1157 * (a) when we are the initiator dlc_unlink will drive
1158 * the reference counter to 0 (there is no initial put
1159 * after session_add)
1160 *
1161 * (b) when we are not the initiator rfcomm_rx_process
1162 * will explicitly call put to balance the initial hold
1163 * done after session add.
1164 */
1165 break; 1161 break;
1166 } 1162 }
1167 } 1163 }
1168 return 0; 1164 return s;
1169} 1165}
1170 1166
1171static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) 1167static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
1172{ 1168{
1173 int err = 0; 1169 int err = 0;
1174 1170
@@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)
1192 else 1188 else
1193 err = ECONNRESET; 1189 err = ECONNRESET;
1194 1190
1195 s->state = BT_CLOSED; 1191 s = rfcomm_session_close(s, err);
1196 rfcomm_session_close(s, err);
1197 } 1192 }
1198 return 0; 1193 return s;
1199} 1194}
1200 1195
1201static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) 1196static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s,
1197 u8 dlci)
1202{ 1198{
1203 int err = 0; 1199 int err = 0;
1204 1200
@@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
1227 else 1223 else
1228 err = ECONNRESET; 1224 err = ECONNRESET;
1229 1225
1230 s->state = BT_CLOSED; 1226 s = rfcomm_session_close(s, err);
1231 rfcomm_session_close(s, err);
1232 } 1227 }
1233 1228 return s;
1234 return 0;
1235} 1229}
1236 1230
1237void rfcomm_dlc_accept(struct rfcomm_dlc *d) 1231void rfcomm_dlc_accept(struct rfcomm_dlc *d)
@@ -1652,11 +1646,18 @@ drop:
1652 return 0; 1646 return 0;
1653} 1647}
1654 1648
1655static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) 1649static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s,
1650 struct sk_buff *skb)
1656{ 1651{
1657 struct rfcomm_hdr *hdr = (void *) skb->data; 1652 struct rfcomm_hdr *hdr = (void *) skb->data;
1658 u8 type, dlci, fcs; 1653 u8 type, dlci, fcs;
1659 1654
1655 if (!s) {
1656 /* no session, so free socket data */
1657 kfree_skb(skb);
1658 return s;
1659 }
1660
1660 dlci = __get_dlci(hdr->addr); 1661 dlci = __get_dlci(hdr->addr);
1661 type = __get_type(hdr->ctrl); 1662 type = __get_type(hdr->ctrl);
1662 1663
@@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1667 if (__check_fcs(skb->data, type, fcs)) { 1668 if (__check_fcs(skb->data, type, fcs)) {
1668 BT_ERR("bad checksum in packet"); 1669 BT_ERR("bad checksum in packet");
1669 kfree_skb(skb); 1670 kfree_skb(skb);
1670 return -EILSEQ; 1671 return s;
1671 } 1672 }
1672 1673
1673 if (__test_ea(hdr->len)) 1674 if (__test_ea(hdr->len))
@@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1683 1684
1684 case RFCOMM_DISC: 1685 case RFCOMM_DISC:
1685 if (__test_pf(hdr->ctrl)) 1686 if (__test_pf(hdr->ctrl))
1686 rfcomm_recv_disc(s, dlci); 1687 s = rfcomm_recv_disc(s, dlci);
1687 break; 1688 break;
1688 1689
1689 case RFCOMM_UA: 1690 case RFCOMM_UA:
1690 if (__test_pf(hdr->ctrl)) 1691 if (__test_pf(hdr->ctrl))
1691 rfcomm_recv_ua(s, dlci); 1692 s = rfcomm_recv_ua(s, dlci);
1692 break; 1693 break;
1693 1694
1694 case RFCOMM_DM: 1695 case RFCOMM_DM:
1695 rfcomm_recv_dm(s, dlci); 1696 s = rfcomm_recv_dm(s, dlci);
1696 break; 1697 break;
1697 1698
1698 case RFCOMM_UIH: 1699 case RFCOMM_UIH:
1699 if (dlci) 1700 if (dlci) {
1700 return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); 1701 rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb);
1701 1702 return s;
1703 }
1702 rfcomm_recv_mcc(s, skb); 1704 rfcomm_recv_mcc(s, skb);
1703 break; 1705 break;
1704 1706
@@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1707 break; 1709 break;
1708 } 1710 }
1709 kfree_skb(skb); 1711 kfree_skb(skb);
1710 return 0; 1712 return s;
1711} 1713}
1712 1714
1713/* ---- Connection and data processing ---- */ 1715/* ---- Connection and data processing ---- */
@@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)
1844 } 1846 }
1845} 1847}
1846 1848
1847static void rfcomm_process_rx(struct rfcomm_session *s) 1849static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
1848{ 1850{
1849 struct socket *sock = s->sock; 1851 struct socket *sock = s->sock;
1850 struct sock *sk = sock->sk; 1852 struct sock *sk = sock->sk;
@@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)
1856 while ((skb = skb_dequeue(&sk->sk_receive_queue))) { 1858 while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
1857 skb_orphan(skb); 1859 skb_orphan(skb);
1858 if (!skb_linearize(skb)) 1860 if (!skb_linearize(skb))
1859 rfcomm_recv_frame(s, skb); 1861 s = rfcomm_recv_frame(s, skb);
1860 else 1862 else
1861 kfree_skb(skb); 1863 kfree_skb(skb);
1862 } 1864 }
1863 1865
1864 if (sk->sk_state == BT_CLOSED) { 1866 if (s && (sk->sk_state == BT_CLOSED))
1865 if (!s->initiator) 1867 s = rfcomm_session_close(s, sk->sk_err);
1866 rfcomm_session_put(s);
1867 1868
1868 rfcomm_session_close(s, sk->sk_err); 1869 return s;
1869 }
1870} 1870}
1871 1871
1872static void rfcomm_accept_connection(struct rfcomm_session *s) 1872static void rfcomm_accept_connection(struct rfcomm_session *s)
@@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
1891 1891
1892 s = rfcomm_session_add(nsock, BT_OPEN); 1892 s = rfcomm_session_add(nsock, BT_OPEN);
1893 if (s) { 1893 if (s) {
1894 rfcomm_session_hold(s);
1895
1896 /* We should adjust MTU on incoming sessions. 1894 /* We should adjust MTU on incoming sessions.
1897 * L2CAP MTU minus UIH header and FCS. */ 1895 * L2CAP MTU minus UIH header and FCS. */
1898 s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, 1896 s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu,
@@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)
1903 sock_release(nsock); 1901 sock_release(nsock);
1904} 1902}
1905 1903
1906static void rfcomm_check_connection(struct rfcomm_session *s) 1904static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)
1907{ 1905{
1908 struct sock *sk = s->sock->sk; 1906 struct sock *sk = s->sock->sk;
1909 1907
@@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)
1921 break; 1919 break;
1922 1920
1923 case BT_CLOSED: 1921 case BT_CLOSED:
1924 s->state = BT_CLOSED; 1922 s = rfcomm_session_close(s, sk->sk_err);
1925 rfcomm_session_close(s, sk->sk_err);
1926 break; 1923 break;
1927 } 1924 }
1925 return s;
1928} 1926}
1929 1927
1930static void rfcomm_process_sessions(void) 1928static void rfcomm_process_sessions(void)
@@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)
1940 if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) { 1938 if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
1941 s->state = BT_DISCONN; 1939 s->state = BT_DISCONN;
1942 rfcomm_send_disc(s, 0); 1940 rfcomm_send_disc(s, 0);
1943 rfcomm_session_put(s);
1944 continue; 1941 continue;
1945 } 1942 }
1946 1943
@@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)
1949 continue; 1946 continue;
1950 } 1947 }
1951 1948
1952 rfcomm_session_hold(s);
1953
1954 switch (s->state) { 1949 switch (s->state) {
1955 case BT_BOUND: 1950 case BT_BOUND:
1956 rfcomm_check_connection(s); 1951 s = rfcomm_check_connection(s);
1957 break; 1952 break;
1958 1953
1959 default: 1954 default:
1960 rfcomm_process_rx(s); 1955 s = rfcomm_process_rx(s);
1961 break; 1956 break;
1962 } 1957 }
1963 1958
1964 rfcomm_process_dlcs(s); 1959 if (s)
1965 1960 rfcomm_process_dlcs(s);
1966 rfcomm_session_put(s);
1967 } 1961 }
1968 1962
1969 rfcomm_unlock(); 1963 rfcomm_unlock();
@@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)
2010 2004
2011 /* Add listening session */ 2005 /* Add listening session */
2012 s = rfcomm_session_add(sock, BT_LISTEN); 2006 s = rfcomm_session_add(sock, BT_LISTEN);
2013 if (!s) 2007 if (!s) {
2008 err = -ENOMEM;
2014 goto failed; 2009 goto failed;
2010 }
2015 2011
2016 rfcomm_session_hold(s);
2017 return 0; 2012 return 0;
2018failed: 2013failed:
2019 sock_release(sock); 2014 sock_release(sock);
@@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2071 if (!s) 2066 if (!s)
2072 return; 2067 return;
2073 2068
2074 rfcomm_session_hold(s);
2075
2076 list_for_each_safe(p, n, &s->dlcs) { 2069 list_for_each_safe(p, n, &s->dlcs) {
2077 d = list_entry(p, struct rfcomm_dlc, list); 2070 d = list_entry(p, struct rfcomm_dlc, list);
2078 2071
@@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2104 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 2097 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
2105 } 2098 }
2106 2099
2107 rfcomm_session_put(s);
2108
2109 rfcomm_schedule(); 2100 rfcomm_schedule();
2110} 2101}
2111 2102
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c23bae86263b..30b3721dc6d7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -608,6 +608,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
608 608
609 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 609 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
610 rfcomm_dlc_accept(d); 610 rfcomm_dlc_accept(d);
611 msg->msg_namelen = 0;
611 return 0; 612 return 0;
612 } 613 }
613 614
@@ -1036,7 +1037,7 @@ int __init rfcomm_init_sockets(void)
1036 goto error; 1037 goto error;
1037 } 1038 }
1038 1039
1039 err = bt_procfs_init(THIS_MODULE, &init_net, "rfcomm", &rfcomm_sk_list, NULL); 1040 err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL);
1040 if (err < 0) { 1041 if (err < 0) {
1041 BT_ERR("Failed to create RFCOMM proc file"); 1042 BT_ERR("Failed to create RFCOMM proc file");
1042 bt_sock_unregister(BTPROTO_RFCOMM); 1043 bt_sock_unregister(BTPROTO_RFCOMM);
@@ -1065,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)
1065 1066
1066 debugfs_remove(rfcomm_sock_debugfs); 1067 debugfs_remove(rfcomm_sock_debugfs);
1067 1068
1068 if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) 1069 bt_sock_unregister(BTPROTO_RFCOMM);
1069 BT_ERR("RFCOMM socket layer unregistration failed");
1070 1070
1071 proto_unregister(&rfcomm_proto); 1071 proto_unregister(&rfcomm_proto);
1072} 1072}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index fad0302bdb32..e7bd4eea575c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,7 +83,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
83 if (conn) 83 if (conn)
84 return conn; 84 return conn;
85 85
86 conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC); 86 conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
87 if (!conn) 87 if (!conn)
88 return NULL; 88 return NULL;
89 89
@@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk)
185 185
186 conn = sco_conn_add(hcon); 186 conn = sco_conn_add(hcon);
187 if (!conn) { 187 if (!conn) {
188 hci_conn_put(hcon); 188 hci_conn_drop(hcon);
189 err = -ENOMEM; 189 err = -ENOMEM;
190 goto done; 190 goto done;
191 } 191 }
@@ -353,7 +353,7 @@ static void __sco_sock_close(struct sock *sk)
353 if (sco_pi(sk)->conn->hcon) { 353 if (sco_pi(sk)->conn->hcon) {
354 sk->sk_state = BT_DISCONN; 354 sk->sk_state = BT_DISCONN;
355 sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); 355 sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
356 hci_conn_put(sco_pi(sk)->conn->hcon); 356 hci_conn_drop(sco_pi(sk)->conn->hcon);
357 sco_pi(sk)->conn->hcon = NULL; 357 sco_pi(sk)->conn->hcon = NULL;
358 } else 358 } else
359 sco_chan_del(sk, ECONNRESET); 359 sco_chan_del(sk, ECONNRESET);
@@ -481,8 +481,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
481{ 481{
482 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; 482 struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
483 struct sock *sk = sock->sk; 483 struct sock *sk = sock->sk;
484 int err = 0; 484 int err;
485
486 485
487 BT_DBG("sk %p", sk); 486 BT_DBG("sk %p", sk);
488 487
@@ -653,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
653 return err; 652 return err;
654} 653}
655 654
655static void sco_conn_defer_accept(struct hci_conn *conn, int mask)
656{
657 struct hci_dev *hdev = conn->hdev;
658
659 BT_DBG("conn %p", conn);
660
661 conn->state = BT_CONFIG;
662
663 if (!lmp_esco_capable(hdev)) {
664 struct hci_cp_accept_conn_req cp;
665
666 bacpy(&cp.bdaddr, &conn->dst);
667
668 if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER))
669 cp.role = 0x00; /* Become master */
670 else
671 cp.role = 0x01; /* Remain slave */
672
673 hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp);
674 } else {
675 struct hci_cp_accept_sync_conn_req cp;
676
677 bacpy(&cp.bdaddr, &conn->dst);
678 cp.pkt_type = cpu_to_le16(conn->pkt_type);
679
680 cp.tx_bandwidth = __constant_cpu_to_le32(0x00001f40);
681 cp.rx_bandwidth = __constant_cpu_to_le32(0x00001f40);
682 cp.max_latency = __constant_cpu_to_le16(0xffff);
683 cp.content_format = cpu_to_le16(hdev->voice_setting);
684 cp.retrans_effort = 0xff;
685
686 hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
687 sizeof(cp), &cp);
688 }
689}
690
656static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 691static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
657 struct msghdr *msg, size_t len, int flags) 692 struct msghdr *msg, size_t len, int flags)
658{ 693{
@@ -663,8 +698,9 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
663 698
664 if (sk->sk_state == BT_CONNECT2 && 699 if (sk->sk_state == BT_CONNECT2 &&
665 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { 700 test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
666 hci_conn_accept(pi->conn->hcon, 0); 701 sco_conn_defer_accept(pi->conn->hcon, 0);
667 sk->sk_state = BT_CONFIG; 702 sk->sk_state = BT_CONFIG;
703 msg->msg_namelen = 0;
668 704
669 release_sock(sk); 705 release_sock(sk);
670 return 0; 706 return 0;
@@ -882,7 +918,7 @@ static void sco_chan_del(struct sock *sk, int err)
882 sco_conn_unlock(conn); 918 sco_conn_unlock(conn);
883 919
884 if (conn->hcon) 920 if (conn->hcon)
885 hci_conn_put(conn->hcon); 921 hci_conn_drop(conn->hcon);
886 } 922 }
887 923
888 sk->sk_state = BT_CLOSED; 924 sk->sk_state = BT_CLOSED;
@@ -1083,7 +1119,7 @@ int __init sco_init(void)
1083 goto error; 1119 goto error;
1084 } 1120 }
1085 1121
1086 err = bt_procfs_init(THIS_MODULE, &init_net, "sco", &sco_sk_list, NULL); 1122 err = bt_procfs_init(&init_net, "sco", &sco_sk_list, NULL);
1087 if (err < 0) { 1123 if (err < 0) {
1088 BT_ERR("Failed to create SCO proc file"); 1124 BT_ERR("Failed to create SCO proc file");
1089 bt_sock_unregister(BTPROTO_SCO); 1125 bt_sock_unregister(BTPROTO_SCO);
@@ -1112,8 +1148,7 @@ void __exit sco_exit(void)
1112 1148
1113 debugfs_remove(sco_debugfs); 1149 debugfs_remove(sco_debugfs);
1114 1150
1115 if (bt_sock_unregister(BTPROTO_SCO) < 0) 1151 bt_sock_unregister(BTPROTO_SCO);
1116 BT_ERR("SCO socket unregistration failed");
1117 1152
1118 proto_unregister(&sco_proto); 1153 proto_unregister(&sco_proto);
1119} 1154}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 5abefb12891d..b2296d3857a0 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn)
522 kfree(smp); 522 kfree(smp);
523 conn->smp_chan = NULL; 523 conn->smp_chan = NULL;
524 conn->hcon->smp_conn = NULL; 524 conn->hcon->smp_conn = NULL;
525 hci_conn_put(conn->hcon); 525 hci_conn_drop(conn->hcon);
526} 526}
527 527
528int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) 528int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 314c73ed418f..967312803e41 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev)
348 348
349 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 349 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
350 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | 350 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
351 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; 351 NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;
352 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 352 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
353 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | 353 NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
354 NETIF_F_HW_VLAN_TX; 354 NETIF_F_HW_VLAN_CTAG_TX;
355 355
356 br->dev = dev; 356 br->dev = dev;
357 spin_lock_init(&br->lock); 357 spin_lock_init(&br->lock);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index bab338e6270d..ebfa4443c69b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
161 if (!pv) 161 if (!pv)
162 return; 162 return;
163 163
164 for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); 164 for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
165 vid < BR_VLAN_BITMAP_LEN;
166 vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) {
167 f = __br_fdb_get(br, br->dev->dev_addr, vid); 165 f = __br_fdb_get(br, br->dev->dev_addr, vid);
168 if (f && f->is_local && !f->dst) 166 if (f && f->is_local && !f->dst)
169 fdb_delete(br, f); 167 fdb_delete(br, f);
@@ -617,6 +615,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
617 struct net_bridge *br = source->br; 615 struct net_bridge *br = source->br;
618 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; 616 struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
619 struct net_bridge_fdb_entry *fdb; 617 struct net_bridge_fdb_entry *fdb;
618 bool modified = false;
620 619
621 fdb = fdb_find(head, addr, vid); 620 fdb = fdb_find(head, addr, vid);
622 if (fdb == NULL) { 621 if (fdb == NULL) {
@@ -626,10 +625,16 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
626 fdb = fdb_create(head, source, addr, vid); 625 fdb = fdb_create(head, source, addr, vid);
627 if (!fdb) 626 if (!fdb)
628 return -ENOMEM; 627 return -ENOMEM;
629 fdb_notify(br, fdb, RTM_NEWNEIGH); 628
629 modified = true;
630 } else { 630 } else {
631 if (flags & NLM_F_EXCL) 631 if (flags & NLM_F_EXCL)
632 return -EEXIST; 632 return -EEXIST;
633
634 if (fdb->dst != source) {
635 fdb->dst = source;
636 modified = true;
637 }
633 } 638 }
634 639
635 if (fdb_to_nud(fdb) != state) { 640 if (fdb_to_nud(fdb) != state) {
@@ -641,7 +646,12 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
641 } else 646 } else
642 fdb->is_local = fdb->is_static = 0; 647 fdb->is_local = fdb->is_static = 0;
643 648
644 fdb->updated = fdb->used = jiffies; 649 modified = true;
650 }
651
652 fdb->used = jiffies;
653 if (modified) {
654 fdb->updated = jiffies;
645 fdb_notify(br, fdb, RTM_NEWNEIGH); 655 fdb_notify(br, fdb, RTM_NEWNEIGH);
646 } 656 }
647 657
@@ -724,13 +734,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
724 * specify a VLAN. To be nice, add/update entry for every 734 * specify a VLAN. To be nice, add/update entry for every
725 * vlan on this port. 735 * vlan on this port.
726 */ 736 */
727 vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 737 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
728 while (vid < BR_VLAN_BITMAP_LEN) {
729 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); 738 err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
730 if (err) 739 if (err)
731 goto out; 740 goto out;
732 vid = find_next_bit(pv->vlan_bitmap,
733 BR_VLAN_BITMAP_LEN, vid+1);
734 } 741 }
735 } 742 }
736 743
@@ -815,11 +822,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
815 * vlan on this port. 822 * vlan on this port.
816 */ 823 */
817 err = -ENOENT; 824 err = -ENOENT;
818 vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 825 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
819 while (vid < BR_VLAN_BITMAP_LEN) {
820 err &= __br_fdb_delete(p, addr, vid); 826 err &= __br_fdb_delete(p, addr, vid);
821 vid = find_next_bit(pv->vlan_bitmap,
822 BR_VLAN_BITMAP_LEN, vid+1);
823 } 827 }
824 } 828 }
825out: 829out:
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ef1b91431c6b..4cdba60926ff 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -67,7 +67,8 @@ void br_port_carrier_check(struct net_bridge_port *p)
67 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
68 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
69 69
70 if (netif_running(dev) && netif_oper_up(dev)) 70 if (!(p->flags & BR_ADMIN_COST) &&
71 netif_running(dev) && netif_oper_up(dev))
71 p->path_cost = port_cost(dev); 72 p->path_cost = port_cost(dev);
72 73
73 if (!netif_running(br->dev)) 74 if (!netif_running(br->dev))
@@ -148,7 +149,6 @@ static void del_nbp(struct net_bridge_port *p)
148 dev->priv_flags &= ~IFF_BRIDGE_PORT; 149 dev->priv_flags &= ~IFF_BRIDGE_PORT;
149 150
150 netdev_rx_handler_unregister(dev); 151 netdev_rx_handler_unregister(dev);
151 synchronize_net();
152 152
153 netdev_upper_dev_unlink(dev, br->dev); 153 netdev_upper_dev_unlink(dev, br->dev);
154 154
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index ee79f3f20383..19942e38fd2d 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
382 return ret; 382 return ret;
383} 383}
384 384
385static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 385static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
386{ 386{
387 struct net *net = sock_net(skb->sk); 387 struct net *net = sock_net(skb->sk);
388 struct br_mdb_entry *entry; 388 struct br_mdb_entry *entry;
@@ -458,7 +458,7 @@ unlock:
458 return err; 458 return err;
459} 459}
460 460
461static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 461static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
462{ 462{
463 struct net_device *dev; 463 struct net_device *dev;
464 struct br_mdb_entry *entry; 464 struct br_mdb_entry *entry;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 923fbeaf7afd..81f2389f78eb 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1369,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1369 return -EINVAL; 1369 return -EINVAL;
1370 1370
1371 if (iph->protocol != IPPROTO_IGMP) { 1371 if (iph->protocol != IPPROTO_IGMP) {
1372 if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) 1372 if (!ipv4_is_local_multicast(iph->daddr))
1373 BR_INPUT_SKB_CB(skb)->mrouters_only = 1; 1373 BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
1374 return 0; 1374 return 0;
1375 } 1375 }
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index fe43bc7b063f..1ed75bfd8d1d 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
535 if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) 535 if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
536 return br; 536 return br;
537 537
538 vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); 538 vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
539 vlan_tx_tag_get(skb) & VLAN_VID_MASK);
539 540
540 return vlan ? vlan : br; 541 return vlan ? vlan : br;
541} 542}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 299fc5f40a26..8e3abf564798 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -136,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
136 goto nla_put_failure; 136 goto nla_put_failure;
137 137
138 pvid = br_get_pvid(pv); 138 pvid = br_get_pvid(pv);
139 for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); 139 for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
140 vid < BR_VLAN_BITMAP_LEN;
141 vid = find_next_bit(pv->vlan_bitmap,
142 BR_VLAN_BITMAP_LEN, vid+1)) {
143 vinfo.vid = vid; 140 vinfo.vid = vid;
144 vinfo.flags = 0; 141 vinfo.flags = 0;
145 if (vid == pvid) 142 if (vid == pvid)
@@ -355,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
355/* Change state and parameters on port. */ 352/* Change state and parameters on port. */
356int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) 353int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
357{ 354{
358 struct ifinfomsg *ifm;
359 struct nlattr *protinfo; 355 struct nlattr *protinfo;
360 struct nlattr *afspec; 356 struct nlattr *afspec;
361 struct net_bridge_port *p; 357 struct net_bridge_port *p;
362 struct nlattr *tb[IFLA_BRPORT_MAX + 1]; 358 struct nlattr *tb[IFLA_BRPORT_MAX + 1];
363 int err; 359 int err = 0;
364
365 ifm = nlmsg_data(nlh);
366 360
367 protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); 361 protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO);
368 afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); 362 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
369 if (!protinfo && !afspec) 363 if (!protinfo && !afspec)
370 return 0; 364 return 0;
371 365
@@ -373,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
373 /* We want to accept dev as bridge itself if the AF_SPEC 367 /* We want to accept dev as bridge itself if the AF_SPEC
374 * is set to see if someone is setting vlan info on the brigde 368 * is set to see if someone is setting vlan info on the brigde
375 */ 369 */
376 if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) 370 if (!p && !afspec)
377 return -EINVAL; 371 return -EINVAL;
378 372
379 if (p && protinfo) { 373 if (p && protinfo) {
@@ -414,14 +408,11 @@ out:
414/* Delete port information */ 408/* Delete port information */
415int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) 409int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
416{ 410{
417 struct ifinfomsg *ifm;
418 struct nlattr *afspec; 411 struct nlattr *afspec;
419 struct net_bridge_port *p; 412 struct net_bridge_port *p;
420 int err; 413 int err;
421 414
422 ifm = nlmsg_data(nlh); 415 afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
423
424 afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC);
425 if (!afspec) 416 if (!afspec)
426 return 0; 417 return 0;
427 418
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3cbf5beb3d4b..d2c043a857b6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -156,6 +156,7 @@ struct net_bridge_port
156#define BR_BPDU_GUARD 0x00000002 156#define BR_BPDU_GUARD 0x00000002
157#define BR_ROOT_BLOCK 0x00000004 157#define BR_ROOT_BLOCK 0x00000004
158#define BR_MULTICAST_FAST_LEAVE 0x00000008 158#define BR_MULTICAST_FAST_LEAVE 0x00000008
159#define BR_ADMIN_COST 0x00000010
159 160
160#ifdef CONFIG_BRIDGE_IGMP_SNOOPING 161#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
161 u32 multicast_startup_queries_sent; 162 u32 multicast_startup_queries_sent;
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index b01849a74310..1c0a50f13229 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -225,7 +225,14 @@ static void br_record_config_timeout_values(struct net_bridge *br,
225/* called under bridge lock */ 225/* called under bridge lock */
226void br_transmit_tcn(struct net_bridge *br) 226void br_transmit_tcn(struct net_bridge *br)
227{ 227{
228 br_send_tcn_bpdu(br_get_port(br, br->root_port)); 228 struct net_bridge_port *p;
229
230 p = br_get_port(br, br->root_port);
231 if (p)
232 br_send_tcn_bpdu(p);
233 else
234 br_notice(br, "root port %u not found for topology notice\n",
235 br->root_port);
229} 236}
230 237
231/* called under bridge lock */ 238/* called under bridge lock */
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 0bdb4ebd362b..d45e760141bb 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -288,6 +288,7 @@ int br_stp_set_path_cost(struct net_bridge_port *p, unsigned long path_cost)
288 path_cost > BR_MAX_PATH_COST) 288 path_cost > BR_MAX_PATH_COST)
289 return -ERANGE; 289 return -ERANGE;
290 290
291 p->flags |= BR_ADMIN_COST;
291 p->path_cost = path_cost; 292 p->path_cost = path_cost;
292 br_configuration_update(p->br); 293 br_configuration_update(p->br);
293 br_port_state_selection(p->br); 294 br_port_state_selection(p->br);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index c3530a81a33b..950663d4d330 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -107,7 +107,7 @@ static void br_tcn_timer_expired(unsigned long arg)
107 107
108 br_debug(br, "tcn timer expired\n"); 108 br_debug(br, "tcn timer expired\n");
109 spin_lock(&br->lock); 109 spin_lock(&br->lock);
110 if (br->dev->flags & IFF_UP) { 110 if (!br_is_root_bridge(br) && (br->dev->flags & IFF_UP)) {
111 br_transmit_tcn(br); 111 br_transmit_tcn(br);
112 112
113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time); 113 mod_timer(&br->tcn_timer,jiffies + br->bridge_hello_time);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 93dde75923f0..bd58b45f5f90 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
34 34
35static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags) 35static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
36{ 36{
37 const struct net_device_ops *ops;
37 struct net_bridge_port *p = NULL; 38 struct net_bridge_port *p = NULL;
38 struct net_bridge *br; 39 struct net_bridge *br;
39 struct net_device *dev; 40 struct net_device *dev;
@@ -53,15 +54,17 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
53 br = v->parent.br; 54 br = v->parent.br;
54 dev = br->dev; 55 dev = br->dev;
55 } 56 }
57 ops = dev->netdev_ops;
56 58
57 if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { 59 if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {
58 /* Add VLAN to the device filter if it is supported. 60 /* Add VLAN to the device filter if it is supported.
59 * Stricly speaking, this is not necessary now, since 61 * Stricly speaking, this is not necessary now, since
60 * devices are made promiscuous by the bridge, but if 62 * devices are made promiscuous by the bridge, but if
61 * that ever changes this code will allow tagged 63 * that ever changes this code will allow tagged
62 * traffic to enter the bridge. 64 * traffic to enter the bridge.
63 */ 65 */
64 err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); 66 err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q),
67 vid);
65 if (err) 68 if (err)
66 return err; 69 return err;
67 } 70 }
@@ -82,8 +85,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
82 return 0; 85 return 0;
83 86
84out_filt: 87out_filt:
85 if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) 88 if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
86 dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); 89 ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
87 return err; 90 return err;
88} 91}
89 92
@@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
97 100
98 if (v->port_idx && vid) { 101 if (v->port_idx && vid) {
99 struct net_device *dev = v->parent.port->dev; 102 struct net_device *dev = v->parent.port->dev;
103 const struct net_device_ops *ops = dev->netdev_ops;
100 104
101 if (dev->features & NETIF_F_HW_VLAN_FILTER) 105 if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
102 dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); 106 ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);
103 } 107 }
104 108
105 clear_bit(vid, v->vlan_bitmap); 109 clear_bit(vid, v->vlan_bitmap);
@@ -171,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
171 * mac header. 175 * mac header.
172 */ 176 */
173 skb_push(skb, ETH_HLEN); 177 skb_push(skb, ETH_HLEN);
174 skb = __vlan_put_tag(skb, skb->vlan_tci); 178 skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci);
175 if (!skb) 179 if (!skb)
176 goto out; 180 goto out;
177 /* put skb->data back to where it was */ 181 /* put skb->data back to where it was */
@@ -213,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
213 /* PVID is set on this port. Any untagged ingress 217 /* PVID is set on this port. Any untagged ingress
214 * frame is considered to belong to this vlan. 218 * frame is considered to belong to this vlan.
215 */ 219 */
216 __vlan_hwaccel_put_tag(skb, pvid); 220 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
217 return true; 221 return true;
218 } 222 }
219 223
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 92de5e5f9db2..9878eb8204c5 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
78 const char *prefix) 78 const char *prefix)
79{ 79{
80 unsigned int bitmask; 80 unsigned int bitmask;
81 struct net *net = dev_net(in ? in : out);
82
83 /* FIXME: Disabled from containers until syslog ns is supported */
84 if (!net_eq(net, &init_net))
85 return;
81 86
82 spin_lock_bh(&ebt_log_lock); 87 spin_lock_bh(&ebt_log_lock);
83 printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", 88 printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
@@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
176{ 181{
177 const struct ebt_log_info *info = par->targinfo; 182 const struct ebt_log_info *info = par->targinfo;
178 struct nf_loginfo li; 183 struct nf_loginfo li;
184 struct net *net = dev_net(par->in ? par->in : par->out);
179 185
180 li.type = NF_LOG_TYPE_LOG; 186 li.type = NF_LOG_TYPE_LOG;
181 li.u.log.level = info->loglevel; 187 li.u.log.level = info->loglevel;
182 li.u.log.logflags = info->bitmask; 188 li.u.log.logflags = info->bitmask;
183 189
184 if (info->bitmask & EBT_LOG_NFLOG) 190 if (info->bitmask & EBT_LOG_NFLOG)
185 nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, 191 nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
186 par->out, &li, "%s", info->prefix); 192 par->in, par->out, &li, "%s", info->prefix);
187 else 193 else
188 ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, 194 ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in,
189 par->out, &li, info->prefix); 195 par->out, &li, info->prefix);
190 return EBT_CONTINUE; 196 return EBT_CONTINUE;
191} 197}
192 198
@@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {
206 .me = THIS_MODULE, 212 .me = THIS_MODULE,
207}; 213};
208 214
215static int __net_init ebt_log_net_init(struct net *net)
216{
217 nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
218 return 0;
219}
220
221static void __net_exit ebt_log_net_fini(struct net *net)
222{
223 nf_log_unset(net, &ebt_log_logger);
224}
225
226static struct pernet_operations ebt_log_net_ops = {
227 .init = ebt_log_net_init,
228 .exit = ebt_log_net_fini,
229};
230
209static int __init ebt_log_init(void) 231static int __init ebt_log_init(void)
210{ 232{
211 int ret; 233 int ret;
212 234
235 ret = register_pernet_subsys(&ebt_log_net_ops);
236 if (ret < 0)
237 goto err_pernet;
238
213 ret = xt_register_target(&ebt_log_tg_reg); 239 ret = xt_register_target(&ebt_log_tg_reg);
214 if (ret < 0) 240 if (ret < 0)
215 return ret; 241 goto err_target;
242
216 nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); 243 nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
217 return 0; 244
245 return ret;
246
247err_target:
248 unregister_pernet_subsys(&ebt_log_net_ops);
249err_pernet:
250 return ret;
218} 251}
219 252
220static void __exit ebt_log_fini(void) 253static void __exit ebt_log_fini(void)
221{ 254{
255 unregister_pernet_subsys(&ebt_log_net_ops);
222 nf_log_unregister(&ebt_log_logger); 256 nf_log_unregister(&ebt_log_logger);
223 xt_unregister_target(&ebt_log_tg_reg); 257 xt_unregister_target(&ebt_log_tg_reg);
224} 258}
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 5be68bbcc341..59ac7952010d 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
24{ 24{
25 const struct ebt_nflog_info *info = par->targinfo; 25 const struct ebt_nflog_info *info = par->targinfo;
26 struct nf_loginfo li; 26 struct nf_loginfo li;
27 struct net *net = dev_net(par->in ? par->in : par->out);
27 28
28 li.type = NF_LOG_TYPE_ULOG; 29 li.type = NF_LOG_TYPE_ULOG;
29 li.u.ulog.copy_len = info->len; 30 li.u.ulog.copy_len = info->len;
30 li.u.ulog.group = info->group; 31 li.u.ulog.group = info->group;
31 li.u.ulog.qthreshold = info->threshold; 32 li.u.ulog.qthreshold = info->threshold;
32 33
33 nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, 34 nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
34 &li, "%s", info->prefix); 35 par->out, &li, "%s", info->prefix);
35 return EBT_CONTINUE; 36 return EBT_CONTINUE;
36} 37}
37 38
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 3bf43f7bb9d4..fc1905c51417 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -35,12 +35,13 @@
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <linux/timer.h> 37#include <linux/timer.h>
38#include <linux/netlink.h> 38#include <net/netlink.h>
39#include <linux/netdevice.h> 39#include <linux/netdevice.h>
40#include <linux/netfilter/x_tables.h> 40#include <linux/netfilter/x_tables.h>
41#include <linux/netfilter_bridge/ebtables.h> 41#include <linux/netfilter_bridge/ebtables.h>
42#include <linux/netfilter_bridge/ebt_ulog.h> 42#include <linux/netfilter_bridge/ebt_ulog.h>
43#include <net/netfilter/nf_log.h> 43#include <net/netfilter/nf_log.h>
44#include <net/netns/generic.h>
44#include <net/sock.h> 45#include <net/sock.h>
45#include "../br_private.h" 46#include "../br_private.h"
46 47
@@ -62,13 +63,22 @@ typedef struct {
62 spinlock_t lock; /* the per-queue lock */ 63 spinlock_t lock; /* the per-queue lock */
63} ebt_ulog_buff_t; 64} ebt_ulog_buff_t;
64 65
65static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; 66static int ebt_ulog_net_id __read_mostly;
66static struct sock *ebtulognl; 67struct ebt_ulog_net {
68 unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
69 ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
70 struct sock *ebtulognl;
71};
72
73static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
74{
75 return net_generic(net, ebt_ulog_net_id);
76}
67 77
68/* send one ulog_buff_t to userspace */ 78/* send one ulog_buff_t to userspace */
69static void ulog_send(unsigned int nlgroup) 79static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
70{ 80{
71 ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; 81 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
72 82
73 del_timer(&ub->timer); 83 del_timer(&ub->timer);
74 84
@@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)
80 ub->lastnlh->nlmsg_type = NLMSG_DONE; 90 ub->lastnlh->nlmsg_type = NLMSG_DONE;
81 91
82 NETLINK_CB(ub->skb).dst_group = nlgroup + 1; 92 NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
83 netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); 93 netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
84 94
85 ub->qlen = 0; 95 ub->qlen = 0;
86 ub->skb = NULL; 96 ub->skb = NULL;
@@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)
89/* timer function to flush queue in flushtimeout time */ 99/* timer function to flush queue in flushtimeout time */
90static void ulog_timer(unsigned long data) 100static void ulog_timer(unsigned long data)
91{ 101{
92 spin_lock_bh(&ulog_buffers[data].lock); 102 struct ebt_ulog_net *ebt = container_of((void *)data,
93 if (ulog_buffers[data].skb) 103 struct ebt_ulog_net,
94 ulog_send(data); 104 nlgroup[*(unsigned int *)data]);
95 spin_unlock_bh(&ulog_buffers[data].lock); 105
106 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
107 spin_lock_bh(&ub->lock);
108 if (ub->skb)
109 ulog_send(ebt, *(unsigned int *)data);
110 spin_unlock_bh(&ub->lock);
96} 111}
97 112
98static struct sk_buff *ulog_alloc_skb(unsigned int size) 113static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
123 ebt_ulog_packet_msg_t *pm; 138 ebt_ulog_packet_msg_t *pm;
124 size_t size, copy_len; 139 size_t size, copy_len;
125 struct nlmsghdr *nlh; 140 struct nlmsghdr *nlh;
141 struct net *net = dev_net(in ? in : out);
142 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
126 unsigned int group = uloginfo->nlgroup; 143 unsigned int group = uloginfo->nlgroup;
127 ebt_ulog_buff_t *ub = &ulog_buffers[group]; 144 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
128 spinlock_t *lock = &ub->lock; 145 spinlock_t *lock = &ub->lock;
129 ktime_t kt; 146 ktime_t kt;
130 147
@@ -134,7 +151,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
134 else 151 else
135 copy_len = uloginfo->cprange; 152 copy_len = uloginfo->cprange;
136 153
137 size = NLMSG_SPACE(sizeof(*pm) + copy_len); 154 size = nlmsg_total_size(sizeof(*pm) + copy_len);
138 if (size > nlbufsiz) { 155 if (size > nlbufsiz) {
139 pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); 156 pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
140 return; 157 return;
@@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
146 if (!(ub->skb = ulog_alloc_skb(size))) 163 if (!(ub->skb = ulog_alloc_skb(size)))
147 goto unlock; 164 goto unlock;
148 } else if (size > skb_tailroom(ub->skb)) { 165 } else if (size > skb_tailroom(ub->skb)) {
149 ulog_send(group); 166 ulog_send(ebt, group);
150 167
151 if (!(ub->skb = ulog_alloc_skb(size))) 168 if (!(ub->skb = ulog_alloc_skb(size)))
152 goto unlock; 169 goto unlock;
@@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
205 ub->lastnlh = nlh; 222 ub->lastnlh = nlh;
206 223
207 if (ub->qlen >= uloginfo->qthreshold) 224 if (ub->qlen >= uloginfo->qthreshold)
208 ulog_send(group); 225 ulog_send(ebt, group);
209 else if (!timer_pending(&ub->timer)) { 226 else if (!timer_pending(&ub->timer)) {
210 ub->timer.expires = jiffies + flushtimeout * HZ / 100; 227 ub->timer.expires = jiffies + flushtimeout * HZ / 100;
211 add_timer(&ub->timer); 228 add_timer(&ub->timer);
@@ -277,56 +294,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {
277 .me = THIS_MODULE, 294 .me = THIS_MODULE,
278}; 295};
279 296
280static int __init ebt_ulog_init(void) 297static int __net_init ebt_ulog_net_init(struct net *net)
281{ 298{
282 int ret;
283 int i; 299 int i;
300 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
301
284 struct netlink_kernel_cfg cfg = { 302 struct netlink_kernel_cfg cfg = {
285 .groups = EBT_ULOG_MAXNLGROUPS, 303 .groups = EBT_ULOG_MAXNLGROUPS,
286 }; 304 };
287 305
288 if (nlbufsiz >= 128*1024) {
289 pr_warning("Netlink buffer has to be <= 128kB,"
290 " please try a smaller nlbufsiz parameter.\n");
291 return -EINVAL;
292 }
293
294 /* initialize ulog_buffers */ 306 /* initialize ulog_buffers */
295 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 307 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
296 setup_timer(&ulog_buffers[i].timer, ulog_timer, i); 308 ebt->nlgroup[i] = i;
297 spin_lock_init(&ulog_buffers[i].lock); 309 setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer,
310 (unsigned long)&ebt->nlgroup[i]);
311 spin_lock_init(&ebt->ulog_buffers[i].lock);
298 } 312 }
299 313
300 ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); 314 ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
301 if (!ebtulognl) 315 if (!ebt->ebtulognl)
302 ret = -ENOMEM; 316 return -ENOMEM;
303 else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
304 netlink_kernel_release(ebtulognl);
305 317
306 if (ret == 0) 318 nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger);
307 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); 319 return 0;
308
309 return ret;
310} 320}
311 321
312static void __exit ebt_ulog_fini(void) 322static void __net_exit ebt_ulog_net_fini(struct net *net)
313{ 323{
314 ebt_ulog_buff_t *ub;
315 int i; 324 int i;
325 struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
316 326
317 nf_log_unregister(&ebt_ulog_logger); 327 nf_log_unset(net, &ebt_ulog_logger);
318 xt_unregister_target(&ebt_ulog_tg_reg);
319 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { 328 for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
320 ub = &ulog_buffers[i]; 329 ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];
321 del_timer(&ub->timer); 330 del_timer(&ub->timer);
322 spin_lock_bh(&ub->lock); 331
323 if (ub->skb) { 332 if (ub->skb) {
324 kfree_skb(ub->skb); 333 kfree_skb(ub->skb);
325 ub->skb = NULL; 334 ub->skb = NULL;
326 } 335 }
327 spin_unlock_bh(&ub->lock);
328 } 336 }
329 netlink_kernel_release(ebtulognl); 337 netlink_kernel_release(ebt->ebtulognl);
338}
339
340static struct pernet_operations ebt_ulog_net_ops = {
341 .init = ebt_ulog_net_init,
342 .exit = ebt_ulog_net_fini,
343 .id = &ebt_ulog_net_id,
344 .size = sizeof(struct ebt_ulog_net),
345};
346
347static int __init ebt_ulog_init(void)
348{
349 int ret;
350
351 if (nlbufsiz >= 128*1024) {
352 pr_warn("Netlink buffer has to be <= 128kB,"
353 "please try a smaller nlbufsiz parameter.\n");
354 return -EINVAL;
355 }
356
357 ret = register_pernet_subsys(&ebt_ulog_net_ops);
358 if (ret)
359 goto out_pernet;
360
361 ret = xt_register_target(&ebt_ulog_tg_reg);
362 if (ret)
363 goto out_target;
364
365 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
366
367 return 0;
368
369out_target:
370 unregister_pernet_subsys(&ebt_ulog_net_ops);
371out_pernet:
372 return ret;
373}
374
375static void __exit ebt_ulog_fini(void)
376{
377 nf_log_unregister(&ebt_ulog_logger);
378 xt_unregister_target(&ebt_ulog_tg_reg);
379 unregister_pernet_subsys(&ebt_ulog_net_ops);
330} 380}
331 381
332module_init(ebt_ulog_init); 382module_init(ebt_ulog_init);
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 40d8258bf74f..70f656ce0f4a 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
64static int __net_init broute_net_init(struct net *net) 64static int __net_init broute_net_init(struct net *net)
65{ 65{
66 net->xt.broute_table = ebt_register_table(net, &broute_table); 66 net->xt.broute_table = ebt_register_table(net, &broute_table);
67 if (IS_ERR(net->xt.broute_table)) 67 return PTR_RET(net->xt.broute_table);
68 return PTR_ERR(net->xt.broute_table);
69 return 0;
70} 68}
71 69
72static void __net_exit broute_net_exit(struct net *net) 70static void __net_exit broute_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8d493c91a562..3d110c4fc787 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
138 ethproto = h->h_proto; 138 ethproto = h->h_proto;
139 139
140 if (e->bitmask & EBT_802_3) { 140 if (e->bitmask & EBT_802_3) {
141 if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) 141 if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
142 return 1; 142 return 1;
143 } else if (!(e->bitmask & EBT_NOPROTO) && 143 } else if (!(e->bitmask & EBT_NOPROTO) &&
144 FWINV2(e->ethproto != ethproto, EBT_IPROTO)) 144 FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 21760f008974..1f9ece1a9c34 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * CAIF Interface registration. 2 * CAIF Interface registration.
3 * Copyright (C) ST-Ericsson AB 2010 3 * Copyright (C) ST-Ericsson AB 2010
4 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 4 * Author: Sjur Brendeland
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 * 6 *
7 * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont 7 * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
@@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on)
301} 301}
302 302
303void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, 303void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
304 struct cflayer *link_support, int head_room, 304 struct cflayer *link_support, int head_room,
305 struct cflayer **layer, int (**rcv_func)( 305 struct cflayer **layer,
306 struct sk_buff *, struct net_device *, 306 int (**rcv_func)(struct sk_buff *, struct net_device *,
307 struct packet_type *, struct net_device *)) 307 struct packet_type *,
308 struct net_device *))
308{ 309{
309 struct caif_device_entry *caifd; 310 struct caif_device_entry *caifd;
310 enum cfcnfg_phy_preference pref; 311 enum cfcnfg_phy_preference pref;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 095259f83902..05a41c7ec304 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr)
197 197
198/* Packet Control Callback function called from CAIF */ 198/* Packet Control Callback function called from CAIF */
199static void caif_ctrl_cb(struct cflayer *layr, 199static void caif_ctrl_cb(struct cflayer *layr,
200 enum caif_ctrlcmd flow, 200 enum caif_ctrlcmd flow,
201 int phyid) 201 int phyid)
202{ 202{
203 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); 203 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
204 switch (flow) { 204 switch (flow) {
@@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk)
274 * changed locking, address handling and added MSG_TRUNC. 274 * changed locking, address handling and added MSG_TRUNC.
275 */ 275 */
276static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, 276static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
277 struct msghdr *m, size_t len, int flags) 277 struct msghdr *m, size_t len, int flags)
278 278
279{ 279{
280 struct sock *sk = sock->sk; 280 struct sock *sk = sock->sk;
@@ -286,6 +286,8 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
286 if (m->msg_flags&MSG_OOB) 286 if (m->msg_flags&MSG_OOB)
287 goto read_error; 287 goto read_error;
288 288
289 m->msg_namelen = 0;
290
289 skb = skb_recv_datagram(sk, flags, 0 , &ret); 291 skb = skb_recv_datagram(sk, flags, 0 , &ret);
290 if (!skb) 292 if (!skb)
291 goto read_error; 293 goto read_error;
@@ -346,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
346 * changed locking calls, changed address handling. 348 * changed locking calls, changed address handling.
347 */ 349 */
348static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 350static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
349 struct msghdr *msg, size_t size, 351 struct msghdr *msg, size_t size,
350 int flags) 352 int flags)
351{ 353{
352 struct sock *sk = sock->sk; 354 struct sock *sk = sock->sk;
353 int copied = 0; 355 int copied = 0;
@@ -462,7 +464,7 @@ out:
462 * CAIF flow-on and sock_writable. 464 * CAIF flow-on and sock_writable.
463 */ 465 */
464static long caif_wait_for_flow_on(struct caifsock *cf_sk, 466static long caif_wait_for_flow_on(struct caifsock *cf_sk,
465 int wait_writeable, long timeo, int *err) 467 int wait_writeable, long timeo, int *err)
466{ 468{
467 struct sock *sk = &cf_sk->sk; 469 struct sock *sk = &cf_sk->sk;
468 DEFINE_WAIT(wait); 470 DEFINE_WAIT(wait);
@@ -516,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
516 518
517/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ 519/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
518static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, 520static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
519 struct msghdr *msg, size_t len) 521 struct msghdr *msg, size_t len)
520{ 522{
521 struct sock *sk = sock->sk; 523 struct sock *sk = sock->sk;
522 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 524 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -591,7 +593,7 @@ err:
591 * and other minor adaptations. 593 * and other minor adaptations.
592 */ 594 */
593static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, 595static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
594 struct msghdr *msg, size_t len) 596 struct msghdr *msg, size_t len)
595{ 597{
596 struct sock *sk = sock->sk; 598 struct sock *sk = sock->sk;
597 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 599 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -670,7 +672,7 @@ out_err:
670} 672}
671 673
672static int setsockopt(struct socket *sock, 674static int setsockopt(struct socket *sock,
673 int lvl, int opt, char __user *ov, unsigned int ol) 675 int lvl, int opt, char __user *ov, unsigned int ol)
674{ 676{
675 struct sock *sk = sock->sk; 677 struct sock *sk = sock->sk;
676 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); 678 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
@@ -932,7 +934,7 @@ static int caif_release(struct socket *sock)
932 934
933/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ 935/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
934static unsigned int caif_poll(struct file *file, 936static unsigned int caif_poll(struct file *file,
935 struct socket *sock, poll_table *wait) 937 struct socket *sock, poll_table *wait)
936{ 938{
937 struct sock *sk = sock->sk; 939 struct sock *sk = sock->sk;
938 unsigned int mask; 940 unsigned int mask;
@@ -1022,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk)
1022} 1024}
1023 1025
1024static int caif_create(struct net *net, struct socket *sock, int protocol, 1026static int caif_create(struct net *net, struct socket *sock, int protocol,
1025 int kern) 1027 int kern)
1026{ 1028{
1027 struct sock *sk = NULL; 1029 struct sock *sk = NULL;
1028 struct caifsock *cf_sk = NULL; 1030 struct caifsock *cf_sk = NULL;
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index ef8ebaa993cf..942e00a425fd 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * CAIF USB handler 2 * CAIF USB handler
3 * Copyright (C) ST-Ericsson AB 2011 3 * Copyright (C) ST-Ericsson AB 2011
4 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 4 * Author: Sjur Brendeland
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 * 6 *
7 */ 7 */
@@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)
75} 75}
76 76
77static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 77static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
78 int phyid) 78 int phyid)
79{ 79{
80 if (layr->up && layr->up->ctrlcmd) 80 if (layr->up && layr->up->ctrlcmd)
81 layr->up->ctrlcmd(layr->up, ctrl, layr->id); 81 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
@@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = {
121}; 121};
122 122
123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, 123static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
124 void *arg) 124 void *arg)
125{ 125{
126 struct net_device *dev = arg; 126 struct net_device *dev = arg;
127 struct caif_dev_common common; 127 struct caif_dev_common common;
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index f1dbddb95a6c..fa39fc298708 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -61,11 +61,11 @@ struct cfcnfg {
61}; 61};
62 62
63static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, 63static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
64 enum cfctrl_srv serv, u8 phyid, 64 enum cfctrl_srv serv, u8 phyid,
65 struct cflayer *adapt_layer); 65 struct cflayer *adapt_layer);
66static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); 66static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
67static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, 67static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
68 struct cflayer *adapt_layer); 68 struct cflayer *adapt_layer);
69static void cfctrl_resp_func(void); 69static void cfctrl_resp_func(void);
70static void cfctrl_enum_resp(void); 70static void cfctrl_enum_resp(void);
71 71
@@ -131,7 +131,7 @@ static void cfctrl_resp_func(void)
131} 131}
132 132
133static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, 133static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg,
134 u8 phyid) 134 u8 phyid)
135{ 135{
136 struct cfcnfg_phyinfo *phy; 136 struct cfcnfg_phyinfo *phy;
137 137
@@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = {
216 216
217 217
218static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, 218static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
219 struct caif_connect_request *s, 219 struct caif_connect_request *s,
220 struct cfctrl_link_param *l) 220 struct cfctrl_link_param *l)
221{ 221{
222 struct dev_info *dev_info; 222 struct dev_info *dev_info;
223 enum cfcnfg_phy_preference pref; 223 enum cfcnfg_phy_preference pref;
@@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,
301 301
302int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, 302int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,
303 struct cflayer *adap_layer, int *ifindex, 303 struct cflayer *adap_layer, int *ifindex,
304 int *proto_head, 304 int *proto_head, int *proto_tail)
305 int *proto_tail)
306{ 305{
307 struct cflayer *frml; 306 struct cflayer *frml;
308 struct cfcnfg_phyinfo *phy; 307 struct cfcnfg_phyinfo *phy;
@@ -364,7 +363,7 @@ unlock:
364EXPORT_SYMBOL(caif_connect_client); 363EXPORT_SYMBOL(caif_connect_client);
365 364
366static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, 365static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
367 struct cflayer *adapt_layer) 366 struct cflayer *adapt_layer)
368{ 367{
369 if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) 368 if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
370 adapt_layer->ctrlcmd(adapt_layer, 369 adapt_layer->ctrlcmd(adapt_layer,
@@ -526,7 +525,7 @@ out_err:
526EXPORT_SYMBOL(cfcnfg_add_phy_layer); 525EXPORT_SYMBOL(cfcnfg_add_phy_layer);
527 526
528int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, 527int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer,
529 bool up) 528 bool up)
530{ 529{
531 struct cfcnfg_phyinfo *phyinfo; 530 struct cfcnfg_phyinfo *phyinfo;
532 531
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a376ec1ac0a7..2bd4b58f4372 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -20,12 +20,12 @@
20 20
21#ifdef CAIF_NO_LOOP 21#ifdef CAIF_NO_LOOP
22static int handle_loop(struct cfctrl *ctrl, 22static int handle_loop(struct cfctrl *ctrl,
23 int cmd, struct cfpkt *pkt){ 23 int cmd, struct cfpkt *pkt){
24 return -1; 24 return -1;
25} 25}
26#else 26#else
27static int handle_loop(struct cfctrl *ctrl, 27static int handle_loop(struct cfctrl *ctrl,
28 int cmd, struct cfpkt *pkt); 28 int cmd, struct cfpkt *pkt);
29#endif 29#endif
30static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); 30static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
31static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 31static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
@@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer)
72} 72}
73 73
74static bool param_eq(const struct cfctrl_link_param *p1, 74static bool param_eq(const struct cfctrl_link_param *p1,
75 const struct cfctrl_link_param *p2) 75 const struct cfctrl_link_param *p2)
76{ 76{
77 bool eq = 77 bool eq =
78 p1->linktype == p2->linktype && 78 p1->linktype == p2->linktype &&
@@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
197} 197}
198 198
199int cfctrl_linkup_request(struct cflayer *layer, 199int cfctrl_linkup_request(struct cflayer *layer,
200 struct cfctrl_link_param *param, 200 struct cfctrl_link_param *param,
201 struct cflayer *user_layer) 201 struct cflayer *user_layer)
202{ 202{
203 struct cfctrl *cfctrl = container_obj(layer); 203 struct cfctrl *cfctrl = container_obj(layer);
204 u32 tmp32; 204 u32 tmp32;
@@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
301} 301}
302 302
303int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, 303int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
304 struct cflayer *client) 304 struct cflayer *client)
305{ 305{
306 int ret; 306 int ret;
307 struct cfpkt *pkt; 307 struct cfpkt *pkt;
@@ -555,7 +555,7 @@ error:
555} 555}
556 556
557static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 557static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
558 int phyid) 558 int phyid)
559{ 559{
560 struct cfctrl *this = container_obj(layr); 560 struct cfctrl *this = container_obj(layr);
561 switch (ctrl) { 561 switch (ctrl) {
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 2914659eb9b2..7aae0b56829e 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index a63f4a5f5aff..3bdddb32d55a 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 0a7df7ef062d..8bc7caa28e64 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -2,7 +2,7 @@
2 * CAIF Framing Layer. 2 * CAIF Framing Layer.
3 * 3 *
4 * Copyright (C) ST-Ericsson AB 2010 4 * Copyright (C) ST-Ericsson AB 2010
5 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 5 * Author: Sjur Brendeland
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
@@ -28,7 +28,7 @@ struct cffrml {
28static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); 28static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
29static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); 29static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
30static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 30static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
31 int phyid); 31 int phyid);
32 32
33static u32 cffrml_rcv_error; 33static u32 cffrml_rcv_error;
34static u32 cffrml_rcv_checsum_error; 34static u32 cffrml_rcv_checsum_error;
@@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
167} 167}
168 168
169static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 169static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
170 int phyid) 170 int phyid)
171{ 171{
172 if (layr->up && layr->up->ctrlcmd) 172 if (layr->up && layr->up->ctrlcmd)
173 layr->up->ctrlcmd(layr->up, ctrl, layr->id); 173 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 94b08612a4d8..8c5d6386319f 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -42,7 +42,7 @@ struct cfmuxl {
42static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt); 42static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
43static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt); 43static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
44static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 44static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
45 int phyid); 45 int phyid);
46static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); 46static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
47 47
48struct cflayer *cfmuxl_create(void) 48struct cflayer *cfmuxl_create(void)
@@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
244} 244}
245 245
246static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 246static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
247 int phyid) 247 int phyid)
248{ 248{
249 struct cfmuxl *muxl = container_obj(layr); 249 struct cfmuxl *muxl = container_obj(layr);
250 struct cflayer *layer; 250 struct cflayer *layer;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 863dedd91bb6..6493351f39c6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)
266} 266}
267 267
268inline u16 cfpkt_iterate(struct cfpkt *pkt, 268inline u16 cfpkt_iterate(struct cfpkt *pkt,
269 u16 (*iter_func)(u16, void *, u16), 269 u16 (*iter_func)(u16, void *, u16),
270 u16 data) 270 u16 data)
271{ 271{
272 /* 272 /*
273 * Don't care about the performance hit of linearizing, 273 * Don't care about the performance hit of linearizing,
@@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
307} 307}
308 308
309struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, 309struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
310 struct cfpkt *addpkt, 310 struct cfpkt *addpkt,
311 u16 expectlen) 311 u16 expectlen)
312{ 312{
313 struct sk_buff *dst = pkt_to_skb(dstpkt); 313 struct sk_buff *dst = pkt_to_skb(dstpkt);
314 struct sk_buff *add = pkt_to_skb(addpkt); 314 struct sk_buff *add = pkt_to_skb(addpkt);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 2b563ad04597..61d7617d9249 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer)
43} 43}
44 44
45struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, 45struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
46 int mtu_size) 46 int mtu_size)
47{ 47{
48 int tmp; 48 int tmp;
49 struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 49 struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
@@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
69} 69}
70 70
71static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, 71static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
72 struct cfpkt *pkt, int *err) 72 struct cfpkt *pkt, int *err)
73{ 73{
74 struct cfpkt *tmppkt; 74 struct cfpkt *tmppkt;
75 *err = -EPROTO; 75 *err = -EPROTO;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 8e68b97f13ee..ce60f06d76de 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -29,7 +29,7 @@ struct cfserl {
29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); 29static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); 30static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
31static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 31static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
32 int phyid); 32 int phyid);
33 33
34struct cflayer *cfserl_create(int instance, bool use_stx) 34struct cflayer *cfserl_create(int instance, bool use_stx)
35{ 35{
@@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
182} 182}
183 183
184static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 184static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
185 int phyid) 185 int phyid)
186{ 186{
187 layr->up->ctrlcmd(layr->up, ctrl, phyid); 187 layr->up->ctrlcmd(layr->up, ctrl, phyid);
188} 188}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index ba217e90765e..353f793d1b3b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
@@ -25,7 +25,7 @@
25#define container_obj(layr) container_of(layr, struct cfsrvl, layer) 25#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
26 26
27static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, 27static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
28 int phyid) 28 int phyid)
29{ 29{
30 struct cfsrvl *service = container_obj(layr); 30 struct cfsrvl *service = container_obj(layr);
31 31
@@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer)
158} 158}
159 159
160void cfsrvl_init(struct cfsrvl *service, 160void cfsrvl_init(struct cfsrvl *service,
161 u8 channel_id, 161 u8 channel_id,
162 struct dev_info *dev_info, 162 struct dev_info *dev_info,
163 bool supports_flowctrl 163 bool supports_flowctrl)
164 )
165{ 164{
166 caif_assert(offsetof(struct cfsrvl, layer) == 0); 165 caif_assert(offsetof(struct cfsrvl, layer) == 0);
167 service->open = false; 166 service->open = false;
@@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer)
207EXPORT_SYMBOL(caif_free_client); 206EXPORT_SYMBOL(caif_free_client);
208 207
209void caif_client_register_refcnt(struct cflayer *adapt_layer, 208void caif_client_register_refcnt(struct cflayer *adapt_layer,
210 void (*hold)(struct cflayer *lyr), 209 void (*hold)(struct cflayer *lyr),
211 void (*put)(struct cflayer *lyr)) 210 void (*put)(struct cflayer *lyr))
212{ 211{
213 struct cfsrvl *service; 212 struct cfsrvl *service;
214 213
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 86d2dadb4b73..1728fa4471cf 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 910ab0661f66..262224581efa 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index a8e2a2d758a5..b3b110e8a350 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index e597733affb8..7344a8fa1bb0 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Copyright (C) ST-Ericsson AB 2010 2 * Copyright (C) ST-Ericsson AB 2010
3 * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Authors: Sjur Brendeland
4 * Daniel Martensson / Daniel.Martensson@stericsson.com 4 * Daniel Martensson
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
@@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr)
167} 167}
168 168
169static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, 169static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
170 int phyid) 170 int phyid)
171{ 171{
172 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 172 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
173 pr_debug("NET flowctrl func called flow: %s\n", 173 pr_debug("NET flowctrl func called flow: %s\n",
@@ -443,7 +443,7 @@ nla_put_failure:
443} 443}
444 444
445static void caif_netlink_parms(struct nlattr *data[], 445static void caif_netlink_parms(struct nlattr *data[],
446 struct caif_connect_request *conn_req) 446 struct caif_connect_request *conn_req)
447{ 447{
448 if (!data) { 448 if (!data) {
449 pr_warn("no params data found\n"); 449 pr_warn("no params data found\n");
@@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
488} 488}
489 489
490static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], 490static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
491 struct nlattr *data[]) 491 struct nlattr *data[])
492{ 492{
493 struct chnl_net *caifdev; 493 struct chnl_net *caifdev;
494 ASSERT_RTNL(); 494 ASSERT_RTNL();
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c48e5220bbac..c4e50852c9f4 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
525 525
526 d = find_dev_rcv_lists(dev); 526 d = find_dev_rcv_lists(dev);
527 if (!d) { 527 if (!d) {
528 printk(KERN_ERR "BUG: receive list not found for " 528 pr_err("BUG: receive list not found for "
529 "dev %s, id %03X, mask %03X\n", 529 "dev %s, id %03X, mask %03X\n",
530 DNAME(dev), can_id, mask); 530 DNAME(dev), can_id, mask);
531 goto out; 531 goto out;
@@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
546 } 546 }
547 547
548 /* 548 /*
549 * Check for bugs in CAN protocol implementations: 549 * Check for bugs in CAN protocol implementations using af_can.c:
550 * If no matching list item was found, the list cursor variable next 550 * 'r' will be NULL if no matching list item was found for removal.
551 * will be NULL, while r will point to the last item of the list.
552 */ 551 */
553 552
554 if (!r) { 553 if (!r) {
555 printk(KERN_ERR "BUG: receive list entry not found for " 554 WARN(1, "BUG: receive list entry not found for dev %s, "
556 "dev %s, id %03X, mask %03X\n", 555 "id %03X, mask %03X\n", DNAME(dev), can_id, mask);
557 DNAME(dev), can_id, mask);
558 r = NULL;
559 goto out; 556 goto out;
560 } 557 }
561 558
@@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp)
749 int err = 0; 746 int err = 0;
750 747
751 if (proto < 0 || proto >= CAN_NPROTO) { 748 if (proto < 0 || proto >= CAN_NPROTO) {
752 printk(KERN_ERR "can: protocol number %d out of range\n", 749 pr_err("can: protocol number %d out of range\n", proto);
753 proto);
754 return -EINVAL; 750 return -EINVAL;
755 } 751 }
756 752
@@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp)
761 mutex_lock(&proto_tab_lock); 757 mutex_lock(&proto_tab_lock);
762 758
763 if (proto_tab[proto]) { 759 if (proto_tab[proto]) {
764 printk(KERN_ERR "can: protocol %d already registered\n", 760 pr_err("can: protocol %d already registered\n", proto);
765 proto);
766 err = -EBUSY; 761 err = -EBUSY;
767 } else 762 } else
768 RCU_INIT_POINTER(proto_tab[proto], cp); 763 RCU_INIT_POINTER(proto_tab[proto], cp);
@@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
816 811
817 /* create new dev_rcv_lists for this device */ 812 /* create new dev_rcv_lists for this device */
818 d = kzalloc(sizeof(*d), GFP_KERNEL); 813 d = kzalloc(sizeof(*d), GFP_KERNEL);
819 if (!d) { 814 if (!d)
820 printk(KERN_ERR
821 "can: allocation of receive list failed\n");
822 return NOTIFY_DONE; 815 return NOTIFY_DONE;
823 }
824 BUG_ON(dev->ml_priv); 816 BUG_ON(dev->ml_priv);
825 dev->ml_priv = d; 817 dev->ml_priv = d;
826 818
@@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
838 dev->ml_priv = NULL; 830 dev->ml_priv = NULL;
839 } 831 }
840 } else 832 } else
841 printk(KERN_ERR "can: notifier: receive list not " 833 pr_err("can: notifier: receive list not found for dev "
842 "found for dev %s\n", dev->name); 834 "%s\n", dev->name);
843 835
844 spin_unlock(&can_rcvlists_lock); 836 spin_unlock(&can_rcvlists_lock);
845 837
@@ -927,7 +919,7 @@ static __exit void can_exit(void)
927 /* remove created dev_rcv_lists from still registered CAN devices */ 919 /* remove created dev_rcv_lists from still registered CAN devices */
928 rcu_read_lock(); 920 rcu_read_lock();
929 for_each_netdev_rcu(&init_net, dev) { 921 for_each_netdev_rcu(&init_net, dev) {
930 if (dev->type == ARPHRD_CAN && dev->ml_priv){ 922 if (dev->type == ARPHRD_CAN && dev->ml_priv) {
931 923
932 struct dev_rcv_lists *d = dev->ml_priv; 924 struct dev_rcv_lists *d = dev->ml_priv;
933 925
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 5dcb20076f39..8f113e6ff327 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -226,7 +226,7 @@ static int bcm_proc_show(struct seq_file *m, void *v)
226 226
227static int bcm_proc_open(struct inode *inode, struct file *file) 227static int bcm_proc_open(struct inode *inode, struct file *file)
228{ 228{
229 return single_open(file, bcm_proc_show, PDE(inode)->data); 229 return single_open(file, bcm_proc_show, PDE_DATA(inode));
230} 230}
231 231
232static const struct file_operations bcm_proc_fops = { 232static const struct file_operations bcm_proc_fops = {
diff --git a/net/can/gw.c b/net/can/gw.c
index 2d117dc5ebea..3ee690e8c7d3 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -466,7 +466,7 @@ static int cgw_notifier(struct notifier_block *nb,
466 if (gwj->src.dev == dev || gwj->dst.dev == dev) { 466 if (gwj->src.dev == dev || gwj->dst.dev == dev) {
467 hlist_del(&gwj->list); 467 hlist_del(&gwj->list);
468 cgw_unregister_filter(gwj); 468 cgw_unregister_filter(gwj);
469 kfree(gwj); 469 kmem_cache_free(cgw_cache, gwj);
470 } 470 }
471 } 471 }
472 } 472 }
@@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
778 return 0; 778 return 0;
779} 779}
780 780
781static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, 781static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh)
782 void *arg)
783{ 782{
784 struct rtcanmsg *r; 783 struct rtcanmsg *r;
785 struct cgw_job *gwj; 784 struct cgw_job *gwj;
@@ -864,11 +863,11 @@ static void cgw_remove_all_jobs(void)
864 hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) { 863 hlist_for_each_entry_safe(gwj, nx, &cgw_list, list) {
865 hlist_del(&gwj->list); 864 hlist_del(&gwj->list);
866 cgw_unregister_filter(gwj); 865 cgw_unregister_filter(gwj);
867 kfree(gwj); 866 kmem_cache_free(cgw_cache, gwj);
868 } 867 }
869} 868}
870 869
871static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 870static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
872{ 871{
873 struct cgw_job *gwj = NULL; 872 struct cgw_job *gwj = NULL;
874 struct hlist_node *nx; 873 struct hlist_node *nx;
@@ -920,7 +919,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
920 919
921 hlist_del(&gwj->list); 920 hlist_del(&gwj->list);
922 cgw_unregister_filter(gwj); 921 cgw_unregister_filter(gwj);
923 kfree(gwj); 922 kmem_cache_free(cgw_cache, gwj);
924 err = 0; 923 err = 0;
925 break; 924 break;
926 } 925 }
diff --git a/net/can/proc.c b/net/can/proc.c
index 1ab8c888f102..b543470c8f8b 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -378,7 +378,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
378 378
379static int can_rcvlist_proc_open(struct inode *inode, struct file *file) 379static int can_rcvlist_proc_open(struct inode *inode, struct file *file)
380{ 380{
381 return single_open(file, can_rcvlist_proc_show, PDE(inode)->data); 381 return single_open(file, can_rcvlist_proc_show, PDE_DATA(inode));
382} 382}
383 383
384static const struct file_operations can_rcvlist_proc_fops = { 384static const struct file_operations can_rcvlist_proc_fops = {
diff --git a/net/can/raw.c b/net/can/raw.c
index c1764e41ddaf..1085e65f848e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
711 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 711 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
712 if (err < 0) 712 if (err < 0)
713 goto free_skb; 713 goto free_skb;
714 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 714
715 if (err < 0) 715 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
716 goto free_skb;
717 716
718 skb->dev = dev; 717 skb->dev = dev;
719 skb->sk = sk; 718 skb->sk = sk;
diff --git a/net/ceph/Makefile b/net/ceph/Makefile
index e87ef435e11b..958d9856912c 100644
--- a/net/ceph/Makefile
+++ b/net/ceph/Makefile
@@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
11 crypto.o armor.o \ 11 crypto.o armor.o \
12 auth_x.o \ 12 auth_x.o \
13 ceph_fs.o ceph_strings.o ceph_hash.o \ 13 ceph_fs.o ceph_strings.o ceph_hash.o \
14 pagevec.o 14 pagevec.o snapshot.o
15 15
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index b4bf4ac090f1..6b923bcaa2a4 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -47,6 +47,7 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
47 if (!ac) 47 if (!ac)
48 goto out; 48 goto out;
49 49
50 mutex_init(&ac->mutex);
50 ac->negotiating = true; 51 ac->negotiating = true;
51 if (name) 52 if (name)
52 ac->name = name; 53 ac->name = name;
@@ -73,10 +74,12 @@ void ceph_auth_destroy(struct ceph_auth_client *ac)
73 */ 74 */
74void ceph_auth_reset(struct ceph_auth_client *ac) 75void ceph_auth_reset(struct ceph_auth_client *ac)
75{ 76{
77 mutex_lock(&ac->mutex);
76 dout("auth_reset %p\n", ac); 78 dout("auth_reset %p\n", ac);
77 if (ac->ops && !ac->negotiating) 79 if (ac->ops && !ac->negotiating)
78 ac->ops->reset(ac); 80 ac->ops->reset(ac);
79 ac->negotiating = true; 81 ac->negotiating = true;
82 mutex_unlock(&ac->mutex);
80} 83}
81 84
82int ceph_entity_name_encode(const char *name, void **p, void *end) 85int ceph_entity_name_encode(const char *name, void **p, void *end)
@@ -102,6 +105,7 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
102 int i, num; 105 int i, num;
103 int ret; 106 int ret;
104 107
108 mutex_lock(&ac->mutex);
105 dout("auth_build_hello\n"); 109 dout("auth_build_hello\n");
106 monhdr->have_version = 0; 110 monhdr->have_version = 0;
107 monhdr->session_mon = cpu_to_le16(-1); 111 monhdr->session_mon = cpu_to_le16(-1);
@@ -122,15 +126,19 @@ int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len)
122 126
123 ret = ceph_entity_name_encode(ac->name, &p, end); 127 ret = ceph_entity_name_encode(ac->name, &p, end);
124 if (ret < 0) 128 if (ret < 0)
125 return ret; 129 goto out;
126 ceph_decode_need(&p, end, sizeof(u64), bad); 130 ceph_decode_need(&p, end, sizeof(u64), bad);
127 ceph_encode_64(&p, ac->global_id); 131 ceph_encode_64(&p, ac->global_id);
128 132
129 ceph_encode_32(&lenp, p - lenp - sizeof(u32)); 133 ceph_encode_32(&lenp, p - lenp - sizeof(u32));
130 return p - buf; 134 ret = p - buf;
135out:
136 mutex_unlock(&ac->mutex);
137 return ret;
131 138
132bad: 139bad:
133 return -ERANGE; 140 ret = -ERANGE;
141 goto out;
134} 142}
135 143
136static int ceph_build_auth_request(struct ceph_auth_client *ac, 144static int ceph_build_auth_request(struct ceph_auth_client *ac,
@@ -151,11 +159,13 @@ static int ceph_build_auth_request(struct ceph_auth_client *ac,
151 if (ret < 0) { 159 if (ret < 0) {
152 pr_err("error %d building auth method %s request\n", ret, 160 pr_err("error %d building auth method %s request\n", ret,
153 ac->ops->name); 161 ac->ops->name);
154 return ret; 162 goto out;
155 } 163 }
156 dout(" built request %d bytes\n", ret); 164 dout(" built request %d bytes\n", ret);
157 ceph_encode_32(&p, ret); 165 ceph_encode_32(&p, ret);
158 return p + ret - msg_buf; 166 ret = p + ret - msg_buf;
167out:
168 return ret;
159} 169}
160 170
161/* 171/*
@@ -176,6 +186,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
176 int result_msg_len; 186 int result_msg_len;
177 int ret = -EINVAL; 187 int ret = -EINVAL;
178 188
189 mutex_lock(&ac->mutex);
179 dout("handle_auth_reply %p %p\n", p, end); 190 dout("handle_auth_reply %p %p\n", p, end);
180 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); 191 ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad);
181 protocol = ceph_decode_32(&p); 192 protocol = ceph_decode_32(&p);
@@ -227,33 +238,103 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
227 238
228 ret = ac->ops->handle_reply(ac, result, payload, payload_end); 239 ret = ac->ops->handle_reply(ac, result, payload, payload_end);
229 if (ret == -EAGAIN) { 240 if (ret == -EAGAIN) {
230 return ceph_build_auth_request(ac, reply_buf, reply_len); 241 ret = ceph_build_auth_request(ac, reply_buf, reply_len);
231 } else if (ret) { 242 } else if (ret) {
232 pr_err("auth method '%s' error %d\n", ac->ops->name, ret); 243 pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
233 return ret;
234 } 244 }
235 return 0;
236 245
237bad:
238 pr_err("failed to decode auth msg\n");
239out: 246out:
247 mutex_unlock(&ac->mutex);
240 return ret; 248 return ret;
249
250bad:
251 pr_err("failed to decode auth msg\n");
252 ret = -EINVAL;
253 goto out;
241} 254}
242 255
243int ceph_build_auth(struct ceph_auth_client *ac, 256int ceph_build_auth(struct ceph_auth_client *ac,
244 void *msg_buf, size_t msg_len) 257 void *msg_buf, size_t msg_len)
245{ 258{
259 int ret = 0;
260
261 mutex_lock(&ac->mutex);
246 if (!ac->protocol) 262 if (!ac->protocol)
247 return ceph_auth_build_hello(ac, msg_buf, msg_len); 263 ret = ceph_auth_build_hello(ac, msg_buf, msg_len);
248 BUG_ON(!ac->ops); 264 else if (ac->ops->should_authenticate(ac))
249 if (ac->ops->should_authenticate(ac)) 265 ret = ceph_build_auth_request(ac, msg_buf, msg_len);
250 return ceph_build_auth_request(ac, msg_buf, msg_len); 266 mutex_unlock(&ac->mutex);
251 return 0; 267 return ret;
252} 268}
253 269
254int ceph_auth_is_authenticated(struct ceph_auth_client *ac) 270int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
255{ 271{
256 if (!ac->ops) 272 int ret = 0;
257 return 0; 273
258 return ac->ops->is_authenticated(ac); 274 mutex_lock(&ac->mutex);
275 if (ac->ops)
276 ret = ac->ops->is_authenticated(ac);
277 mutex_unlock(&ac->mutex);
278 return ret;
279}
280EXPORT_SYMBOL(ceph_auth_is_authenticated);
281
282int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
283 int peer_type,
284 struct ceph_auth_handshake *auth)
285{
286 int ret = 0;
287
288 mutex_lock(&ac->mutex);
289 if (ac->ops && ac->ops->create_authorizer)
290 ret = ac->ops->create_authorizer(ac, peer_type, auth);
291 mutex_unlock(&ac->mutex);
292 return ret;
293}
294EXPORT_SYMBOL(ceph_auth_create_authorizer);
295
296void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac,
297 struct ceph_authorizer *a)
298{
299 mutex_lock(&ac->mutex);
300 if (ac->ops && ac->ops->destroy_authorizer)
301 ac->ops->destroy_authorizer(ac, a);
302 mutex_unlock(&ac->mutex);
303}
304EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
305
306int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
307 int peer_type,
308 struct ceph_auth_handshake *a)
309{
310 int ret = 0;
311
312 mutex_lock(&ac->mutex);
313 if (ac->ops && ac->ops->update_authorizer)
314 ret = ac->ops->update_authorizer(ac, peer_type, a);
315 mutex_unlock(&ac->mutex);
316 return ret;
317}
318EXPORT_SYMBOL(ceph_auth_update_authorizer);
319
320int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
321 struct ceph_authorizer *a, size_t len)
322{
323 int ret = 0;
324
325 mutex_lock(&ac->mutex);
326 if (ac->ops && ac->ops->verify_authorizer_reply)
327 ret = ac->ops->verify_authorizer_reply(ac, a, len);
328 mutex_unlock(&ac->mutex);
329 return ret;
330}
331EXPORT_SYMBOL(ceph_auth_verify_authorizer_reply);
332
333void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
334{
335 mutex_lock(&ac->mutex);
336 if (ac->ops && ac->ops->invalidate_authorizer)
337 ac->ops->invalidate_authorizer(ac, peer_type);
338 mutex_unlock(&ac->mutex);
259} 339}
340EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index a16bf14eb027..96238ba95f2b 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -298,6 +298,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
298 return -ENOMEM; 298 return -ENOMEM;
299 } 299 }
300 au->service = th->service; 300 au->service = th->service;
301 au->secret_id = th->secret_id;
301 302
302 msg_a = au->buf->vec.iov_base; 303 msg_a = au->buf->vec.iov_base;
303 msg_a->struct_v = 1; 304 msg_a->struct_v = 1;
@@ -555,6 +556,26 @@ static int ceph_x_create_authorizer(
555 return 0; 556 return 0;
556} 557}
557 558
559static int ceph_x_update_authorizer(
560 struct ceph_auth_client *ac, int peer_type,
561 struct ceph_auth_handshake *auth)
562{
563 struct ceph_x_authorizer *au;
564 struct ceph_x_ticket_handler *th;
565
566 th = get_ticket_handler(ac, peer_type);
567 if (IS_ERR(th))
568 return PTR_ERR(th);
569
570 au = (struct ceph_x_authorizer *)auth->authorizer;
571 if (au->secret_id < th->secret_id) {
572 dout("ceph_x_update_authorizer service %u secret %llu < %llu\n",
573 au->service, au->secret_id, th->secret_id);
574 return ceph_x_build_authorizer(ac, th, au);
575 }
576 return 0;
577}
578
558static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, 579static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
559 struct ceph_authorizer *a, size_t len) 580 struct ceph_authorizer *a, size_t len)
560{ 581{
@@ -630,7 +651,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
630 651
631 th = get_ticket_handler(ac, peer_type); 652 th = get_ticket_handler(ac, peer_type);
632 if (!IS_ERR(th)) 653 if (!IS_ERR(th))
633 remove_ticket_handler(ac, th); 654 memset(&th->validity, 0, sizeof(th->validity));
634} 655}
635 656
636 657
@@ -641,6 +662,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
641 .build_request = ceph_x_build_request, 662 .build_request = ceph_x_build_request,
642 .handle_reply = ceph_x_handle_reply, 663 .handle_reply = ceph_x_handle_reply,
643 .create_authorizer = ceph_x_create_authorizer, 664 .create_authorizer = ceph_x_create_authorizer,
665 .update_authorizer = ceph_x_update_authorizer,
644 .verify_authorizer_reply = ceph_x_verify_authorizer_reply, 666 .verify_authorizer_reply = ceph_x_verify_authorizer_reply,
645 .destroy_authorizer = ceph_x_destroy_authorizer, 667 .destroy_authorizer = ceph_x_destroy_authorizer,
646 .invalidate_authorizer = ceph_x_invalidate_authorizer, 668 .invalidate_authorizer = ceph_x_invalidate_authorizer,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index f459e93b774f..c5a058da7ac8 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -29,6 +29,7 @@ struct ceph_x_authorizer {
29 struct ceph_buffer *buf; 29 struct ceph_buffer *buf;
30 unsigned int service; 30 unsigned int service;
31 u64 nonce; 31 u64 nonce;
32 u64 secret_id;
32 char reply_buf[128]; /* big enough for encrypted blob */ 33 char reply_buf[128]; /* big enough for encrypted blob */
33}; 34};
34 35
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index e65e6e4be38b..34b11ee8124e 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -606,11 +606,17 @@ static int __init init_ceph_lib(void)
606 if (ret < 0) 606 if (ret < 0)
607 goto out_crypto; 607 goto out_crypto;
608 608
609 ret = ceph_osdc_setup();
610 if (ret < 0)
611 goto out_msgr;
612
609 pr_info("loaded (mon/osd proto %d/%d)\n", 613 pr_info("loaded (mon/osd proto %d/%d)\n",
610 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); 614 CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL);
611 615
612 return 0; 616 return 0;
613 617
618out_msgr:
619 ceph_msgr_exit();
614out_crypto: 620out_crypto:
615 ceph_crypto_shutdown(); 621 ceph_crypto_shutdown();
616out_debugfs: 622out_debugfs:
@@ -622,6 +628,7 @@ out:
622static void __exit exit_ceph_lib(void) 628static void __exit exit_ceph_lib(void)
623{ 629{
624 dout("exit_ceph_lib\n"); 630 dout("exit_ceph_lib\n");
631 ceph_osdc_cleanup();
625 ceph_msgr_exit(); 632 ceph_msgr_exit();
626 ceph_crypto_shutdown(); 633 ceph_crypto_shutdown();
627 ceph_debugfs_cleanup(); 634 ceph_debugfs_cleanup();
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 00d051f4894e..83661cdc0766 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -123,8 +123,8 @@ static int osdc_show(struct seq_file *s, void *pp)
123 mutex_lock(&osdc->request_mutex); 123 mutex_lock(&osdc->request_mutex);
124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { 124 for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
125 struct ceph_osd_request *req; 125 struct ceph_osd_request *req;
126 unsigned int i;
126 int opcode; 127 int opcode;
127 int i;
128 128
129 req = rb_entry(p, struct ceph_osd_request, r_node); 129 req = rb_entry(p, struct ceph_osd_request, r_node);
130 130
@@ -142,7 +142,7 @@ static int osdc_show(struct seq_file *s, void *pp)
142 seq_printf(s, "\t"); 142 seq_printf(s, "\t");
143 143
144 for (i = 0; i < req->r_num_ops; i++) { 144 for (i = 0; i < req->r_num_ops; i++) {
145 opcode = le16_to_cpu(req->r_request_ops[i].op); 145 opcode = req->r_ops[i].op;
146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); 146 seq_printf(s, "\t%s", ceph_osd_op_name(opcode));
147 } 147 }
148 148
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2c0669fb54e3..eb0a46a49bd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -21,6 +21,9 @@
21#include <linux/ceph/pagelist.h> 21#include <linux/ceph/pagelist.h>
22#include <linux/export.h> 22#include <linux/export.h>
23 23
24#define list_entry_next(pos, member) \
25 list_entry(pos->member.next, typeof(*pos), member)
26
24/* 27/*
25 * Ceph uses the messenger to exchange ceph_msg messages with other 28 * Ceph uses the messenger to exchange ceph_msg messages with other
26 * hosts in the system. The messenger provides ordered and reliable 29 * hosts in the system. The messenger provides ordered and reliable
@@ -149,6 +152,11 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
149 return test_and_set_bit(con_flag, &con->flags); 152 return test_and_set_bit(con_flag, &con->flags);
150} 153}
151 154
155/* Slab caches for frequently-allocated structures */
156
157static struct kmem_cache *ceph_msg_cache;
158static struct kmem_cache *ceph_msg_data_cache;
159
152/* static tag bytes (protocol control messages) */ 160/* static tag bytes (protocol control messages) */
153static char tag_msg = CEPH_MSGR_TAG_MSG; 161static char tag_msg = CEPH_MSGR_TAG_MSG;
154static char tag_ack = CEPH_MSGR_TAG_ACK; 162static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -223,6 +231,41 @@ static void encode_my_addr(struct ceph_messenger *msgr)
223 */ 231 */
224static struct workqueue_struct *ceph_msgr_wq; 232static struct workqueue_struct *ceph_msgr_wq;
225 233
234static int ceph_msgr_slab_init(void)
235{
236 BUG_ON(ceph_msg_cache);
237 ceph_msg_cache = kmem_cache_create("ceph_msg",
238 sizeof (struct ceph_msg),
239 __alignof__(struct ceph_msg), 0, NULL);
240
241 if (!ceph_msg_cache)
242 return -ENOMEM;
243
244 BUG_ON(ceph_msg_data_cache);
245 ceph_msg_data_cache = kmem_cache_create("ceph_msg_data",
246 sizeof (struct ceph_msg_data),
247 __alignof__(struct ceph_msg_data),
248 0, NULL);
249 if (ceph_msg_data_cache)
250 return 0;
251
252 kmem_cache_destroy(ceph_msg_cache);
253 ceph_msg_cache = NULL;
254
255 return -ENOMEM;
256}
257
258static void ceph_msgr_slab_exit(void)
259{
260 BUG_ON(!ceph_msg_data_cache);
261 kmem_cache_destroy(ceph_msg_data_cache);
262 ceph_msg_data_cache = NULL;
263
264 BUG_ON(!ceph_msg_cache);
265 kmem_cache_destroy(ceph_msg_cache);
266 ceph_msg_cache = NULL;
267}
268
226static void _ceph_msgr_exit(void) 269static void _ceph_msgr_exit(void)
227{ 270{
228 if (ceph_msgr_wq) { 271 if (ceph_msgr_wq) {
@@ -230,6 +273,8 @@ static void _ceph_msgr_exit(void)
230 ceph_msgr_wq = NULL; 273 ceph_msgr_wq = NULL;
231 } 274 }
232 275
276 ceph_msgr_slab_exit();
277
233 BUG_ON(zero_page == NULL); 278 BUG_ON(zero_page == NULL);
234 kunmap(zero_page); 279 kunmap(zero_page);
235 page_cache_release(zero_page); 280 page_cache_release(zero_page);
@@ -242,6 +287,9 @@ int ceph_msgr_init(void)
242 zero_page = ZERO_PAGE(0); 287 zero_page = ZERO_PAGE(0);
243 page_cache_get(zero_page); 288 page_cache_get(zero_page);
244 289
290 if (ceph_msgr_slab_init())
291 return -ENOMEM;
292
245 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0); 293 ceph_msgr_wq = alloc_workqueue("ceph-msgr", WQ_NON_REENTRANT, 0);
246 if (ceph_msgr_wq) 294 if (ceph_msgr_wq)
247 return 0; 295 return 0;
@@ -471,6 +519,22 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
471 return r; 519 return r;
472} 520}
473 521
522static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
523 int page_offset, size_t length)
524{
525 void *kaddr;
526 int ret;
527
528 BUG_ON(page_offset + length > PAGE_SIZE);
529
530 kaddr = kmap(page);
531 BUG_ON(!kaddr);
532 ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
533 kunmap(page);
534
535 return ret;
536}
537
474/* 538/*
475 * write something. @more is true if caller will be sending more data 539 * write something. @more is true if caller will be sending more data
476 * shortly. 540 * shortly.
@@ -493,7 +557,7 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
493} 557}
494 558
495static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 559static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
496 int offset, size_t size, int more) 560 int offset, size_t size, bool more)
497{ 561{
498 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR); 562 int flags = MSG_DONTWAIT | MSG_NOSIGNAL | (more ? MSG_MORE : MSG_EOR);
499 int ret; 563 int ret;
@@ -697,50 +761,397 @@ static void con_out_kvec_add(struct ceph_connection *con,
697} 761}
698 762
699#ifdef CONFIG_BLOCK 763#ifdef CONFIG_BLOCK
700static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg) 764
765/*
766 * For a bio data item, a piece is whatever remains of the next
767 * entry in the current bio iovec, or the first entry in the next
768 * bio in the list.
769 */
770static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
771 size_t length)
701{ 772{
702 if (!bio) { 773 struct ceph_msg_data *data = cursor->data;
703 *iter = NULL; 774 struct bio *bio;
704 *seg = 0; 775
705 return; 776 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
777
778 bio = data->bio;
779 BUG_ON(!bio);
780 BUG_ON(!bio->bi_vcnt);
781
782 cursor->resid = min(length, data->bio_length);
783 cursor->bio = bio;
784 cursor->vector_index = 0;
785 cursor->vector_offset = 0;
786 cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
787}
788
789static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
790 size_t *page_offset,
791 size_t *length)
792{
793 struct ceph_msg_data *data = cursor->data;
794 struct bio *bio;
795 struct bio_vec *bio_vec;
796 unsigned int index;
797
798 BUG_ON(data->type != CEPH_MSG_DATA_BIO);
799
800 bio = cursor->bio;
801 BUG_ON(!bio);
802
803 index = cursor->vector_index;
804 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
805
806 bio_vec = &bio->bi_io_vec[index];
807 BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
808 *page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
809 BUG_ON(*page_offset >= PAGE_SIZE);
810 if (cursor->last_piece) /* pagelist offset is always 0 */
811 *length = cursor->resid;
812 else
813 *length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
814 BUG_ON(*length > cursor->resid);
815 BUG_ON(*page_offset + *length > PAGE_SIZE);
816
817 return bio_vec->bv_page;
818}
819
820static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
821 size_t bytes)
822{
823 struct bio *bio;
824 struct bio_vec *bio_vec;
825 unsigned int index;
826
827 BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
828
829 bio = cursor->bio;
830 BUG_ON(!bio);
831
832 index = cursor->vector_index;
833 BUG_ON(index >= (unsigned int) bio->bi_vcnt);
834 bio_vec = &bio->bi_io_vec[index];
835
836 /* Advance the cursor offset */
837
838 BUG_ON(cursor->resid < bytes);
839 cursor->resid -= bytes;
840 cursor->vector_offset += bytes;
841 if (cursor->vector_offset < bio_vec->bv_len)
842 return false; /* more bytes to process in this segment */
843 BUG_ON(cursor->vector_offset != bio_vec->bv_len);
844
845 /* Move on to the next segment, and possibly the next bio */
846
847 if (++index == (unsigned int) bio->bi_vcnt) {
848 bio = bio->bi_next;
849 index = 0;
706 } 850 }
707 *iter = bio; 851 cursor->bio = bio;
708 *seg = bio->bi_idx; 852 cursor->vector_index = index;
853 cursor->vector_offset = 0;
854
855 if (!cursor->last_piece) {
856 BUG_ON(!cursor->resid);
857 BUG_ON(!bio);
858 /* A short read is OK, so use <= rather than == */
859 if (cursor->resid <= bio->bi_io_vec[index].bv_len)
860 cursor->last_piece = true;
861 }
862
863 return true;
709} 864}
865#endif /* CONFIG_BLOCK */
710 866
711static void iter_bio_next(struct bio **bio_iter, int *seg) 867/*
868 * For a page array, a piece comes from the first page in the array
869 * that has not already been fully consumed.
870 */
871static void ceph_msg_data_pages_cursor_init(struct ceph_msg_data_cursor *cursor,
872 size_t length)
712{ 873{
713 if (*bio_iter == NULL) 874 struct ceph_msg_data *data = cursor->data;
714 return; 875 int page_count;
876
877 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
715 878
716 BUG_ON(*seg >= (*bio_iter)->bi_vcnt); 879 BUG_ON(!data->pages);
880 BUG_ON(!data->length);
717 881
718 (*seg)++; 882 cursor->resid = min(length, data->length);
719 if (*seg == (*bio_iter)->bi_vcnt) 883 page_count = calc_pages_for(data->alignment, (u64)data->length);
720 init_bio_iter((*bio_iter)->bi_next, bio_iter, seg); 884 cursor->page_offset = data->alignment & ~PAGE_MASK;
885 cursor->page_index = 0;
886 BUG_ON(page_count > (int)USHRT_MAX);
887 cursor->page_count = (unsigned short)page_count;
888 BUG_ON(length > SIZE_MAX - cursor->page_offset);
889 cursor->last_piece = (size_t)cursor->page_offset + length <= PAGE_SIZE;
721} 890}
722#endif
723 891
724static void prepare_write_message_data(struct ceph_connection *con) 892static struct page *
893ceph_msg_data_pages_next(struct ceph_msg_data_cursor *cursor,
894 size_t *page_offset, size_t *length)
725{ 895{
726 struct ceph_msg *msg = con->out_msg; 896 struct ceph_msg_data *data = cursor->data;
727 897
728 BUG_ON(!msg); 898 BUG_ON(data->type != CEPH_MSG_DATA_PAGES);
729 BUG_ON(!msg->hdr.data_len); 899
900 BUG_ON(cursor->page_index >= cursor->page_count);
901 BUG_ON(cursor->page_offset >= PAGE_SIZE);
902
903 *page_offset = cursor->page_offset;
904 if (cursor->last_piece)
905 *length = cursor->resid;
906 else
907 *length = PAGE_SIZE - *page_offset;
908
909 return data->pages[cursor->page_index];
910}
911
912static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor,
913 size_t bytes)
914{
915 BUG_ON(cursor->data->type != CEPH_MSG_DATA_PAGES);
916
917 BUG_ON(cursor->page_offset + bytes > PAGE_SIZE);
918
919 /* Advance the cursor page offset */
920
921 cursor->resid -= bytes;
922 cursor->page_offset = (cursor->page_offset + bytes) & ~PAGE_MASK;
923 if (!bytes || cursor->page_offset)
924 return false; /* more bytes to process in the current page */
925
926 /* Move on to the next page; offset is already at 0 */
927
928 BUG_ON(cursor->page_index >= cursor->page_count);
929 cursor->page_index++;
930 cursor->last_piece = cursor->resid <= PAGE_SIZE;
931
932 return true;
933}
934
935/*
936 * For a pagelist, a piece is whatever remains to be consumed in the
937 * first page in the list, or the front of the next page.
938 */
939static void
940ceph_msg_data_pagelist_cursor_init(struct ceph_msg_data_cursor *cursor,
941 size_t length)
942{
943 struct ceph_msg_data *data = cursor->data;
944 struct ceph_pagelist *pagelist;
945 struct page *page;
946
947 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
948
949 pagelist = data->pagelist;
950 BUG_ON(!pagelist);
951
952 if (!length)
953 return; /* pagelist can be assigned but empty */
954
955 BUG_ON(list_empty(&pagelist->head));
956 page = list_first_entry(&pagelist->head, struct page, lru);
957
958 cursor->resid = min(length, pagelist->length);
959 cursor->page = page;
960 cursor->offset = 0;
961 cursor->last_piece = cursor->resid <= PAGE_SIZE;
962}
963
964static struct page *
965ceph_msg_data_pagelist_next(struct ceph_msg_data_cursor *cursor,
966 size_t *page_offset, size_t *length)
967{
968 struct ceph_msg_data *data = cursor->data;
969 struct ceph_pagelist *pagelist;
970
971 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
730 972
731 /* initialize page iterator */ 973 pagelist = data->pagelist;
732 con->out_msg_pos.page = 0; 974 BUG_ON(!pagelist);
733 if (msg->pages) 975
734 con->out_msg_pos.page_pos = msg->page_alignment; 976 BUG_ON(!cursor->page);
977 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
978
979 /* offset of first page in pagelist is always 0 */
980 *page_offset = cursor->offset & ~PAGE_MASK;
981 if (cursor->last_piece)
982 *length = cursor->resid;
735 else 983 else
736 con->out_msg_pos.page_pos = 0; 984 *length = PAGE_SIZE - *page_offset;
985
986 return cursor->page;
987}
988
989static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
990 size_t bytes)
991{
992 struct ceph_msg_data *data = cursor->data;
993 struct ceph_pagelist *pagelist;
994
995 BUG_ON(data->type != CEPH_MSG_DATA_PAGELIST);
996
997 pagelist = data->pagelist;
998 BUG_ON(!pagelist);
999
1000 BUG_ON(cursor->offset + cursor->resid != pagelist->length);
1001 BUG_ON((cursor->offset & ~PAGE_MASK) + bytes > PAGE_SIZE);
1002
1003 /* Advance the cursor offset */
1004
1005 cursor->resid -= bytes;
1006 cursor->offset += bytes;
1007 /* offset of first page in pagelist is always 0 */
1008 if (!bytes || cursor->offset & ~PAGE_MASK)
1009 return false; /* more bytes to process in the current page */
1010
1011 /* Move on to the next page */
1012
1013 BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
1014 cursor->page = list_entry_next(cursor->page, lru);
1015 cursor->last_piece = cursor->resid <= PAGE_SIZE;
1016
1017 return true;
1018}
1019
1020/*
1021 * Message data is handled (sent or received) in pieces, where each
1022 * piece resides on a single page. The network layer might not
1023 * consume an entire piece at once. A data item's cursor keeps
1024 * track of which piece is next to process and how much remains to
1025 * be processed in that piece. It also tracks whether the current
1026 * piece is the last one in the data item.
1027 */
1028static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
1029{
1030 size_t length = cursor->total_resid;
1031
1032 switch (cursor->data->type) {
1033 case CEPH_MSG_DATA_PAGELIST:
1034 ceph_msg_data_pagelist_cursor_init(cursor, length);
1035 break;
1036 case CEPH_MSG_DATA_PAGES:
1037 ceph_msg_data_pages_cursor_init(cursor, length);
1038 break;
737#ifdef CONFIG_BLOCK 1039#ifdef CONFIG_BLOCK
738 if (msg->bio) 1040 case CEPH_MSG_DATA_BIO:
739 init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg); 1041 ceph_msg_data_bio_cursor_init(cursor, length);
740#endif 1042 break;
741 con->out_msg_pos.data_pos = 0; 1043#endif /* CONFIG_BLOCK */
742 con->out_msg_pos.did_page_crc = false; 1044 case CEPH_MSG_DATA_NONE:
743 con->out_more = 1; /* data + footer will follow */ 1045 default:
1046 /* BUG(); */
1047 break;
1048 }
1049 cursor->need_crc = true;
1050}
1051
1052static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
1053{
1054 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1055 struct ceph_msg_data *data;
1056
1057 BUG_ON(!length);
1058 BUG_ON(length > msg->data_length);
1059 BUG_ON(list_empty(&msg->data));
1060
1061 cursor->data_head = &msg->data;
1062 cursor->total_resid = length;
1063 data = list_first_entry(&msg->data, struct ceph_msg_data, links);
1064 cursor->data = data;
1065
1066 __ceph_msg_data_cursor_init(cursor);
1067}
1068
1069/*
1070 * Return the page containing the next piece to process for a given
1071 * data item, and supply the page offset and length of that piece.
1072 * Indicate whether this is the last piece in this data item.
1073 */
1074static struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
1075 size_t *page_offset, size_t *length,
1076 bool *last_piece)
1077{
1078 struct page *page;
1079
1080 switch (cursor->data->type) {
1081 case CEPH_MSG_DATA_PAGELIST:
1082 page = ceph_msg_data_pagelist_next(cursor, page_offset, length);
1083 break;
1084 case CEPH_MSG_DATA_PAGES:
1085 page = ceph_msg_data_pages_next(cursor, page_offset, length);
1086 break;
1087#ifdef CONFIG_BLOCK
1088 case CEPH_MSG_DATA_BIO:
1089 page = ceph_msg_data_bio_next(cursor, page_offset, length);
1090 break;
1091#endif /* CONFIG_BLOCK */
1092 case CEPH_MSG_DATA_NONE:
1093 default:
1094 page = NULL;
1095 break;
1096 }
1097 BUG_ON(!page);
1098 BUG_ON(*page_offset + *length > PAGE_SIZE);
1099 BUG_ON(!*length);
1100 if (last_piece)
1101 *last_piece = cursor->last_piece;
1102
1103 return page;
1104}
1105
1106/*
1107 * Returns true if the result moves the cursor on to the next piece
1108 * of the data item.
1109 */
1110static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
1111 size_t bytes)
1112{
1113 bool new_piece;
1114
1115 BUG_ON(bytes > cursor->resid);
1116 switch (cursor->data->type) {
1117 case CEPH_MSG_DATA_PAGELIST:
1118 new_piece = ceph_msg_data_pagelist_advance(cursor, bytes);
1119 break;
1120 case CEPH_MSG_DATA_PAGES:
1121 new_piece = ceph_msg_data_pages_advance(cursor, bytes);
1122 break;
1123#ifdef CONFIG_BLOCK
1124 case CEPH_MSG_DATA_BIO:
1125 new_piece = ceph_msg_data_bio_advance(cursor, bytes);
1126 break;
1127#endif /* CONFIG_BLOCK */
1128 case CEPH_MSG_DATA_NONE:
1129 default:
1130 BUG();
1131 break;
1132 }
1133 cursor->total_resid -= bytes;
1134
1135 if (!cursor->resid && cursor->total_resid) {
1136 WARN_ON(!cursor->last_piece);
1137 BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
1138 cursor->data = list_entry_next(cursor->data, links);
1139 __ceph_msg_data_cursor_init(cursor);
1140 new_piece = true;
1141 }
1142 cursor->need_crc = new_piece;
1143
1144 return new_piece;
1145}
1146
1147static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1148{
1149 BUG_ON(!msg);
1150 BUG_ON(!data_len);
1151
1152 /* Initialize data cursor */
1153
1154 ceph_msg_data_cursor_init(msg, (size_t)data_len);
744} 1155}
745 1156
746/* 1157/*
@@ -803,16 +1214,12 @@ static void prepare_write_message(struct ceph_connection *con)
803 m->hdr.seq = cpu_to_le64(++con->out_seq); 1214 m->hdr.seq = cpu_to_le64(++con->out_seq);
804 m->needs_out_seq = false; 1215 m->needs_out_seq = false;
805 } 1216 }
806#ifdef CONFIG_BLOCK 1217 WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
807 else
808 m->bio_iter = NULL;
809#endif
810 1218
811 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 1219 dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
812 m, con->out_seq, le16_to_cpu(m->hdr.type), 1220 m, con->out_seq, le16_to_cpu(m->hdr.type),
813 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 1221 le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
814 le32_to_cpu(m->hdr.data_len), 1222 m->data_length);
815 m->nr_pages);
816 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); 1223 BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);
817 1224
818 /* tag + hdr + front + middle */ 1225 /* tag + hdr + front + middle */
@@ -843,11 +1250,13 @@ static void prepare_write_message(struct ceph_connection *con)
843 1250
844 /* is there a data payload? */ 1251 /* is there a data payload? */
845 con->out_msg->footer.data_crc = 0; 1252 con->out_msg->footer.data_crc = 0;
846 if (m->hdr.data_len) 1253 if (m->data_length) {
847 prepare_write_message_data(con); 1254 prepare_message_data(con->out_msg, m->data_length);
848 else 1255 con->out_more = 1; /* data + footer will follow */
1256 } else {
849 /* no, queue up footer too and be done */ 1257 /* no, queue up footer too and be done */
850 prepare_write_message_footer(con); 1258 prepare_write_message_footer(con);
1259 }
851 1260
852 con_flag_set(con, CON_FLAG_WRITE_PENDING); 1261 con_flag_set(con, CON_FLAG_WRITE_PENDING);
853} 1262}
@@ -874,6 +1283,24 @@ static void prepare_write_ack(struct ceph_connection *con)
874} 1283}
875 1284
876/* 1285/*
1286 * Prepare to share the seq during handshake
1287 */
1288static void prepare_write_seq(struct ceph_connection *con)
1289{
1290 dout("prepare_write_seq %p %llu -> %llu\n", con,
1291 con->in_seq_acked, con->in_seq);
1292 con->in_seq_acked = con->in_seq;
1293
1294 con_out_kvec_reset(con);
1295
1296 con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
1297 con_out_kvec_add(con, sizeof (con->out_temp_ack),
1298 &con->out_temp_ack);
1299
1300 con_flag_set(con, CON_FLAG_WRITE_PENDING);
1301}
1302
1303/*
877 * Prepare to write keepalive byte. 1304 * Prepare to write keepalive byte.
878 */ 1305 */
879static void prepare_write_keepalive(struct ceph_connection *con) 1306static void prepare_write_keepalive(struct ceph_connection *con)
@@ -1022,35 +1449,19 @@ out:
1022 return ret; /* done! */ 1449 return ret; /* done! */
1023} 1450}
1024 1451
1025static void out_msg_pos_next(struct ceph_connection *con, struct page *page, 1452static u32 ceph_crc32c_page(u32 crc, struct page *page,
1026 size_t len, size_t sent, bool in_trail) 1453 unsigned int page_offset,
1454 unsigned int length)
1027{ 1455{
1028 struct ceph_msg *msg = con->out_msg; 1456 char *kaddr;
1029 1457
1030 BUG_ON(!msg); 1458 kaddr = kmap(page);
1031 BUG_ON(!sent); 1459 BUG_ON(kaddr == NULL);
1032 1460 crc = crc32c(crc, kaddr + page_offset, length);
1033 con->out_msg_pos.data_pos += sent; 1461 kunmap(page);
1034 con->out_msg_pos.page_pos += sent;
1035 if (sent < len)
1036 return;
1037 1462
1038 BUG_ON(sent != len); 1463 return crc;
1039 con->out_msg_pos.page_pos = 0;
1040 con->out_msg_pos.page++;
1041 con->out_msg_pos.did_page_crc = false;
1042 if (in_trail)
1043 list_move_tail(&page->lru,
1044 &msg->trail->head);
1045 else if (msg->pagelist)
1046 list_move_tail(&page->lru,
1047 &msg->pagelist->head);
1048#ifdef CONFIG_BLOCK
1049 else if (msg->bio)
1050 iter_bio_next(&msg->bio_iter, &msg->bio_seg);
1051#endif
1052} 1464}
1053
1054/* 1465/*
1055 * Write as much message data payload as we can. If we finish, queue 1466 * Write as much message data payload as we can. If we finish, queue
1056 * up the footer. 1467 * up the footer.
@@ -1058,21 +1469,17 @@ static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
1058 * 0 -> socket full, but more to do 1469 * 0 -> socket full, but more to do
1059 * <0 -> error 1470 * <0 -> error
1060 */ 1471 */
1061static int write_partial_msg_pages(struct ceph_connection *con) 1472static int write_partial_message_data(struct ceph_connection *con)
1062{ 1473{
1063 struct ceph_msg *msg = con->out_msg; 1474 struct ceph_msg *msg = con->out_msg;
1064 unsigned int data_len = le32_to_cpu(msg->hdr.data_len); 1475 struct ceph_msg_data_cursor *cursor = &msg->cursor;
1065 size_t len;
1066 bool do_datacrc = !con->msgr->nocrc; 1476 bool do_datacrc = !con->msgr->nocrc;
1067 int ret; 1477 u32 crc;
1068 int total_max_write;
1069 bool in_trail = false;
1070 const size_t trail_len = (msg->trail ? msg->trail->length : 0);
1071 const size_t trail_off = data_len - trail_len;
1072 1478
1073 dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", 1479 dout("%s %p msg %p\n", __func__, con, msg);
1074 con, msg, con->out_msg_pos.page, msg->nr_pages, 1480
1075 con->out_msg_pos.page_pos); 1481 if (list_empty(&msg->data))
1482 return -EINVAL;
1076 1483
1077 /* 1484 /*
1078 * Iterate through each page that contains data to be 1485 * Iterate through each page that contains data to be
@@ -1082,72 +1489,41 @@ static int write_partial_msg_pages(struct ceph_connection *con)
1082 * need to map the page. If we have no pages, they have 1489 * need to map the page. If we have no pages, they have
1083 * been revoked, so use the zero page. 1490 * been revoked, so use the zero page.
1084 */ 1491 */
1085 while (data_len > con->out_msg_pos.data_pos) { 1492 crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
1086 struct page *page = NULL; 1493 while (cursor->resid) {
1087 int max_write = PAGE_SIZE; 1494 struct page *page;
1088 int bio_offset = 0; 1495 size_t page_offset;
1089 1496 size_t length;
1090 in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off; 1497 bool last_piece;
1091 if (!in_trail) 1498 bool need_crc;
1092 total_max_write = trail_off - con->out_msg_pos.data_pos; 1499 int ret;
1093
1094 if (in_trail) {
1095 total_max_write = data_len - con->out_msg_pos.data_pos;
1096
1097 page = list_first_entry(&msg->trail->head,
1098 struct page, lru);
1099 } else if (msg->pages) {
1100 page = msg->pages[con->out_msg_pos.page];
1101 } else if (msg->pagelist) {
1102 page = list_first_entry(&msg->pagelist->head,
1103 struct page, lru);
1104#ifdef CONFIG_BLOCK
1105 } else if (msg->bio) {
1106 struct bio_vec *bv;
1107 1500
1108 bv = bio_iovec_idx(msg->bio_iter, msg->bio_seg); 1501 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
1109 page = bv->bv_page; 1502 &last_piece);
1110 bio_offset = bv->bv_offset; 1503 ret = ceph_tcp_sendpage(con->sock, page, page_offset,
1111 max_write = bv->bv_len; 1504 length, last_piece);
1112#endif 1505 if (ret <= 0) {
1113 } else { 1506 if (do_datacrc)
1114 page = zero_page; 1507 msg->footer.data_crc = cpu_to_le32(crc);
1115 }
1116 len = min_t(int, max_write - con->out_msg_pos.page_pos,
1117 total_max_write);
1118
1119 if (do_datacrc && !con->out_msg_pos.did_page_crc) {
1120 void *base;
1121 u32 crc = le32_to_cpu(msg->footer.data_crc);
1122 char *kaddr;
1123
1124 kaddr = kmap(page);
1125 BUG_ON(kaddr == NULL);
1126 base = kaddr + con->out_msg_pos.page_pos + bio_offset;
1127 crc = crc32c(crc, base, len);
1128 kunmap(page);
1129 msg->footer.data_crc = cpu_to_le32(crc);
1130 con->out_msg_pos.did_page_crc = true;
1131 }
1132 ret = ceph_tcp_sendpage(con->sock, page,
1133 con->out_msg_pos.page_pos + bio_offset,
1134 len, 1);
1135 if (ret <= 0)
1136 goto out;
1137 1508
1138 out_msg_pos_next(con, page, len, (size_t) ret, in_trail); 1509 return ret;
1510 }
1511 if (do_datacrc && cursor->need_crc)
1512 crc = ceph_crc32c_page(crc, page, page_offset, length);
1513 need_crc = ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1139 } 1514 }
1140 1515
1141 dout("write_partial_msg_pages %p msg %p done\n", con, msg); 1516 dout("%s %p msg %p done\n", __func__, con, msg);
1142 1517
1143 /* prepare and queue up footer, too */ 1518 /* prepare and queue up footer, too */
1144 if (!do_datacrc) 1519 if (do_datacrc)
1520 msg->footer.data_crc = cpu_to_le32(crc);
1521 else
1145 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 1522 msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
1146 con_out_kvec_reset(con); 1523 con_out_kvec_reset(con);
1147 prepare_write_message_footer(con); 1524 prepare_write_message_footer(con);
1148 ret = 1; 1525
1149out: 1526 return 1; /* must return > 0 to indicate success */
1150 return ret;
1151} 1527}
1152 1528
1153/* 1529/*
@@ -1160,7 +1536,7 @@ static int write_partial_skip(struct ceph_connection *con)
1160 while (con->out_skip > 0) { 1536 while (con->out_skip > 0) {
1161 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); 1537 size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
1162 1538
1163 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, 1); 1539 ret = ceph_tcp_sendpage(con->sock, zero_page, 0, size, true);
1164 if (ret <= 0) 1540 if (ret <= 0)
1165 goto out; 1541 goto out;
1166 con->out_skip -= ret; 1542 con->out_skip -= ret;
@@ -1191,6 +1567,13 @@ static void prepare_read_ack(struct ceph_connection *con)
1191 con->in_base_pos = 0; 1567 con->in_base_pos = 0;
1192} 1568}
1193 1569
1570static void prepare_read_seq(struct ceph_connection *con)
1571{
1572 dout("prepare_read_seq %p\n", con);
1573 con->in_base_pos = 0;
1574 con->in_tag = CEPH_MSGR_TAG_SEQ;
1575}
1576
1194static void prepare_read_tag(struct ceph_connection *con) 1577static void prepare_read_tag(struct ceph_connection *con)
1195{ 1578{
1196 dout("prepare_read_tag %p\n", con); 1579 dout("prepare_read_tag %p\n", con);
@@ -1597,7 +1980,6 @@ static int process_connect(struct ceph_connection *con)
1597 con->error_msg = "connect authorization failure"; 1980 con->error_msg = "connect authorization failure";
1598 return -1; 1981 return -1;
1599 } 1982 }
1600 con->auth_retry = 1;
1601 con_out_kvec_reset(con); 1983 con_out_kvec_reset(con);
1602 ret = prepare_write_connect(con); 1984 ret = prepare_write_connect(con);
1603 if (ret < 0) 1985 if (ret < 0)
@@ -1668,6 +2050,7 @@ static int process_connect(struct ceph_connection *con)
1668 prepare_read_connect(con); 2050 prepare_read_connect(con);
1669 break; 2051 break;
1670 2052
2053 case CEPH_MSGR_TAG_SEQ:
1671 case CEPH_MSGR_TAG_READY: 2054 case CEPH_MSGR_TAG_READY:
1672 if (req_feat & ~server_feat) { 2055 if (req_feat & ~server_feat) {
1673 pr_err("%s%lld %s protocol feature mismatch," 2056 pr_err("%s%lld %s protocol feature mismatch,"
@@ -1682,7 +2065,7 @@ static int process_connect(struct ceph_connection *con)
1682 2065
1683 WARN_ON(con->state != CON_STATE_NEGOTIATING); 2066 WARN_ON(con->state != CON_STATE_NEGOTIATING);
1684 con->state = CON_STATE_OPEN; 2067 con->state = CON_STATE_OPEN;
1685 2068 con->auth_retry = 0; /* we authenticated; clear flag */
1686 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); 2069 con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
1687 con->connect_seq++; 2070 con->connect_seq++;
1688 con->peer_features = server_feat; 2071 con->peer_features = server_feat;
@@ -1698,7 +2081,12 @@ static int process_connect(struct ceph_connection *con)
1698 2081
1699 con->delay = 0; /* reset backoff memory */ 2082 con->delay = 0; /* reset backoff memory */
1700 2083
1701 prepare_read_tag(con); 2084 if (con->in_reply.tag == CEPH_MSGR_TAG_SEQ) {
2085 prepare_write_seq(con);
2086 prepare_read_seq(con);
2087 } else {
2088 prepare_read_tag(con);
2089 }
1702 break; 2090 break;
1703 2091
1704 case CEPH_MSGR_TAG_WAIT: 2092 case CEPH_MSGR_TAG_WAIT:
@@ -1732,7 +2120,6 @@ static int read_partial_ack(struct ceph_connection *con)
1732 return read_partial(con, end, size, &con->in_temp_ack); 2120 return read_partial(con, end, size, &con->in_temp_ack);
1733} 2121}
1734 2122
1735
1736/* 2123/*
1737 * We can finally discard anything that's been acked. 2124 * We can finally discard anything that's been acked.
1738 */ 2125 */
@@ -1757,8 +2144,6 @@ static void process_ack(struct ceph_connection *con)
1757} 2144}
1758 2145
1759 2146
1760
1761
1762static int read_partial_message_section(struct ceph_connection *con, 2147static int read_partial_message_section(struct ceph_connection *con,
1763 struct kvec *section, 2148 struct kvec *section,
1764 unsigned int sec_len, u32 *crc) 2149 unsigned int sec_len, u32 *crc)
@@ -1782,77 +2167,49 @@ static int read_partial_message_section(struct ceph_connection *con,
1782 return 1; 2167 return 1;
1783} 2168}
1784 2169
1785static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip); 2170static int read_partial_msg_data(struct ceph_connection *con)
1786
1787static int read_partial_message_pages(struct ceph_connection *con,
1788 struct page **pages,
1789 unsigned int data_len, bool do_datacrc)
1790{ 2171{
1791 void *p; 2172 struct ceph_msg *msg = con->in_msg;
2173 struct ceph_msg_data_cursor *cursor = &msg->cursor;
2174 const bool do_datacrc = !con->msgr->nocrc;
2175 struct page *page;
2176 size_t page_offset;
2177 size_t length;
2178 u32 crc = 0;
1792 int ret; 2179 int ret;
1793 int left;
1794 2180
1795 left = min((int)(data_len - con->in_msg_pos.data_pos), 2181 BUG_ON(!msg);
1796 (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); 2182 if (list_empty(&msg->data))
1797 /* (page) data */ 2183 return -EIO;
1798 BUG_ON(pages == NULL);
1799 p = kmap(pages[con->in_msg_pos.page]);
1800 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos,
1801 left);
1802 if (ret > 0 && do_datacrc)
1803 con->in_data_crc =
1804 crc32c(con->in_data_crc,
1805 p + con->in_msg_pos.page_pos, ret);
1806 kunmap(pages[con->in_msg_pos.page]);
1807 if (ret <= 0)
1808 return ret;
1809 con->in_msg_pos.data_pos += ret;
1810 con->in_msg_pos.page_pos += ret;
1811 if (con->in_msg_pos.page_pos == PAGE_SIZE) {
1812 con->in_msg_pos.page_pos = 0;
1813 con->in_msg_pos.page++;
1814 }
1815
1816 return ret;
1817}
1818
1819#ifdef CONFIG_BLOCK
1820static int read_partial_message_bio(struct ceph_connection *con,
1821 struct bio **bio_iter, int *bio_seg,
1822 unsigned int data_len, bool do_datacrc)
1823{
1824 struct bio_vec *bv = bio_iovec_idx(*bio_iter, *bio_seg);
1825 void *p;
1826 int ret, left;
1827 2184
1828 left = min((int)(data_len - con->in_msg_pos.data_pos), 2185 if (do_datacrc)
1829 (int)(bv->bv_len - con->in_msg_pos.page_pos)); 2186 crc = con->in_data_crc;
2187 while (cursor->resid) {
2188 page = ceph_msg_data_next(&msg->cursor, &page_offset, &length,
2189 NULL);
2190 ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
2191 if (ret <= 0) {
2192 if (do_datacrc)
2193 con->in_data_crc = crc;
1830 2194
1831 p = kmap(bv->bv_page) + bv->bv_offset; 2195 return ret;
2196 }
1832 2197
1833 ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, 2198 if (do_datacrc)
1834 left); 2199 crc = ceph_crc32c_page(crc, page, page_offset, ret);
1835 if (ret > 0 && do_datacrc) 2200 (void) ceph_msg_data_advance(&msg->cursor, (size_t)ret);
1836 con->in_data_crc =
1837 crc32c(con->in_data_crc,
1838 p + con->in_msg_pos.page_pos, ret);
1839 kunmap(bv->bv_page);
1840 if (ret <= 0)
1841 return ret;
1842 con->in_msg_pos.data_pos += ret;
1843 con->in_msg_pos.page_pos += ret;
1844 if (con->in_msg_pos.page_pos == bv->bv_len) {
1845 con->in_msg_pos.page_pos = 0;
1846 iter_bio_next(bio_iter, bio_seg);
1847 } 2201 }
2202 if (do_datacrc)
2203 con->in_data_crc = crc;
1848 2204
1849 return ret; 2205 return 1; /* must return > 0 to indicate success */
1850} 2206}
1851#endif
1852 2207
1853/* 2208/*
1854 * read (part of) a message. 2209 * read (part of) a message.
1855 */ 2210 */
2211static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);
2212
1856static int read_partial_message(struct ceph_connection *con) 2213static int read_partial_message(struct ceph_connection *con)
1857{ 2214{
1858 struct ceph_msg *m = con->in_msg; 2215 struct ceph_msg *m = con->in_msg;
@@ -1885,7 +2242,7 @@ static int read_partial_message(struct ceph_connection *con)
1885 if (front_len > CEPH_MSG_MAX_FRONT_LEN) 2242 if (front_len > CEPH_MSG_MAX_FRONT_LEN)
1886 return -EIO; 2243 return -EIO;
1887 middle_len = le32_to_cpu(con->in_hdr.middle_len); 2244 middle_len = le32_to_cpu(con->in_hdr.middle_len);
1888 if (middle_len > CEPH_MSG_MAX_DATA_LEN) 2245 if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
1889 return -EIO; 2246 return -EIO;
1890 data_len = le32_to_cpu(con->in_hdr.data_len); 2247 data_len = le32_to_cpu(con->in_hdr.data_len);
1891 if (data_len > CEPH_MSG_MAX_DATA_LEN) 2248 if (data_len > CEPH_MSG_MAX_DATA_LEN)
@@ -1914,14 +2271,22 @@ static int read_partial_message(struct ceph_connection *con)
1914 int skip = 0; 2271 int skip = 0;
1915 2272
1916 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 2273 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
1917 con->in_hdr.front_len, con->in_hdr.data_len); 2274 front_len, data_len);
1918 ret = ceph_con_in_msg_alloc(con, &skip); 2275 ret = ceph_con_in_msg_alloc(con, &skip);
1919 if (ret < 0) 2276 if (ret < 0)
1920 return ret; 2277 return ret;
2278
2279 BUG_ON(!con->in_msg ^ skip);
2280 if (con->in_msg && data_len > con->in_msg->data_length) {
2281 pr_warning("%s skipping long message (%u > %zd)\n",
2282 __func__, data_len, con->in_msg->data_length);
2283 ceph_msg_put(con->in_msg);
2284 con->in_msg = NULL;
2285 skip = 1;
2286 }
1921 if (skip) { 2287 if (skip) {
1922 /* skip this message */ 2288 /* skip this message */
1923 dout("alloc_msg said skip message\n"); 2289 dout("alloc_msg said skip message\n");
1924 BUG_ON(con->in_msg);
1925 con->in_base_pos = -front_len - middle_len - data_len - 2290 con->in_base_pos = -front_len - middle_len - data_len -
1926 sizeof(m->footer); 2291 sizeof(m->footer);
1927 con->in_tag = CEPH_MSGR_TAG_READY; 2292 con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1936,17 +2301,10 @@ static int read_partial_message(struct ceph_connection *con)
1936 if (m->middle) 2301 if (m->middle)
1937 m->middle->vec.iov_len = 0; 2302 m->middle->vec.iov_len = 0;
1938 2303
1939 con->in_msg_pos.page = 0; 2304 /* prepare for data payload, if any */
1940 if (m->pages)
1941 con->in_msg_pos.page_pos = m->page_alignment;
1942 else
1943 con->in_msg_pos.page_pos = 0;
1944 con->in_msg_pos.data_pos = 0;
1945 2305
1946#ifdef CONFIG_BLOCK 2306 if (data_len)
1947 if (m->bio) 2307 prepare_message_data(con->in_msg, data_len);
1948 init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
1949#endif
1950 } 2308 }
1951 2309
1952 /* front */ 2310 /* front */
@@ -1965,24 +2323,10 @@ static int read_partial_message(struct ceph_connection *con)
1965 } 2323 }
1966 2324
1967 /* (page) data */ 2325 /* (page) data */
1968 while (con->in_msg_pos.data_pos < data_len) { 2326 if (data_len) {
1969 if (m->pages) { 2327 ret = read_partial_msg_data(con);
1970 ret = read_partial_message_pages(con, m->pages, 2328 if (ret <= 0)
1971 data_len, do_datacrc); 2329 return ret;
1972 if (ret <= 0)
1973 return ret;
1974#ifdef CONFIG_BLOCK
1975 } else if (m->bio) {
1976 BUG_ON(!m->bio_iter);
1977 ret = read_partial_message_bio(con,
1978 &m->bio_iter, &m->bio_seg,
1979 data_len, do_datacrc);
1980 if (ret <= 0)
1981 return ret;
1982#endif
1983 } else {
1984 BUG_ON(1);
1985 }
1986 } 2330 }
1987 2331
1988 /* footer */ 2332 /* footer */
@@ -2108,13 +2452,13 @@ more_kvec:
2108 goto do_next; 2452 goto do_next;
2109 } 2453 }
2110 2454
2111 ret = write_partial_msg_pages(con); 2455 ret = write_partial_message_data(con);
2112 if (ret == 1) 2456 if (ret == 1)
2113 goto more_kvec; /* we need to send the footer, too! */ 2457 goto more_kvec; /* we need to send the footer, too! */
2114 if (ret == 0) 2458 if (ret == 0)
2115 goto out; 2459 goto out;
2116 if (ret < 0) { 2460 if (ret < 0) {
2117 dout("try_write write_partial_msg_pages err %d\n", 2461 dout("try_write write_partial_message_data err %d\n",
2118 ret); 2462 ret);
2119 goto out; 2463 goto out;
2120 } 2464 }
@@ -2266,7 +2610,12 @@ more:
2266 prepare_read_tag(con); 2610 prepare_read_tag(con);
2267 goto more; 2611 goto more;
2268 } 2612 }
2269 if (con->in_tag == CEPH_MSGR_TAG_ACK) { 2613 if (con->in_tag == CEPH_MSGR_TAG_ACK ||
2614 con->in_tag == CEPH_MSGR_TAG_SEQ) {
2615 /*
2616 * the final handshake seq exchange is semantically
2617 * equivalent to an ACK
2618 */
2270 ret = read_partial_ack(con); 2619 ret = read_partial_ack(con);
2271 if (ret <= 0) 2620 if (ret <= 0)
2272 goto out; 2621 goto out;
@@ -2672,6 +3021,88 @@ void ceph_con_keepalive(struct ceph_connection *con)
2672} 3021}
2673EXPORT_SYMBOL(ceph_con_keepalive); 3022EXPORT_SYMBOL(ceph_con_keepalive);
2674 3023
3024static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
3025{
3026 struct ceph_msg_data *data;
3027
3028 if (WARN_ON(!ceph_msg_data_type_valid(type)))
3029 return NULL;
3030
3031 data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
3032 if (data)
3033 data->type = type;
3034 INIT_LIST_HEAD(&data->links);
3035
3036 return data;
3037}
3038
3039static void ceph_msg_data_destroy(struct ceph_msg_data *data)
3040{
3041 if (!data)
3042 return;
3043
3044 WARN_ON(!list_empty(&data->links));
3045 if (data->type == CEPH_MSG_DATA_PAGELIST) {
3046 ceph_pagelist_release(data->pagelist);
3047 kfree(data->pagelist);
3048 }
3049 kmem_cache_free(ceph_msg_data_cache, data);
3050}
3051
3052void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
3053 size_t length, size_t alignment)
3054{
3055 struct ceph_msg_data *data;
3056
3057 BUG_ON(!pages);
3058 BUG_ON(!length);
3059
3060 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
3061 BUG_ON(!data);
3062 data->pages = pages;
3063 data->length = length;
3064 data->alignment = alignment & ~PAGE_MASK;
3065
3066 list_add_tail(&data->links, &msg->data);
3067 msg->data_length += length;
3068}
3069EXPORT_SYMBOL(ceph_msg_data_add_pages);
3070
3071void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
3072 struct ceph_pagelist *pagelist)
3073{
3074 struct ceph_msg_data *data;
3075
3076 BUG_ON(!pagelist);
3077 BUG_ON(!pagelist->length);
3078
3079 data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
3080 BUG_ON(!data);
3081 data->pagelist = pagelist;
3082
3083 list_add_tail(&data->links, &msg->data);
3084 msg->data_length += pagelist->length;
3085}
3086EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
3087
3088#ifdef CONFIG_BLOCK
3089void ceph_msg_data_add_bio(struct ceph_msg *msg, struct bio *bio,
3090 size_t length)
3091{
3092 struct ceph_msg_data *data;
3093
3094 BUG_ON(!bio);
3095
3096 data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
3097 BUG_ON(!data);
3098 data->bio = bio;
3099 data->bio_length = length;
3100
3101 list_add_tail(&data->links, &msg->data);
3102 msg->data_length += length;
3103}
3104EXPORT_SYMBOL(ceph_msg_data_add_bio);
3105#endif /* CONFIG_BLOCK */
2675 3106
2676/* 3107/*
2677 * construct a new message with given type, size 3108 * construct a new message with given type, size
@@ -2682,49 +3113,20 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
2682{ 3113{
2683 struct ceph_msg *m; 3114 struct ceph_msg *m;
2684 3115
2685 m = kmalloc(sizeof(*m), flags); 3116 m = kmem_cache_zalloc(ceph_msg_cache, flags);
2686 if (m == NULL) 3117 if (m == NULL)
2687 goto out; 3118 goto out;
2688 kref_init(&m->kref);
2689 3119
2690 m->con = NULL;
2691 INIT_LIST_HEAD(&m->list_head);
2692
2693 m->hdr.tid = 0;
2694 m->hdr.type = cpu_to_le16(type); 3120 m->hdr.type = cpu_to_le16(type);
2695 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 3121 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2696 m->hdr.version = 0;
2697 m->hdr.front_len = cpu_to_le32(front_len); 3122 m->hdr.front_len = cpu_to_le32(front_len);
2698 m->hdr.middle_len = 0;
2699 m->hdr.data_len = 0;
2700 m->hdr.data_off = 0;
2701 m->hdr.reserved = 0;
2702 m->footer.front_crc = 0;
2703 m->footer.middle_crc = 0;
2704 m->footer.data_crc = 0;
2705 m->footer.flags = 0;
2706 m->front_max = front_len;
2707 m->front_is_vmalloc = false;
2708 m->more_to_follow = false;
2709 m->ack_stamp = 0;
2710 m->pool = NULL;
2711
2712 /* middle */
2713 m->middle = NULL;
2714 3123
2715 /* data */ 3124 INIT_LIST_HEAD(&m->list_head);
2716 m->nr_pages = 0; 3125 kref_init(&m->kref);
2717 m->page_alignment = 0; 3126 INIT_LIST_HEAD(&m->data);
2718 m->pages = NULL;
2719 m->pagelist = NULL;
2720#ifdef CONFIG_BLOCK
2721 m->bio = NULL;
2722 m->bio_iter = NULL;
2723 m->bio_seg = 0;
2724#endif /* CONFIG_BLOCK */
2725 m->trail = NULL;
2726 3127
2727 /* front */ 3128 /* front */
3129 m->front_max = front_len;
2728 if (front_len) { 3130 if (front_len) {
2729 if (front_len > PAGE_CACHE_SIZE) { 3131 if (front_len > PAGE_CACHE_SIZE) {
2730 m->front.iov_base = __vmalloc(front_len, flags, 3132 m->front.iov_base = __vmalloc(front_len, flags,
@@ -2802,49 +3204,37 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
2802static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip) 3204static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
2803{ 3205{
2804 struct ceph_msg_header *hdr = &con->in_hdr; 3206 struct ceph_msg_header *hdr = &con->in_hdr;
2805 int type = le16_to_cpu(hdr->type);
2806 int front_len = le32_to_cpu(hdr->front_len);
2807 int middle_len = le32_to_cpu(hdr->middle_len); 3207 int middle_len = le32_to_cpu(hdr->middle_len);
3208 struct ceph_msg *msg;
2808 int ret = 0; 3209 int ret = 0;
2809 3210
2810 BUG_ON(con->in_msg != NULL); 3211 BUG_ON(con->in_msg != NULL);
3212 BUG_ON(!con->ops->alloc_msg);
2811 3213
2812 if (con->ops->alloc_msg) { 3214 mutex_unlock(&con->mutex);
2813 struct ceph_msg *msg; 3215 msg = con->ops->alloc_msg(con, hdr, skip);
2814 3216 mutex_lock(&con->mutex);
2815 mutex_unlock(&con->mutex); 3217 if (con->state != CON_STATE_OPEN) {
2816 msg = con->ops->alloc_msg(con, hdr, skip); 3218 if (msg)
2817 mutex_lock(&con->mutex); 3219 ceph_msg_put(msg);
2818 if (con->state != CON_STATE_OPEN) { 3220 return -EAGAIN;
2819 if (msg)
2820 ceph_msg_put(msg);
2821 return -EAGAIN;
2822 }
2823 con->in_msg = msg;
2824 if (con->in_msg) {
2825 con->in_msg->con = con->ops->get(con);
2826 BUG_ON(con->in_msg->con == NULL);
2827 }
2828 if (*skip) {
2829 con->in_msg = NULL;
2830 return 0;
2831 }
2832 if (!con->in_msg) {
2833 con->error_msg =
2834 "error allocating memory for incoming message";
2835 return -ENOMEM;
2836 }
2837 } 3221 }
2838 if (!con->in_msg) { 3222 if (msg) {
2839 con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false); 3223 BUG_ON(*skip);
2840 if (!con->in_msg) { 3224 con->in_msg = msg;
2841 pr_err("unable to allocate msg type %d len %d\n",
2842 type, front_len);
2843 return -ENOMEM;
2844 }
2845 con->in_msg->con = con->ops->get(con); 3225 con->in_msg->con = con->ops->get(con);
2846 BUG_ON(con->in_msg->con == NULL); 3226 BUG_ON(con->in_msg->con == NULL);
2847 con->in_msg->page_alignment = le16_to_cpu(hdr->data_off); 3227 } else {
3228 /*
3229 * Null message pointer means either we should skip
3230 * this message or we couldn't allocate memory. The
3231 * former is not an error.
3232 */
3233 if (*skip)
3234 return 0;
3235 con->error_msg = "error allocating memory for incoming message";
3236
3237 return -ENOMEM;
2848 } 3238 }
2849 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); 3239 memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
2850 3240
@@ -2870,7 +3260,7 @@ void ceph_msg_kfree(struct ceph_msg *m)
2870 vfree(m->front.iov_base); 3260 vfree(m->front.iov_base);
2871 else 3261 else
2872 kfree(m->front.iov_base); 3262 kfree(m->front.iov_base);
2873 kfree(m); 3263 kmem_cache_free(ceph_msg_cache, m);
2874} 3264}
2875 3265
2876/* 3266/*
@@ -2879,6 +3269,9 @@ void ceph_msg_kfree(struct ceph_msg *m)
2879void ceph_msg_last_put(struct kref *kref) 3269void ceph_msg_last_put(struct kref *kref)
2880{ 3270{
2881 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); 3271 struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
3272 LIST_HEAD(data);
3273 struct list_head *links;
3274 struct list_head *next;
2882 3275
2883 dout("ceph_msg_put last one on %p\n", m); 3276 dout("ceph_msg_put last one on %p\n", m);
2884 WARN_ON(!list_empty(&m->list_head)); 3277 WARN_ON(!list_empty(&m->list_head));
@@ -2888,16 +3281,16 @@ void ceph_msg_last_put(struct kref *kref)
2888 ceph_buffer_put(m->middle); 3281 ceph_buffer_put(m->middle);
2889 m->middle = NULL; 3282 m->middle = NULL;
2890 } 3283 }
2891 m->nr_pages = 0;
2892 m->pages = NULL;
2893 3284
2894 if (m->pagelist) { 3285 list_splice_init(&m->data, &data);
2895 ceph_pagelist_release(m->pagelist); 3286 list_for_each_safe(links, next, &data) {
2896 kfree(m->pagelist); 3287 struct ceph_msg_data *data;
2897 m->pagelist = NULL;
2898 }
2899 3288
2900 m->trail = NULL; 3289 data = list_entry(links, struct ceph_msg_data, links);
3290 list_del_init(links);
3291 ceph_msg_data_destroy(data);
3292 }
3293 m->data_length = 0;
2901 3294
2902 if (m->pool) 3295 if (m->pool)
2903 ceph_msgpool_put(m->pool, m); 3296 ceph_msgpool_put(m->pool, m);
@@ -2908,8 +3301,8 @@ EXPORT_SYMBOL(ceph_msg_last_put);
2908 3301
2909void ceph_msg_dump(struct ceph_msg *msg) 3302void ceph_msg_dump(struct ceph_msg *msg)
2910{ 3303{
2911 pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, 3304 pr_debug("msg_dump %p (front_max %d length %zd)\n", msg,
2912 msg->front_max, msg->nr_pages); 3305 msg->front_max, msg->data_length);
2913 print_hex_dump(KERN_DEBUG, "header: ", 3306 print_hex_dump(KERN_DEBUG, "header: ",
2914 DUMP_PREFIX_OFFSET, 16, 1, 3307 DUMP_PREFIX_OFFSET, 16, 1,
2915 &msg->hdr, sizeof(msg->hdr), true); 3308 &msg->hdr, sizeof(msg->hdr), true);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index aef5b1062bee..1fe25cd29d0e 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -737,7 +737,7 @@ static void delayed_work(struct work_struct *work)
737 737
738 __validate_auth(monc); 738 __validate_auth(monc);
739 739
740 if (monc->auth->ops->is_authenticated(monc->auth)) 740 if (ceph_auth_is_authenticated(monc->auth))
741 __send_subscribe(monc); 741 __send_subscribe(monc);
742 } 742 }
743 __schedule_delayed(monc); 743 __schedule_delayed(monc);
@@ -892,8 +892,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
892 892
893 mutex_lock(&monc->mutex); 893 mutex_lock(&monc->mutex);
894 had_debugfs_info = have_debugfs_info(monc); 894 had_debugfs_info = have_debugfs_info(monc);
895 if (monc->auth->ops) 895 was_auth = ceph_auth_is_authenticated(monc->auth);
896 was_auth = monc->auth->ops->is_authenticated(monc->auth);
897 monc->pending_auth = 0; 896 monc->pending_auth = 0;
898 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, 897 ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
899 msg->front.iov_len, 898 msg->front.iov_len,
@@ -904,7 +903,7 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
904 wake_up_all(&monc->client->auth_wq); 903 wake_up_all(&monc->client->auth_wq);
905 } else if (ret > 0) { 904 } else if (ret > 0) {
906 __send_prepared_auth_request(monc, ret); 905 __send_prepared_auth_request(monc, ret);
907 } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { 906 } else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
908 dout("authenticated, starting session\n"); 907 dout("authenticated, starting session\n");
909 908
910 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; 909 monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d730dd4d8eb2..a3395fdfbd4f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1,3 +1,4 @@
1
1#include <linux/ceph/ceph_debug.h> 2#include <linux/ceph/ceph_debug.h>
2 3
3#include <linux/module.h> 4#include <linux/module.h>
@@ -21,6 +22,8 @@
21#define OSD_OP_FRONT_LEN 4096 22#define OSD_OP_FRONT_LEN 4096
22#define OSD_OPREPLY_FRONT_LEN 512 23#define OSD_OPREPLY_FRONT_LEN 512
23 24
25static struct kmem_cache *ceph_osd_request_cache;
26
24static const struct ceph_connection_operations osd_con_ops; 27static const struct ceph_connection_operations osd_con_ops;
25 28
26static void __send_queued(struct ceph_osd_client *osdc); 29static void __send_queued(struct ceph_osd_client *osdc);
@@ -32,12 +35,6 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
32static void __send_request(struct ceph_osd_client *osdc, 35static void __send_request(struct ceph_osd_client *osdc,
33 struct ceph_osd_request *req); 36 struct ceph_osd_request *req);
34 37
35static int op_has_extent(int op)
36{
37 return (op == CEPH_OSD_OP_READ ||
38 op == CEPH_OSD_OP_WRITE);
39}
40
41/* 38/*
42 * Implement client access to distributed object storage cluster. 39 * Implement client access to distributed object storage cluster.
43 * 40 *
@@ -63,53 +60,238 @@ static int op_has_extent(int op)
63 * 60 *
64 * fill osd op in request message. 61 * fill osd op in request message.
65 */ 62 */
66static int calc_layout(struct ceph_vino vino, 63static int calc_layout(struct ceph_file_layout *layout, u64 off, u64 *plen,
67 struct ceph_file_layout *layout, 64 u64 *objnum, u64 *objoff, u64 *objlen)
68 u64 off, u64 *plen,
69 struct ceph_osd_request *req,
70 struct ceph_osd_req_op *op)
71{ 65{
72 u64 orig_len = *plen; 66 u64 orig_len = *plen;
73 u64 bno = 0;
74 u64 objoff = 0;
75 u64 objlen = 0;
76 int r; 67 int r;
77 68
78 /* object extent? */ 69 /* object extent? */
79 r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, 70 r = ceph_calc_file_object_mapping(layout, off, orig_len, objnum,
80 &objoff, &objlen); 71 objoff, objlen);
81 if (r < 0) 72 if (r < 0)
82 return r; 73 return r;
83 if (objlen < orig_len) { 74 if (*objlen < orig_len) {
84 *plen = objlen; 75 *plen = *objlen;
85 dout(" skipping last %llu, final file extent %llu~%llu\n", 76 dout(" skipping last %llu, final file extent %llu~%llu\n",
86 orig_len - *plen, off, *plen); 77 orig_len - *plen, off, *plen);
87 } 78 }
88 79
89 if (op_has_extent(op->op)) { 80 dout("calc_layout objnum=%llx %llu~%llu\n", *objnum, *objoff, *objlen);
90 u32 osize = le32_to_cpu(layout->fl_object_size); 81
91 op->extent.offset = objoff; 82 return 0;
92 op->extent.length = objlen; 83}
93 if (op->extent.truncate_size <= off - objoff) { 84
94 op->extent.truncate_size = 0; 85static void ceph_osd_data_init(struct ceph_osd_data *osd_data)
95 } else { 86{
96 op->extent.truncate_size -= off - objoff; 87 memset(osd_data, 0, sizeof (*osd_data));
97 if (op->extent.truncate_size > osize) 88 osd_data->type = CEPH_OSD_DATA_TYPE_NONE;
98 op->extent.truncate_size = osize; 89}
99 } 90
91static void ceph_osd_data_pages_init(struct ceph_osd_data *osd_data,
92 struct page **pages, u64 length, u32 alignment,
93 bool pages_from_pool, bool own_pages)
94{
95 osd_data->type = CEPH_OSD_DATA_TYPE_PAGES;
96 osd_data->pages = pages;
97 osd_data->length = length;
98 osd_data->alignment = alignment;
99 osd_data->pages_from_pool = pages_from_pool;
100 osd_data->own_pages = own_pages;
101}
102
103static void ceph_osd_data_pagelist_init(struct ceph_osd_data *osd_data,
104 struct ceph_pagelist *pagelist)
105{
106 osd_data->type = CEPH_OSD_DATA_TYPE_PAGELIST;
107 osd_data->pagelist = pagelist;
108}
109
110#ifdef CONFIG_BLOCK
111static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data,
112 struct bio *bio, size_t bio_length)
113{
114 osd_data->type = CEPH_OSD_DATA_TYPE_BIO;
115 osd_data->bio = bio;
116 osd_data->bio_length = bio_length;
117}
118#endif /* CONFIG_BLOCK */
119
120#define osd_req_op_data(oreq, whch, typ, fld) \
121 ({ \
122 BUG_ON(whch >= (oreq)->r_num_ops); \
123 &(oreq)->r_ops[whch].typ.fld; \
124 })
125
126static struct ceph_osd_data *
127osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
128{
129 BUG_ON(which >= osd_req->r_num_ops);
130
131 return &osd_req->r_ops[which].raw_data_in;
132}
133
134struct ceph_osd_data *
135osd_req_op_extent_osd_data(struct ceph_osd_request *osd_req,
136 unsigned int which)
137{
138 return osd_req_op_data(osd_req, which, extent, osd_data);
139}
140EXPORT_SYMBOL(osd_req_op_extent_osd_data);
141
142struct ceph_osd_data *
143osd_req_op_cls_response_data(struct ceph_osd_request *osd_req,
144 unsigned int which)
145{
146 return osd_req_op_data(osd_req, which, cls, response_data);
147}
148EXPORT_SYMBOL(osd_req_op_cls_response_data); /* ??? */
149
150void osd_req_op_raw_data_in_pages(struct ceph_osd_request *osd_req,
151 unsigned int which, struct page **pages,
152 u64 length, u32 alignment,
153 bool pages_from_pool, bool own_pages)
154{
155 struct ceph_osd_data *osd_data;
156
157 osd_data = osd_req_op_raw_data_in(osd_req, which);
158 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
159 pages_from_pool, own_pages);
160}
161EXPORT_SYMBOL(osd_req_op_raw_data_in_pages);
162
163void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *osd_req,
164 unsigned int which, struct page **pages,
165 u64 length, u32 alignment,
166 bool pages_from_pool, bool own_pages)
167{
168 struct ceph_osd_data *osd_data;
169
170 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
171 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
172 pages_from_pool, own_pages);
173}
174EXPORT_SYMBOL(osd_req_op_extent_osd_data_pages);
175
176void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *osd_req,
177 unsigned int which, struct ceph_pagelist *pagelist)
178{
179 struct ceph_osd_data *osd_data;
180
181 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
182 ceph_osd_data_pagelist_init(osd_data, pagelist);
183}
184EXPORT_SYMBOL(osd_req_op_extent_osd_data_pagelist);
185
186#ifdef CONFIG_BLOCK
187void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
188 unsigned int which, struct bio *bio, size_t bio_length)
189{
190 struct ceph_osd_data *osd_data;
191
192 osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
193 ceph_osd_data_bio_init(osd_data, bio, bio_length);
194}
195EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio);
196#endif /* CONFIG_BLOCK */
197
198static void osd_req_op_cls_request_info_pagelist(
199 struct ceph_osd_request *osd_req,
200 unsigned int which, struct ceph_pagelist *pagelist)
201{
202 struct ceph_osd_data *osd_data;
203
204 osd_data = osd_req_op_data(osd_req, which, cls, request_info);
205 ceph_osd_data_pagelist_init(osd_data, pagelist);
206}
207
208void osd_req_op_cls_request_data_pagelist(
209 struct ceph_osd_request *osd_req,
210 unsigned int which, struct ceph_pagelist *pagelist)
211{
212 struct ceph_osd_data *osd_data;
213
214 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
215 ceph_osd_data_pagelist_init(osd_data, pagelist);
216}
217EXPORT_SYMBOL(osd_req_op_cls_request_data_pagelist);
218
219void osd_req_op_cls_request_data_pages(struct ceph_osd_request *osd_req,
220 unsigned int which, struct page **pages, u64 length,
221 u32 alignment, bool pages_from_pool, bool own_pages)
222{
223 struct ceph_osd_data *osd_data;
224
225 osd_data = osd_req_op_data(osd_req, which, cls, request_data);
226 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
227 pages_from_pool, own_pages);
228}
229EXPORT_SYMBOL(osd_req_op_cls_request_data_pages);
230
231void osd_req_op_cls_response_data_pages(struct ceph_osd_request *osd_req,
232 unsigned int which, struct page **pages, u64 length,
233 u32 alignment, bool pages_from_pool, bool own_pages)
234{
235 struct ceph_osd_data *osd_data;
236
237 osd_data = osd_req_op_data(osd_req, which, cls, response_data);
238 ceph_osd_data_pages_init(osd_data, pages, length, alignment,
239 pages_from_pool, own_pages);
240}
241EXPORT_SYMBOL(osd_req_op_cls_response_data_pages);
242
243static u64 ceph_osd_data_length(struct ceph_osd_data *osd_data)
244{
245 switch (osd_data->type) {
246 case CEPH_OSD_DATA_TYPE_NONE:
247 return 0;
248 case CEPH_OSD_DATA_TYPE_PAGES:
249 return osd_data->length;
250 case CEPH_OSD_DATA_TYPE_PAGELIST:
251 return (u64)osd_data->pagelist->length;
252#ifdef CONFIG_BLOCK
253 case CEPH_OSD_DATA_TYPE_BIO:
254 return (u64)osd_data->bio_length;
255#endif /* CONFIG_BLOCK */
256 default:
257 WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
258 return 0;
100 } 259 }
101 req->r_num_pages = calc_pages_for(off, *plen); 260}
102 req->r_page_alignment = off & ~PAGE_MASK;
103 if (op->op == CEPH_OSD_OP_WRITE)
104 op->payload_len = *plen;
105 261
106 dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", 262static void ceph_osd_data_release(struct ceph_osd_data *osd_data)
107 bno, objoff, objlen, req->r_num_pages); 263{
264 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES && osd_data->own_pages) {
265 int num_pages;
108 266
109 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); 267 num_pages = calc_pages_for((u64)osd_data->alignment,
110 req->r_oid_len = strlen(req->r_oid); 268 (u64)osd_data->length);
269 ceph_release_page_vector(osd_data->pages, num_pages);
270 }
271 ceph_osd_data_init(osd_data);
272}
273
274static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
275 unsigned int which)
276{
277 struct ceph_osd_req_op *op;
278
279 BUG_ON(which >= osd_req->r_num_ops);
280 op = &osd_req->r_ops[which];
111 281
112 return r; 282 switch (op->op) {
283 case CEPH_OSD_OP_READ:
284 case CEPH_OSD_OP_WRITE:
285 ceph_osd_data_release(&op->extent.osd_data);
286 break;
287 case CEPH_OSD_OP_CALL:
288 ceph_osd_data_release(&op->cls.request_info);
289 ceph_osd_data_release(&op->cls.request_data);
290 ceph_osd_data_release(&op->cls.response_data);
291 break;
292 default:
293 break;
294 }
113} 295}
114 296
115/* 297/*
@@ -117,30 +299,26 @@ static int calc_layout(struct ceph_vino vino,
117 */ 299 */
118void ceph_osdc_release_request(struct kref *kref) 300void ceph_osdc_release_request(struct kref *kref)
119{ 301{
120 struct ceph_osd_request *req = container_of(kref, 302 struct ceph_osd_request *req;
121 struct ceph_osd_request, 303 unsigned int which;
122 r_kref);
123 304
305 req = container_of(kref, struct ceph_osd_request, r_kref);
124 if (req->r_request) 306 if (req->r_request)
125 ceph_msg_put(req->r_request); 307 ceph_msg_put(req->r_request);
126 if (req->r_con_filling_msg) { 308 if (req->r_reply) {
127 dout("%s revoking msg %p from con %p\n", __func__,
128 req->r_reply, req->r_con_filling_msg);
129 ceph_msg_revoke_incoming(req->r_reply); 309 ceph_msg_revoke_incoming(req->r_reply);
130 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
131 req->r_con_filling_msg = NULL;
132 }
133 if (req->r_reply)
134 ceph_msg_put(req->r_reply); 310 ceph_msg_put(req->r_reply);
135 if (req->r_own_pages) 311 }
136 ceph_release_page_vector(req->r_pages, 312
137 req->r_num_pages); 313 for (which = 0; which < req->r_num_ops; which++)
314 osd_req_op_data_release(req, which);
315
138 ceph_put_snap_context(req->r_snapc); 316 ceph_put_snap_context(req->r_snapc);
139 ceph_pagelist_release(&req->r_trail);
140 if (req->r_mempool) 317 if (req->r_mempool)
141 mempool_free(req, req->r_osdc->req_mempool); 318 mempool_free(req, req->r_osdc->req_mempool);
142 else 319 else
143 kfree(req); 320 kmem_cache_free(ceph_osd_request_cache, req);
321
144} 322}
145EXPORT_SYMBOL(ceph_osdc_release_request); 323EXPORT_SYMBOL(ceph_osdc_release_request);
146 324
@@ -154,6 +332,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
154 struct ceph_msg *msg; 332 struct ceph_msg *msg;
155 size_t msg_size; 333 size_t msg_size;
156 334
335 BUILD_BUG_ON(CEPH_OSD_MAX_OP > U16_MAX);
336 BUG_ON(num_ops > CEPH_OSD_MAX_OP);
337
157 msg_size = 4 + 4 + 8 + 8 + 4+8; 338 msg_size = 4 + 4 + 8 + 8 + 4+8;
158 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ 339 msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */
159 msg_size += 1 + 8 + 4 + 4; /* pg_t */ 340 msg_size += 1 + 8 + 4 + 4; /* pg_t */
@@ -168,13 +349,14 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
168 req = mempool_alloc(osdc->req_mempool, gfp_flags); 349 req = mempool_alloc(osdc->req_mempool, gfp_flags);
169 memset(req, 0, sizeof(*req)); 350 memset(req, 0, sizeof(*req));
170 } else { 351 } else {
171 req = kzalloc(sizeof(*req), gfp_flags); 352 req = kmem_cache_zalloc(ceph_osd_request_cache, gfp_flags);
172 } 353 }
173 if (req == NULL) 354 if (req == NULL)
174 return NULL; 355 return NULL;
175 356
176 req->r_osdc = osdc; 357 req->r_osdc = osdc;
177 req->r_mempool = use_mempool; 358 req->r_mempool = use_mempool;
359 req->r_num_ops = num_ops;
178 360
179 kref_init(&req->r_kref); 361 kref_init(&req->r_kref);
180 init_completion(&req->r_completion); 362 init_completion(&req->r_completion);
@@ -198,8 +380,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
198 } 380 }
199 req->r_reply = msg; 381 req->r_reply = msg;
200 382
201 ceph_pagelist_init(&req->r_trail);
202
203 /* create request message; allow space for oid */ 383 /* create request message; allow space for oid */
204 if (use_mempool) 384 if (use_mempool)
205 msg = ceph_msgpool_get(&osdc->msgpool_op, 0); 385 msg = ceph_msgpool_get(&osdc->msgpool_op, 0);
@@ -218,60 +398,24 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
218} 398}
219EXPORT_SYMBOL(ceph_osdc_alloc_request); 399EXPORT_SYMBOL(ceph_osdc_alloc_request);
220 400
221static void osd_req_encode_op(struct ceph_osd_request *req, 401static bool osd_req_opcode_valid(u16 opcode)
222 struct ceph_osd_op *dst,
223 struct ceph_osd_req_op *src)
224{ 402{
225 dst->op = cpu_to_le16(src->op); 403 switch (opcode) {
226
227 switch (src->op) {
228 case CEPH_OSD_OP_STAT:
229 break;
230 case CEPH_OSD_OP_READ: 404 case CEPH_OSD_OP_READ:
231 case CEPH_OSD_OP_WRITE: 405 case CEPH_OSD_OP_STAT:
232 dst->extent.offset =
233 cpu_to_le64(src->extent.offset);
234 dst->extent.length =
235 cpu_to_le64(src->extent.length);
236 dst->extent.truncate_size =
237 cpu_to_le64(src->extent.truncate_size);
238 dst->extent.truncate_seq =
239 cpu_to_le32(src->extent.truncate_seq);
240 break;
241 case CEPH_OSD_OP_CALL:
242 dst->cls.class_len = src->cls.class_len;
243 dst->cls.method_len = src->cls.method_len;
244 dst->cls.indata_len = cpu_to_le32(src->cls.indata_len);
245
246 ceph_pagelist_append(&req->r_trail, src->cls.class_name,
247 src->cls.class_len);
248 ceph_pagelist_append(&req->r_trail, src->cls.method_name,
249 src->cls.method_len);
250 ceph_pagelist_append(&req->r_trail, src->cls.indata,
251 src->cls.indata_len);
252 break;
253 case CEPH_OSD_OP_STARTSYNC:
254 break;
255 case CEPH_OSD_OP_NOTIFY_ACK:
256 case CEPH_OSD_OP_WATCH:
257 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
258 dst->watch.ver = cpu_to_le64(src->watch.ver);
259 dst->watch.flag = src->watch.flag;
260 break;
261 default:
262 pr_err("unrecognized osd opcode %d\n", dst->op);
263 WARN_ON(1);
264 break;
265 case CEPH_OSD_OP_MAPEXT: 406 case CEPH_OSD_OP_MAPEXT:
266 case CEPH_OSD_OP_MASKTRUNC: 407 case CEPH_OSD_OP_MASKTRUNC:
267 case CEPH_OSD_OP_SPARSE_READ: 408 case CEPH_OSD_OP_SPARSE_READ:
268 case CEPH_OSD_OP_NOTIFY: 409 case CEPH_OSD_OP_NOTIFY:
410 case CEPH_OSD_OP_NOTIFY_ACK:
269 case CEPH_OSD_OP_ASSERT_VER: 411 case CEPH_OSD_OP_ASSERT_VER:
412 case CEPH_OSD_OP_WRITE:
270 case CEPH_OSD_OP_WRITEFULL: 413 case CEPH_OSD_OP_WRITEFULL:
271 case CEPH_OSD_OP_TRUNCATE: 414 case CEPH_OSD_OP_TRUNCATE:
272 case CEPH_OSD_OP_ZERO: 415 case CEPH_OSD_OP_ZERO:
273 case CEPH_OSD_OP_DELETE: 416 case CEPH_OSD_OP_DELETE:
274 case CEPH_OSD_OP_APPEND: 417 case CEPH_OSD_OP_APPEND:
418 case CEPH_OSD_OP_STARTSYNC:
275 case CEPH_OSD_OP_SETTRUNC: 419 case CEPH_OSD_OP_SETTRUNC:
276 case CEPH_OSD_OP_TRIMTRUNC: 420 case CEPH_OSD_OP_TRIMTRUNC:
277 case CEPH_OSD_OP_TMAPUP: 421 case CEPH_OSD_OP_TMAPUP:
@@ -279,11 +423,11 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
279 case CEPH_OSD_OP_TMAPGET: 423 case CEPH_OSD_OP_TMAPGET:
280 case CEPH_OSD_OP_CREATE: 424 case CEPH_OSD_OP_CREATE:
281 case CEPH_OSD_OP_ROLLBACK: 425 case CEPH_OSD_OP_ROLLBACK:
426 case CEPH_OSD_OP_WATCH:
282 case CEPH_OSD_OP_OMAPGETKEYS: 427 case CEPH_OSD_OP_OMAPGETKEYS:
283 case CEPH_OSD_OP_OMAPGETVALS: 428 case CEPH_OSD_OP_OMAPGETVALS:
284 case CEPH_OSD_OP_OMAPGETHEADER: 429 case CEPH_OSD_OP_OMAPGETHEADER:
285 case CEPH_OSD_OP_OMAPGETVALSBYKEYS: 430 case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
286 case CEPH_OSD_OP_MODE_RD:
287 case CEPH_OSD_OP_OMAPSETVALS: 431 case CEPH_OSD_OP_OMAPSETVALS:
288 case CEPH_OSD_OP_OMAPSETHEADER: 432 case CEPH_OSD_OP_OMAPSETHEADER:
289 case CEPH_OSD_OP_OMAPCLEAR: 433 case CEPH_OSD_OP_OMAPCLEAR:
@@ -314,113 +458,233 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
314 case CEPH_OSD_OP_RDUNLOCK: 458 case CEPH_OSD_OP_RDUNLOCK:
315 case CEPH_OSD_OP_UPLOCK: 459 case CEPH_OSD_OP_UPLOCK:
316 case CEPH_OSD_OP_DNLOCK: 460 case CEPH_OSD_OP_DNLOCK:
461 case CEPH_OSD_OP_CALL:
317 case CEPH_OSD_OP_PGLS: 462 case CEPH_OSD_OP_PGLS:
318 case CEPH_OSD_OP_PGLS_FILTER: 463 case CEPH_OSD_OP_PGLS_FILTER:
319 pr_err("unsupported osd opcode %s\n", 464 return true;
320 ceph_osd_op_name(dst->op)); 465 default:
321 WARN_ON(1); 466 return false;
322 break;
323 } 467 }
324 dst->payload_len = cpu_to_le32(src->payload_len);
325} 468}
326 469
327/* 470/*
328 * build new request AND message 471 * This is an osd op init function for opcodes that have no data or
329 * 472 * other information associated with them. It also serves as a
473 * common init routine for all the other init functions, below.
330 */ 474 */
331void ceph_osdc_build_request(struct ceph_osd_request *req, 475static struct ceph_osd_req_op *
332 u64 off, u64 len, unsigned int num_ops, 476_osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which,
333 struct ceph_osd_req_op *src_ops, 477 u16 opcode)
334 struct ceph_snap_context *snapc, u64 snap_id,
335 struct timespec *mtime)
336{ 478{
337 struct ceph_msg *msg = req->r_request; 479 struct ceph_osd_req_op *op;
338 struct ceph_osd_req_op *src_op;
339 void *p;
340 size_t msg_size;
341 int flags = req->r_flags;
342 u64 data_len;
343 int i;
344 480
345 req->r_num_ops = num_ops; 481 BUG_ON(which >= osd_req->r_num_ops);
346 req->r_snapid = snap_id; 482 BUG_ON(!osd_req_opcode_valid(opcode));
347 req->r_snapc = ceph_get_snap_context(snapc);
348 483
349 /* encode request */ 484 op = &osd_req->r_ops[which];
350 msg->hdr.version = cpu_to_le16(4); 485 memset(op, 0, sizeof (*op));
486 op->op = opcode;
351 487
352 p = msg->front.iov_base; 488 return op;
353 ceph_encode_32(&p, 1); /* client_inc is always 1 */ 489}
354 req->r_request_osdmap_epoch = p;
355 p += 4;
356 req->r_request_flags = p;
357 p += 4;
358 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
359 ceph_encode_timespec(p, mtime);
360 p += sizeof(struct ceph_timespec);
361 req->r_request_reassert_version = p;
362 p += sizeof(struct ceph_eversion); /* will get filled in */
363 490
364 /* oloc */ 491void osd_req_op_init(struct ceph_osd_request *osd_req,
365 ceph_encode_8(&p, 4); 492 unsigned int which, u16 opcode)
366 ceph_encode_8(&p, 4); 493{
367 ceph_encode_32(&p, 8 + 4 + 4); 494 (void)_osd_req_op_init(osd_req, which, opcode);
368 req->r_request_pool = p; 495}
369 p += 8; 496EXPORT_SYMBOL(osd_req_op_init);
370 ceph_encode_32(&p, -1); /* preferred */
371 ceph_encode_32(&p, 0); /* key len */
372 497
373 ceph_encode_8(&p, 1); 498void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
374 req->r_request_pgid = p; 499 unsigned int which, u16 opcode,
375 p += 8 + 4; 500 u64 offset, u64 length,
376 ceph_encode_32(&p, -1); /* preferred */ 501 u64 truncate_size, u32 truncate_seq)
502{
503 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
504 size_t payload_len = 0;
377 505
378 /* oid */ 506 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
379 ceph_encode_32(&p, req->r_oid_len);
380 memcpy(p, req->r_oid, req->r_oid_len);
381 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
382 p += req->r_oid_len;
383 507
384 /* ops */ 508 op->extent.offset = offset;
385 ceph_encode_16(&p, num_ops); 509 op->extent.length = length;
386 src_op = src_ops; 510 op->extent.truncate_size = truncate_size;
387 req->r_request_ops = p; 511 op->extent.truncate_seq = truncate_seq;
388 for (i = 0; i < num_ops; i++, src_op++) { 512 if (opcode == CEPH_OSD_OP_WRITE)
389 osd_req_encode_op(req, p, src_op); 513 payload_len += length;
390 p += sizeof(struct ceph_osd_op);
391 }
392 514
393 /* snaps */ 515 op->payload_len = payload_len;
394 ceph_encode_64(&p, req->r_snapid); 516}
395 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); 517EXPORT_SYMBOL(osd_req_op_extent_init);
396 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); 518
397 if (req->r_snapc) { 519void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
398 for (i = 0; i < snapc->num_snaps; i++) { 520 unsigned int which, u64 length)
399 ceph_encode_64(&p, req->r_snapc->snaps[i]); 521{
400 } 522 struct ceph_osd_req_op *op;
523 u64 previous;
524
525 BUG_ON(which >= osd_req->r_num_ops);
526 op = &osd_req->r_ops[which];
527 previous = op->extent.length;
528
529 if (length == previous)
530 return; /* Nothing to do */
531 BUG_ON(length > previous);
532
533 op->extent.length = length;
534 op->payload_len -= previous - length;
535}
536EXPORT_SYMBOL(osd_req_op_extent_update);
537
538void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
539 u16 opcode, const char *class, const char *method)
540{
541 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
542 struct ceph_pagelist *pagelist;
543 size_t payload_len = 0;
544 size_t size;
545
546 BUG_ON(opcode != CEPH_OSD_OP_CALL);
547
548 pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
549 BUG_ON(!pagelist);
550 ceph_pagelist_init(pagelist);
551
552 op->cls.class_name = class;
553 size = strlen(class);
554 BUG_ON(size > (size_t) U8_MAX);
555 op->cls.class_len = size;
556 ceph_pagelist_append(pagelist, class, size);
557 payload_len += size;
558
559 op->cls.method_name = method;
560 size = strlen(method);
561 BUG_ON(size > (size_t) U8_MAX);
562 op->cls.method_len = size;
563 ceph_pagelist_append(pagelist, method, size);
564 payload_len += size;
565
566 osd_req_op_cls_request_info_pagelist(osd_req, which, pagelist);
567
568 op->cls.argc = 0; /* currently unused */
569
570 op->payload_len = payload_len;
571}
572EXPORT_SYMBOL(osd_req_op_cls_init);
573
574void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
575 unsigned int which, u16 opcode,
576 u64 cookie, u64 version, int flag)
577{
578 struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
579
580 BUG_ON(opcode != CEPH_OSD_OP_NOTIFY_ACK && opcode != CEPH_OSD_OP_WATCH);
581
582 op->watch.cookie = cookie;
583 op->watch.ver = version;
584 if (opcode == CEPH_OSD_OP_WATCH && flag)
585 op->watch.flag = (u8)1;
586}
587EXPORT_SYMBOL(osd_req_op_watch_init);
588
589static void ceph_osdc_msg_data_add(struct ceph_msg *msg,
590 struct ceph_osd_data *osd_data)
591{
592 u64 length = ceph_osd_data_length(osd_data);
593
594 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
595 BUG_ON(length > (u64) SIZE_MAX);
596 if (length)
597 ceph_msg_data_add_pages(msg, osd_data->pages,
598 length, osd_data->alignment);
599 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGELIST) {
600 BUG_ON(!length);
601 ceph_msg_data_add_pagelist(msg, osd_data->pagelist);
602#ifdef CONFIG_BLOCK
603 } else if (osd_data->type == CEPH_OSD_DATA_TYPE_BIO) {
604 ceph_msg_data_add_bio(msg, osd_data->bio, length);
605#endif
606 } else {
607 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
401 } 608 }
609}
402 610
403 req->r_request_attempts = p; 611static u64 osd_req_encode_op(struct ceph_osd_request *req,
404 p += 4; 612 struct ceph_osd_op *dst, unsigned int which)
613{
614 struct ceph_osd_req_op *src;
615 struct ceph_osd_data *osd_data;
616 u64 request_data_len = 0;
617 u64 data_length;
405 618
406 data_len = req->r_trail.length; 619 BUG_ON(which >= req->r_num_ops);
407 if (flags & CEPH_OSD_FLAG_WRITE) { 620 src = &req->r_ops[which];
408 req->r_request->hdr.data_off = cpu_to_le16(off); 621 if (WARN_ON(!osd_req_opcode_valid(src->op))) {
409 data_len += len; 622 pr_err("unrecognized osd opcode %d\n", src->op);
623
624 return 0;
410 } 625 }
411 req->r_request->hdr.data_len = cpu_to_le32(data_len);
412 req->r_request->page_alignment = req->r_page_alignment;
413 626
414 BUG_ON(p > msg->front.iov_base + msg->front.iov_len); 627 switch (src->op) {
415 msg_size = p - msg->front.iov_base; 628 case CEPH_OSD_OP_STAT:
416 msg->front.iov_len = msg_size; 629 osd_data = &src->raw_data_in;
417 msg->hdr.front_len = cpu_to_le32(msg_size); 630 ceph_osdc_msg_data_add(req->r_reply, osd_data);
631 break;
632 case CEPH_OSD_OP_READ:
633 case CEPH_OSD_OP_WRITE:
634 if (src->op == CEPH_OSD_OP_WRITE)
635 request_data_len = src->extent.length;
636 dst->extent.offset = cpu_to_le64(src->extent.offset);
637 dst->extent.length = cpu_to_le64(src->extent.length);
638 dst->extent.truncate_size =
639 cpu_to_le64(src->extent.truncate_size);
640 dst->extent.truncate_seq =
641 cpu_to_le32(src->extent.truncate_seq);
642 osd_data = &src->extent.osd_data;
643 if (src->op == CEPH_OSD_OP_WRITE)
644 ceph_osdc_msg_data_add(req->r_request, osd_data);
645 else
646 ceph_osdc_msg_data_add(req->r_reply, osd_data);
647 break;
648 case CEPH_OSD_OP_CALL:
649 dst->cls.class_len = src->cls.class_len;
650 dst->cls.method_len = src->cls.method_len;
651 osd_data = &src->cls.request_info;
652 ceph_osdc_msg_data_add(req->r_request, osd_data);
653 BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGELIST);
654 request_data_len = osd_data->pagelist->length;
655
656 osd_data = &src->cls.request_data;
657 data_length = ceph_osd_data_length(osd_data);
658 if (data_length) {
659 BUG_ON(osd_data->type == CEPH_OSD_DATA_TYPE_NONE);
660 dst->cls.indata_len = cpu_to_le32(data_length);
661 ceph_osdc_msg_data_add(req->r_request, osd_data);
662 src->payload_len += data_length;
663 request_data_len += data_length;
664 }
665 osd_data = &src->cls.response_data;
666 ceph_osdc_msg_data_add(req->r_reply, osd_data);
667 break;
668 case CEPH_OSD_OP_STARTSYNC:
669 break;
670 case CEPH_OSD_OP_NOTIFY_ACK:
671 case CEPH_OSD_OP_WATCH:
672 dst->watch.cookie = cpu_to_le64(src->watch.cookie);
673 dst->watch.ver = cpu_to_le64(src->watch.ver);
674 dst->watch.flag = src->watch.flag;
675 break;
676 default:
677 pr_err("unsupported osd opcode %s\n",
678 ceph_osd_op_name(src->op));
679 WARN_ON(1);
418 680
419 dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, 681 return 0;
420 num_ops); 682 }
421 return; 683 dst->op = cpu_to_le16(src->op);
684 dst->payload_len = cpu_to_le32(src->payload_len);
685
686 return request_data_len;
422} 687}
423EXPORT_SYMBOL(ceph_osdc_build_request);
424 688
425/* 689/*
426 * build new request AND message, calculate layout, and adjust file 690 * build new request AND message, calculate layout, and adjust file
@@ -436,51 +700,63 @@ EXPORT_SYMBOL(ceph_osdc_build_request);
436struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, 700struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
437 struct ceph_file_layout *layout, 701 struct ceph_file_layout *layout,
438 struct ceph_vino vino, 702 struct ceph_vino vino,
439 u64 off, u64 *plen, 703 u64 off, u64 *plen, int num_ops,
440 int opcode, int flags, 704 int opcode, int flags,
441 struct ceph_snap_context *snapc, 705 struct ceph_snap_context *snapc,
442 int do_sync,
443 u32 truncate_seq, 706 u32 truncate_seq,
444 u64 truncate_size, 707 u64 truncate_size,
445 struct timespec *mtime, 708 bool use_mempool)
446 bool use_mempool,
447 int page_align)
448{ 709{
449 struct ceph_osd_req_op ops[2];
450 struct ceph_osd_request *req; 710 struct ceph_osd_request *req;
451 unsigned int num_op = 1; 711 u64 objnum = 0;
712 u64 objoff = 0;
713 u64 objlen = 0;
714 u32 object_size;
715 u64 object_base;
452 int r; 716 int r;
453 717
454 memset(&ops, 0, sizeof ops); 718 BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE);
455
456 ops[0].op = opcode;
457 ops[0].extent.truncate_seq = truncate_seq;
458 ops[0].extent.truncate_size = truncate_size;
459
460 if (do_sync) {
461 ops[1].op = CEPH_OSD_OP_STARTSYNC;
462 num_op++;
463 }
464 719
465 req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, 720 req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
466 GFP_NOFS); 721 GFP_NOFS);
467 if (!req) 722 if (!req)
468 return ERR_PTR(-ENOMEM); 723 return ERR_PTR(-ENOMEM);
724
469 req->r_flags = flags; 725 req->r_flags = flags;
470 726
471 /* calculate max write size */ 727 /* calculate max write size */
472 r = calc_layout(vino, layout, off, plen, req, ops); 728 r = calc_layout(layout, off, plen, &objnum, &objoff, &objlen);
473 if (r < 0) 729 if (r < 0) {
730 ceph_osdc_put_request(req);
474 return ERR_PTR(r); 731 return ERR_PTR(r);
475 req->r_file_layout = *layout; /* keep a copy */ 732 }
476 733
477 /* in case it differs from natural (file) alignment that 734 object_size = le32_to_cpu(layout->fl_object_size);
478 calc_layout filled in for us */ 735 object_base = off - objoff;
479 req->r_num_pages = calc_pages_for(page_align, *plen); 736 if (truncate_size <= object_base) {
480 req->r_page_alignment = page_align; 737 truncate_size = 0;
738 } else {
739 truncate_size -= object_base;
740 if (truncate_size > object_size)
741 truncate_size = object_size;
742 }
743
744 osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
745 truncate_size, truncate_seq);
746
747 /*
748 * A second op in the ops array means the caller wants to
749 * also issue a include a 'startsync' command so that the
750 * osd will flush data quickly.
751 */
752 if (num_ops > 1)
753 osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
754
755 req->r_file_layout = *layout; /* keep a copy */
481 756
482 ceph_osdc_build_request(req, off, *plen, num_op, ops, 757 snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx",
483 snapc, vino.snap, mtime); 758 vino.ino, objnum);
759 req->r_oid_len = strlen(req->r_oid);
484 760
485 return req; 761 return req;
486} 762}
@@ -558,21 +834,46 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
558 struct ceph_osd *osd) 834 struct ceph_osd *osd)
559{ 835{
560 struct ceph_osd_request *req, *nreq; 836 struct ceph_osd_request *req, *nreq;
837 LIST_HEAD(resend);
561 int err; 838 int err;
562 839
563 dout("__kick_osd_requests osd%d\n", osd->o_osd); 840 dout("__kick_osd_requests osd%d\n", osd->o_osd);
564 err = __reset_osd(osdc, osd); 841 err = __reset_osd(osdc, osd);
565 if (err) 842 if (err)
566 return; 843 return;
567 844 /*
845 * Build up a list of requests to resend by traversing the
846 * osd's list of requests. Requests for a given object are
847 * sent in tid order, and that is also the order they're
848 * kept on this list. Therefore all requests that are in
849 * flight will be found first, followed by all requests that
850 * have not yet been sent. And to resend requests while
851 * preserving this order we will want to put any sent
852 * requests back on the front of the osd client's unsent
853 * list.
854 *
855 * So we build a separate ordered list of already-sent
856 * requests for the affected osd and splice it onto the
857 * front of the osd client's unsent list. Once we've seen a
858 * request that has not yet been sent we're done. Those
859 * requests are already sitting right where they belong.
860 */
568 list_for_each_entry(req, &osd->o_requests, r_osd_item) { 861 list_for_each_entry(req, &osd->o_requests, r_osd_item) {
569 list_move(&req->r_req_lru_item, &osdc->req_unsent); 862 if (!req->r_sent)
570 dout("requeued %p tid %llu osd%d\n", req, req->r_tid, 863 break;
864 list_move_tail(&req->r_req_lru_item, &resend);
865 dout("requeueing %p tid %llu osd%d\n", req, req->r_tid,
571 osd->o_osd); 866 osd->o_osd);
572 if (!req->r_linger) 867 if (!req->r_linger)
573 req->r_flags |= CEPH_OSD_FLAG_RETRY; 868 req->r_flags |= CEPH_OSD_FLAG_RETRY;
574 } 869 }
870 list_splice(&resend, &osdc->req_unsent);
575 871
872 /*
873 * Linger requests are re-registered before sending, which
874 * sets up a new tid for each. We add them to the unsent
875 * list at the end to keep things in tid order.
876 */
576 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests, 877 list_for_each_entry_safe(req, nreq, &osd->o_linger_requests,
577 r_linger_osd) { 878 r_linger_osd) {
578 /* 879 /*
@@ -581,8 +882,8 @@ static void __kick_osd_requests(struct ceph_osd_client *osdc,
581 */ 882 */
582 BUG_ON(!list_empty(&req->r_req_lru_item)); 883 BUG_ON(!list_empty(&req->r_req_lru_item));
583 __register_request(osdc, req); 884 __register_request(osdc, req);
584 list_add(&req->r_req_lru_item, &osdc->req_unsent); 885 list_add_tail(&req->r_req_lru_item, &osdc->req_unsent);
585 list_add(&req->r_osd_item, &req->r_osd->o_requests); 886 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
586 __unregister_linger_request(osdc, req); 887 __unregister_linger_request(osdc, req);
587 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid, 888 dout("requeued lingering %p tid %llu osd%d\n", req, req->r_tid,
588 osd->o_osd); 889 osd->o_osd);
@@ -654,8 +955,7 @@ static void put_osd(struct ceph_osd *osd)
654 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) { 955 if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
655 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; 956 struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;
656 957
657 if (ac->ops && ac->ops->destroy_authorizer) 958 ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer);
658 ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
659 kfree(osd); 959 kfree(osd);
660 } 960 }
661} 961}
@@ -820,14 +1120,6 @@ static void __register_request(struct ceph_osd_client *osdc,
820 } 1120 }
821} 1121}
822 1122
823static void register_request(struct ceph_osd_client *osdc,
824 struct ceph_osd_request *req)
825{
826 mutex_lock(&osdc->request_mutex);
827 __register_request(osdc, req);
828 mutex_unlock(&osdc->request_mutex);
829}
830
831/* 1123/*
832 * called under osdc->request_mutex 1124 * called under osdc->request_mutex
833 */ 1125 */
@@ -952,8 +1244,8 @@ static int __map_request(struct ceph_osd_client *osdc,
952 int err; 1244 int err;
953 1245
954 dout("map_request %p tid %lld\n", req, req->r_tid); 1246 dout("map_request %p tid %lld\n", req, req->r_tid);
955 err = ceph_calc_object_layout(&pgid, req->r_oid, 1247 err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
956 &req->r_file_layout, osdc->osdmap); 1248 ceph_file_layout_pg_pool(req->r_file_layout));
957 if (err) { 1249 if (err) {
958 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1250 list_move(&req->r_req_lru_item, &osdc->req_notarget);
959 return err; 1251 return err;
@@ -1007,10 +1299,10 @@ static int __map_request(struct ceph_osd_client *osdc,
1007 1299
1008 if (req->r_osd) { 1300 if (req->r_osd) {
1009 __remove_osd_from_lru(req->r_osd); 1301 __remove_osd_from_lru(req->r_osd);
1010 list_add(&req->r_osd_item, &req->r_osd->o_requests); 1302 list_add_tail(&req->r_osd_item, &req->r_osd->o_requests);
1011 list_move(&req->r_req_lru_item, &osdc->req_unsent); 1303 list_move_tail(&req->r_req_lru_item, &osdc->req_unsent);
1012 } else { 1304 } else {
1013 list_move(&req->r_req_lru_item, &osdc->req_notarget); 1305 list_move_tail(&req->r_req_lru_item, &osdc->req_notarget);
1014 } 1306 }
1015 err = 1; /* osd or pg changed */ 1307 err = 1; /* osd or pg changed */
1016 1308
@@ -1045,8 +1337,14 @@ static void __send_request(struct ceph_osd_client *osdc,
1045 list_move_tail(&req->r_req_lru_item, &osdc->req_lru); 1337 list_move_tail(&req->r_req_lru_item, &osdc->req_lru);
1046 1338
1047 ceph_msg_get(req->r_request); /* send consumes a ref */ 1339 ceph_msg_get(req->r_request); /* send consumes a ref */
1048 ceph_con_send(&req->r_osd->o_con, req->r_request); 1340
1341 /* Mark the request unsafe if this is the first timet's being sent. */
1342
1343 if (!req->r_sent && req->r_unsafe_callback)
1344 req->r_unsafe_callback(req, true);
1049 req->r_sent = req->r_osd->o_incarnation; 1345 req->r_sent = req->r_osd->o_incarnation;
1346
1347 ceph_con_send(&req->r_osd->o_con, req->r_request);
1050} 1348}
1051 1349
1052/* 1350/*
@@ -1134,31 +1432,11 @@ static void handle_osds_timeout(struct work_struct *work)
1134 1432
1135static void complete_request(struct ceph_osd_request *req) 1433static void complete_request(struct ceph_osd_request *req)
1136{ 1434{
1137 if (req->r_safe_callback) 1435 if (req->r_unsafe_callback)
1138 req->r_safe_callback(req, NULL); 1436 req->r_unsafe_callback(req, false);
1139 complete_all(&req->r_safe_completion); /* fsync waiter */ 1437 complete_all(&req->r_safe_completion); /* fsync waiter */
1140} 1438}
1141 1439
1142static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid)
1143{
1144 __u8 v;
1145
1146 ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad);
1147 v = ceph_decode_8(p);
1148 if (v > 1) {
1149 pr_warning("do not understand pg encoding %d > 1", v);
1150 return -EINVAL;
1151 }
1152 pgid->pool = ceph_decode_64(p);
1153 pgid->seed = ceph_decode_32(p);
1154 *p += 4;
1155 return 0;
1156
1157bad:
1158 pr_warning("incomplete pg encoding");
1159 return -EINVAL;
1160}
1161
1162/* 1440/*
1163 * handle osd op reply. either call the callback if it is specified, 1441 * handle osd op reply. either call the callback if it is specified,
1164 * or do the completion to wake up the waiting thread. 1442 * or do the completion to wake up the waiting thread.
@@ -1170,7 +1448,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1170 struct ceph_osd_request *req; 1448 struct ceph_osd_request *req;
1171 u64 tid; 1449 u64 tid;
1172 int object_len; 1450 int object_len;
1173 int numops, payload_len, flags; 1451 unsigned int numops;
1452 int payload_len, flags;
1174 s32 result; 1453 s32 result;
1175 s32 retry_attempt; 1454 s32 retry_attempt;
1176 struct ceph_pg pg; 1455 struct ceph_pg pg;
@@ -1178,7 +1457,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1178 u32 reassert_epoch; 1457 u32 reassert_epoch;
1179 u64 reassert_version; 1458 u64 reassert_version;
1180 u32 osdmap_epoch; 1459 u32 osdmap_epoch;
1181 int i; 1460 int already_completed;
1461 u32 bytes;
1462 unsigned int i;
1182 1463
1183 tid = le64_to_cpu(msg->hdr.tid); 1464 tid = le64_to_cpu(msg->hdr.tid);
1184 dout("handle_reply %p tid %llu\n", msg, tid); 1465 dout("handle_reply %p tid %llu\n", msg, tid);
@@ -1191,7 +1472,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1191 ceph_decode_need(&p, end, object_len, bad); 1472 ceph_decode_need(&p, end, object_len, bad);
1192 p += object_len; 1473 p += object_len;
1193 1474
1194 err = __decode_pgid(&p, end, &pg); 1475 err = ceph_decode_pgid(&p, end, &pg);
1195 if (err) 1476 if (err)
1196 goto bad; 1477 goto bad;
1197 1478
@@ -1207,8 +1488,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1207 req = __lookup_request(osdc, tid); 1488 req = __lookup_request(osdc, tid);
1208 if (req == NULL) { 1489 if (req == NULL) {
1209 dout("handle_reply tid %llu dne\n", tid); 1490 dout("handle_reply tid %llu dne\n", tid);
1210 mutex_unlock(&osdc->request_mutex); 1491 goto bad_mutex;
1211 return;
1212 } 1492 }
1213 ceph_osdc_get_request(req); 1493 ceph_osdc_get_request(req);
1214 1494
@@ -1233,9 +1513,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1233 payload_len += len; 1513 payload_len += len;
1234 p += sizeof(*op); 1514 p += sizeof(*op);
1235 } 1515 }
1236 if (payload_len != le32_to_cpu(msg->hdr.data_len)) { 1516 bytes = le32_to_cpu(msg->hdr.data_len);
1517 if (payload_len != bytes) {
1237 pr_warning("sum of op payload lens %d != data_len %d", 1518 pr_warning("sum of op payload lens %d != data_len %d",
1238 payload_len, le32_to_cpu(msg->hdr.data_len)); 1519 payload_len, bytes);
1239 goto bad_put; 1520 goto bad_put;
1240 } 1521 }
1241 1522
@@ -1244,21 +1525,9 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1244 for (i = 0; i < numops; i++) 1525 for (i = 0; i < numops; i++)
1245 req->r_reply_op_result[i] = ceph_decode_32(&p); 1526 req->r_reply_op_result[i] = ceph_decode_32(&p);
1246 1527
1247 /*
1248 * if this connection filled our message, drop our reference now, to
1249 * avoid a (safe but slower) revoke later.
1250 */
1251 if (req->r_con_filling_msg == con && req->r_reply == msg) {
1252 dout(" dropping con_filling_msg ref %p\n", con);
1253 req->r_con_filling_msg = NULL;
1254 con->ops->put(con);
1255 }
1256
1257 if (!req->r_got_reply) { 1528 if (!req->r_got_reply) {
1258 unsigned int bytes;
1259 1529
1260 req->r_result = result; 1530 req->r_result = result;
1261 bytes = le32_to_cpu(msg->hdr.data_len);
1262 dout("handle_reply result %d bytes %d\n", req->r_result, 1531 dout("handle_reply result %d bytes %d\n", req->r_result,
1263 bytes); 1532 bytes);
1264 if (req->r_result == 0) 1533 if (req->r_result == 0)
@@ -1286,7 +1555,11 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
1286 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 1555 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
1287 __unregister_request(osdc, req); 1556 __unregister_request(osdc, req);
1288 1557
1558 already_completed = req->r_completed;
1559 req->r_completed = 1;
1289 mutex_unlock(&osdc->request_mutex); 1560 mutex_unlock(&osdc->request_mutex);
1561 if (already_completed)
1562 goto done;
1290 1563
1291 if (req->r_callback) 1564 if (req->r_callback)
1292 req->r_callback(req, msg); 1565 req->r_callback(req, msg);
@@ -1303,6 +1576,8 @@ done:
1303 1576
1304bad_put: 1577bad_put:
1305 ceph_osdc_put_request(req); 1578 ceph_osdc_put_request(req);
1579bad_mutex:
1580 mutex_unlock(&osdc->request_mutex);
1306bad: 1581bad:
1307 pr_err("corrupt osd_op_reply got %d %d\n", 1582 pr_err("corrupt osd_op_reply got %d %d\n",
1308 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); 1583 (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -1736,6 +2011,104 @@ bad:
1736} 2011}
1737 2012
1738/* 2013/*
2014 * build new request AND message
2015 *
2016 */
2017void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off,
2018 struct ceph_snap_context *snapc, u64 snap_id,
2019 struct timespec *mtime)
2020{
2021 struct ceph_msg *msg = req->r_request;
2022 void *p;
2023 size_t msg_size;
2024 int flags = req->r_flags;
2025 u64 data_len;
2026 unsigned int i;
2027
2028 req->r_snapid = snap_id;
2029 req->r_snapc = ceph_get_snap_context(snapc);
2030
2031 /* encode request */
2032 msg->hdr.version = cpu_to_le16(4);
2033
2034 p = msg->front.iov_base;
2035 ceph_encode_32(&p, 1); /* client_inc is always 1 */
2036 req->r_request_osdmap_epoch = p;
2037 p += 4;
2038 req->r_request_flags = p;
2039 p += 4;
2040 if (req->r_flags & CEPH_OSD_FLAG_WRITE)
2041 ceph_encode_timespec(p, mtime);
2042 p += sizeof(struct ceph_timespec);
2043 req->r_request_reassert_version = p;
2044 p += sizeof(struct ceph_eversion); /* will get filled in */
2045
2046 /* oloc */
2047 ceph_encode_8(&p, 4);
2048 ceph_encode_8(&p, 4);
2049 ceph_encode_32(&p, 8 + 4 + 4);
2050 req->r_request_pool = p;
2051 p += 8;
2052 ceph_encode_32(&p, -1); /* preferred */
2053 ceph_encode_32(&p, 0); /* key len */
2054
2055 ceph_encode_8(&p, 1);
2056 req->r_request_pgid = p;
2057 p += 8 + 4;
2058 ceph_encode_32(&p, -1); /* preferred */
2059
2060 /* oid */
2061 ceph_encode_32(&p, req->r_oid_len);
2062 memcpy(p, req->r_oid, req->r_oid_len);
2063 dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len);
2064 p += req->r_oid_len;
2065
2066 /* ops--can imply data */
2067 ceph_encode_16(&p, (u16)req->r_num_ops);
2068 data_len = 0;
2069 for (i = 0; i < req->r_num_ops; i++) {
2070 data_len += osd_req_encode_op(req, p, i);
2071 p += sizeof(struct ceph_osd_op);
2072 }
2073
2074 /* snaps */
2075 ceph_encode_64(&p, req->r_snapid);
2076 ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0);
2077 ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0);
2078 if (req->r_snapc) {
2079 for (i = 0; i < snapc->num_snaps; i++) {
2080 ceph_encode_64(&p, req->r_snapc->snaps[i]);
2081 }
2082 }
2083
2084 req->r_request_attempts = p;
2085 p += 4;
2086
2087 /* data */
2088 if (flags & CEPH_OSD_FLAG_WRITE) {
2089 u16 data_off;
2090
2091 /*
2092 * The header "data_off" is a hint to the receiver
2093 * allowing it to align received data into its
2094 * buffers such that there's no need to re-copy
2095 * it before writing it to disk (direct I/O).
2096 */
2097 data_off = (u16) (off & 0xffff);
2098 req->r_request->hdr.data_off = cpu_to_le16(data_off);
2099 }
2100 req->r_request->hdr.data_len = cpu_to_le32(data_len);
2101
2102 BUG_ON(p > msg->front.iov_base + msg->front.iov_len);
2103 msg_size = p - msg->front.iov_base;
2104 msg->front.iov_len = msg_size;
2105 msg->hdr.front_len = cpu_to_le32(msg_size);
2106
2107 dout("build_request msg_size was %d\n", (int)msg_size);
2108}
2109EXPORT_SYMBOL(ceph_osdc_build_request);
2110
2111/*
1739 * Register request, send initial attempt. 2112 * Register request, send initial attempt.
1740 */ 2113 */
1741int ceph_osdc_start_request(struct ceph_osd_client *osdc, 2114int ceph_osdc_start_request(struct ceph_osd_client *osdc,
@@ -1744,41 +2117,26 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
1744{ 2117{
1745 int rc = 0; 2118 int rc = 0;
1746 2119
1747 req->r_request->pages = req->r_pages;
1748 req->r_request->nr_pages = req->r_num_pages;
1749#ifdef CONFIG_BLOCK
1750 req->r_request->bio = req->r_bio;
1751#endif
1752 req->r_request->trail = &req->r_trail;
1753
1754 register_request(osdc, req);
1755
1756 down_read(&osdc->map_sem); 2120 down_read(&osdc->map_sem);
1757 mutex_lock(&osdc->request_mutex); 2121 mutex_lock(&osdc->request_mutex);
1758 /* 2122 __register_request(osdc, req);
1759 * a racing kick_requests() may have sent the message for us 2123 WARN_ON(req->r_sent);
1760 * while we dropped request_mutex above, so only send now if 2124 rc = __map_request(osdc, req, 0);
1761 * the request still han't been touched yet. 2125 if (rc < 0) {
1762 */ 2126 if (nofail) {
1763 if (req->r_sent == 0) { 2127 dout("osdc_start_request failed map, "
1764 rc = __map_request(osdc, req, 0); 2128 " will retry %lld\n", req->r_tid);
1765 if (rc < 0) { 2129 rc = 0;
1766 if (nofail) {
1767 dout("osdc_start_request failed map, "
1768 " will retry %lld\n", req->r_tid);
1769 rc = 0;
1770 }
1771 goto out_unlock;
1772 }
1773 if (req->r_osd == NULL) {
1774 dout("send_request %p no up osds in pg\n", req);
1775 ceph_monc_request_next_osdmap(&osdc->client->monc);
1776 } else {
1777 __send_request(osdc, req);
1778 } 2130 }
1779 rc = 0; 2131 goto out_unlock;
1780 } 2132 }
1781 2133 if (req->r_osd == NULL) {
2134 dout("send_request %p no up osds in pg\n", req);
2135 ceph_monc_request_next_osdmap(&osdc->client->monc);
2136 } else {
2137 __send_queued(osdc);
2138 }
2139 rc = 0;
1782out_unlock: 2140out_unlock:
1783 mutex_unlock(&osdc->request_mutex); 2141 mutex_unlock(&osdc->request_mutex);
1784 up_read(&osdc->map_sem); 2142 up_read(&osdc->map_sem);
@@ -1940,18 +2298,22 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
1940 2298
1941 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, 2299 dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
1942 vino.snap, off, *plen); 2300 vino.snap, off, *plen);
1943 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 2301 req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
1944 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, 2302 CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
1945 NULL, 0, truncate_seq, truncate_size, NULL, 2303 NULL, truncate_seq, truncate_size,
1946 false, page_align); 2304 false);
1947 if (IS_ERR(req)) 2305 if (IS_ERR(req))
1948 return PTR_ERR(req); 2306 return PTR_ERR(req);
1949 2307
1950 /* it may be a short read due to an object boundary */ 2308 /* it may be a short read due to an object boundary */
1951 req->r_pages = pages;
1952 2309
1953 dout("readpages final extent is %llu~%llu (%d pages align %d)\n", 2310 osd_req_op_extent_osd_data_pages(req, 0,
1954 off, *plen, req->r_num_pages, page_align); 2311 pages, *plen, page_align, false, false);
2312
2313 dout("readpages final extent is %llu~%llu (%llu bytes align %d)\n",
2314 off, *plen, *plen, page_align);
2315
2316 ceph_osdc_build_request(req, off, NULL, vino.snap, NULL);
1955 2317
1956 rc = ceph_osdc_start_request(osdc, req, false); 2318 rc = ceph_osdc_start_request(osdc, req, false);
1957 if (!rc) 2319 if (!rc)
@@ -1978,20 +2340,21 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
1978 int rc = 0; 2340 int rc = 0;
1979 int page_align = off & ~PAGE_MASK; 2341 int page_align = off & ~PAGE_MASK;
1980 2342
1981 BUG_ON(vino.snap != CEPH_NOSNAP); 2343 BUG_ON(vino.snap != CEPH_NOSNAP); /* snapshots aren't writeable */
1982 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 2344 req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
1983 CEPH_OSD_OP_WRITE, 2345 CEPH_OSD_OP_WRITE,
1984 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, 2346 CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
1985 snapc, 0, 2347 snapc, truncate_seq, truncate_size,
1986 truncate_seq, truncate_size, mtime, 2348 true);
1987 true, page_align);
1988 if (IS_ERR(req)) 2349 if (IS_ERR(req))
1989 return PTR_ERR(req); 2350 return PTR_ERR(req);
1990 2351
1991 /* it may be a short write due to an object boundary */ 2352 /* it may be a short write due to an object boundary */
1992 req->r_pages = pages; 2353 osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
1993 dout("writepages %llu~%llu (%d pages)\n", off, len, 2354 false, false);
1994 req->r_num_pages); 2355 dout("writepages %llu~%llu (%llu bytes)\n", off, len, len);
2356
2357 ceph_osdc_build_request(req, off, snapc, CEPH_NOSNAP, mtime);
1995 2358
1996 rc = ceph_osdc_start_request(osdc, req, true); 2359 rc = ceph_osdc_start_request(osdc, req, true);
1997 if (!rc) 2360 if (!rc)
@@ -2005,6 +2368,26 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
2005} 2368}
2006EXPORT_SYMBOL(ceph_osdc_writepages); 2369EXPORT_SYMBOL(ceph_osdc_writepages);
2007 2370
2371int ceph_osdc_setup(void)
2372{
2373 BUG_ON(ceph_osd_request_cache);
2374 ceph_osd_request_cache = kmem_cache_create("ceph_osd_request",
2375 sizeof (struct ceph_osd_request),
2376 __alignof__(struct ceph_osd_request),
2377 0, NULL);
2378
2379 return ceph_osd_request_cache ? 0 : -ENOMEM;
2380}
2381EXPORT_SYMBOL(ceph_osdc_setup);
2382
2383void ceph_osdc_cleanup(void)
2384{
2385 BUG_ON(!ceph_osd_request_cache);
2386 kmem_cache_destroy(ceph_osd_request_cache);
2387 ceph_osd_request_cache = NULL;
2388}
2389EXPORT_SYMBOL(ceph_osdc_cleanup);
2390
2008/* 2391/*
2009 * handle incoming message 2392 * handle incoming message
2010 */ 2393 */
@@ -2064,13 +2447,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2064 goto out; 2447 goto out;
2065 } 2448 }
2066 2449
2067 if (req->r_con_filling_msg) { 2450 if (req->r_reply->con)
2068 dout("%s revoking msg %p from old con %p\n", __func__, 2451 dout("%s revoking msg %p from old con %p\n", __func__,
2069 req->r_reply, req->r_con_filling_msg); 2452 req->r_reply, req->r_reply->con);
2070 ceph_msg_revoke_incoming(req->r_reply); 2453 ceph_msg_revoke_incoming(req->r_reply);
2071 req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
2072 req->r_con_filling_msg = NULL;
2073 }
2074 2454
2075 if (front > req->r_reply->front.iov_len) { 2455 if (front > req->r_reply->front.iov_len) {
2076 pr_warning("get_reply front %d > preallocated %d\n", 2456 pr_warning("get_reply front %d > preallocated %d\n",
@@ -2084,26 +2464,29 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
2084 m = ceph_msg_get(req->r_reply); 2464 m = ceph_msg_get(req->r_reply);
2085 2465
2086 if (data_len > 0) { 2466 if (data_len > 0) {
2087 int want = calc_pages_for(req->r_page_alignment, data_len); 2467 struct ceph_osd_data *osd_data;
2088 2468
2089 if (req->r_pages && unlikely(req->r_num_pages < want)) { 2469 /*
2090 pr_warning("tid %lld reply has %d bytes %d pages, we" 2470 * XXX This is assuming there is only one op containing
2091 " had only %d pages ready\n", tid, data_len, 2471 * XXX page data. Probably OK for reads, but this
2092 want, req->r_num_pages); 2472 * XXX ought to be done more generally.
2093 *skip = 1; 2473 */
2094 ceph_msg_put(m); 2474 osd_data = osd_req_op_extent_osd_data(req, 0);
2095 m = NULL; 2475 if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
2096 goto out; 2476 if (osd_data->pages &&
2477 unlikely(osd_data->length < data_len)) {
2478
2479 pr_warning("tid %lld reply has %d bytes "
2480 "we had only %llu bytes ready\n",
2481 tid, data_len, osd_data->length);
2482 *skip = 1;
2483 ceph_msg_put(m);
2484 m = NULL;
2485 goto out;
2486 }
2097 } 2487 }
2098 m->pages = req->r_pages;
2099 m->nr_pages = req->r_num_pages;
2100 m->page_alignment = req->r_page_alignment;
2101#ifdef CONFIG_BLOCK
2102 m->bio = req->r_bio;
2103#endif
2104 } 2488 }
2105 *skip = 0; 2489 *skip = 0;
2106 req->r_con_filling_msg = con->ops->get(con);
2107 dout("get_reply tid %lld %p\n", tid, m); 2490 dout("get_reply tid %lld %p\n", tid, m);
2108 2491
2109out: 2492out:
@@ -2168,13 +2551,17 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
2168 struct ceph_auth_handshake *auth = &o->o_auth; 2551 struct ceph_auth_handshake *auth = &o->o_auth;
2169 2552
2170 if (force_new && auth->authorizer) { 2553 if (force_new && auth->authorizer) {
2171 if (ac->ops && ac->ops->destroy_authorizer) 2554 ceph_auth_destroy_authorizer(ac, auth->authorizer);
2172 ac->ops->destroy_authorizer(ac, auth->authorizer);
2173 auth->authorizer = NULL; 2555 auth->authorizer = NULL;
2174 } 2556 }
2175 if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) { 2557 if (!auth->authorizer) {
2176 int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD, 2558 int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2177 auth); 2559 auth);
2560 if (ret)
2561 return ERR_PTR(ret);
2562 } else {
2563 int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
2564 auth);
2178 if (ret) 2565 if (ret)
2179 return ERR_PTR(ret); 2566 return ERR_PTR(ret);
2180 } 2567 }
@@ -2190,11 +2577,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
2190 struct ceph_osd_client *osdc = o->o_osdc; 2577 struct ceph_osd_client *osdc = o->o_osdc;
2191 struct ceph_auth_client *ac = osdc->client->monc.auth; 2578 struct ceph_auth_client *ac = osdc->client->monc.auth;
2192 2579
2193 /* 2580 return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2194 * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
2195 * XXX which do we do: succeed or fail?
2196 */
2197 return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
2198} 2581}
2199 2582
2200static int invalidate_authorizer(struct ceph_connection *con) 2583static int invalidate_authorizer(struct ceph_connection *con)
@@ -2203,9 +2586,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
2203 struct ceph_osd_client *osdc = o->o_osdc; 2586 struct ceph_osd_client *osdc = o->o_osdc;
2204 struct ceph_auth_client *ac = osdc->client->monc.auth; 2587 struct ceph_auth_client *ac = osdc->client->monc.auth;
2205 2588
2206 if (ac->ops && ac->ops->invalidate_authorizer) 2589 ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2207 ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);
2208
2209 return ceph_monc_validate_auth(&osdc->client->monc); 2590 return ceph_monc_validate_auth(&osdc->client->monc);
2210} 2591}
2211 2592
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 4543b9aba40c..603ddd92db19 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -654,24 +654,6 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
654 return 0; 654 return 0;
655} 655}
656 656
657static int __decode_pgid(void **p, void *end, struct ceph_pg *pg)
658{
659 u8 v;
660
661 ceph_decode_need(p, end, 1+8+4+4, bad);
662 v = ceph_decode_8(p);
663 if (v != 1)
664 goto bad;
665 pg->pool = ceph_decode_64(p);
666 pg->seed = ceph_decode_32(p);
667 *p += 4; /* skip preferred */
668 return 0;
669
670bad:
671 dout("error decoding pgid\n");
672 return -EINVAL;
673}
674
675/* 657/*
676 * decode a full map. 658 * decode a full map.
677 */ 659 */
@@ -765,7 +747,7 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
765 struct ceph_pg pgid; 747 struct ceph_pg pgid;
766 struct ceph_pg_mapping *pg; 748 struct ceph_pg_mapping *pg;
767 749
768 err = __decode_pgid(p, end, &pgid); 750 err = ceph_decode_pgid(p, end, &pgid);
769 if (err) 751 if (err)
770 goto bad; 752 goto bad;
771 ceph_decode_need(p, end, sizeof(u32), bad); 753 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -983,7 +965,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
983 struct ceph_pg pgid; 965 struct ceph_pg pgid;
984 u32 pglen; 966 u32 pglen;
985 967
986 err = __decode_pgid(p, end, &pgid); 968 err = ceph_decode_pgid(p, end, &pgid);
987 if (err) 969 if (err)
988 goto bad; 970 goto bad;
989 ceph_decode_need(p, end, sizeof(u32), bad); 971 ceph_decode_need(p, end, sizeof(u32), bad);
@@ -1111,27 +1093,22 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping);
1111 * calculate an object layout (i.e. pgid) from an oid, 1093 * calculate an object layout (i.e. pgid) from an oid,
1112 * file_layout, and osdmap 1094 * file_layout, and osdmap
1113 */ 1095 */
1114int ceph_calc_object_layout(struct ceph_pg *pg, 1096int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
1115 const char *oid, 1097 struct ceph_osdmap *osdmap, uint64_t pool)
1116 struct ceph_file_layout *fl,
1117 struct ceph_osdmap *osdmap)
1118{ 1098{
1119 unsigned int num, num_mask; 1099 struct ceph_pg_pool_info *pool_info;
1120 struct ceph_pg_pool_info *pool;
1121 1100
1122 BUG_ON(!osdmap); 1101 BUG_ON(!osdmap);
1123 pg->pool = le32_to_cpu(fl->fl_pg_pool); 1102 pool_info = __lookup_pg_pool(&osdmap->pg_pools, pool);
1124 pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); 1103 if (!pool_info)
1125 if (!pool)
1126 return -EIO; 1104 return -EIO;
1127 pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); 1105 pg->pool = pool;
1128 num = pool->pg_num; 1106 pg->seed = ceph_str_hash(pool_info->object_hash, oid, strlen(oid));
1129 num_mask = pool->pg_num_mask;
1130 1107
1131 dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); 1108 dout("%s '%s' pgid %lld.%x\n", __func__, oid, pg->pool, pg->seed);
1132 return 0; 1109 return 0;
1133} 1110}
1134EXPORT_SYMBOL(ceph_calc_object_layout); 1111EXPORT_SYMBOL(ceph_calc_ceph_pg);
1135 1112
1136/* 1113/*
1137 * Calculate raw osd vector for the given pgid. Return pointer to osd 1114 * Calculate raw osd vector for the given pgid. Return pointer to osd
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
new file mode 100644
index 000000000000..154683f5f14c
--- /dev/null
+++ b/net/ceph/snapshot.c
@@ -0,0 +1,78 @@
1/*
2 * snapshot.c Ceph snapshot context utility routines (part of libceph)
3 *
4 * Copyright (C) 2013 Inktank Storage, Inc.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
19 */
20
21#include <stddef.h>
22
23#include <linux/types.h>
24#include <linux/export.h>
25#include <linux/ceph/libceph.h>
26
27/*
28 * Ceph snapshot contexts are reference counted objects, and the
29 * returned structure holds a single reference. Acquire additional
30 * references with ceph_get_snap_context(), and release them with
31 * ceph_put_snap_context(). When the reference count reaches zero
32 * the entire structure is freed.
33 */
34
35/*
36 * Create a new ceph snapshot context large enough to hold the
37 * indicated number of snapshot ids (which can be 0). Caller has
38 * to fill in snapc->seq and snapc->snaps[0..snap_count-1].
39 *
40 * Returns a null pointer if an error occurs.
41 */
42struct ceph_snap_context *ceph_create_snap_context(u32 snap_count,
43 gfp_t gfp_flags)
44{
45 struct ceph_snap_context *snapc;
46 size_t size;
47
48 size = sizeof (struct ceph_snap_context);
49 size += snap_count * sizeof (snapc->snaps[0]);
50 snapc = kzalloc(size, gfp_flags);
51 if (!snapc)
52 return NULL;
53
54 atomic_set(&snapc->nref, 1);
55 snapc->num_snaps = snap_count;
56
57 return snapc;
58}
59EXPORT_SYMBOL(ceph_create_snap_context);
60
61struct ceph_snap_context *ceph_get_snap_context(struct ceph_snap_context *sc)
62{
63 if (sc)
64 atomic_inc(&sc->nref);
65 return sc;
66}
67EXPORT_SYMBOL(ceph_get_snap_context);
68
69void ceph_put_snap_context(struct ceph_snap_context *sc)
70{
71 if (!sc)
72 return;
73 if (atomic_dec_and_test(&sc->nref)) {
74 /*printk(" deleting snap_context %p\n", sc);*/
75 kfree(sc);
76 }
77}
78EXPORT_SYMBOL(ceph_put_snap_context);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3f9dc6..b71423db7785 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn
78 return autoremove_wake_function(wait, mode, sync, key); 78 return autoremove_wake_function(wait, mode, sync, key);
79} 79}
80/* 80/*
81 * Wait for a packet.. 81 * Wait for the last received packet to be different from skb
82 */ 82 */
83static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) 83static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
84 const struct sk_buff *skb)
84{ 85{
85 int error; 86 int error;
86 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 87 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
@@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
92 if (error) 93 if (error)
93 goto out_err; 94 goto out_err;
94 95
95 if (!skb_queue_empty(&sk->sk_receive_queue)) 96 if (sk->sk_receive_queue.prev != skb)
96 goto out; 97 goto out;
97 98
98 /* Socket shut down? */ 99 /* Socket shut down? */
@@ -131,9 +132,9 @@ out_noerr:
131 * __skb_recv_datagram - Receive a datagram skbuff 132 * __skb_recv_datagram - Receive a datagram skbuff
132 * @sk: socket 133 * @sk: socket
133 * @flags: MSG_ flags 134 * @flags: MSG_ flags
135 * @peeked: returns non-zero if this packet has been seen before
134 * @off: an offset in bytes to peek skb from. Returns an offset 136 * @off: an offset in bytes to peek skb from. Returns an offset
135 * within an skb where data actually starts 137 * within an skb where data actually starts
136 * @peeked: returns non-zero if this packet has been seen before
137 * @err: error code returned 138 * @err: error code returned
138 * 139 *
139 * Get a datagram skbuff, understands the peeking, nonblocking wakeups 140 * Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -161,7 +162,7 @@ out_noerr:
161struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, 162struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
162 int *peeked, int *off, int *err) 163 int *peeked, int *off, int *err)
163{ 164{
164 struct sk_buff *skb; 165 struct sk_buff *skb, *last;
165 long timeo; 166 long timeo;
166 /* 167 /*
167 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() 168 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -182,13 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
182 */ 183 */
183 unsigned long cpu_flags; 184 unsigned long cpu_flags;
184 struct sk_buff_head *queue = &sk->sk_receive_queue; 185 struct sk_buff_head *queue = &sk->sk_receive_queue;
186 int _off = *off;
185 187
188 last = (struct sk_buff *)queue;
186 spin_lock_irqsave(&queue->lock, cpu_flags); 189 spin_lock_irqsave(&queue->lock, cpu_flags);
187 skb_queue_walk(queue, skb) { 190 skb_queue_walk(queue, skb) {
191 last = skb;
188 *peeked = skb->peeked; 192 *peeked = skb->peeked;
189 if (flags & MSG_PEEK) { 193 if (flags & MSG_PEEK) {
190 if (*off >= skb->len && skb->len) { 194 if (_off >= skb->len && (skb->len || _off ||
191 *off -= skb->len; 195 skb->peeked)) {
196 _off -= skb->len;
192 continue; 197 continue;
193 } 198 }
194 skb->peeked = 1; 199 skb->peeked = 1;
@@ -197,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
197 __skb_unlink(skb, queue); 202 __skb_unlink(skb, queue);
198 203
199 spin_unlock_irqrestore(&queue->lock, cpu_flags); 204 spin_unlock_irqrestore(&queue->lock, cpu_flags);
205 *off = _off;
200 return skb; 206 return skb;
201 } 207 }
202 spin_unlock_irqrestore(&queue->lock, cpu_flags); 208 spin_unlock_irqrestore(&queue->lock, cpu_flags);
@@ -206,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
206 if (!timeo) 212 if (!timeo)
207 goto no_packet; 213 goto no_packet;
208 214
209 } while (!wait_for_packet(sk, err, &timeo)); 215 } while (!wait_for_more_packets(sk, err, &timeo, last));
210 216
211 return NULL; 217 return NULL;
212 218
@@ -749,7 +755,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
749 755
750 /* exceptional events? */ 756 /* exceptional events? */
751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 757 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
752 mask |= POLLERR; 758 mask |= POLLERR |
759 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
760
753 if (sk->sk_shutdown & RCV_SHUTDOWN) 761 if (sk->sk_shutdown & RCV_SHUTDOWN)
754 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 762 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
755 if (sk->sk_shutdown == SHUTDOWN_MASK) 763 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/dev.c b/net/core/dev.c
index e7d68ed8aafe..fc1e289397f5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -200,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd)
200} 200}
201 201
202/* Device list insertion */ 202/* Device list insertion */
203static int list_netdevice(struct net_device *dev) 203static void list_netdevice(struct net_device *dev)
204{ 204{
205 struct net *net = dev_net(dev); 205 struct net *net = dev_net(dev);
206 206
@@ -214,8 +214,6 @@ static int list_netdevice(struct net_device *dev)
214 write_unlock_bh(&dev_base_lock); 214 write_unlock_bh(&dev_base_lock);
215 215
216 dev_base_seq_inc(net); 216 dev_base_seq_inc(net);
217
218 return 0;
219} 217}
220 218
221/* Device list removal 219/* Device list removal
@@ -2148,6 +2146,9 @@ static void skb_warn_bad_offload(const struct sk_buff *skb)
2148 struct net_device *dev = skb->dev; 2146 struct net_device *dev = skb->dev;
2149 const char *driver = ""; 2147 const char *driver = "";
2150 2148
2149 if (!net_ratelimit())
2150 return;
2151
2151 if (dev && dev->dev.parent) 2152 if (dev && dev->dev.parent)
2152 driver = dev_driver_string(dev->dev.parent); 2153 driver = dev_driver_string(dev->dev.parent);
2153 2154
@@ -2207,30 +2208,51 @@ out:
2207} 2208}
2208EXPORT_SYMBOL(skb_checksum_help); 2209EXPORT_SYMBOL(skb_checksum_help);
2209 2210
2210/** 2211__be16 skb_network_protocol(struct sk_buff *skb)
2211 * skb_mac_gso_segment - mac layer segmentation handler.
2212 * @skb: buffer to segment
2213 * @features: features for the output path (see dev->features)
2214 */
2215struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2216 netdev_features_t features)
2217{ 2212{
2218 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2219 struct packet_offload *ptype;
2220 __be16 type = skb->protocol; 2213 __be16 type = skb->protocol;
2221 int vlan_depth = ETH_HLEN; 2214 int vlan_depth = ETH_HLEN;
2222 2215
2223 while (type == htons(ETH_P_8021Q)) { 2216 /* Tunnel gso handlers can set protocol to ethernet. */
2217 if (type == htons(ETH_P_TEB)) {
2218 struct ethhdr *eth;
2219
2220 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2221 return 0;
2222
2223 eth = (struct ethhdr *)skb_mac_header(skb);
2224 type = eth->h_proto;
2225 }
2226
2227 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2224 struct vlan_hdr *vh; 2228 struct vlan_hdr *vh;
2225 2229
2226 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) 2230 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2227 return ERR_PTR(-EINVAL); 2231 return 0;
2228 2232
2229 vh = (struct vlan_hdr *)(skb->data + vlan_depth); 2233 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2230 type = vh->h_vlan_encapsulated_proto; 2234 type = vh->h_vlan_encapsulated_proto;
2231 vlan_depth += VLAN_HLEN; 2235 vlan_depth += VLAN_HLEN;
2232 } 2236 }
2233 2237
2238 return type;
2239}
2240
2241/**
2242 * skb_mac_gso_segment - mac layer segmentation handler.
2243 * @skb: buffer to segment
2244 * @features: features for the output path (see dev->features)
2245 */
2246struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2247 netdev_features_t features)
2248{
2249 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2250 struct packet_offload *ptype;
2251 __be16 type = skb_network_protocol(skb);
2252
2253 if (unlikely(!type))
2254 return ERR_PTR(-EINVAL);
2255
2234 __skb_pull(skb, skb->mac_len); 2256 __skb_pull(skb, skb->mac_len);
2235 2257
2236 rcu_read_lock(); 2258 rcu_read_lock();
@@ -2397,24 +2419,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2397 return 0; 2419 return 0;
2398} 2420}
2399 2421
2400static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2401{
2402 return ((features & NETIF_F_GEN_CSUM) ||
2403 ((features & NETIF_F_V4_CSUM) &&
2404 protocol == htons(ETH_P_IP)) ||
2405 ((features & NETIF_F_V6_CSUM) &&
2406 protocol == htons(ETH_P_IPV6)) ||
2407 ((features & NETIF_F_FCOE_CRC) &&
2408 protocol == htons(ETH_P_FCOE)));
2409}
2410
2411static netdev_features_t harmonize_features(struct sk_buff *skb, 2422static netdev_features_t harmonize_features(struct sk_buff *skb,
2412 __be16 protocol, netdev_features_t features) 2423 __be16 protocol, netdev_features_t features)
2413{ 2424{
2414 if (skb->ip_summed != CHECKSUM_NONE && 2425 if (skb->ip_summed != CHECKSUM_NONE &&
2415 !can_checksum_protocol(features, protocol)) { 2426 !can_checksum_protocol(features, protocol)) {
2416 features &= ~NETIF_F_ALL_CSUM; 2427 features &= ~NETIF_F_ALL_CSUM;
2417 features &= ~NETIF_F_SG;
2418 } else if (illegal_highdma(skb->dev, skb)) { 2428 } else if (illegal_highdma(skb->dev, skb)) {
2419 features &= ~NETIF_F_SG; 2429 features &= ~NETIF_F_SG;
2420 } 2430 }
@@ -2430,20 +2440,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
2430 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs) 2440 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2431 features &= ~NETIF_F_GSO_MASK; 2441 features &= ~NETIF_F_GSO_MASK;
2432 2442
2433 if (protocol == htons(ETH_P_8021Q)) { 2443 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2434 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; 2444 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2435 protocol = veh->h_vlan_encapsulated_proto; 2445 protocol = veh->h_vlan_encapsulated_proto;
2436 } else if (!vlan_tx_tag_present(skb)) { 2446 } else if (!vlan_tx_tag_present(skb)) {
2437 return harmonize_features(skb, protocol, features); 2447 return harmonize_features(skb, protocol, features);
2438 } 2448 }
2439 2449
2440 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); 2450 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2451 NETIF_F_HW_VLAN_STAG_TX);
2441 2452
2442 if (protocol != htons(ETH_P_8021Q)) { 2453 if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
2443 return harmonize_features(skb, protocol, features); 2454 return harmonize_features(skb, protocol, features);
2444 } else { 2455 } else {
2445 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | 2456 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2446 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; 2457 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2458 NETIF_F_HW_VLAN_STAG_TX;
2447 return harmonize_features(skb, protocol, features); 2459 return harmonize_features(skb, protocol, features);
2448 } 2460 }
2449} 2461}
@@ -2455,7 +2467,7 @@ EXPORT_SYMBOL(netif_skb_features);
2455 * 2. skb is fragmented and the device does not support SG. 2467 * 2. skb is fragmented and the device does not support SG.
2456 */ 2468 */
2457static inline int skb_needs_linearize(struct sk_buff *skb, 2469static inline int skb_needs_linearize(struct sk_buff *skb,
2458 int features) 2470 netdev_features_t features)
2459{ 2471{
2460 return skb_is_nonlinear(skb) && 2472 return skb_is_nonlinear(skb) &&
2461 ((skb_has_frag_list(skb) && 2473 ((skb_has_frag_list(skb) &&
@@ -2484,8 +2496,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2484 features = netif_skb_features(skb); 2496 features = netif_skb_features(skb);
2485 2497
2486 if (vlan_tx_tag_present(skb) && 2498 if (vlan_tx_tag_present(skb) &&
2487 !(features & NETIF_F_HW_VLAN_TX)) { 2499 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
2488 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); 2500 skb = __vlan_put_tag(skb, skb->vlan_proto,
2501 vlan_tx_tag_get(skb));
2489 if (unlikely(!skb)) 2502 if (unlikely(!skb))
2490 goto out; 2503 goto out;
2491 2504
@@ -2544,13 +2557,6 @@ gso:
2544 skb->next = nskb->next; 2557 skb->next = nskb->next;
2545 nskb->next = NULL; 2558 nskb->next = NULL;
2546 2559
2547 /*
2548 * If device doesn't need nskb->dst, release it right now while
2549 * its hot in this cpu cache
2550 */
2551 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2552 skb_dst_drop(nskb);
2553
2554 if (!list_empty(&ptype_all)) 2560 if (!list_empty(&ptype_all))
2555 dev_queue_xmit_nit(nskb, dev); 2561 dev_queue_xmit_nit(nskb, dev);
2556 2562
@@ -2570,8 +2576,11 @@ gso:
2570 } while (skb->next); 2576 } while (skb->next);
2571 2577
2572out_kfree_gso_skb: 2578out_kfree_gso_skb:
2573 if (likely(skb->next == NULL)) 2579 if (likely(skb->next == NULL)) {
2574 skb->destructor = DEV_GSO_CB(skb)->destructor; 2580 skb->destructor = DEV_GSO_CB(skb)->destructor;
2581 consume_skb(skb);
2582 return rc;
2583 }
2575out_kfree_skb: 2584out_kfree_skb:
2576 kfree_skb(skb); 2585 kfree_skb(skb);
2577out: 2586out:
@@ -2589,6 +2598,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
2589 */ 2598 */
2590 if (shinfo->gso_size) { 2599 if (shinfo->gso_size) {
2591 unsigned int hdr_len; 2600 unsigned int hdr_len;
2601 u16 gso_segs = shinfo->gso_segs;
2592 2602
2593 /* mac layer + network layer */ 2603 /* mac layer + network layer */
2594 hdr_len = skb_transport_header(skb) - skb_mac_header(skb); 2604 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
@@ -2598,7 +2608,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
2598 hdr_len += tcp_hdrlen(skb); 2608 hdr_len += tcp_hdrlen(skb);
2599 else 2609 else
2600 hdr_len += sizeof(struct udphdr); 2610 hdr_len += sizeof(struct udphdr);
2601 qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; 2611
2612 if (shinfo->gso_type & SKB_GSO_DODGY)
2613 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2614 shinfo->gso_size);
2615
2616 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2602 } 2617 }
2603} 2618}
2604 2619
@@ -3326,7 +3341,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3326 * netdev_rx_handler_unregister - unregister receive handler 3341 * netdev_rx_handler_unregister - unregister receive handler
3327 * @dev: device to unregister a handler from 3342 * @dev: device to unregister a handler from
3328 * 3343 *
3329 * Unregister a receive hander from a device. 3344 * Unregister a receive handler from a device.
3330 * 3345 *
3331 * The caller must hold the rtnl_mutex. 3346 * The caller must hold the rtnl_mutex.
3332 */ 3347 */
@@ -3355,6 +3370,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3355 case __constant_htons(ETH_P_IP): 3370 case __constant_htons(ETH_P_IP):
3356 case __constant_htons(ETH_P_IPV6): 3371 case __constant_htons(ETH_P_IPV6):
3357 case __constant_htons(ETH_P_8021Q): 3372 case __constant_htons(ETH_P_8021Q):
3373 case __constant_htons(ETH_P_8021AD):
3358 return true; 3374 return true;
3359 default: 3375 default:
3360 return false; 3376 return false;
@@ -3395,7 +3411,8 @@ another_round:
3395 3411
3396 __this_cpu_inc(softnet_data.processed); 3412 __this_cpu_inc(softnet_data.processed);
3397 3413
3398 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { 3414 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3415 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3399 skb = vlan_untag(skb); 3416 skb = vlan_untag(skb);
3400 if (unlikely(!skb)) 3417 if (unlikely(!skb))
3401 goto unlock; 3418 goto unlock;
@@ -4063,6 +4080,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4063 napi->gro_list = NULL; 4080 napi->gro_list = NULL;
4064 napi->skb = NULL; 4081 napi->skb = NULL;
4065 napi->poll = poll; 4082 napi->poll = poll;
4083 if (weight > NAPI_POLL_WEIGHT)
4084 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4085 weight, dev->name);
4066 napi->weight = weight; 4086 napi->weight = weight;
4067 list_add(&napi->dev_list, &dev->napi_list); 4087 list_add(&napi->dev_list, &dev->napi_list);
4068 napi->dev = dev; 4088 napi->dev = dev;
@@ -4924,20 +4944,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
4924 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 4944 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4925 } 4945 }
4926 4946
4927 /* Fix illegal SG+CSUM combinations. */
4928 if ((features & NETIF_F_SG) &&
4929 !(features & NETIF_F_ALL_CSUM)) {
4930 netdev_dbg(dev,
4931 "Dropping NETIF_F_SG since no checksum feature.\n");
4932 features &= ~NETIF_F_SG;
4933 }
4934
4935 /* TSO requires that SG is present as well. */ 4947 /* TSO requires that SG is present as well. */
4936 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { 4948 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
4937 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); 4949 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
4938 features &= ~NETIF_F_ALL_TSO; 4950 features &= ~NETIF_F_ALL_TSO;
4939 } 4951 }
4940 4952
4953 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
4954 !(features & NETIF_F_IP_CSUM)) {
4955 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
4956 features &= ~NETIF_F_TSO;
4957 features &= ~NETIF_F_TSO_ECN;
4958 }
4959
4960 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
4961 !(features & NETIF_F_IPV6_CSUM)) {
4962 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
4963 features &= ~NETIF_F_TSO6;
4964 }
4965
4941 /* TSO ECN requires that TSO is present as well. */ 4966 /* TSO ECN requires that TSO is present as well. */
4942 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) 4967 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
4943 features &= ~NETIF_F_TSO_ECN; 4968 features &= ~NETIF_F_TSO_ECN;
@@ -5168,7 +5193,8 @@ int register_netdevice(struct net_device *dev)
5168 } 5193 }
5169 } 5194 }
5170 5195
5171 if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && 5196 if (((dev->hw_features | dev->features) &
5197 NETIF_F_HW_VLAN_CTAG_FILTER) &&
5172 (!dev->netdev_ops->ndo_vlan_rx_add_vid || 5198 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5173 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) { 5199 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5174 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); 5200 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
@@ -5205,6 +5231,10 @@ int register_netdevice(struct net_device *dev)
5205 */ 5231 */
5206 dev->vlan_features |= NETIF_F_HIGHDMA; 5232 dev->vlan_features |= NETIF_F_HIGHDMA;
5207 5233
5234 /* Make NETIF_F_SG inheritable to tunnel devices.
5235 */
5236 dev->hw_enc_features |= NETIF_F_SG;
5237
5208 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5238 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5209 ret = notifier_to_errno(ret); 5239 ret = notifier_to_errno(ret);
5210 if (ret) 5240 if (ret)
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index abdc9e6ef33e..c013f38482a1 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -22,7 +22,8 @@
22 22
23static int __hw_addr_create_ex(struct netdev_hw_addr_list *list, 23static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
24 const unsigned char *addr, int addr_len, 24 const unsigned char *addr, int addr_len,
25 unsigned char addr_type, bool global) 25 unsigned char addr_type, bool global,
26 bool sync)
26{ 27{
27 struct netdev_hw_addr *ha; 28 struct netdev_hw_addr *ha;
28 int alloc_size; 29 int alloc_size;
@@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
37 ha->type = addr_type; 38 ha->type = addr_type;
38 ha->refcount = 1; 39 ha->refcount = 1;
39 ha->global_use = global; 40 ha->global_use = global;
40 ha->synced = 0; 41 ha->synced = sync;
41 list_add_tail_rcu(&ha->list, &list->list); 42 list_add_tail_rcu(&ha->list, &list->list);
42 list->count++; 43 list->count++;
43 44
@@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,
46 47
47static int __hw_addr_add_ex(struct netdev_hw_addr_list *list, 48static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
48 const unsigned char *addr, int addr_len, 49 const unsigned char *addr, int addr_len,
49 unsigned char addr_type, bool global) 50 unsigned char addr_type, bool global, bool sync)
50{ 51{
51 struct netdev_hw_addr *ha; 52 struct netdev_hw_addr *ha;
52 53
@@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
63 else 64 else
64 ha->global_use = true; 65 ha->global_use = true;
65 } 66 }
67 if (sync) {
68 if (ha->synced)
69 return 0;
70 else
71 ha->synced = true;
72 }
66 ha->refcount++; 73 ha->refcount++;
67 return 0; 74 return 0;
68 } 75 }
69 } 76 }
70 77
71 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); 78 return __hw_addr_create_ex(list, addr, addr_len, addr_type, global,
79 sync);
72} 80}
73 81
74static int __hw_addr_add(struct netdev_hw_addr_list *list, 82static int __hw_addr_add(struct netdev_hw_addr_list *list,
75 const unsigned char *addr, int addr_len, 83 const unsigned char *addr, int addr_len,
76 unsigned char addr_type) 84 unsigned char addr_type)
77{ 85{
78 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); 86 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false);
87}
88
89static int __hw_addr_del_entry(struct netdev_hw_addr_list *list,
90 struct netdev_hw_addr *ha, bool global,
91 bool sync)
92{
93 if (global && !ha->global_use)
94 return -ENOENT;
95
96 if (sync && !ha->synced)
97 return -ENOENT;
98
99 if (global)
100 ha->global_use = false;
101
102 if (sync)
103 ha->synced = false;
104
105 if (--ha->refcount)
106 return 0;
107 list_del_rcu(&ha->list);
108 kfree_rcu(ha, rcu_head);
109 list->count--;
110 return 0;
79} 111}
80 112
81static int __hw_addr_del_ex(struct netdev_hw_addr_list *list, 113static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
82 const unsigned char *addr, int addr_len, 114 const unsigned char *addr, int addr_len,
83 unsigned char addr_type, bool global) 115 unsigned char addr_type, bool global, bool sync)
84{ 116{
85 struct netdev_hw_addr *ha; 117 struct netdev_hw_addr *ha;
86 118
87 list_for_each_entry(ha, &list->list, list) { 119 list_for_each_entry(ha, &list->list, list) {
88 if (!memcmp(ha->addr, addr, addr_len) && 120 if (!memcmp(ha->addr, addr, addr_len) &&
89 (ha->type == addr_type || !addr_type)) { 121 (ha->type == addr_type || !addr_type))
90 if (global) { 122 return __hw_addr_del_entry(list, ha, global, sync);
91 if (!ha->global_use)
92 break;
93 else
94 ha->global_use = false;
95 }
96 if (--ha->refcount)
97 return 0;
98 list_del_rcu(&ha->list);
99 kfree_rcu(ha, rcu_head);
100 list->count--;
101 return 0;
102 }
103 } 123 }
104 return -ENOENT; 124 return -ENOENT;
105} 125}
@@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,
108 const unsigned char *addr, int addr_len, 128 const unsigned char *addr, int addr_len,
109 unsigned char addr_type) 129 unsigned char addr_type)
110{ 130{
111 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); 131 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false);
132}
133
134static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list,
135 struct netdev_hw_addr *ha,
136 int addr_len)
137{
138 int err;
139
140 err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type,
141 false, true);
142 if (err)
143 return err;
144 ha->sync_cnt++;
145 ha->refcount++;
146
147 return 0;
148}
149
150static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list,
151 struct netdev_hw_addr_list *from_list,
152 struct netdev_hw_addr *ha,
153 int addr_len)
154{
155 int err;
156
157 err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type,
158 false, true);
159 if (err)
160 return;
161 ha->sync_cnt--;
162 __hw_addr_del_entry(from_list, ha, false, true);
163}
164
165static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list,
166 struct netdev_hw_addr_list *from_list,
167 int addr_len)
168{
169 int err = 0;
170 struct netdev_hw_addr *ha, *tmp;
171
172 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
173 if (ha->sync_cnt == ha->refcount) {
174 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
175 } else {
176 err = __hw_addr_sync_one(to_list, ha, addr_len);
177 if (err)
178 break;
179 }
180 }
181 return err;
112} 182}
113 183
114int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, 184int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
@@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
152} 222}
153EXPORT_SYMBOL(__hw_addr_del_multiple); 223EXPORT_SYMBOL(__hw_addr_del_multiple);
154 224
225/* This function only works where there is a strict 1-1 relationship
226 * between source and destionation of they synch. If you ever need to
227 * sync addresses to more then 1 destination, you need to use
228 * __hw_addr_sync_multiple().
229 */
155int __hw_addr_sync(struct netdev_hw_addr_list *to_list, 230int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
156 struct netdev_hw_addr_list *from_list, 231 struct netdev_hw_addr_list *from_list,
157 int addr_len) 232 int addr_len)
@@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
160 struct netdev_hw_addr *ha, *tmp; 235 struct netdev_hw_addr *ha, *tmp;
161 236
162 list_for_each_entry_safe(ha, tmp, &from_list->list, list) { 237 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
163 if (!ha->synced) { 238 if (!ha->sync_cnt) {
164 err = __hw_addr_add(to_list, ha->addr, 239 err = __hw_addr_sync_one(to_list, ha, addr_len);
165 addr_len, ha->type);
166 if (err) 240 if (err)
167 break; 241 break;
168 ha->synced++; 242 } else if (ha->refcount == 1)
169 ha->refcount++; 243 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
170 } else if (ha->refcount == 1) {
171 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
172 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
173 }
174 } 244 }
175 return err; 245 return err;
176} 246}
@@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
183 struct netdev_hw_addr *ha, *tmp; 253 struct netdev_hw_addr *ha, *tmp;
184 254
185 list_for_each_entry_safe(ha, tmp, &from_list->list, list) { 255 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
186 if (ha->synced) { 256 if (ha->sync_cnt)
187 __hw_addr_del(to_list, ha->addr, 257 __hw_addr_unsync_one(to_list, from_list, ha, addr_len);
188 addr_len, ha->type);
189 ha->synced--;
190 __hw_addr_del(from_list, ha->addr,
191 addr_len, ha->type);
192 }
193 } 258 }
194} 259}
195EXPORT_SYMBOL(__hw_addr_unsync); 260EXPORT_SYMBOL(__hw_addr_unsync);
@@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)
406 } 471 }
407 } 472 }
408 err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, 473 err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len,
409 NETDEV_HW_ADDR_T_UNICAST, true); 474 NETDEV_HW_ADDR_T_UNICAST, true, false);
410 if (!err) 475 if (!err)
411 __dev_set_rx_mode(dev); 476 __dev_set_rx_mode(dev);
412out: 477out:
@@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del);
469 * locked by netif_addr_lock_bh. 534 * locked by netif_addr_lock_bh.
470 * 535 *
471 * This function is intended to be called from the dev->set_rx_mode 536 * This function is intended to be called from the dev->set_rx_mode
472 * function of layered software devices. 537 * function of layered software devices. This function assumes that
538 * addresses will only ever be synced to the @to devices and no other.
473 */ 539 */
474int dev_uc_sync(struct net_device *to, struct net_device *from) 540int dev_uc_sync(struct net_device *to, struct net_device *from)
475{ 541{
@@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
488EXPORT_SYMBOL(dev_uc_sync); 554EXPORT_SYMBOL(dev_uc_sync);
489 555
490/** 556/**
557 * dev_uc_sync_multiple - Synchronize device's unicast list to another
558 * device, but allow for multiple calls to sync to multiple devices.
559 * @to: destination device
560 * @from: source device
561 *
562 * Add newly added addresses to the destination device and release
563 * addresses that have been deleted from the source. The source device
564 * must be locked by netif_addr_lock_bh.
565 *
566 * This function is intended to be called from the dev->set_rx_mode
567 * function of layered software devices. It allows for a single source
568 * device to be synced to multiple destination devices.
569 */
570int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
571{
572 int err = 0;
573
574 if (to->addr_len != from->addr_len)
575 return -EINVAL;
576
577 netif_addr_lock_nested(to);
578 err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
579 if (!err)
580 __dev_set_rx_mode(to);
581 netif_addr_unlock(to);
582 return err;
583}
584EXPORT_SYMBOL(dev_uc_sync_multiple);
585
586/**
491 * dev_uc_unsync - Remove synchronized addresses from the destination device 587 * dev_uc_unsync - Remove synchronized addresses from the destination device
492 * @to: destination device 588 * @to: destination device
493 * @from: source device 589 * @from: source device
@@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)
559 } 655 }
560 } 656 }
561 err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, 657 err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len,
562 NETDEV_HW_ADDR_T_MULTICAST, true); 658 NETDEV_HW_ADDR_T_MULTICAST, true, false);
563 if (!err) 659 if (!err)
564 __dev_set_rx_mode(dev); 660 __dev_set_rx_mode(dev);
565out: 661out:
@@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,
575 671
576 netif_addr_lock_bh(dev); 672 netif_addr_lock_bh(dev);
577 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, 673 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
578 NETDEV_HW_ADDR_T_MULTICAST, global); 674 NETDEV_HW_ADDR_T_MULTICAST, global, false);
579 if (!err) 675 if (!err)
580 __dev_set_rx_mode(dev); 676 __dev_set_rx_mode(dev);
581 netif_addr_unlock_bh(dev); 677 netif_addr_unlock_bh(dev);
@@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,
615 711
616 netif_addr_lock_bh(dev); 712 netif_addr_lock_bh(dev);
617 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, 713 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
618 NETDEV_HW_ADDR_T_MULTICAST, global); 714 NETDEV_HW_ADDR_T_MULTICAST, global, false);
619 if (!err) 715 if (!err)
620 __dev_set_rx_mode(dev); 716 __dev_set_rx_mode(dev);
621 netif_addr_unlock_bh(dev); 717 netif_addr_unlock_bh(dev);
@@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
679EXPORT_SYMBOL(dev_mc_sync); 775EXPORT_SYMBOL(dev_mc_sync);
680 776
681/** 777/**
778 * dev_mc_sync_multiple - Synchronize device's unicast list to another
779 * device, but allow for multiple calls to sync to multiple devices.
780 * @to: destination device
781 * @from: source device
782 *
783 * Add newly added addresses to the destination device and release
784 * addresses that have no users left. The source device must be
785 * locked by netif_addr_lock_bh.
786 *
787 * This function is intended to be called from the ndo_set_rx_mode
788 * function of layered software devices. It allows for a single
789 * source device to be synced to multiple destination devices.
790 */
791int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
792{
793 int err = 0;
794
795 if (to->addr_len != from->addr_len)
796 return -EINVAL;
797
798 netif_addr_lock_nested(to);
799 err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
800 if (!err)
801 __dev_set_rx_mode(to);
802 netif_addr_unlock(to);
803 return err;
804}
805EXPORT_SYMBOL(dev_mc_sync_multiple);
806
807/**
682 * dev_mc_unsync - Remove synchronized addresses from the destination device 808 * dev_mc_unsync - Remove synchronized addresses from the destination device
683 * @to: destination device 809 * @to: destination device
684 * @from: source device 810 * @from: source device
diff --git a/net/core/dst.c b/net/core/dst.c
index 35fd12f1a69c..df9cc810ec8e 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
320EXPORT_SYMBOL(__dst_destroy_metrics_generic); 320EXPORT_SYMBOL(__dst_destroy_metrics_generic);
321 321
322/** 322/**
323 * skb_dst_set_noref - sets skb dst, without a reference 323 * __skb_dst_set_noref - sets skb dst, without a reference
324 * @skb: buffer 324 * @skb: buffer
325 * @dst: dst entry 325 * @dst: dst entry
326 * @force: if force is set, use noref version even for DST_NOCACHE entries
326 * 327 *
327 * Sets skb dst, assuming a reference was not taken on dst 328 * Sets skb dst, assuming a reference was not taken on dst
328 * skb_dst_drop() should not dst_release() this dst 329 * skb_dst_drop() should not dst_release() this dst
329 */ 330 */
330void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) 331void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
331{ 332{
332 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 333 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
333 /* If dst not in cache, we must take a reference, because 334 /* If dst not in cache, we must take a reference, because
334 * dst_release() will destroy dst as soon as its refcount becomes zero 335 * dst_release() will destroy dst as soon as its refcount becomes zero
335 */ 336 */
336 if (unlikely(dst->flags & DST_NOCACHE)) { 337 if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
337 dst_hold(dst); 338 dst_hold(dst);
338 skb_dst_set(skb, dst); 339 skb_dst_set(skb, dst);
339 } else { 340 } else {
340 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; 341 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
341 } 342 }
342} 343}
343EXPORT_SYMBOL(skb_dst_set_noref); 344EXPORT_SYMBOL(__skb_dst_set_noref);
344 345
345/* Dirty hack. We did it in 2.2 (in __dst_free), 346/* Dirty hack. We did it in 2.2 (in __dst_free),
346 * we have _very_ good reasons not to repeat 347 * we have _very_ good reasons not to repeat
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3e9b2c3e30f0..22efdaa76ebf 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", 60 [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
61 [NETIF_F_HIGHDMA_BIT] = "highdma", 61 [NETIF_F_HIGHDMA_BIT] = "highdma",
62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", 62 [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
63 [NETIF_F_HW_VLAN_TX_BIT] = "tx-vlan-hw-insert", 63 [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-ctag-hw-insert",
64 64
65 [NETIF_F_HW_VLAN_RX_BIT] = "rx-vlan-hw-parse", 65 [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-ctag-hw-parse",
66 [NETIF_F_HW_VLAN_FILTER_BIT] = "rx-vlan-filter", 66 [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter",
67 [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
68 [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
69 [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
67 [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", 70 [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
68 [NETIF_F_GSO_BIT] = "tx-generic-segmentation", 71 [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
69 [NETIF_F_LLTX_BIT] = "tx-lockless", 72 [NETIF_F_LLTX_BIT] = "tx-lockless",
@@ -78,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 81 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 82 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
80 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 83 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
84 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
81 85
82 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 86 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
83 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", 87 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
@@ -266,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev,
266 270
267#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \ 271#define ETH_ALL_FLAGS (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
268 ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) 272 ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
269#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ 273#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
270 NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) 274 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
275 NETIF_F_RXHASH)
271 276
272static u32 __ethtool_get_flags(struct net_device *dev) 277static u32 __ethtool_get_flags(struct net_device *dev)
273{ 278{
274 u32 flags = 0; 279 u32 flags = 0;
275 280
276 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; 281 if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO;
277 if (dev->features & NETIF_F_HW_VLAN_RX) flags |= ETH_FLAG_RXVLAN; 282 if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN;
278 if (dev->features & NETIF_F_HW_VLAN_TX) flags |= ETH_FLAG_TXVLAN; 283 if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN;
279 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; 284 if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE;
280 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; 285 if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH;
281 286
282 return flags; 287 return flags;
283} 288}
@@ -290,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
290 return -EINVAL; 295 return -EINVAL;
291 296
292 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; 297 if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO;
293 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_RX; 298 if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX;
294 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_TX; 299 if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX;
295 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; 300 if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE;
296 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; 301 if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH;
297 302
@@ -1416,7 +1421,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1416 void __user *useraddr = ifr->ifr_data; 1421 void __user *useraddr = ifr->ifr_data;
1417 u32 ethcmd; 1422 u32 ethcmd;
1418 int rc; 1423 int rc;
1419 u32 old_features; 1424 netdev_features_t old_features;
1420 1425
1421 if (!dev || !netif_device_present(dev)) 1426 if (!dev || !netif_device_present(dev))
1422 return -ENODEV; 1427 return -ENODEV;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 58a4ba27dfe3..d5a9f8ead0d8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -266,7 +266,7 @@ errout:
266 return err; 266 return err;
267} 267}
268 268
269static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 269static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
270{ 270{
271 struct net *net = sock_net(skb->sk); 271 struct net *net = sock_net(skb->sk);
272 struct fib_rule_hdr *frh = nlmsg_data(nlh); 272 struct fib_rule_hdr *frh = nlmsg_data(nlh);
@@ -415,7 +415,7 @@ errout:
415 return err; 415 return err;
416} 416}
417 417
418static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 418static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
419{ 419{
420 struct net *net = sock_net(skb->sk); 420 struct net *net = sock_net(skb->sk);
421 struct fib_rule_hdr *frh = nlmsg_data(nlh); 421 struct fib_rule_hdr *frh = nlmsg_data(nlh);
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e20b55a7830..dad2a178f9f8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -348,6 +348,9 @@ load_b:
348 case BPF_S_ANC_VLAN_TAG_PRESENT: 348 case BPF_S_ANC_VLAN_TAG_PRESENT:
349 A = !!vlan_tx_tag_present(skb); 349 A = !!vlan_tx_tag_present(skb);
350 continue; 350 continue;
351 case BPF_S_ANC_PAY_OFFSET:
352 A = __skb_get_poff(skb);
353 continue;
351 case BPF_S_ANC_NLATTR: { 354 case BPF_S_ANC_NLATTR: {
352 struct nlattr *nla; 355 struct nlattr *nla;
353 356
@@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
612 ANCILLARY(ALU_XOR_X); 615 ANCILLARY(ALU_XOR_X);
613 ANCILLARY(VLAN_TAG); 616 ANCILLARY(VLAN_TAG);
614 ANCILLARY(VLAN_TAG_PRESENT); 617 ANCILLARY(VLAN_TAG_PRESENT);
618 ANCILLARY(PAY_OFFSET);
615 } 619 }
616 620
617 /* ancillary operation unknown or unsupported */ 621 /* ancillary operation unknown or unsupported */
@@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
814 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, 818 [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
815 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, 819 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
816 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, 820 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
821 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
817 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, 822 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
818 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, 823 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
819 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, 824 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
diff --git a/net/core/flow.c b/net/core/flow.c
index 2bfd081c59f7..7102f166482d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -323,6 +323,24 @@ static void flow_cache_flush_tasklet(unsigned long data)
323 complete(&info->completion); 323 complete(&info->completion);
324} 324}
325 325
326/*
327 * Return whether a cpu needs flushing. Conservatively, we assume
328 * the presence of any entries means the core may require flushing,
329 * since the flow_cache_ops.check() function may assume it's running
330 * on the same core as the per-cpu cache component.
331 */
332static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
333{
334 struct flow_cache_percpu *fcp;
335 int i;
336
337 fcp = per_cpu_ptr(fc->percpu, cpu);
338 for (i = 0; i < flow_cache_hash_size(fc); i++)
339 if (!hlist_empty(&fcp->hash_table[i]))
340 return 0;
341 return 1;
342}
343
326static void flow_cache_flush_per_cpu(void *data) 344static void flow_cache_flush_per_cpu(void *data)
327{ 345{
328 struct flow_flush_info *info = data; 346 struct flow_flush_info *info = data;
@@ -337,22 +355,40 @@ void flow_cache_flush(void)
337{ 355{
338 struct flow_flush_info info; 356 struct flow_flush_info info;
339 static DEFINE_MUTEX(flow_flush_sem); 357 static DEFINE_MUTEX(flow_flush_sem);
358 cpumask_var_t mask;
359 int i, self;
360
361 /* Track which cpus need flushing to avoid disturbing all cores. */
362 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
363 return;
364 cpumask_clear(mask);
340 365
341 /* Don't want cpus going down or up during this. */ 366 /* Don't want cpus going down or up during this. */
342 get_online_cpus(); 367 get_online_cpus();
343 mutex_lock(&flow_flush_sem); 368 mutex_lock(&flow_flush_sem);
344 info.cache = &flow_cache_global; 369 info.cache = &flow_cache_global;
345 atomic_set(&info.cpuleft, num_online_cpus()); 370 for_each_online_cpu(i)
371 if (!flow_cache_percpu_empty(info.cache, i))
372 cpumask_set_cpu(i, mask);
373 atomic_set(&info.cpuleft, cpumask_weight(mask));
374 if (atomic_read(&info.cpuleft) == 0)
375 goto done;
376
346 init_completion(&info.completion); 377 init_completion(&info.completion);
347 378
348 local_bh_disable(); 379 local_bh_disable();
349 smp_call_function(flow_cache_flush_per_cpu, &info, 0); 380 self = cpumask_test_and_clear_cpu(smp_processor_id(), mask);
350 flow_cache_flush_tasklet((unsigned long)&info); 381 on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0);
382 if (self)
383 flow_cache_flush_tasklet((unsigned long)&info);
351 local_bh_enable(); 384 local_bh_enable();
352 385
353 wait_for_completion(&info.completion); 386 wait_for_completion(&info.completion);
387
388done:
354 mutex_unlock(&flow_flush_sem); 389 mutex_unlock(&flow_flush_sem);
355 put_online_cpus(); 390 put_online_cpus();
391 free_cpumask_var(mask);
356} 392}
357 393
358static void flow_cache_flush_task(struct work_struct *work) 394static void flow_cache_flush_task(struct work_struct *work)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e187bf06d673..00ee068efc1c 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,10 @@
5#include <linux/if_vlan.h> 5#include <linux/if_vlan.h>
6#include <net/ip.h> 6#include <net/ip.h>
7#include <net/ipv6.h> 7#include <net/ipv6.h>
8#include <linux/igmp.h>
9#include <linux/icmp.h>
10#include <linux/sctp.h>
11#include <linux/dccp.h>
8#include <linux/if_tunnel.h> 12#include <linux/if_tunnel.h>
9#include <linux/if_pppox.h> 13#include <linux/if_pppox.h>
10#include <linux/ppp_defs.h> 14#include <linux/ppp_defs.h>
@@ -119,6 +123,17 @@ ipv6:
119 nhoff += 4; 123 nhoff += 4;
120 if (hdr->flags & GRE_SEQ) 124 if (hdr->flags & GRE_SEQ)
121 nhoff += 4; 125 nhoff += 4;
126 if (proto == htons(ETH_P_TEB)) {
127 const struct ethhdr *eth;
128 struct ethhdr _eth;
129
130 eth = skb_header_pointer(skb, nhoff,
131 sizeof(_eth), &_eth);
132 if (!eth)
133 return false;
134 proto = eth->h_proto;
135 nhoff += sizeof(*eth);
136 }
122 goto again; 137 goto again;
123 } 138 }
124 break; 139 break;
@@ -217,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
217} 232}
218EXPORT_SYMBOL(__skb_tx_hash); 233EXPORT_SYMBOL(__skb_tx_hash);
219 234
235/* __skb_get_poff() returns the offset to the payload as far as it could
236 * be dissected. The main user is currently BPF, so that we can dynamically
237 * truncate packets without needing to push actual payload to the user
238 * space and can analyze headers only, instead.
239 */
240u32 __skb_get_poff(const struct sk_buff *skb)
241{
242 struct flow_keys keys;
243 u32 poff = 0;
244
245 if (!skb_flow_dissect(skb, &keys))
246 return 0;
247
248 poff += keys.thoff;
249 switch (keys.ip_proto) {
250 case IPPROTO_TCP: {
251 const struct tcphdr *tcph;
252 struct tcphdr _tcph;
253
254 tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
255 if (!tcph)
256 return poff;
257
258 poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4);
259 break;
260 }
261 case IPPROTO_UDP:
262 case IPPROTO_UDPLITE:
263 poff += sizeof(struct udphdr);
264 break;
265 /* For the rest, we do not really care about header
266 * extensions at this point for now.
267 */
268 case IPPROTO_ICMP:
269 poff += sizeof(struct icmphdr);
270 break;
271 case IPPROTO_ICMPV6:
272 poff += sizeof(struct icmp6hdr);
273 break;
274 case IPPROTO_IGMP:
275 poff += sizeof(struct igmphdr);
276 break;
277 case IPPROTO_DCCP:
278 poff += sizeof(struct dccp_hdr);
279 break;
280 case IPPROTO_SCTP:
281 poff += sizeof(struct sctphdr);
282 break;
283 }
284
285 return poff;
286}
287
220static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) 288static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
221{ 289{
222 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 290 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3863b8f639c5..5c56b217b999 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -39,21 +39,13 @@
39#include <linux/string.h> 39#include <linux/string.h>
40#include <linux/log2.h> 40#include <linux/log2.h>
41 41
42#define DEBUG
42#define NEIGH_DEBUG 1 43#define NEIGH_DEBUG 1
43 44#define neigh_dbg(level, fmt, ...) \
44#define NEIGH_PRINTK(x...) printk(x) 45do { \
45#define NEIGH_NOPRINTK(x...) do { ; } while(0) 46 if (level <= NEIGH_DEBUG) \
46#define NEIGH_PRINTK1 NEIGH_NOPRINTK 47 pr_debug(fmt, ##__VA_ARGS__); \
47#define NEIGH_PRINTK2 NEIGH_NOPRINTK 48} while (0)
48
49#if NEIGH_DEBUG >= 1
50#undef NEIGH_PRINTK1
51#define NEIGH_PRINTK1 NEIGH_PRINTK
52#endif
53#if NEIGH_DEBUG >= 2
54#undef NEIGH_PRINTK2
55#define NEIGH_PRINTK2 NEIGH_PRINTK
56#endif
57 49
58#define PNEIGH_HASHMASK 0xF 50#define PNEIGH_HASHMASK 0xF
59 51
@@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
246 n->nud_state = NUD_NOARP; 238 n->nud_state = NUD_NOARP;
247 else 239 else
248 n->nud_state = NUD_NONE; 240 n->nud_state = NUD_NONE;
249 NEIGH_PRINTK2("neigh %p is stray.\n", n); 241 neigh_dbg(2, "neigh %p is stray\n", n);
250 } 242 }
251 write_unlock(&n->lock); 243 write_unlock(&n->lock);
252 neigh_cleanup_and_release(n); 244 neigh_cleanup_and_release(n);
@@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
542 lockdep_is_held(&tbl->lock))); 534 lockdep_is_held(&tbl->lock)));
543 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 535 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
544 write_unlock_bh(&tbl->lock); 536 write_unlock_bh(&tbl->lock);
545 NEIGH_PRINTK2("neigh %p is created.\n", n); 537 neigh_dbg(2, "neigh %p is created\n", n);
546 rc = n; 538 rc = n;
547out: 539out:
548 return rc; 540 return rc;
@@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)
725 dev_put(dev); 717 dev_put(dev);
726 neigh_parms_put(neigh->parms); 718 neigh_parms_put(neigh->parms);
727 719
728 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); 720 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
729 721
730 atomic_dec(&neigh->tbl->entries); 722 atomic_dec(&neigh->tbl->entries);
731 kfree_rcu(neigh, rcu); 723 kfree_rcu(neigh, rcu);
@@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);
739 */ 731 */
740static void neigh_suspect(struct neighbour *neigh) 732static void neigh_suspect(struct neighbour *neigh)
741{ 733{
742 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); 734 neigh_dbg(2, "neigh %p is suspected\n", neigh);
743 735
744 neigh->output = neigh->ops->output; 736 neigh->output = neigh->ops->output;
745} 737}
@@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)
751 */ 743 */
752static void neigh_connect(struct neighbour *neigh) 744static void neigh_connect(struct neighbour *neigh)
753{ 745{
754 NEIGH_PRINTK2("neigh %p is connected.\n", neigh); 746 neigh_dbg(2, "neigh %p is connected\n", neigh);
755 747
756 neigh->output = neigh->ops->connected_output; 748 neigh->output = neigh->ops->connected_output;
757} 749}
@@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
852 struct sk_buff *skb; 844 struct sk_buff *skb;
853 845
854 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 846 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
855 NEIGH_PRINTK2("neigh %p is failed.\n", neigh); 847 neigh_dbg(2, "neigh %p is failed\n", neigh);
856 neigh->updated = jiffies; 848 neigh->updated = jiffies;
857 849
858 /* It is very thin place. report_unreachable is very complicated 850 /* It is very thin place. report_unreachable is very complicated
@@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)
904 if (state & NUD_REACHABLE) { 896 if (state & NUD_REACHABLE) {
905 if (time_before_eq(now, 897 if (time_before_eq(now,
906 neigh->confirmed + neigh->parms->reachable_time)) { 898 neigh->confirmed + neigh->parms->reachable_time)) {
907 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); 899 neigh_dbg(2, "neigh %p is still alive\n", neigh);
908 next = neigh->confirmed + neigh->parms->reachable_time; 900 next = neigh->confirmed + neigh->parms->reachable_time;
909 } else if (time_before_eq(now, 901 } else if (time_before_eq(now,
910 neigh->used + neigh->parms->delay_probe_time)) { 902 neigh->used + neigh->parms->delay_probe_time)) {
911 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 903 neigh_dbg(2, "neigh %p is delayed\n", neigh);
912 neigh->nud_state = NUD_DELAY; 904 neigh->nud_state = NUD_DELAY;
913 neigh->updated = jiffies; 905 neigh->updated = jiffies;
914 neigh_suspect(neigh); 906 neigh_suspect(neigh);
915 next = now + neigh->parms->delay_probe_time; 907 next = now + neigh->parms->delay_probe_time;
916 } else { 908 } else {
917 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); 909 neigh_dbg(2, "neigh %p is suspected\n", neigh);
918 neigh->nud_state = NUD_STALE; 910 neigh->nud_state = NUD_STALE;
919 neigh->updated = jiffies; 911 neigh->updated = jiffies;
920 neigh_suspect(neigh); 912 neigh_suspect(neigh);
@@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)
923 } else if (state & NUD_DELAY) { 915 } else if (state & NUD_DELAY) {
924 if (time_before_eq(now, 916 if (time_before_eq(now,
925 neigh->confirmed + neigh->parms->delay_probe_time)) { 917 neigh->confirmed + neigh->parms->delay_probe_time)) {
926 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); 918 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
927 neigh->nud_state = NUD_REACHABLE; 919 neigh->nud_state = NUD_REACHABLE;
928 neigh->updated = jiffies; 920 neigh->updated = jiffies;
929 neigh_connect(neigh); 921 neigh_connect(neigh);
930 notify = 1; 922 notify = 1;
931 next = neigh->confirmed + neigh->parms->reachable_time; 923 next = neigh->confirmed + neigh->parms->reachable_time;
932 } else { 924 } else {
933 NEIGH_PRINTK2("neigh %p is probed.\n", neigh); 925 neigh_dbg(2, "neigh %p is probed\n", neigh);
934 neigh->nud_state = NUD_PROBE; 926 neigh->nud_state = NUD_PROBE;
935 neigh->updated = jiffies; 927 neigh->updated = jiffies;
936 atomic_set(&neigh->probes, 0); 928 atomic_set(&neigh->probes, 0);
@@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
997 return 1; 989 return 1;
998 } 990 }
999 } else if (neigh->nud_state & NUD_STALE) { 991 } else if (neigh->nud_state & NUD_STALE) {
1000 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); 992 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1001 neigh->nud_state = NUD_DELAY; 993 neigh->nud_state = NUD_DELAY;
1002 neigh->updated = jiffies; 994 neigh->updated = jiffies;
1003 neigh_add_timer(neigh, 995 neigh_add_timer(neigh,
@@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1320out: 1312out:
1321 return rc; 1313 return rc;
1322discard: 1314discard:
1323 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", 1315 neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1324 dst, neigh);
1325out_kfree_skb: 1316out_kfree_skb:
1326 rc = -EINVAL; 1317 rc = -EINVAL;
1327 kfree_skb(skb); 1318 kfree_skb(skb);
@@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1498 } 1489 }
1499 } 1490 }
1500 write_unlock_bh(&tbl->lock); 1491 write_unlock_bh(&tbl->lock);
1501 NEIGH_PRINTK1("neigh_parms_release: not found\n"); 1492 neigh_dbg(1, "%s: not found\n", __func__);
1502} 1493}
1503EXPORT_SYMBOL(neigh_parms_release); 1494EXPORT_SYMBOL(neigh_parms_release);
1504 1495
@@ -1613,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl)
1613} 1604}
1614EXPORT_SYMBOL(neigh_table_clear); 1605EXPORT_SYMBOL(neigh_table_clear);
1615 1606
1616static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1607static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1617{ 1608{
1618 struct net *net = sock_net(skb->sk); 1609 struct net *net = sock_net(skb->sk);
1619 struct ndmsg *ndm; 1610 struct ndmsg *ndm;
@@ -1677,7 +1668,7 @@ out:
1677 return err; 1668 return err;
1678} 1669}
1679 1670
1680static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1671static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1681{ 1672{
1682 struct net *net = sock_net(skb->sk); 1673 struct net *net = sock_net(skb->sk);
1683 struct ndmsg *ndm; 1674 struct ndmsg *ndm;
@@ -1955,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1955 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 1946 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1956}; 1947};
1957 1948
1958static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1949static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1959{ 1950{
1960 struct net *net = sock_net(skb->sk); 1951 struct net *net = sock_net(skb->sk);
1961 struct neigh_table *tbl; 1952 struct neigh_table *tbl;
@@ -2714,7 +2705,7 @@ static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2714 2705
2715 if (!ret) { 2706 if (!ret) {
2716 struct seq_file *sf = file->private_data; 2707 struct seq_file *sf = file->private_data;
2717 sf->private = PDE(inode)->data; 2708 sf->private = PDE_DATA(inode);
2718 } 2709 }
2719 return ret; 2710 return ret;
2720}; 2711};
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 3174f1998ee6..569d355fec3e 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
271 else 271 else
272 seq_printf(seq, "%04x", ntohs(pt->type)); 272 seq_printf(seq, "%04x", ntohs(pt->type));
273 273
274 seq_printf(seq, " %-8s %pF\n", 274 seq_printf(seq, " %-8s %pf\n",
275 pt->dev ? pt->dev->name : "", pt->func); 275 pt->dev ? pt->dev->name : "", pt->func);
276 } 276 }
277 277
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7427ab5e27d8..981fed397d1d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,21 +606,11 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
606 return sprintf(buf, "%lu\n", val); 606 return sprintf(buf, "%lu\n", val);
607} 607}
608 608
609static void rps_dev_flow_table_release_work(struct work_struct *work)
610{
611 struct rps_dev_flow_table *table = container_of(work,
612 struct rps_dev_flow_table, free_work);
613
614 vfree(table);
615}
616
617static void rps_dev_flow_table_release(struct rcu_head *rcu) 609static void rps_dev_flow_table_release(struct rcu_head *rcu)
618{ 610{
619 struct rps_dev_flow_table *table = container_of(rcu, 611 struct rps_dev_flow_table *table = container_of(rcu,
620 struct rps_dev_flow_table, rcu); 612 struct rps_dev_flow_table, rcu);
621 613 vfree(table);
622 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
623 schedule_work(&table->free_work);
624} 614}
625 615
626static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, 616static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 80e271d9e64b..f97652036754 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -10,7 +10,8 @@
10#include <linux/idr.h> 10#include <linux/idr.h>
11#include <linux/rculist.h> 11#include <linux/rculist.h>
12#include <linux/nsproxy.h> 12#include <linux/nsproxy.h>
13#include <linux/proc_fs.h> 13#include <linux/fs.h>
14#include <linux/proc_ns.h>
14#include <linux/file.h> 15#include <linux/file.h>
15#include <linux/export.h> 16#include <linux/export.h>
16#include <linux/user_namespace.h> 17#include <linux/user_namespace.h>
@@ -336,7 +337,7 @@ EXPORT_SYMBOL_GPL(__put_net);
336 337
337struct net *get_net_ns_by_fd(int fd) 338struct net *get_net_ns_by_fd(int fd)
338{ 339{
339 struct proc_inode *ei; 340 struct proc_ns *ei;
340 struct file *file; 341 struct file *file;
341 struct net *net; 342 struct net *net;
342 343
@@ -344,7 +345,7 @@ struct net *get_net_ns_by_fd(int fd)
344 if (IS_ERR(file)) 345 if (IS_ERR(file))
345 return ERR_CAST(file); 346 return ERR_CAST(file);
346 347
347 ei = PROC_I(file_inode(file)); 348 ei = get_proc_ns(file_inode(file));
348 if (ei->ns_ops == &netns_operations) 349 if (ei->ns_ops == &netns_operations)
349 net = get_net(ei->ns); 350 net = get_net(ei->ns);
350 else 351 else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index fa32899006a2..cec074be8c43 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -47,7 +47,7 @@ static struct sk_buff_head skb_pool;
47 47
48static atomic_t trapped; 48static atomic_t trapped;
49 49
50static struct srcu_struct netpoll_srcu; 50DEFINE_STATIC_SRCU(netpoll_srcu);
51 51
52#define USEC_PER_POLL 50 52#define USEC_PER_POLL 50
53#define NETPOLL_RX_ENABLED 1 53#define NETPOLL_RX_ENABLED 1
@@ -206,17 +206,17 @@ static void netpoll_poll_dev(struct net_device *dev)
206 * the dev_open/close paths use this to block netpoll activity 206 * the dev_open/close paths use this to block netpoll activity
207 * while changing device state 207 * while changing device state
208 */ 208 */
209 if (!mutex_trylock(&ni->dev_lock)) 209 if (down_trylock(&ni->dev_lock))
210 return; 210 return;
211 211
212 if (!netif_running(dev)) { 212 if (!netif_running(dev)) {
213 mutex_unlock(&ni->dev_lock); 213 up(&ni->dev_lock);
214 return; 214 return;
215 } 215 }
216 216
217 ops = dev->netdev_ops; 217 ops = dev->netdev_ops;
218 if (!ops->ndo_poll_controller) { 218 if (!ops->ndo_poll_controller) {
219 mutex_unlock(&ni->dev_lock); 219 up(&ni->dev_lock);
220 return; 220 return;
221 } 221 }
222 222
@@ -225,7 +225,7 @@ static void netpoll_poll_dev(struct net_device *dev)
225 225
226 poll_napi(dev); 226 poll_napi(dev);
227 227
228 mutex_unlock(&ni->dev_lock); 228 up(&ni->dev_lock);
229 229
230 if (dev->flags & IFF_SLAVE) { 230 if (dev->flags & IFF_SLAVE) {
231 if (ni) { 231 if (ni) {
@@ -255,7 +255,7 @@ int netpoll_rx_disable(struct net_device *dev)
255 idx = srcu_read_lock(&netpoll_srcu); 255 idx = srcu_read_lock(&netpoll_srcu);
256 ni = srcu_dereference(dev->npinfo, &netpoll_srcu); 256 ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
257 if (ni) 257 if (ni)
258 mutex_lock(&ni->dev_lock); 258 down(&ni->dev_lock);
259 srcu_read_unlock(&netpoll_srcu, idx); 259 srcu_read_unlock(&netpoll_srcu, idx);
260 return 0; 260 return 0;
261} 261}
@@ -267,7 +267,7 @@ void netpoll_rx_enable(struct net_device *dev)
267 rcu_read_lock(); 267 rcu_read_lock();
268 ni = rcu_dereference(dev->npinfo); 268 ni = rcu_dereference(dev->npinfo);
269 if (ni) 269 if (ni)
270 mutex_unlock(&ni->dev_lock); 270 up(&ni->dev_lock);
271 rcu_read_unlock(); 271 rcu_read_unlock();
272} 272}
273EXPORT_SYMBOL(netpoll_rx_enable); 273EXPORT_SYMBOL(netpoll_rx_enable);
@@ -383,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
383 if (__netif_tx_trylock(txq)) { 383 if (__netif_tx_trylock(txq)) {
384 if (!netif_xmit_stopped(txq)) { 384 if (!netif_xmit_stopped(txq)) {
385 if (vlan_tx_tag_present(skb) && 385 if (vlan_tx_tag_present(skb) &&
386 !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { 386 !vlan_hw_offload_capable(netif_skb_features(skb),
387 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); 387 skb->vlan_proto)) {
388 skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
388 if (unlikely(!skb)) 389 if (unlikely(!skb))
389 break; 390 break;
390 skb->vlan_tci = 0; 391 skb->vlan_tci = 0;
@@ -1046,7 +1047,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
1046 INIT_LIST_HEAD(&npinfo->rx_np); 1047 INIT_LIST_HEAD(&npinfo->rx_np);
1047 1048
1048 spin_lock_init(&npinfo->rx_lock); 1049 spin_lock_init(&npinfo->rx_lock);
1049 mutex_init(&npinfo->dev_lock); 1050 sema_init(&npinfo->dev_lock, 1);
1050 skb_queue_head_init(&npinfo->neigh_tx); 1051 skb_queue_head_init(&npinfo->neigh_tx);
1051 skb_queue_head_init(&npinfo->txq); 1052 skb_queue_head_init(&npinfo->txq);
1052 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 1053 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
@@ -1212,7 +1213,6 @@ EXPORT_SYMBOL(netpoll_setup);
1212static int __init netpoll_init(void) 1213static int __init netpoll_init(void)
1213{ 1214{
1214 skb_queue_head_init(&skb_pool); 1215 skb_queue_head_init(&skb_pool);
1215 init_srcu_struct(&netpoll_srcu);
1216 return 0; 1216 return 0;
1217} 1217}
1218core_initcall(netpoll_init); 1218core_initcall(netpoll_init);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6048fc1da1c2..11f2704c3810 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -508,7 +508,7 @@ out:
508 508
509static int pgctrl_open(struct inode *inode, struct file *file) 509static int pgctrl_open(struct inode *inode, struct file *file)
510{ 510{
511 return single_open(file, pgctrl_show, PDE(inode)->data); 511 return single_open(file, pgctrl_show, PDE_DATA(inode));
512} 512}
513 513
514static const struct file_operations pktgen_fops = { 514static const struct file_operations pktgen_fops = {
@@ -1685,7 +1685,7 @@ static ssize_t pktgen_if_write(struct file *file,
1685 1685
1686static int pktgen_if_open(struct inode *inode, struct file *file) 1686static int pktgen_if_open(struct inode *inode, struct file *file)
1687{ 1687{
1688 return single_open(file, pktgen_if_show, PDE(inode)->data); 1688 return single_open(file, pktgen_if_show, PDE_DATA(inode));
1689} 1689}
1690 1690
1691static const struct file_operations pktgen_if_fops = { 1691static const struct file_operations pktgen_if_fops = {
@@ -1823,7 +1823,7 @@ out:
1823 1823
1824static int pktgen_thread_open(struct inode *inode, struct file *file) 1824static int pktgen_thread_open(struct inode *inode, struct file *file)
1825{ 1825{
1826 return single_open(file, pktgen_thread_show, PDE(inode)->data); 1826 return single_open(file, pktgen_thread_show, PDE_DATA(inode));
1827} 1827}
1828 1828
1829static const struct file_operations pktgen_thread_fops = { 1829static const struct file_operations pktgen_thread_fops = {
@@ -1904,7 +1904,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
1904 if (pkt_dev->odev != dev) 1904 if (pkt_dev->odev != dev)
1905 continue; 1905 continue;
1906 1906
1907 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); 1907 proc_remove(pkt_dev->entry);
1908 1908
1909 pkt_dev->entry = proc_create_data(dev->name, 0600, 1909 pkt_dev->entry = proc_create_data(dev->name, 0600,
1910 pn->proc_dir, 1910 pn->proc_dir,
@@ -2198,7 +2198,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
2198 pkt_dev->curfl = 0; /*reset */ 2198 pkt_dev->curfl = 0; /*reset */
2199 } 2199 }
2200 } else { 2200 } else {
2201 flow = random32() % pkt_dev->cflows; 2201 flow = prandom_u32() % pkt_dev->cflows;
2202 pkt_dev->curfl = flow; 2202 pkt_dev->curfl = flow;
2203 2203
2204 if (pkt_dev->flows[flow].count > pkt_dev->lflow) { 2204 if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
@@ -2246,7 +2246,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
2246 else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) { 2246 else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
2247 __u16 t; 2247 __u16 t;
2248 if (pkt_dev->flags & F_QUEUE_MAP_RND) { 2248 if (pkt_dev->flags & F_QUEUE_MAP_RND) {
2249 t = random32() % 2249 t = prandom_u32() %
2250 (pkt_dev->queue_map_max - 2250 (pkt_dev->queue_map_max -
2251 pkt_dev->queue_map_min + 1) 2251 pkt_dev->queue_map_min + 1)
2252 + pkt_dev->queue_map_min; 2252 + pkt_dev->queue_map_min;
@@ -2278,7 +2278,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2278 __u32 tmp; 2278 __u32 tmp;
2279 2279
2280 if (pkt_dev->flags & F_MACSRC_RND) 2280 if (pkt_dev->flags & F_MACSRC_RND)
2281 mc = random32() % pkt_dev->src_mac_count; 2281 mc = prandom_u32() % pkt_dev->src_mac_count;
2282 else { 2282 else {
2283 mc = pkt_dev->cur_src_mac_offset++; 2283 mc = pkt_dev->cur_src_mac_offset++;
2284 if (pkt_dev->cur_src_mac_offset >= 2284 if (pkt_dev->cur_src_mac_offset >=
@@ -2304,7 +2304,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2304 __u32 tmp; 2304 __u32 tmp;
2305 2305
2306 if (pkt_dev->flags & F_MACDST_RND) 2306 if (pkt_dev->flags & F_MACDST_RND)
2307 mc = random32() % pkt_dev->dst_mac_count; 2307 mc = prandom_u32() % pkt_dev->dst_mac_count;
2308 2308
2309 else { 2309 else {
2310 mc = pkt_dev->cur_dst_mac_offset++; 2310 mc = pkt_dev->cur_dst_mac_offset++;
@@ -2331,21 +2331,21 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2331 for (i = 0; i < pkt_dev->nr_labels; i++) 2331 for (i = 0; i < pkt_dev->nr_labels; i++)
2332 if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM) 2332 if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
2333 pkt_dev->labels[i] = MPLS_STACK_BOTTOM | 2333 pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
2334 ((__force __be32)random32() & 2334 ((__force __be32)prandom_u32() &
2335 htonl(0x000fffff)); 2335 htonl(0x000fffff));
2336 } 2336 }
2337 2337
2338 if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) { 2338 if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
2339 pkt_dev->vlan_id = random32() & (4096-1); 2339 pkt_dev->vlan_id = prandom_u32() & (4096 - 1);
2340 } 2340 }
2341 2341
2342 if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) { 2342 if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
2343 pkt_dev->svlan_id = random32() & (4096 - 1); 2343 pkt_dev->svlan_id = prandom_u32() & (4096 - 1);
2344 } 2344 }
2345 2345
2346 if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { 2346 if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
2347 if (pkt_dev->flags & F_UDPSRC_RND) 2347 if (pkt_dev->flags & F_UDPSRC_RND)
2348 pkt_dev->cur_udp_src = random32() % 2348 pkt_dev->cur_udp_src = prandom_u32() %
2349 (pkt_dev->udp_src_max - pkt_dev->udp_src_min) 2349 (pkt_dev->udp_src_max - pkt_dev->udp_src_min)
2350 + pkt_dev->udp_src_min; 2350 + pkt_dev->udp_src_min;
2351 2351
@@ -2358,7 +2358,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2358 2358
2359 if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { 2359 if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
2360 if (pkt_dev->flags & F_UDPDST_RND) { 2360 if (pkt_dev->flags & F_UDPDST_RND) {
2361 pkt_dev->cur_udp_dst = random32() % 2361 pkt_dev->cur_udp_dst = prandom_u32() %
2362 (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min) 2362 (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
2363 + pkt_dev->udp_dst_min; 2363 + pkt_dev->udp_dst_min;
2364 } else { 2364 } else {
@@ -2375,7 +2375,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2375 if (imn < imx) { 2375 if (imn < imx) {
2376 __u32 t; 2376 __u32 t;
2377 if (pkt_dev->flags & F_IPSRC_RND) 2377 if (pkt_dev->flags & F_IPSRC_RND)
2378 t = random32() % (imx - imn) + imn; 2378 t = prandom_u32() % (imx - imn) + imn;
2379 else { 2379 else {
2380 t = ntohl(pkt_dev->cur_saddr); 2380 t = ntohl(pkt_dev->cur_saddr);
2381 t++; 2381 t++;
@@ -2396,17 +2396,15 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2396 __be32 s; 2396 __be32 s;
2397 if (pkt_dev->flags & F_IPDST_RND) { 2397 if (pkt_dev->flags & F_IPDST_RND) {
2398 2398
2399 t = random32() % (imx - imn) + imn; 2399 do {
2400 s = htonl(t); 2400 t = prandom_u32() %
2401 2401 (imx - imn) + imn;
2402 while (ipv4_is_loopback(s) ||
2403 ipv4_is_multicast(s) ||
2404 ipv4_is_lbcast(s) ||
2405 ipv4_is_zeronet(s) ||
2406 ipv4_is_local_multicast(s)) {
2407 t = random32() % (imx - imn) + imn;
2408 s = htonl(t); 2402 s = htonl(t);
2409 } 2403 } while (ipv4_is_loopback(s) ||
2404 ipv4_is_multicast(s) ||
2405 ipv4_is_lbcast(s) ||
2406 ipv4_is_zeronet(s) ||
2407 ipv4_is_local_multicast(s));
2410 pkt_dev->cur_daddr = s; 2408 pkt_dev->cur_daddr = s;
2411 } else { 2409 } else {
2412 t = ntohl(pkt_dev->cur_daddr); 2410 t = ntohl(pkt_dev->cur_daddr);
@@ -2437,7 +2435,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2437 2435
2438 for (i = 0; i < 4; i++) { 2436 for (i = 0; i < 4; i++) {
2439 pkt_dev->cur_in6_daddr.s6_addr32[i] = 2437 pkt_dev->cur_in6_daddr.s6_addr32[i] =
2440 (((__force __be32)random32() | 2438 (((__force __be32)prandom_u32() |
2441 pkt_dev->min_in6_daddr.s6_addr32[i]) & 2439 pkt_dev->min_in6_daddr.s6_addr32[i]) &
2442 pkt_dev->max_in6_daddr.s6_addr32[i]); 2440 pkt_dev->max_in6_daddr.s6_addr32[i]);
2443 } 2441 }
@@ -2447,7 +2445,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
2447 if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { 2445 if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
2448 __u32 t; 2446 __u32 t;
2449 if (pkt_dev->flags & F_TXSIZE_RND) { 2447 if (pkt_dev->flags & F_TXSIZE_RND) {
2450 t = random32() % 2448 t = prandom_u32() %
2451 (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size) 2449 (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
2452 + pkt_dev->min_pkt_size; 2450 + pkt_dev->min_pkt_size;
2453 } else { 2451 } else {
@@ -3576,8 +3574,6 @@ static void _rem_dev_from_if_list(struct pktgen_thread *t,
3576static int pktgen_remove_device(struct pktgen_thread *t, 3574static int pktgen_remove_device(struct pktgen_thread *t,
3577 struct pktgen_dev *pkt_dev) 3575 struct pktgen_dev *pkt_dev)
3578{ 3576{
3579 struct pktgen_net *pn = t->net;
3580
3581 pr_debug("remove_device pkt_dev=%p\n", pkt_dev); 3577 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3582 3578
3583 if (pkt_dev->running) { 3579 if (pkt_dev->running) {
@@ -3597,7 +3593,7 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3597 _rem_dev_from_if_list(t, pkt_dev); 3593 _rem_dev_from_if_list(t, pkt_dev);
3598 3594
3599 if (pkt_dev->entry) 3595 if (pkt_dev->entry)
3600 remove_proc_entry(pkt_dev->entry->name, pn->proc_dir); 3596 proc_remove(pkt_dev->entry);
3601 3597
3602#ifdef CONFIG_XFRM 3598#ifdef CONFIG_XFRM
3603 free_SAs(pkt_dev); 3599 free_SAs(pkt_dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b65441da74ab..a08bd2b7fe3f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -517,32 +517,6 @@ out:
517 return err; 517 return err;
518} 518}
519 519
520static const int rtm_min[RTM_NR_FAMILIES] =
521{
522 [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
523 [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)),
524 [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)),
525 [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)),
526 [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
527 [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
528 [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)),
529 [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)),
530 [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
531 [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)),
532};
533
534static const int rta_max[RTM_NR_FAMILIES] =
535{
536 [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX,
537 [RTM_FAM(RTM_NEWADDR)] = IFA_MAX,
538 [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX,
539 [RTM_FAM(RTM_NEWRULE)] = FRA_MAX,
540 [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX,
541 [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX,
542 [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX,
543 [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX,
544};
545
546int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) 520int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)
547{ 521{
548 struct sock *rtnl = net->rtnl; 522 struct sock *rtnl = net->rtnl;
@@ -1072,7 +1046,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1072 rcu_read_lock(); 1046 rcu_read_lock();
1073 cb->seq = net->dev_base_seq; 1047 cb->seq = net->dev_base_seq;
1074 1048
1075 if (nlmsg_parse(cb->nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, 1049 if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
1076 ifla_policy) >= 0) { 1050 ifla_policy) >= 0) {
1077 1051
1078 if (tb[IFLA_EXT_MASK]) 1052 if (tb[IFLA_EXT_MASK])
@@ -1539,7 +1513,7 @@ errout:
1539 return err; 1513 return err;
1540} 1514}
1541 1515
1542static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1516static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1543{ 1517{
1544 struct net *net = sock_net(skb->sk); 1518 struct net *net = sock_net(skb->sk);
1545 struct ifinfomsg *ifm; 1519 struct ifinfomsg *ifm;
@@ -1580,7 +1554,7 @@ errout:
1580 return err; 1554 return err;
1581} 1555}
1582 1556
1583static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1557static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
1584{ 1558{
1585 struct net *net = sock_net(skb->sk); 1559 struct net *net = sock_net(skb->sk);
1586 const struct rtnl_link_ops *ops; 1560 const struct rtnl_link_ops *ops;
@@ -1711,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group,
1711 return 0; 1685 return 0;
1712} 1686}
1713 1687
1714static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1688static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)
1715{ 1689{
1716 struct net *net = sock_net(skb->sk); 1690 struct net *net = sock_net(skb->sk);
1717 const struct rtnl_link_ops *ops; 1691 const struct rtnl_link_ops *ops;
@@ -1866,7 +1840,7 @@ out:
1866 } 1840 }
1867} 1841}
1868 1842
1869static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1843static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)
1870{ 1844{
1871 struct net *net = sock_net(skb->sk); 1845 struct net *net = sock_net(skb->sk);
1872 struct ifinfomsg *ifm; 1846 struct ifinfomsg *ifm;
@@ -1922,7 +1896,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
1922 u32 ext_filter_mask = 0; 1896 u32 ext_filter_mask = 0;
1923 u16 min_ifinfo_dump_size = 0; 1897 u16 min_ifinfo_dump_size = 0;
1924 1898
1925 if (nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, IFLA_MAX, 1899 if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
1926 ifla_policy) >= 0) { 1900 ifla_policy) >= 0) {
1927 if (tb[IFLA_EXT_MASK]) 1901 if (tb[IFLA_EXT_MASK])
1928 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1902 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1957,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1957 if (rtnl_msg_handlers[idx] == NULL || 1931 if (rtnl_msg_handlers[idx] == NULL ||
1958 rtnl_msg_handlers[idx][type].dumpit == NULL) 1932 rtnl_msg_handlers[idx][type].dumpit == NULL)
1959 continue; 1933 continue;
1960 if (idx > s_idx) 1934 if (idx > s_idx) {
1961 memset(&cb->args[0], 0, sizeof(cb->args)); 1935 memset(&cb->args[0], 0, sizeof(cb->args));
1936 cb->prev_seq = 0;
1937 cb->seq = 0;
1938 }
1962 if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) 1939 if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))
1963 break; 1940 break;
1964 } 1941 }
@@ -2051,7 +2028,39 @@ errout:
2051 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2028 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2052} 2029}
2053 2030
2054static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2031/**
2032 * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry
2033 */
2034int ndo_dflt_fdb_add(struct ndmsg *ndm,
2035 struct nlattr *tb[],
2036 struct net_device *dev,
2037 const unsigned char *addr,
2038 u16 flags)
2039{
2040 int err = -EINVAL;
2041
2042 /* If aging addresses are supported device will need to
2043 * implement its own handler for this.
2044 */
2045 if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
2046 pr_info("%s: FDB only supports static addresses\n", dev->name);
2047 return err;
2048 }
2049
2050 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2051 err = dev_uc_add_excl(dev, addr);
2052 else if (is_multicast_ether_addr(addr))
2053 err = dev_mc_add_excl(dev, addr);
2054
2055 /* Only return duplicate errors if NLM_F_EXCL is set */
2056 if (err == -EEXIST && !(flags & NLM_F_EXCL))
2057 err = 0;
2058
2059 return err;
2060}
2061EXPORT_SYMBOL(ndo_dflt_fdb_add);
2062
2063static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
2055{ 2064{
2056 struct net *net = sock_net(skb->sk); 2065 struct net *net = sock_net(skb->sk);
2057 struct ndmsg *ndm; 2066 struct ndmsg *ndm;
@@ -2082,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2082 } 2091 }
2083 2092
2084 addr = nla_data(tb[NDA_LLADDR]); 2093 addr = nla_data(tb[NDA_LLADDR]);
2085 if (!is_valid_ether_addr(addr)) { 2094 if (is_zero_ether_addr(addr)) {
2086 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); 2095 pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
2087 return -EINVAL; 2096 return -EINVAL;
2088 } 2097 }
@@ -2103,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2103 } 2112 }
2104 2113
2105 /* Embedded bridge, macvlan, and any other device support */ 2114 /* Embedded bridge, macvlan, and any other device support */
2106 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { 2115 if ((ndm->ndm_flags & NTF_SELF)) {
2107 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, 2116 if (dev->netdev_ops->ndo_fdb_add)
2108 dev, addr, 2117 err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
2109 nlh->nlmsg_flags); 2118 nlh->nlmsg_flags);
2119 else
2120 err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
2121 nlh->nlmsg_flags);
2110 2122
2111 if (!err) { 2123 if (!err) {
2112 rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); 2124 rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH);
@@ -2117,7 +2129,36 @@ out:
2117 return err; 2129 return err;
2118} 2130}
2119 2131
2120static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 2132/**
2133 * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry
2134 */
2135int ndo_dflt_fdb_del(struct ndmsg *ndm,
2136 struct nlattr *tb[],
2137 struct net_device *dev,
2138 const unsigned char *addr)
2139{
2140 int err = -EOPNOTSUPP;
2141
2142 /* If aging addresses are supported device will need to
2143 * implement its own handler for this.
2144 */
2145 if (ndm->ndm_state & NUD_PERMANENT) {
2146 pr_info("%s: FDB only supports static addresses\n", dev->name);
2147 return -EINVAL;
2148 }
2149
2150 if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
2151 err = dev_uc_del(dev, addr);
2152 else if (is_multicast_ether_addr(addr))
2153 err = dev_mc_del(dev, addr);
2154 else
2155 err = -EINVAL;
2156
2157 return err;
2158}
2159EXPORT_SYMBOL(ndo_dflt_fdb_del);
2160
2161static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
2121{ 2162{
2122 struct net *net = sock_net(skb->sk); 2163 struct net *net = sock_net(skb->sk);
2123 struct ndmsg *ndm; 2164 struct ndmsg *ndm;
@@ -2151,7 +2192,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2151 } 2192 }
2152 2193
2153 addr = nla_data(tb[NDA_LLADDR]); 2194 addr = nla_data(tb[NDA_LLADDR]);
2154 if (!is_valid_ether_addr(addr)) { 2195 if (is_zero_ether_addr(addr)) {
2155 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); 2196 pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");
2156 return -EINVAL; 2197 return -EINVAL;
2157 } 2198 }
@@ -2174,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
2174 } 2215 }
2175 2216
2176 /* Embedded bridge, macvlan, and any other device support */ 2217 /* Embedded bridge, macvlan, and any other device support */
2177 if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { 2218 if (ndm->ndm_flags & NTF_SELF) {
2178 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); 2219 if (dev->netdev_ops->ndo_fdb_del)
2220 err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
2221 else
2222 err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
2179 2223
2180 if (!err) { 2224 if (!err) {
2181 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); 2225 rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
@@ -2220,7 +2264,7 @@ skip:
2220 * @dev: netdevice 2264 * @dev: netdevice
2221 * 2265 *
2222 * Default netdevice operation to dump the existing unicast address list. 2266 * Default netdevice operation to dump the existing unicast address list.
2223 * Returns zero on success. 2267 * Returns number of addresses from list put in skb.
2224 */ 2268 */
2225int ndo_dflt_fdb_dump(struct sk_buff *skb, 2269int ndo_dflt_fdb_dump(struct sk_buff *skb,
2226 struct netlink_callback *cb, 2270 struct netlink_callback *cb,
@@ -2260,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
2260 2304
2261 if (dev->netdev_ops->ndo_fdb_dump) 2305 if (dev->netdev_ops->ndo_fdb_dump)
2262 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); 2306 idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx);
2307 else
2308 idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);
2263 } 2309 }
2264 rcu_read_unlock(); 2310 rcu_read_unlock();
2265 2311
@@ -2411,8 +2457,7 @@ errout:
2411 return err; 2457 return err;
2412} 2458}
2413 2459
2414static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, 2460static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
2415 void *arg)
2416{ 2461{
2417 struct net *net = sock_net(skb->sk); 2462 struct net *net = sock_net(skb->sk);
2418 struct ifinfomsg *ifm; 2463 struct ifinfomsg *ifm;
@@ -2482,8 +2527,7 @@ out:
2482 return err; 2527 return err;
2483} 2528}
2484 2529
2485static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, 2530static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
2486 void *arg)
2487{ 2531{
2488 struct net *net = sock_net(skb->sk); 2532 struct net *net = sock_net(skb->sk);
2489 struct ifinfomsg *ifm; 2533 struct ifinfomsg *ifm;
@@ -2553,10 +2597,6 @@ out:
2553 return err; 2597 return err;
2554} 2598}
2555 2599
2556/* Protected by RTNL sempahore. */
2557static struct rtattr **rta_buf;
2558static int rtattr_max;
2559
2560/* Process one rtnetlink message. */ 2600/* Process one rtnetlink message. */
2561 2601
2562static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 2602static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -2564,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2564 struct net *net = sock_net(skb->sk); 2604 struct net *net = sock_net(skb->sk);
2565 rtnl_doit_func doit; 2605 rtnl_doit_func doit;
2566 int sz_idx, kind; 2606 int sz_idx, kind;
2567 int min_len;
2568 int family; 2607 int family;
2569 int type; 2608 int type;
2570 int err; 2609 int err;
@@ -2576,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2576 type -= RTM_BASE; 2615 type -= RTM_BASE;
2577 2616
2578 /* All the messages must have at least 1 byte length */ 2617 /* All the messages must have at least 1 byte length */
2579 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) 2618 if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))
2580 return 0; 2619 return 0;
2581 2620
2582 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; 2621 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;
2583 sz_idx = type>>2; 2622 sz_idx = type>>2;
2584 kind = type&3; 2623 kind = type&3;
2585 2624
@@ -2612,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2612 return err; 2651 return err;
2613 } 2652 }
2614 2653
2615 memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *)));
2616
2617 min_len = rtm_min[sz_idx];
2618 if (nlh->nlmsg_len < min_len)
2619 return -EINVAL;
2620
2621 if (nlh->nlmsg_len > min_len) {
2622 int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
2623 struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
2624
2625 while (RTA_OK(attr, attrlen)) {
2626 unsigned int flavor = attr->rta_type & NLA_TYPE_MASK;
2627 if (flavor) {
2628 if (flavor > rta_max[sz_idx])
2629 return -EINVAL;
2630 rta_buf[flavor-1] = attr;
2631 }
2632 attr = RTA_NEXT(attr, attrlen);
2633 }
2634 }
2635
2636 doit = rtnl_get_doit(family, type); 2654 doit = rtnl_get_doit(family, type);
2637 if (doit == NULL) 2655 if (doit == NULL)
2638 return -EOPNOTSUPP; 2656 return -EOPNOTSUPP;
2639 2657
2640 return doit(skb, nlh, (void *)&rta_buf[0]); 2658 return doit(skb, nlh);
2641} 2659}
2642 2660
2643static void rtnetlink_rcv(struct sk_buff *skb) 2661static void rtnetlink_rcv(struct sk_buff *skb)
@@ -2707,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = {
2707 2725
2708void __init rtnetlink_init(void) 2726void __init rtnetlink_init(void)
2709{ 2727{
2710 int i;
2711
2712 rtattr_max = 0;
2713 for (i = 0; i < ARRAY_SIZE(rta_max); i++)
2714 if (rta_max[i] > rtattr_max)
2715 rtattr_max = rta_max[i];
2716 rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL);
2717 if (!rta_buf)
2718 panic("rtnetlink_init: cannot allocate rta_buf\n");
2719
2720 if (register_pernet_subsys(&rtnetlink_net_ops)) 2728 if (register_pernet_subsys(&rtnetlink_net_ops))
2721 panic("rtnetlink_init: cannot initialize rtnetlink\n"); 2729 panic("rtnetlink_init: cannot initialize rtnetlink\n");
2722 2730
diff --git a/net/core/scm.c b/net/core/scm.c
index 2dc6cdaaae8a..03795d0147f2 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
187 187
188 p->creds.uid = uid; 188 p->creds.uid = uid;
189 p->creds.gid = gid; 189 p->creds.gid = gid;
190
191 if (!p->cred ||
192 !uid_eq(p->cred->euid, uid) ||
193 !gid_eq(p->cred->egid, gid)) {
194 struct cred *cred;
195 err = -ENOMEM;
196 cred = prepare_creds();
197 if (!cred)
198 goto error;
199
200 cred->uid = cred->euid = uid;
201 cred->gid = cred->egid = gid;
202 if (p->cred)
203 put_cred(p->cred);
204 p->cred = cred;
205 }
206 break; 190 break;
207 } 191 }
208 default: 192 default:
@@ -306,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
306 /* Bump the usage count and install the file. */ 290 /* Bump the usage count and install the file. */
307 sock = sock_from_file(fp[i], &err); 291 sock = sock_from_file(fp[i], &err);
308 if (sock) { 292 if (sock) {
309 sock_update_netprioidx(sock->sk, current); 293 sock_update_netprioidx(sock->sk);
310 sock_update_classid(sock->sk, current); 294 sock_update_classid(sock->sk);
311 } 295 }
312 fd_install(new_fd, get_file(fp[i])); 296 fd_install(new_fd, get_file(fp[i]));
313 } 297 }
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index e61a8bb7fce7..6a2f13cee86a 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -12,12 +12,10 @@
12 12
13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; 13static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
14 14
15static int __init net_secret_init(void) 15void net_secret_init(void)
16{ 16{
17 get_random_bytes(net_secret, sizeof(net_secret)); 17 get_random_bytes(net_secret, sizeof(net_secret));
18 return 0;
19} 18}
20late_initcall(net_secret_init);
21 19
22#ifdef CONFIG_INET 20#ifdef CONFIG_INET
23static u32 seq_scale(u32 seq) 21static u32 seq_scale(u32 seq)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 33245ef54c3b..af9185d0be6a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -179,6 +179,33 @@ out:
179 * 179 *
180 */ 180 */
181 181
182struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node)
183{
184 struct sk_buff *skb;
185
186 /* Get the HEAD */
187 skb = kmem_cache_alloc_node(skbuff_head_cache,
188 gfp_mask & ~__GFP_DMA, node);
189 if (!skb)
190 goto out;
191
192 /*
193 * Only clear those fields we need to clear, not those that we will
194 * actually initialise below. Hence, don't put any more fields after
195 * the tail pointer in struct sk_buff!
196 */
197 memset(skb, 0, offsetof(struct sk_buff, tail));
198 skb->data = NULL;
199 skb->truesize = sizeof(struct sk_buff);
200 atomic_set(&skb->users, 1);
201
202#ifdef NET_SKBUFF_DATA_USES_OFFSET
203 skb->mac_header = ~0U;
204#endif
205out:
206 return skb;
207}
208
182/** 209/**
183 * __alloc_skb - allocate a network buffer 210 * __alloc_skb - allocate a network buffer
184 * @size: size to allocate 211 * @size: size to allocate
@@ -584,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb)
584static void skb_release_all(struct sk_buff *skb) 611static void skb_release_all(struct sk_buff *skb)
585{ 612{
586 skb_release_head_state(skb); 613 skb_release_head_state(skb);
587 skb_release_data(skb); 614 if (likely(skb->data))
615 skb_release_data(skb);
588} 616}
589 617
590/** 618/**
@@ -673,6 +701,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
673 new->mac_header = old->mac_header; 701 new->mac_header = old->mac_header;
674 new->inner_transport_header = old->inner_transport_header; 702 new->inner_transport_header = old->inner_transport_header;
675 new->inner_network_header = old->inner_network_header; 703 new->inner_network_header = old->inner_network_header;
704 new->inner_mac_header = old->inner_mac_header;
676 skb_dst_copy(new, old); 705 skb_dst_copy(new, old);
677 new->rxhash = old->rxhash; 706 new->rxhash = old->rxhash;
678 new->ooo_okay = old->ooo_okay; 707 new->ooo_okay = old->ooo_okay;
@@ -706,6 +735,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
706 new->tc_verd = old->tc_verd; 735 new->tc_verd = old->tc_verd;
707#endif 736#endif
708#endif 737#endif
738 new->vlan_proto = old->vlan_proto;
709 new->vlan_tci = old->vlan_tci; 739 new->vlan_tci = old->vlan_tci;
710 740
711 skb_copy_secmark(new, old); 741 skb_copy_secmark(new, old);
@@ -867,6 +897,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
867} 897}
868EXPORT_SYMBOL(skb_clone); 898EXPORT_SYMBOL(skb_clone);
869 899
900static void skb_headers_offset_update(struct sk_buff *skb, int off)
901{
902 /* {transport,network,mac}_header and tail are relative to skb->head */
903 skb->transport_header += off;
904 skb->network_header += off;
905 if (skb_mac_header_was_set(skb))
906 skb->mac_header += off;
907 skb->inner_transport_header += off;
908 skb->inner_network_header += off;
909 skb->inner_mac_header += off;
910}
911
870static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 912static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
871{ 913{
872#ifndef NET_SKBUFF_DATA_USES_OFFSET 914#ifndef NET_SKBUFF_DATA_USES_OFFSET
@@ -879,13 +921,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
879 __copy_skb_header(new, old); 921 __copy_skb_header(new, old);
880 922
881#ifndef NET_SKBUFF_DATA_USES_OFFSET 923#ifndef NET_SKBUFF_DATA_USES_OFFSET
882 /* {transport,network,mac}_header are relative to skb->head */ 924 skb_headers_offset_update(new, offset);
883 new->transport_header += offset;
884 new->network_header += offset;
885 if (skb_mac_header_was_set(new))
886 new->mac_header += offset;
887 new->inner_transport_header += offset;
888 new->inner_network_header += offset;
889#endif 925#endif
890 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; 926 skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
891 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; 927 skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
@@ -1077,14 +1113,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
1077#else 1113#else
1078 skb->end = skb->head + size; 1114 skb->end = skb->head + size;
1079#endif 1115#endif
1080 /* {transport,network,mac}_header and tail are relative to skb->head */
1081 skb->tail += off; 1116 skb->tail += off;
1082 skb->transport_header += off; 1117 skb_headers_offset_update(skb, off);
1083 skb->network_header += off;
1084 if (skb_mac_header_was_set(skb))
1085 skb->mac_header += off;
1086 skb->inner_transport_header += off;
1087 skb->inner_network_header += off;
1088 /* Only adjust this if it actually is csum_start rather than csum */ 1118 /* Only adjust this if it actually is csum_start rather than csum */
1089 if (skb->ip_summed == CHECKSUM_PARTIAL) 1119 if (skb->ip_summed == CHECKSUM_PARTIAL)
1090 skb->csum_start += nhead; 1120 skb->csum_start += nhead;
@@ -1180,12 +1210,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
1180 if (n->ip_summed == CHECKSUM_PARTIAL) 1210 if (n->ip_summed == CHECKSUM_PARTIAL)
1181 n->csum_start += off; 1211 n->csum_start += off;
1182#ifdef NET_SKBUFF_DATA_USES_OFFSET 1212#ifdef NET_SKBUFF_DATA_USES_OFFSET
1183 n->transport_header += off; 1213 skb_headers_offset_update(n, off);
1184 n->network_header += off;
1185 if (skb_mac_header_was_set(skb))
1186 n->mac_header += off;
1187 n->inner_transport_header += off;
1188 n->inner_network_header += off;
1189#endif 1214#endif
1190 1215
1191 return n; 1216 return n;
@@ -2741,12 +2766,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
2741 unsigned int tnl_hlen = skb_tnl_header_len(skb); 2766 unsigned int tnl_hlen = skb_tnl_header_len(skb);
2742 unsigned int headroom; 2767 unsigned int headroom;
2743 unsigned int len; 2768 unsigned int len;
2769 __be16 proto;
2770 bool csum;
2744 int sg = !!(features & NETIF_F_SG); 2771 int sg = !!(features & NETIF_F_SG);
2745 int nfrags = skb_shinfo(skb)->nr_frags; 2772 int nfrags = skb_shinfo(skb)->nr_frags;
2746 int err = -ENOMEM; 2773 int err = -ENOMEM;
2747 int i = 0; 2774 int i = 0;
2748 int pos; 2775 int pos;
2749 2776
2777 proto = skb_network_protocol(skb);
2778 if (unlikely(!proto))
2779 return ERR_PTR(-EINVAL);
2780
2781 csum = !!can_checksum_protocol(features, proto);
2750 __skb_push(skb, doffset); 2782 __skb_push(skb, doffset);
2751 headroom = skb_headroom(skb); 2783 headroom = skb_headroom(skb);
2752 pos = skb_headlen(skb); 2784 pos = skb_headlen(skb);
@@ -2884,6 +2916,12 @@ skip_fraglist:
2884 nskb->data_len = len - hsize; 2916 nskb->data_len = len - hsize;
2885 nskb->len += nskb->data_len; 2917 nskb->len += nskb->data_len;
2886 nskb->truesize += nskb->data_len; 2918 nskb->truesize += nskb->data_len;
2919
2920 if (!csum) {
2921 nskb->csum = skb_checksum(nskb, doffset,
2922 nskb->len - doffset, 0);
2923 nskb->ip_summed = CHECKSUM_NONE;
2924 }
2887 } while ((offset += len) < skb->len); 2925 } while ((offset += len) < skb->len);
2888 2926
2889 return segs; 2927 return segs;
@@ -3289,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3289 if (!sk) 3327 if (!sk)
3290 return; 3328 return;
3291 3329
3292 skb = skb_clone(orig_skb, GFP_ATOMIC);
3293 if (!skb)
3294 return;
3295
3296 if (hwtstamps) { 3330 if (hwtstamps) {
3297 *skb_hwtstamps(skb) = 3331 *skb_hwtstamps(orig_skb) =
3298 *hwtstamps; 3332 *hwtstamps;
3299 } else { 3333 } else {
3300 /* 3334 /*
@@ -3302,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3302 * so keep the shared tx_flags and only 3336 * so keep the shared tx_flags and only
3303 * store software time stamp 3337 * store software time stamp
3304 */ 3338 */
3305 skb->tstamp = ktime_get_real(); 3339 orig_skb->tstamp = ktime_get_real();
3306 } 3340 }
3307 3341
3342 skb = skb_clone(orig_skb, GFP_ATOMIC);
3343 if (!skb)
3344 return;
3345
3308 serr = SKB_EXT_ERR(skb); 3346 serr = SKB_EXT_ERR(skb);
3309 memset(serr, 0, sizeof(*serr)); 3347 memset(serr, 0, sizeof(*serr));
3310 serr->ee.ee_errno = ENOMSG; 3348 serr->ee.ee_errno = ENOMSG;
@@ -3361,6 +3399,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
3361 skb->ip_summed = CHECKSUM_PARTIAL; 3399 skb->ip_summed = CHECKSUM_PARTIAL;
3362 skb->csum_start = skb_headroom(skb) + start; 3400 skb->csum_start = skb_headroom(skb) + start;
3363 skb->csum_offset = off; 3401 skb->csum_offset = off;
3402 skb_set_transport_header(skb, start);
3364 return true; 3403 return true;
3365} 3404}
3366EXPORT_SYMBOL_GPL(skb_partial_csum_set); 3405EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index b261a7977746..d4f4cea726e7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -907,6 +907,10 @@ set_rcvbuf:
907 sock_valbool_flag(sk, SOCK_NOFCS, valbool); 907 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
908 break; 908 break;
909 909
910 case SO_SELECT_ERR_QUEUE:
911 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
912 break;
913
910 default: 914 default:
911 ret = -ENOPROTOOPT; 915 ret = -ENOPROTOOPT;
912 break; 916 break;
@@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1160 v.val = sock_flag(sk, SOCK_FILTER_LOCKED); 1164 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1161 break; 1165 break;
1162 1166
1167 case SO_SELECT_ERR_QUEUE:
1168 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1169 break;
1170
1163 default: 1171 default:
1164 return -ENOPROTOOPT; 1172 return -ENOPROTOOPT;
1165 } 1173 }
@@ -1298,13 +1306,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1298 module_put(owner); 1306 module_put(owner);
1299} 1307}
1300 1308
1301#ifdef CONFIG_CGROUPS
1302#if IS_ENABLED(CONFIG_NET_CLS_CGROUP) 1309#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1303void sock_update_classid(struct sock *sk, struct task_struct *task) 1310void sock_update_classid(struct sock *sk)
1304{ 1311{
1305 u32 classid; 1312 u32 classid;
1306 1313
1307 classid = task_cls_classid(task); 1314 classid = task_cls_classid(current);
1308 if (classid != sk->sk_classid) 1315 if (classid != sk->sk_classid)
1309 sk->sk_classid = classid; 1316 sk->sk_classid = classid;
1310} 1317}
@@ -1312,16 +1319,15 @@ EXPORT_SYMBOL(sock_update_classid);
1312#endif 1319#endif
1313 1320
1314#if IS_ENABLED(CONFIG_NETPRIO_CGROUP) 1321#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1315void sock_update_netprioidx(struct sock *sk, struct task_struct *task) 1322void sock_update_netprioidx(struct sock *sk)
1316{ 1323{
1317 if (in_interrupt()) 1324 if (in_interrupt())
1318 return; 1325 return;
1319 1326
1320 sk->sk_cgrp_prioidx = task_netprioidx(task); 1327 sk->sk_cgrp_prioidx = task_netprioidx(current);
1321} 1328}
1322EXPORT_SYMBOL_GPL(sock_update_netprioidx); 1329EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1323#endif 1330#endif
1324#endif
1325 1331
1326/** 1332/**
1327 * sk_alloc - All socket objects are allocated here 1333 * sk_alloc - All socket objects are allocated here
@@ -1347,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1347 sock_net_set(sk, get_net(net)); 1353 sock_net_set(sk, get_net(net));
1348 atomic_set(&sk->sk_wmem_alloc, 1); 1354 atomic_set(&sk->sk_wmem_alloc, 1);
1349 1355
1350 sock_update_classid(sk, current); 1356 sock_update_classid(sk);
1351 sock_update_netprioidx(sk, current); 1357 sock_update_netprioidx(sk);
1352 } 1358 }
1353 1359
1354 return sk; 1360 return sk;
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a29e90cf36b7..d5bef0b0f639 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,6 +49,39 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
49} 49}
50EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); 50EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
51 51
52int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
53 struct sk_buff *skb, int attrtype)
54{
55 struct nlattr *attr;
56 struct sk_filter *filter;
57 unsigned int len;
58 int err = 0;
59
60 if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
61 nla_reserve(skb, attrtype, 0);
62 return 0;
63 }
64
65 rcu_read_lock();
66
67 filter = rcu_dereference(sk->sk_filter);
68 len = filter ? filter->len * sizeof(struct sock_filter) : 0;
69
70 attr = nla_reserve(skb, attrtype, len);
71 if (attr == NULL) {
72 err = -EMSGSIZE;
73 goto out;
74 }
75
76 if (filter)
77 memcpy(nla_data(attr), filter->insns, len);
78
79out:
80 rcu_read_unlock();
81 return err;
82}
83EXPORT_SYMBOL(sock_diag_put_filterinfo);
84
52void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)) 85void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
53{ 86{
54 mutex_lock(&sock_diag_table_mutex); 87 mutex_lock(&sock_diag_table_mutex);
diff --git a/net/core/utils.c b/net/core/utils.c
index e3487e461939..3c7f5b51b979 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/jiffies.h> 18#include <linux/jiffies.h>
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/ctype.h>
20#include <linux/inet.h> 21#include <linux/inet.h>
21#include <linux/mm.h> 22#include <linux/mm.h>
22#include <linux/net.h> 23#include <linux/net.h>
@@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac)
348 349
349 /* Don't dirty result unless string is valid MAC. */ 350 /* Don't dirty result unless string is valid MAC. */
350 for (i = 0; i < ETH_ALEN; i++) { 351 for (i = 0; i < ETH_ALEN; i++) {
351 if (!strchr("0123456789abcdefABCDEF", s[i * 3])) 352 if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))
352 return 0;
353 if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1]))
354 return 0; 353 return 0;
355 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') 354 if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')
356 return 0; 355 return 0;
diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c
index 1d9eb7c60a68..4f72fc40bf02 100644
--- a/net/dcb/dcbevent.c
+++ b/net/dcb/dcbevent.c
@@ -20,6 +20,7 @@
20#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
21#include <linux/notifier.h> 21#include <linux/notifier.h>
22#include <linux/export.h> 22#include <linux/export.h>
23#include <net/dcbevent.h>
23 24
24static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); 25static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain);
25 26
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 21291f1abcd6..40d5829ed36a 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {
1658 [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, 1658 [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get },
1659}; 1659};
1660 1660
1661static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 1661static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
1662{ 1662{
1663 struct net *net = sock_net(skb->sk); 1663 struct net *net = sock_net(skb->sk);
1664 struct net_device *netdev; 1664 struct net_device *netdev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4f9f5eb478f1..ebc54fef85a5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
500 return &rt->dst; 500 return &rt->dst;
501} 501}
502 502
503static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 503static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
504 struct request_values *rv_unused)
505{ 504{
506 int err = -1; 505 int err = -1;
507 struct sk_buff *skb; 506 struct sk_buff *skb;
@@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
658 dreq->dreq_gss = dreq->dreq_iss; 657 dreq->dreq_gss = dreq->dreq_iss;
659 dreq->dreq_service = service; 658 dreq->dreq_service = service;
660 659
661 if (dccp_v4_send_response(sk, req, NULL)) 660 if (dccp_v4_send_response(sk, req))
662 goto drop_and_free; 661 goto drop_and_free;
663 662
664 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 663 inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e05981f271e..9c61f9c02fdb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -213,8 +213,7 @@ out:
213} 213}
214 214
215 215
216static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, 216static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
217 struct request_values *rv_unused)
218{ 217{
219 struct inet6_request_sock *ireq6 = inet6_rsk(req); 218 struct inet6_request_sock *ireq6 = inet6_rsk(req);
220 struct ipv6_pinfo *np = inet6_sk(sk); 219 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
428 dreq->dreq_gss = dreq->dreq_iss; 427 dreq->dreq_gss = dreq->dreq_iss;
429 dreq->dreq_service = service; 428 dreq->dreq_service = service;
430 429
431 if (dccp_v6_send_response(sk, req, NULL)) 430 if (dccp_v6_send_response(sk, req))
432 goto drop_and_free; 431 goto drop_and_free;
433 432
434 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 433 inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index c8da116d84a4..7d9197063ebb 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
563 .len = IFNAMSIZ - 1 }, 563 .len = IFNAMSIZ - 1 },
564}; 564};
565 565
566static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 566static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
567{ 567{
568 struct net *net = sock_net(skb->sk); 568 struct net *net = sock_net(skb->sk);
569 struct nlattr *tb[IFA_MAX+1]; 569 struct nlattr *tb[IFA_MAX+1];
@@ -607,7 +607,7 @@ errout:
607 return err; 607 return err;
608} 608}
609 609
610static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 610static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
611{ 611{
612 struct net *net = sock_net(skb->sk); 612 struct net *net = sock_net(skb->sk);
613 struct nlattr *tb[IFA_MAX+1]; 613 struct nlattr *tb[IFA_MAX+1];
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index e36614eccc04..57dc159245ec 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi
145 return NULL; 145 return NULL;
146} 146}
147 147
148__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) 148static int dn_fib_count_nhs(const struct nlattr *attr)
149{ 149{
150 while(RTA_OK(attr,attrlen)) { 150 struct rtnexthop *nhp = nla_data(attr);
151 if (attr->rta_type == type) 151 int nhs = 0, nhlen = nla_len(attr);
152 return *(__le16*)RTA_DATA(attr);
153 attr = RTA_NEXT(attr, attrlen);
154 }
155
156 return 0;
157}
158
159static int dn_fib_count_nhs(struct rtattr *rta)
160{
161 int nhs = 0;
162 struct rtnexthop *nhp = RTA_DATA(rta);
163 int nhlen = RTA_PAYLOAD(rta);
164 152
165 while(nhlen >= (int)sizeof(struct rtnexthop)) { 153 while(nhlen >= (int)sizeof(struct rtnexthop)) {
166 if ((nhlen -= nhp->rtnh_len) < 0) 154 if ((nhlen -= nhp->rtnh_len) < 0)
@@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta)
172 return nhs; 160 return nhs;
173} 161}
174 162
175static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) 163static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
164 const struct rtmsg *r)
176{ 165{
177 struct rtnexthop *nhp = RTA_DATA(rta); 166 struct rtnexthop *nhp = nla_data(attr);
178 int nhlen = RTA_PAYLOAD(rta); 167 int nhlen = nla_len(attr);
179 168
180 change_nexthops(fi) { 169 change_nexthops(fi) {
181 int attrlen = nhlen - sizeof(struct rtnexthop); 170 int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons
187 nh->nh_weight = nhp->rtnh_hops + 1; 176 nh->nh_weight = nhp->rtnh_hops + 1;
188 177
189 if (attrlen) { 178 if (attrlen) {
190 nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 179 struct nlattr *gw_attr;
180
181 gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
182 nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
191 } 183 }
192 nhp = RTNH_NEXT(nhp); 184 nhp = RTNH_NEXT(nhp);
193 } endfor_nexthops(fi); 185 } endfor_nexthops(fi);
@@ -268,7 +260,8 @@ out:
268} 260}
269 261
270 262
271struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) 263struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
264 const struct nlmsghdr *nlh, int *errp)
272{ 265{
273 int err; 266 int err;
274 struct dn_fib_info *fi = NULL; 267 struct dn_fib_info *fi = NULL;
@@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
281 if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) 274 if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
282 goto err_inval; 275 goto err_inval;
283 276
284 if (rta->rta_mp) { 277 if (attrs[RTA_MULTIPATH] &&
285 nhs = dn_fib_count_nhs(rta->rta_mp); 278 (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
286 if (nhs == 0) 279 goto err_inval;
287 goto err_inval;
288 }
289 280
290 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); 281 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
291 err = -ENOBUFS; 282 err = -ENOBUFS;
@@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
295 fi->fib_protocol = r->rtm_protocol; 286 fi->fib_protocol = r->rtm_protocol;
296 fi->fib_nhs = nhs; 287 fi->fib_nhs = nhs;
297 fi->fib_flags = r->rtm_flags; 288 fi->fib_flags = r->rtm_flags;
298 if (rta->rta_priority)
299 fi->fib_priority = *rta->rta_priority;
300 if (rta->rta_mx) {
301 int attrlen = RTA_PAYLOAD(rta->rta_mx);
302 struct rtattr *attr = RTA_DATA(rta->rta_mx);
303 289
304 while(RTA_OK(attr, attrlen)) { 290 if (attrs[RTA_PRIORITY])
305 unsigned int flavour = attr->rta_type; 291 fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
292
293 if (attrs[RTA_METRICS]) {
294 struct nlattr *attr;
295 int rem;
306 296
307 if (flavour) { 297 nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
308 if (flavour > RTAX_MAX) 298 int type = nla_type(attr);
299
300 if (type) {
301 if (type > RTAX_MAX || nla_len(attr) < 4)
309 goto err_inval; 302 goto err_inval;
310 fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); 303
304 fi->fib_metrics[type-1] = nla_get_u32(attr);
311 } 305 }
312 attr = RTA_NEXT(attr, attrlen);
313 } 306 }
314 } 307 }
315 if (rta->rta_prefsrc)
316 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2);
317 308
318 if (rta->rta_mp) { 309 if (attrs[RTA_PREFSRC])
319 if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) 310 fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
311
312 if (attrs[RTA_MULTIPATH]) {
313 if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
320 goto failure; 314 goto failure;
321 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) 315
316 if (attrs[RTA_OIF] &&
317 fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
322 goto err_inval; 318 goto err_inval;
323 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) 319
320 if (attrs[RTA_GATEWAY] &&
321 fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
324 goto err_inval; 322 goto err_inval;
325 } else { 323 } else {
326 struct dn_fib_nh *nh = fi->fib_nh; 324 struct dn_fib_nh *nh = fi->fib_nh;
327 if (rta->rta_oif) 325
328 nh->nh_oif = *rta->rta_oif; 326 if (attrs[RTA_OIF])
329 if (rta->rta_gw) 327 nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
330 memcpy(&nh->nh_gw, rta->rta_gw, 2); 328
329 if (attrs[RTA_GATEWAY])
330 nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
331
331 nh->nh_flags = r->rtm_flags; 332 nh->nh_flags = r->rtm_flags;
332 nh->nh_weight = 1; 333 nh->nh_weight = 1;
333 } 334 }
334 335
335 if (r->rtm_type == RTN_NAT) { 336 if (r->rtm_type == RTN_NAT) {
336 if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) 337 if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
337 goto err_inval; 338 goto err_inval;
338 memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); 339
340 fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
339 goto link_it; 341 goto link_it;
340 } 342 }
341 343
342 if (dn_fib_props[r->rtm_type].error) { 344 if (dn_fib_props[r->rtm_type].error) {
343 if (rta->rta_gw || rta->rta_oif || rta->rta_mp) 345 if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
344 goto err_inval; 346 goto err_inval;
347
345 goto link_it; 348 goto link_it;
346 } 349 }
347 350
@@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta
367 } 370 }
368 371
369 if (fi->fib_prefsrc) { 372 if (fi->fib_prefsrc) {
370 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || 373 if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
371 memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) 374 fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
372 if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 375 if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
373 goto err_inval; 376 goto err_inval;
374 } 377 }
@@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
486 spin_unlock_bh(&dn_fib_multipath_lock); 489 spin_unlock_bh(&dn_fib_multipath_lock);
487} 490}
488 491
489 492static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
490static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta)
491{
492 int i;
493
494 for(i = 1; i <= RTA_MAX; i++) {
495 struct rtattr *attr = rta[i-1];
496 if (attr) {
497 if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2)
498 return -EINVAL;
499 if (i != RTA_MULTIPATH && i != RTA_METRICS &&
500 i != RTA_TABLE)
501 rta[i-1] = (struct rtattr *)RTA_DATA(attr);
502 }
503 }
504
505 return 0;
506}
507
508static inline u32 rtm_get_table(struct rtattr **rta, u8 table)
509{ 493{
510 if (rta[RTA_TABLE - 1]) 494 if (attrs[RTA_TABLE])
511 table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); 495 table = nla_get_u32(attrs[RTA_TABLE]);
512 496
513 return table; 497 return table;
514} 498}
515 499
516static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 500static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
517{ 501{
518 struct net *net = sock_net(skb->sk); 502 struct net *net = sock_net(skb->sk);
519 struct dn_fib_table *tb; 503 struct dn_fib_table *tb;
520 struct rtattr **rta = arg; 504 struct rtmsg *r = nlmsg_data(nlh);
521 struct rtmsg *r = NLMSG_DATA(nlh); 505 struct nlattr *attrs[RTA_MAX+1];
506 int err;
522 507
523 if (!capable(CAP_NET_ADMIN)) 508 if (!capable(CAP_NET_ADMIN))
524 return -EPERM; 509 return -EPERM;
@@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
526 if (!net_eq(net, &init_net)) 511 if (!net_eq(net, &init_net))
527 return -EINVAL; 512 return -EINVAL;
528 513
529 if (dn_fib_check_attr(r, rta)) 514 err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
530 return -EINVAL; 515 if (err < 0)
516 return err;
531 517
532 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); 518 tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
533 if (tb) 519 if (!tb)
534 return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 520 return -ESRCH;
535 521
536 return -ESRCH; 522 return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
537} 523}
538 524
539static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 525static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
540{ 526{
541 struct net *net = sock_net(skb->sk); 527 struct net *net = sock_net(skb->sk);
542 struct dn_fib_table *tb; 528 struct dn_fib_table *tb;
543 struct rtattr **rta = arg; 529 struct rtmsg *r = nlmsg_data(nlh);
544 struct rtmsg *r = NLMSG_DATA(nlh); 530 struct nlattr *attrs[RTA_MAX+1];
531 int err;
545 532
546 if (!capable(CAP_NET_ADMIN)) 533 if (!capable(CAP_NET_ADMIN))
547 return -EPERM; 534 return -EPERM;
@@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *
549 if (!net_eq(net, &init_net)) 536 if (!net_eq(net, &init_net))
550 return -EINVAL; 537 return -EINVAL;
551 538
552 if (dn_fib_check_attr(r, rta)) 539 err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy);
553 return -EINVAL; 540 if (err < 0)
541 return err;
554 542
555 tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); 543 tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
556 if (tb) 544 if (!tb)
557 return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); 545 return -ENOBUFS;
558 546
559 return -ENOBUFS; 547 return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
560} 548}
561 549
562static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) 550static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
@@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
566 struct nlmsghdr nlh; 554 struct nlmsghdr nlh;
567 struct rtmsg rtm; 555 struct rtmsg rtm;
568 } req; 556 } req;
569 struct dn_kern_rta rta; 557 struct {
558 struct nlattr hdr;
559 __le16 dst;
560 } dst_attr = {
561 .dst = dst,
562 };
563 struct {
564 struct nlattr hdr;
565 __le16 prefsrc;
566 } prefsrc_attr = {
567 .prefsrc = ifa->ifa_local,
568 };
569 struct {
570 struct nlattr hdr;
571 u32 oif;
572 } oif_attr = {
573 .oif = ifa->ifa_dev->dev->ifindex,
574 };
575 struct nlattr *attrs[RTA_MAX+1] = {
576 [RTA_DST] = (struct nlattr *) &dst_attr,
577 [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
578 [RTA_OIF] = (struct nlattr *) &oif_attr,
579 };
570 580
571 memset(&req.rtm, 0, sizeof(req.rtm)); 581 memset(&req.rtm, 0, sizeof(req.rtm));
572 memset(&rta, 0, sizeof(rta));
573 582
574 if (type == RTN_UNICAST) 583 if (type == RTN_UNICAST)
575 tb = dn_fib_get_table(RT_MIN_TABLE, 1); 584 tb = dn_fib_get_table(RT_MIN_TABLE, 1);
@@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad
591 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); 600 req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
592 req.rtm.rtm_type = type; 601 req.rtm.rtm_type = type;
593 602
594 rta.rta_dst = &dst;
595 rta.rta_prefsrc = &ifa->ifa_local;
596 rta.rta_oif = &ifa->ifa_dev->dev->ifindex;
597
598 if (cmd == RTM_NEWROUTE) 603 if (cmd == RTM_NEWROUTE)
599 tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); 604 tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
600 else 605 else
601 tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); 606 tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
602} 607}
603 608
604static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) 609static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 5ac0e153ef83..fe32388ea24f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1613,23 +1613,41 @@ errout:
1613 return -EMSGSIZE; 1613 return -EMSGSIZE;
1614} 1614}
1615 1615
1616const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
1617 [RTA_DST] = { .type = NLA_U16 },
1618 [RTA_SRC] = { .type = NLA_U16 },
1619 [RTA_IIF] = { .type = NLA_U32 },
1620 [RTA_OIF] = { .type = NLA_U32 },
1621 [RTA_GATEWAY] = { .type = NLA_U16 },
1622 [RTA_PRIORITY] = { .type = NLA_U32 },
1623 [RTA_PREFSRC] = { .type = NLA_U16 },
1624 [RTA_METRICS] = { .type = NLA_NESTED },
1625 [RTA_MULTIPATH] = { .type = NLA_NESTED },
1626 [RTA_TABLE] = { .type = NLA_U32 },
1627 [RTA_MARK] = { .type = NLA_U32 },
1628};
1629
1616/* 1630/*
1617 * This is called by both endnodes and routers now. 1631 * This is called by both endnodes and routers now.
1618 */ 1632 */
1619static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1633static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1620{ 1634{
1621 struct net *net = sock_net(in_skb->sk); 1635 struct net *net = sock_net(in_skb->sk);
1622 struct rtattr **rta = arg;
1623 struct rtmsg *rtm = nlmsg_data(nlh); 1636 struct rtmsg *rtm = nlmsg_data(nlh);
1624 struct dn_route *rt = NULL; 1637 struct dn_route *rt = NULL;
1625 struct dn_skb_cb *cb; 1638 struct dn_skb_cb *cb;
1626 int err; 1639 int err;
1627 struct sk_buff *skb; 1640 struct sk_buff *skb;
1628 struct flowidn fld; 1641 struct flowidn fld;
1642 struct nlattr *tb[RTA_MAX+1];
1629 1643
1630 if (!net_eq(net, &init_net)) 1644 if (!net_eq(net, &init_net))
1631 return -EINVAL; 1645 return -EINVAL;
1632 1646
1647 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy);
1648 if (err < 0)
1649 return err;
1650
1633 memset(&fld, 0, sizeof(fld)); 1651 memset(&fld, 0, sizeof(fld));
1634 fld.flowidn_proto = DNPROTO_NSP; 1652 fld.flowidn_proto = DNPROTO_NSP;
1635 1653
@@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1639 skb_reset_mac_header(skb); 1657 skb_reset_mac_header(skb);
1640 cb = DN_SKB_CB(skb); 1658 cb = DN_SKB_CB(skb);
1641 1659
1642 if (rta[RTA_SRC-1]) 1660 if (tb[RTA_SRC])
1643 memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); 1661 fld.saddr = nla_get_le16(tb[RTA_SRC]);
1644 if (rta[RTA_DST-1]) 1662
1645 memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); 1663 if (tb[RTA_DST])
1646 if (rta[RTA_IIF-1]) 1664 fld.daddr = nla_get_le16(tb[RTA_DST]);
1647 memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1665
1666 if (tb[RTA_IIF])
1667 fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
1648 1668
1649 if (fld.flowidn_iif) { 1669 if (fld.flowidn_iif) {
1650 struct net_device *dev; 1670 struct net_device *dev;
@@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1669 if (!err && -rt->dst.error) 1689 if (!err && -rt->dst.error)
1670 err = rt->dst.error; 1690 err = rt->dst.error;
1671 } else { 1691 } else {
1672 int oif = 0; 1692 if (tb[RTA_OIF])
1673 if (rta[RTA_OIF - 1]) 1693 fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
1674 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 1694
1675 fld.flowidn_oif = oif;
1676 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); 1695 err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
1677 } 1696 }
1678 1697
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 6c2445bcaba1..86e3807052e9 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -19,7 +19,6 @@
19#include <linux/sockios.h> 19#include <linux/sockios.h>
20#include <linux/init.h> 20#include <linux/init.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/netlink.h>
23#include <linux/rtnetlink.h> 22#include <linux/rtnetlink.h>
24#include <linux/proc_fs.h> 23#include <linux/proc_fs.h>
25#include <linux/netdevice.h> 24#include <linux/netdevice.h>
@@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
224} 223}
225 224
226 225
227static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) 226static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
228{ 227{
229 struct rtnexthop *nhp; 228 struct rtnexthop *nhp;
230 int nhlen; 229 int nhlen;
231 230
232 if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) 231 if (attrs[RTA_PRIORITY] &&
232 nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
233 return 1; 233 return 1;
234 234
235 if (rta->rta_oif || rta->rta_gw) { 235 if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
236 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && 236 if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
237 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) 237 (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
238 return 0; 238 return 0;
239 return 1; 239 return 1;
240 } 240 }
241 241
242 if (rta->rta_mp == NULL) 242 if (!attrs[RTA_MULTIPATH])
243 return 0; 243 return 0;
244 244
245 nhp = RTA_DATA(rta->rta_mp); 245 nhp = nla_data(attrs[RTA_MULTIPATH]);
246 nhlen = RTA_PAYLOAD(rta->rta_mp); 246 nhlen = nla_len(attrs[RTA_MULTIPATH]);
247 247
248 for_nexthops(fi) { 248 for_nexthops(fi) {
249 int attrlen = nhlen - sizeof(struct rtnexthop); 249 int attrlen = nhlen - sizeof(struct rtnexthop);
@@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern
254 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) 254 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
255 return 1; 255 return 1;
256 if (attrlen) { 256 if (attrlen) {
257 gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); 257 struct nlattr *gw_attr;
258
259 gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
260 gw = gw_attr ? nla_get_le16(gw_attr) : 0;
258 261
259 if (gw && gw != nh->nh_gw) 262 if (gw && gw != nh->nh_gw)
260 return 1; 263 return 1;
@@ -488,7 +491,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
488 if (!net_eq(net, &init_net)) 491 if (!net_eq(net, &init_net))
489 return 0; 492 return 0;
490 493
491 if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && 494 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
492 ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) 495 ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
493 return dn_cache_dump(skb, cb); 496 return dn_cache_dump(skb, cb);
494 497
@@ -517,7 +520,8 @@ out:
517 return skb->len; 520 return skb->len;
518} 521}
519 522
520static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) 523static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
524 struct nlmsghdr *n, struct netlink_skb_parms *req)
521{ 525{
522 struct dn_hash *table = (struct dn_hash *)tb->data; 526 struct dn_hash *table = (struct dn_hash *)tb->data;
523 struct dn_fib_node *new_f, *f, **fp, **del_fp; 527 struct dn_fib_node *new_f, *f, **fp, **del_fp;
@@ -536,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct
536 return -ENOBUFS; 540 return -ENOBUFS;
537 541
538 dz_key_0(key); 542 dz_key_0(key);
539 if (rta->rta_dst) { 543 if (attrs[RTA_DST]) {
540 __le16 dst; 544 __le16 dst = nla_get_le16(attrs[RTA_DST]);
541 memcpy(&dst, rta->rta_dst, 2);
542 if (dst & ~DZ_MASK(dz)) 545 if (dst & ~DZ_MASK(dz))
543 return -EINVAL; 546 return -EINVAL;
544 key = dz_key(dst, dz); 547 key = dz_key(dst, dz);
545 } 548 }
546 549
547 if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) 550 if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
548 return err; 551 return err;
549 552
550 if (dz->dz_nent > (dz->dz_divisor << 2) && 553 if (dz->dz_nent > (dz->dz_divisor << 2) &&
@@ -654,7 +657,8 @@ out:
654} 657}
655 658
656 659
657static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) 660static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
661 struct nlmsghdr *n, struct netlink_skb_parms *req)
658{ 662{
659 struct dn_hash *table = (struct dn_hash*)tb->data; 663 struct dn_hash *table = (struct dn_hash*)tb->data;
660 struct dn_fib_node **fp, **del_fp, *f; 664 struct dn_fib_node **fp, **del_fp, *f;
@@ -671,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
671 return -ESRCH; 675 return -ESRCH;
672 676
673 dz_key_0(key); 677 dz_key_0(key);
674 if (rta->rta_dst) { 678 if (attrs[RTA_DST]) {
675 __le16 dst; 679 __le16 dst = nla_get_le16(attrs[RTA_DST]);
676 memcpy(&dst, rta->rta_dst, 2);
677 if (dst & ~DZ_MASK(dz)) 680 if (dst & ~DZ_MASK(dz))
678 return -EINVAL; 681 return -EINVAL;
679 key = dz_key(dst, dz); 682 key = dz_key(dst, dz);
@@ -703,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct
703 (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && 706 (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
704 (!r->rtm_protocol || 707 (!r->rtm_protocol ||
705 fi->fib_protocol == r->rtm_protocol) && 708 fi->fib_protocol == r->rtm_protocol) &&
706 dn_fib_nh_match(r, n, rta, fi) == 0) 709 dn_fib_nh_match(r, n, attrs, fi) == 0)
707 del_fp = fp; 710 del_fp = fp;
708 } 711 }
709 712
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index dfe42012a044..2a7efe388344 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -19,7 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/netfilter.h> 20#include <linux/netfilter.h>
21#include <linux/spinlock.h> 21#include <linux/spinlock.h>
22#include <linux/netlink.h> 22#include <net/netlink.h>
23#include <linux/netfilter_decnet.h> 23#include <linux/netfilter_decnet.h>
24 24
25#include <net/sock.h> 25#include <net/sock.h>
@@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
39 unsigned char *ptr; 39 unsigned char *ptr;
40 struct nf_dn_rtmsg *rtm; 40 struct nf_dn_rtmsg *rtm;
41 41
42 size = NLMSG_SPACE(rt_skb->len); 42 size = NLMSG_ALIGN(rt_skb->len) +
43 size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); 43 NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
44 skb = alloc_skb(size, GFP_ATOMIC); 44 skb = nlmsg_new(size, GFP_ATOMIC);
45 if (!skb) { 45 if (!skb) {
46 *errp = -ENOMEM; 46 *errp = -ENOMEM;
47 return NULL; 47 return NULL;
48 } 48 }
49 old_tail = skb->tail; 49 old_tail = skb->tail;
50 nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0); 50 nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
51 if (!nlh) { 51 if (!nlh) {
52 kfree_skb(skb); 52 kfree_skb(skb);
53 *errp = -ENOMEM; 53 *errp = -ENOMEM;
54 return NULL; 54 return NULL;
55 } 55 }
56 rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); 56 rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
57 rtm->nfdn_ifindex = rt_skb->dev->ifindex; 57 rtm->nfdn_ifindex = rt_skb->dev->ifindex;
58 ptr = NFDN_RTMSG(rtm); 58 ptr = NFDN_RTMSG(rtm);
59 skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); 59 skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 2bc62ea857c8..0eb5d5e76dfb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * net/dsa/dsa.c - Hardware switch handling 2 * net/dsa/dsa.c - Hardware switch handling
3 * Copyright (c) 2008-2009 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -14,6 +15,9 @@
14#include <linux/slab.h> 15#include <linux/slab.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <net/dsa.h> 17#include <net/dsa.h>
18#include <linux/of.h>
19#include <linux/of_mdio.h>
20#include <linux/of_platform.h>
17#include "dsa_priv.h" 21#include "dsa_priv.h"
18 22
19char dsa_driver_version[] = "0.1"; 23char dsa_driver_version[] = "0.1";
@@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev)
287 return NULL; 291 return NULL;
288} 292}
289 293
294#ifdef CONFIG_OF
295static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
296 struct dsa_chip_data *cd,
297 int chip_index,
298 struct device_node *link)
299{
300 int ret;
301 const __be32 *reg;
302 int link_port_addr;
303 int link_sw_addr;
304 struct device_node *parent_sw;
305 int len;
306
307 parent_sw = of_get_parent(link);
308 if (!parent_sw)
309 return -EINVAL;
310
311 reg = of_get_property(parent_sw, "reg", &len);
312 if (!reg || (len != sizeof(*reg) * 2))
313 return -EINVAL;
314
315 link_sw_addr = be32_to_cpup(reg + 1);
316
317 if (link_sw_addr >= pd->nr_chips)
318 return -EINVAL;
319
320 /* First time routing table allocation */
321 if (!cd->rtable) {
322 cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL);
323 if (!cd->rtable)
324 return -ENOMEM;
325
326 /* default to no valid uplink/downlink */
327 memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
328 }
329
330 reg = of_get_property(link, "reg", NULL);
331 if (!reg) {
332 ret = -EINVAL;
333 goto out;
334 }
335
336 link_port_addr = be32_to_cpup(reg);
337
338 cd->rtable[link_sw_addr] = link_port_addr;
339
340 return 0;
341out:
342 kfree(cd->rtable);
343 return ret;
344}
345
346static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
347{
348 int i;
349 int port_index;
350
351 for (i = 0; i < pd->nr_chips; i++) {
352 port_index = 0;
353 while (port_index < DSA_MAX_PORTS) {
354 if (pd->chip[i].port_names[port_index])
355 kfree(pd->chip[i].port_names[port_index]);
356 port_index++;
357 }
358 kfree(pd->chip[i].rtable);
359 }
360 kfree(pd->chip);
361}
362
363static int dsa_of_probe(struct platform_device *pdev)
364{
365 struct device_node *np = pdev->dev.of_node;
366 struct device_node *child, *mdio, *ethernet, *port, *link;
367 struct mii_bus *mdio_bus;
368 struct platform_device *ethernet_dev;
369 struct dsa_platform_data *pd;
370 struct dsa_chip_data *cd;
371 const char *port_name;
372 int chip_index, port_index;
373 const unsigned int *sw_addr, *port_reg;
374 int ret;
375
376 mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
377 if (!mdio)
378 return -EINVAL;
379
380 mdio_bus = of_mdio_find_bus(mdio);
381 if (!mdio_bus)
382 return -EINVAL;
383
384 ethernet = of_parse_phandle(np, "dsa,ethernet", 0);
385 if (!ethernet)
386 return -EINVAL;
387
388 ethernet_dev = of_find_device_by_node(ethernet);
389 if (!ethernet_dev)
390 return -ENODEV;
391
392 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
393 if (!pd)
394 return -ENOMEM;
395
396 pdev->dev.platform_data = pd;
397 pd->netdev = &ethernet_dev->dev;
398 pd->nr_chips = of_get_child_count(np);
399 if (pd->nr_chips > DSA_MAX_SWITCHES)
400 pd->nr_chips = DSA_MAX_SWITCHES;
401
402 pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data),
403 GFP_KERNEL);
404 if (!pd->chip) {
405 ret = -ENOMEM;
406 goto out_free;
407 }
408
409 chip_index = 0;
410 for_each_available_child_of_node(np, child) {
411 cd = &pd->chip[chip_index];
412
413 cd->mii_bus = &mdio_bus->dev;
414
415 sw_addr = of_get_property(child, "reg", NULL);
416 if (!sw_addr)
417 continue;
418
419 cd->sw_addr = be32_to_cpup(sw_addr);
420 if (cd->sw_addr > PHY_MAX_ADDR)
421 continue;
422
423 for_each_available_child_of_node(child, port) {
424 port_reg = of_get_property(port, "reg", NULL);
425 if (!port_reg)
426 continue;
427
428 port_index = be32_to_cpup(port_reg);
429
430 port_name = of_get_property(port, "label", NULL);
431 if (!port_name)
432 continue;
433
434 cd->port_names[port_index] = kstrdup(port_name,
435 GFP_KERNEL);
436 if (!cd->port_names[port_index]) {
437 ret = -ENOMEM;
438 goto out_free_chip;
439 }
440
441 link = of_parse_phandle(port, "link", 0);
442
443 if (!strcmp(port_name, "dsa") && link &&
444 pd->nr_chips > 1) {
445 ret = dsa_of_setup_routing_table(pd, cd,
446 chip_index, link);
447 if (ret)
448 goto out_free_chip;
449 }
450
451 if (port_index == DSA_MAX_PORTS)
452 break;
453 }
454 }
455
456 return 0;
457
458out_free_chip:
459 dsa_of_free_platform_data(pd);
460out_free:
461 kfree(pd);
462 pdev->dev.platform_data = NULL;
463 return ret;
464}
465
466static void dsa_of_remove(struct platform_device *pdev)
467{
468 struct dsa_platform_data *pd = pdev->dev.platform_data;
469
470 if (!pdev->dev.of_node)
471 return;
472
473 dsa_of_free_platform_data(pd);
474 kfree(pd);
475}
476#else
477static inline int dsa_of_probe(struct platform_device *pdev)
478{
479 return 0;
480}
481
482static inline void dsa_of_remove(struct platform_device *pdev)
483{
484}
485#endif
486
290static int dsa_probe(struct platform_device *pdev) 487static int dsa_probe(struct platform_device *pdev)
291{ 488{
292 static int dsa_version_printed; 489 static int dsa_version_printed;
293 struct dsa_platform_data *pd = pdev->dev.platform_data; 490 struct dsa_platform_data *pd = pdev->dev.platform_data;
294 struct net_device *dev; 491 struct net_device *dev;
295 struct dsa_switch_tree *dst; 492 struct dsa_switch_tree *dst;
296 int i; 493 int i, ret;
297 494
298 if (!dsa_version_printed++) 495 if (!dsa_version_printed++)
299 printk(KERN_NOTICE "Distributed Switch Architecture " 496 printk(KERN_NOTICE "Distributed Switch Architecture "
300 "driver version %s\n", dsa_driver_version); 497 "driver version %s\n", dsa_driver_version);
301 498
499 if (pdev->dev.of_node) {
500 ret = dsa_of_probe(pdev);
501 if (ret)
502 return ret;
503
504 pd = pdev->dev.platform_data;
505 }
506
302 if (pd == NULL || pd->netdev == NULL) 507 if (pd == NULL || pd->netdev == NULL)
303 return -EINVAL; 508 return -EINVAL;
304 509
305 dev = dev_to_net_device(pd->netdev); 510 dev = dev_to_net_device(pd->netdev);
306 if (dev == NULL) 511 if (dev == NULL) {
307 return -EINVAL; 512 ret = -EINVAL;
513 goto out;
514 }
308 515
309 if (dev->dsa_ptr != NULL) { 516 if (dev->dsa_ptr != NULL) {
310 dev_put(dev); 517 dev_put(dev);
311 return -EEXIST; 518 ret = -EEXIST;
519 goto out;
312 } 520 }
313 521
314 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 522 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
315 if (dst == NULL) { 523 if (dst == NULL) {
316 dev_put(dev); 524 dev_put(dev);
317 return -ENOMEM; 525 ret = -ENOMEM;
526 goto out;
318 } 527 }
319 528
320 platform_set_drvdata(pdev, dst); 529 platform_set_drvdata(pdev, dst);
@@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev)
366 } 575 }
367 576
368 return 0; 577 return 0;
578
579out:
580 dsa_of_remove(pdev);
581
582 return ret;
369} 583}
370 584
371static int dsa_remove(struct platform_device *pdev) 585static int dsa_remove(struct platform_device *pdev)
@@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev)
385 dsa_switch_destroy(ds); 599 dsa_switch_destroy(ds);
386 } 600 }
387 601
602 dsa_of_remove(pdev);
603
388 return 0; 604 return 0;
389} 605}
390 606
@@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev)
392{ 608{
393} 609}
394 610
611static const struct of_device_id dsa_of_match_table[] = {
612 { .compatible = "marvell,dsa", },
613 {}
614};
615MODULE_DEVICE_TABLE(of, dsa_of_match_table);
616
395static struct platform_driver dsa_driver = { 617static struct platform_driver dsa_driver = {
396 .probe = dsa_probe, 618 .probe = dsa_probe,
397 .remove = dsa_remove, 619 .remove = dsa_remove,
@@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = {
399 .driver = { 621 .driver = {
400 .name = "dsa", 622 .name = "dsa",
401 .owner = THIS_MODULE, 623 .owner = THIS_MODULE,
624 .of_match_table = dsa_of_match_table,
402 }, 625 },
403}; 626};
404 627
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a36c85eab5b4..5359560926bc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
195 if (netdev_uses_trailer_tags(dev)) 195 if (netdev_uses_trailer_tags(dev))
196 return htons(ETH_P_TRAILER); 196 return htons(ETH_P_TRAILER);
197 197
198 if (ntohs(eth->h_proto) >= 1536) 198 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
199 return eth->h_proto; 199 return eth->h_proto;
200 200
201 /* 201 /*
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 43b95ca61114..55e1fd5b3e56 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};
104struct lowpan_dev_info { 104struct lowpan_dev_info {
105 struct net_device *real_dev; /* real WPAN device ptr */ 105 struct net_device *real_dev; /* real WPAN device ptr */
106 struct mutex dev_list_mtx; /* mutex for list ops */ 106 struct mutex dev_list_mtx; /* mutex for list ops */
107 unsigned short fragment_tag;
107}; 108};
108 109
109struct lowpan_dev_record { 110struct lowpan_dev_record {
@@ -120,7 +121,6 @@ struct lowpan_fragment {
120 struct list_head list; /* fragments list */ 121 struct list_head list; /* fragments list */
121}; 122};
122 123
123static unsigned short fragment_tag;
124static LIST_HEAD(lowpan_fragments); 124static LIST_HEAD(lowpan_fragments);
125static DEFINE_SPINLOCK(flist_lock); 125static DEFINE_SPINLOCK(flist_lock);
126 126
@@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)
284 /* checksum is always inline */ 284 /* checksum is always inline */
285 memcpy(*hc06_ptr, &uh->check, 2); 285 memcpy(*hc06_ptr, &uh->check, 2);
286 *hc06_ptr += 2; 286 *hc06_ptr += 2;
287
288 /* skip the UDP header */
289 skb_pull(skb, sizeof(struct udphdr));
287} 290}
288 291
289static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) 292static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val)
@@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)
309} 312}
310 313
311static int 314static int
312lowpan_uncompress_udp_header(struct sk_buff *skb) 315lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)
313{ 316{
314 struct udphdr *uh = udp_hdr(skb);
315 u8 tmp; 317 u8 tmp;
316 318
317 if (!uh) 319 if (!uh)
@@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb)
358 /* copy checksum */ 360 /* copy checksum */
359 memcpy(&uh->check, &skb->data[0], 2); 361 memcpy(&uh->check, &skb->data[0], 2);
360 skb_pull(skb, 2); 362 skb_pull(skb, 2);
363
364 /*
365 * UDP lenght needs to be infered from the lower layers
366 * here, we obtain the hint from the remaining size of the
367 * frame
368 */
369 uh->len = htons(skb->len + sizeof(struct udphdr));
370 pr_debug("uncompressed UDP length: src = %d", uh->len);
361 } else { 371 } else {
362 pr_debug("ERROR: unsupported NH format\n"); 372 pr_debug("ERROR: unsupported NH format\n");
363 goto err; 373 goto err;
@@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb,
572 * this isn't implemented in mainline yet, so currently we assign 0xff 582 * this isn't implemented in mainline yet, so currently we assign 0xff
573 */ 583 */
574 { 584 {
585 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
586 mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
587
575 /* prepare wpan address data */ 588 /* prepare wpan address data */
576 sa.addr_type = IEEE802154_ADDR_LONG; 589 sa.addr_type = IEEE802154_ADDR_LONG;
577 sa.pan_id = 0xff; 590 sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
578
579 da.addr_type = IEEE802154_ADDR_LONG;
580 da.pan_id = 0xff;
581 591
582 memcpy(&(da.hwaddr), daddr, 8);
583 memcpy(&(sa.hwaddr), saddr, 8); 592 memcpy(&(sa.hwaddr), saddr, 8);
593 /* intra-PAN communications */
594 da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev);
584 595
585 mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; 596 /*
597 * if the destination address is the broadcast address, use the
598 * corresponding short address
599 */
600 if (lowpan_is_addr_broadcast(daddr)) {
601 da.addr_type = IEEE802154_ADDR_SHORT;
602 da.short_addr = IEEE802154_ADDR_BROADCAST;
603 } else {
604 da.addr_type = IEEE802154_ADDR_LONG;
605 memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN);
606
607 /* request acknowledgment */
608 mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
609 }
586 610
587 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, 611 return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
588 type, (void *)&da, (void *)&sa, skb->len); 612 type, (void *)&da, (void *)&sa, skb->len);
@@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr)
650} 674}
651 675
652static struct lowpan_fragment * 676static struct lowpan_fragment *
653lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag) 677lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)
654{ 678{
655 struct lowpan_fragment *frame; 679 struct lowpan_fragment *frame;
656 680
@@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb)
720 { 744 {
721 struct lowpan_fragment *frame; 745 struct lowpan_fragment *frame;
722 /* slen stores the rightmost 8 bits of the 11 bits length */ 746 /* slen stores the rightmost 8 bits of the 11 bits length */
723 u8 slen, offset; 747 u8 slen, offset = 0;
724 u16 len, tag; 748 u16 len, tag;
725 bool found = false; 749 bool found = false;
726 750
@@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb)
731 /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */ 755 /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */
732 len = ((iphc0 & 7) << 8) | slen; 756 len = ((iphc0 & 7) << 8) | slen;
733 757
758 if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) {
759 pr_debug("%s received a FRAG1 packet (tag: %d, "
760 "size of the entire IP packet: %d)",
761 __func__, tag, len);
762 } else { /* FRAGN */
763 if (lowpan_fetch_skb_u8(skb, &offset))
764 goto unlock_and_drop;
765 pr_debug("%s received a FRAGN packet (tag: %d, "
766 "size of the entire IP packet: %d, "
767 "offset: %d)", __func__, tag, len, offset * 8);
768 }
769
734 /* 770 /*
735 * check if frame assembling with the same tag is 771 * check if frame assembling with the same tag is
736 * already in progress 772 * already in progress
@@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb)
745 781
746 /* alloc new frame structure */ 782 /* alloc new frame structure */
747 if (!found) { 783 if (!found) {
784 pr_debug("%s first fragment received for tag %d, "
785 "begin packet reassembly", __func__, tag);
748 frame = lowpan_alloc_new_frame(skb, len, tag); 786 frame = lowpan_alloc_new_frame(skb, len, tag);
749 if (!frame) 787 if (!frame)
750 goto unlock_and_drop; 788 goto unlock_and_drop;
751 } 789 }
752 790
753 if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1)
754 goto unlock_and_drop;
755
756 if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */
757 goto unlock_and_drop;
758
759 /* if payload fits buffer, copy it */ 791 /* if payload fits buffer, copy it */
760 if (likely((offset * 8 + skb->len) <= frame->length)) 792 if (likely((offset * 8 + skb->len) <= frame->length))
761 skb_copy_to_linear_data_offset(frame->skb, offset * 8, 793 skb_copy_to_linear_data_offset(frame->skb, offset * 8,
@@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb)
773 list_del(&frame->list); 805 list_del(&frame->list);
774 spin_unlock_bh(&flist_lock); 806 spin_unlock_bh(&flist_lock);
775 807
808 pr_debug("%s successfully reassembled fragment "
809 "(tag %d)", __func__, tag);
810
776 dev_kfree_skb(skb); 811 dev_kfree_skb(skb);
777 skb = frame->skb; 812 skb = frame->skb;
778 kfree(frame); 813 kfree(frame);
@@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb)
918 } 953 }
919 954
920 /* UDP data uncompression */ 955 /* UDP data uncompression */
921 if (iphc0 & LOWPAN_IPHC_NH_C) 956 if (iphc0 & LOWPAN_IPHC_NH_C) {
922 if (lowpan_uncompress_udp_header(skb)) 957 struct udphdr uh;
958 struct sk_buff *new;
959 if (lowpan_uncompress_udp_header(skb, &uh))
923 goto drop; 960 goto drop;
924 961
962 /*
963 * replace the compressed UDP head by the uncompressed UDP
964 * header
965 */
966 new = skb_copy_expand(skb, sizeof(struct udphdr),
967 skb_tailroom(skb), GFP_ATOMIC);
968 kfree_skb(skb);
969
970 if (!new)
971 return -ENOMEM;
972
973 skb = new;
974
975 skb_push(skb, sizeof(struct udphdr));
976 skb_reset_transport_header(skb);
977 skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr));
978
979 lowpan_raw_dump_table(__func__, "raw UDP header dump",
980 (u8 *)&uh, sizeof(uh));
981
982 hdr.nexthdr = UIP_PROTO_UDP;
983 }
984
925 /* Not fragmented package */ 985 /* Not fragmented package */
926 hdr.payload_len = htons(skb->len); 986 hdr.payload_len = htons(skb->len);
927 987
@@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb)
969 1029
970static int 1030static int
971lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, 1031lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
972 int mlen, int plen, int offset) 1032 int mlen, int plen, int offset, int type)
973{ 1033{
974 struct sk_buff *frag; 1034 struct sk_buff *frag;
975 int hlen, ret; 1035 int hlen, ret;
976 1036
977 /* if payload length is zero, therefore it's a first fragment */ 1037 hlen = (type == LOWPAN_DISPATCH_FRAG1) ?
978 hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE); 1038 LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;
979 1039
980 lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); 1040 lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen);
981 1041
@@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,
1003} 1063}
1004 1064
1005static int 1065static int
1006lowpan_skb_fragmentation(struct sk_buff *skb) 1066lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)
1007{ 1067{
1008 int err, header_length, payload_length, tag, offset = 0; 1068 int err, header_length, payload_length, tag, offset = 0;
1009 u8 head[5]; 1069 u8 head[5];
1010 1070
1011 header_length = lowpan_get_mac_header_length(skb); 1071 header_length = lowpan_get_mac_header_length(skb);
1012 payload_length = skb->len - header_length; 1072 payload_length = skb->len - header_length;
1013 tag = fragment_tag++; 1073 tag = lowpan_dev_info(dev)->fragment_tag++;
1014 1074
1015 /* first fragment header */ 1075 /* first fragment header */
1016 head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); 1076 head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7);
@@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
1018 head[2] = tag >> 8; 1078 head[2] = tag >> 8;
1019 head[3] = tag & 0xff; 1079 head[3] = tag & 0xff;
1020 1080
1021 err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); 1081 err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE,
1082 0, LOWPAN_DISPATCH_FRAG1);
1083
1084 if (err) {
1085 pr_debug("%s unable to send FRAG1 packet (tag: %d)",
1086 __func__, tag);
1087 goto exit;
1088 }
1089
1090 offset = LOWPAN_FRAG_SIZE;
1022 1091
1023 /* next fragment header */ 1092 /* next fragment header */
1024 head[0] &= ~LOWPAN_DISPATCH_FRAG1; 1093 head[0] &= ~LOWPAN_DISPATCH_FRAG1;
@@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb)
1033 len = payload_length - offset; 1102 len = payload_length - offset;
1034 1103
1035 err = lowpan_fragment_xmit(skb, head, header_length, 1104 err = lowpan_fragment_xmit(skb, head, header_length,
1036 len, offset); 1105 len, offset, LOWPAN_DISPATCH_FRAGN);
1106 if (err) {
1107 pr_debug("%s unable to send a subsequent FRAGN packet "
1108 "(tag: %d, offset: %d", __func__, tag, offset);
1109 goto exit;
1110 }
1111
1037 offset += len; 1112 offset += len;
1038 } 1113 }
1039 1114
1115exit:
1040 return err; 1116 return err;
1041} 1117}
1042 1118
@@ -1059,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
1059 } 1135 }
1060 1136
1061 pr_debug("frame is too big, fragmentation is needed\n"); 1137 pr_debug("frame is too big, fragmentation is needed\n");
1062 err = lowpan_skb_fragmentation(skb); 1138 err = lowpan_skb_fragmentation(skb, dev);
1063error: 1139error:
1064 dev_kfree_skb(skb); 1140 dev_kfree_skb(skb);
1065out: 1141out:
1066 if (err < 0) 1142 if (err)
1067 pr_debug("ERROR: xmit failed\n"); 1143 pr_debug("ERROR: xmit failed\n");
1068 1144
1069 return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); 1145 return (err < 0) ? NET_XMIT_DROP : err;
1070} 1146}
1071 1147
1072static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) 1148static struct wpan_phy *lowpan_get_phy(const struct net_device *dev)
@@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev)
1087 return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); 1163 return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);
1088} 1164}
1089 1165
1166static u8 lowpan_get_dsn(const struct net_device *dev)
1167{
1168 struct net_device *real_dev = lowpan_dev_info(dev)->real_dev;
1169 return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev);
1170}
1171
1090static struct header_ops lowpan_header_ops = { 1172static struct header_ops lowpan_header_ops = {
1091 .create = lowpan_header_create, 1173 .create = lowpan_header_create,
1092}; 1174};
@@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = {
1100 .get_pan_id = lowpan_get_pan_id, 1182 .get_pan_id = lowpan_get_pan_id,
1101 .get_phy = lowpan_get_phy, 1183 .get_phy = lowpan_get_phy,
1102 .get_short_addr = lowpan_get_short_addr, 1184 .get_short_addr = lowpan_get_short_addr,
1185 .get_dsn = lowpan_get_dsn,
1103}; 1186};
1104 1187
1105static void lowpan_setup(struct net_device *dev) 1188static void lowpan_setup(struct net_device *dev)
@@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
1203 return -ENODEV; 1286 return -ENODEV;
1204 1287
1205 lowpan_dev_info(dev)->real_dev = real_dev; 1288 lowpan_dev_info(dev)->real_dev = real_dev;
1289 lowpan_dev_info(dev)->fragment_tag = 0;
1206 mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); 1290 mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
1207 1291
1208 entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); 1292 entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL);
diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h
index bba5f8336317..4b8f917658b5 100644
--- a/net/ieee802154/6lowpan.h
+++ b/net/ieee802154/6lowpan.h
@@ -92,9 +92,10 @@
92 */ 92 */
93#define lowpan_is_iid_16_bit_compressable(a) \ 93#define lowpan_is_iid_16_bit_compressable(a) \
94 ((((a)->s6_addr16[4]) == 0) && \ 94 ((((a)->s6_addr16[4]) == 0) && \
95 (((a)->s6_addr16[5]) == 0) && \ 95 (((a)->s6_addr[10]) == 0) && \
96 (((a)->s6_addr16[6]) == 0) && \ 96 (((a)->s6_addr[11]) == 0xff) && \
97 ((((a)->s6_addr[14]) & 0x80) == 0)) 97 (((a)->s6_addr[12]) == 0xfe) && \
98 (((a)->s6_addr[13]) == 0))
98 99
99/* multicast address */ 100/* multicast address */
100#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) 101#define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF)
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index e0da175f8e5b..581a59504bd5 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
291 size_t copied = 0; 291 size_t copied = 0;
292 int err = -EOPNOTSUPP; 292 int err = -EOPNOTSUPP;
293 struct sk_buff *skb; 293 struct sk_buff *skb;
294 struct sockaddr_ieee802154 *saddr;
295
296 saddr = (struct sockaddr_ieee802154 *)msg->msg_name;
294 297
295 skb = skb_recv_datagram(sk, flags, noblock, &err); 298 skb = skb_recv_datagram(sk, flags, noblock, &err);
296 if (!skb) 299 if (!skb)
@@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
309 312
310 sock_recv_ts_and_drops(msg, sk, skb); 313 sock_recv_ts_and_drops(msg, sk, skb);
311 314
315 if (saddr) {
316 saddr->family = AF_IEEE802154;
317 saddr->addr = mac_cb(skb)->sa;
318 }
319 if (addr_len)
320 *addr_len = sizeof(*saddr);
321
312 if (flags & MSG_TRUNC) 322 if (flags & MSG_TRUNC)
313 copied = skb->len; 323 copied = skb->len;
314done: 324done:
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 97351e1d07a4..7e49bbcc6967 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)
64 64
65int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) 65int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
66{ 66{
67 /* XXX: nlh is right at the start of msg */ 67 struct nlmsghdr *nlh = nlmsg_hdr(msg);
68 void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); 68 void *hdr = genlmsg_data(nlmsg_data(nlh));
69 69
70 if (genlmsg_end(msg, hdr) < 0) 70 if (genlmsg_end(msg, hdr) < 0)
71 goto out; 71 goto out;
@@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
97 97
98int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) 98int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
99{ 99{
100 /* XXX: nlh is right at the start of msg */ 100 struct nlmsghdr *nlh = nlmsg_hdr(msg);
101 void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); 101 void *hdr = genlmsg_data(nlmsg_data(nlh));
102 102
103 if (genlmsg_end(msg, hdr) < 0) 103 if (genlmsg_end(msg, hdr) < 0)
104 goto out; 104 goto out;
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 96bb08abece2..b0bdd8c51e9c 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
315 struct net_device *dev; 315 struct net_device *dev;
316 struct ieee802154_addr addr; 316 struct ieee802154_addr addr;
317 u8 page; 317 u8 page;
318 int ret = -EINVAL; 318 int ret = -EOPNOTSUPP;
319 319
320 if (!info->attrs[IEEE802154_ATTR_CHANNEL] || 320 if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
321 !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || 321 !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
@@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,
327 dev = ieee802154_nl_get_dev(info); 327 dev = ieee802154_nl_get_dev(info);
328 if (!dev) 328 if (!dev)
329 return -ENODEV; 329 return -ENODEV;
330 if (!ieee802154_mlme_ops(dev)->assoc_req)
331 goto out;
330 332
331 if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) { 333 if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {
332 addr.addr_type = IEEE802154_ADDR_LONG; 334 addr.addr_type = IEEE802154_ADDR_LONG;
@@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
350 page, 352 page,
351 nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); 353 nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
352 354
355out:
353 dev_put(dev); 356 dev_put(dev);
354 return ret; 357 return ret;
355} 358}
@@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
359{ 362{
360 struct net_device *dev; 363 struct net_device *dev;
361 struct ieee802154_addr addr; 364 struct ieee802154_addr addr;
362 int ret = -EINVAL; 365 int ret = -EOPNOTSUPP;
363 366
364 if (!info->attrs[IEEE802154_ATTR_STATUS] || 367 if (!info->attrs[IEEE802154_ATTR_STATUS] ||
365 !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || 368 !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] ||
@@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
369 dev = ieee802154_nl_get_dev(info); 372 dev = ieee802154_nl_get_dev(info);
370 if (!dev) 373 if (!dev)
371 return -ENODEV; 374 return -ENODEV;
375 if (!ieee802154_mlme_ops(dev)->assoc_resp)
376 goto out;
372 377
373 addr.addr_type = IEEE802154_ADDR_LONG; 378 addr.addr_type = IEEE802154_ADDR_LONG;
374 nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], 379 nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR],
@@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,
380 nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]), 385 nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),
381 nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); 386 nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS]));
382 387
388out:
383 dev_put(dev); 389 dev_put(dev);
384 return ret; 390 return ret;
385} 391}
@@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
389{ 395{
390 struct net_device *dev; 396 struct net_device *dev;
391 struct ieee802154_addr addr; 397 struct ieee802154_addr addr;
392 int ret = -EINVAL; 398 int ret = -EOPNOTSUPP;
393 399
394 if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] && 400 if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&
395 !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || 401 !info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) ||
@@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
399 dev = ieee802154_nl_get_dev(info); 405 dev = ieee802154_nl_get_dev(info);
400 if (!dev) 406 if (!dev)
401 return -ENODEV; 407 return -ENODEV;
408 if (!ieee802154_mlme_ops(dev)->disassoc_req)
409 goto out;
402 410
403 if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) { 411 if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {
404 addr.addr_type = IEEE802154_ADDR_LONG; 412 addr.addr_type = IEEE802154_ADDR_LONG;
@@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,
415 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr, 423 ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,
416 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); 424 nla_get_u8(info->attrs[IEEE802154_ATTR_REASON]));
417 425
426out:
418 dev_put(dev); 427 dev_put(dev);
419 return ret; 428 return ret;
420} 429}
@@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
432 u8 channel, bcn_ord, sf_ord; 441 u8 channel, bcn_ord, sf_ord;
433 u8 page; 442 u8 page;
434 int pan_coord, blx, coord_realign; 443 int pan_coord, blx, coord_realign;
435 int ret; 444 int ret = -EOPNOTSUPP;
436 445
437 if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || 446 if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||
438 !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || 447 !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] ||
@@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
448 dev = ieee802154_nl_get_dev(info); 457 dev = ieee802154_nl_get_dev(info);
449 if (!dev) 458 if (!dev)
450 return -ENODEV; 459 return -ENODEV;
460 if (!ieee802154_mlme_ops(dev)->start_req)
461 goto out;
451 462
452 addr.addr_type = IEEE802154_ADDR_SHORT; 463 addr.addr_type = IEEE802154_ADDR_SHORT;
453 addr.short_addr = nla_get_u16( 464 addr.short_addr = nla_get_u16(
@@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
476 ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page, 487 ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
477 bcn_ord, sf_ord, pan_coord, blx, coord_realign); 488 bcn_ord, sf_ord, pan_coord, blx, coord_realign);
478 489
490out:
479 dev_put(dev); 491 dev_put(dev);
480 return ret; 492 return ret;
481} 493}
@@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
483static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info) 495static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
484{ 496{
485 struct net_device *dev; 497 struct net_device *dev;
486 int ret; 498 int ret = -EOPNOTSUPP;
487 u8 type; 499 u8 type;
488 u32 channels; 500 u32 channels;
489 u8 duration; 501 u8 duration;
@@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
497 dev = ieee802154_nl_get_dev(info); 509 dev = ieee802154_nl_get_dev(info);
498 if (!dev) 510 if (!dev)
499 return -ENODEV; 511 return -ENODEV;
512 if (!ieee802154_mlme_ops(dev)->scan_req)
513 goto out;
500 514
501 type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]); 515 type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);
502 channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); 516 channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
@@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
511 ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page, 525 ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
512 duration); 526 duration);
513 527
528out:
514 dev_put(dev); 529 dev_put(dev);
515 return ret; 530 return ret;
516} 531}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7944df768454..8603ca827104 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,6 +166,7 @@ config IP_PNP_RARP
166config NET_IPIP 166config NET_IPIP
167 tristate "IP: tunneling" 167 tristate "IP: tunneling"
168 select INET_TUNNEL 168 select INET_TUNNEL
169 select NET_IP_TUNNEL
169 ---help--- 170 ---help---
170 Tunneling means encapsulating data of one protocol type within 171 Tunneling means encapsulating data of one protocol type within
171 another protocol and sending it over a channel that understands the 172 another protocol and sending it over a channel that understands the
@@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX
186 This is helper module to demultiplex GRE packets on GRE version field criteria. 187 This is helper module to demultiplex GRE packets on GRE version field criteria.
187 Required by ip_gre and pptp modules. 188 Required by ip_gre and pptp modules.
188 189
190config NET_IP_TUNNEL
191 tristate
192 default n
193
189config NET_IPGRE 194config NET_IPGRE
190 tristate "IP: GRE tunnels over IP" 195 tristate "IP: GRE tunnels over IP"
191 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX 196 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
197 select NET_IP_TUNNEL
192 help 198 help
193 Tunneling means encapsulating data of one protocol type within 199 Tunneling means encapsulating data of one protocol type within
194 another protocol and sending it over a channel that understands the 200 another protocol and sending it over a channel that understands the
@@ -313,6 +319,7 @@ config SYN_COOKIES
313config NET_IPVTI 319config NET_IPVTI
314 tristate "Virtual (secure) IP: tunneling" 320 tristate "Virtual (secure) IP: tunneling"
315 select INET_TUNNEL 321 select INET_TUNNEL
322 select NET_IP_TUNNEL
316 depends on INET_XFRM_MODE_TUNNEL 323 depends on INET_XFRM_MODE_TUNNEL
317 ---help--- 324 ---help---
318 Tunneling means encapsulating data of one protocol type within 325 Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 15ca63ec604e..089cb9f36387 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \
13 fib_frontend.o fib_semantics.o fib_trie.o \ 13 fib_frontend.o fib_semantics.o fib_trie.o \
14 inet_fragment.o ping.o 14 inet_fragment.o ping.o
15 15
16obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
16obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o 17obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
17obj-$(CONFIG_PROC_FS) += proc.o 18obj-$(CONFIG_PROC_FS) += proc.o
18obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 19obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c929d9c1c4b6..d01be2a3ae53 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -111,10 +111,10 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/raw.h> 112#include <net/raw.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h>
115#include <net/inet_common.h> 114#include <net/inet_common.h>
116#include <net/xfrm.h> 115#include <net/xfrm.h>
117#include <net/net_namespace.h> 116#include <net/net_namespace.h>
117#include <net/secure_seq.h>
118#ifdef CONFIG_IP_MROUTE 118#ifdef CONFIG_IP_MROUTE
119#include <linux/mroute.h> 119#include <linux/mroute.h>
120#endif 120#endif
@@ -263,8 +263,10 @@ void build_ehash_secret(void)
263 get_random_bytes(&rnd, sizeof(rnd)); 263 get_random_bytes(&rnd, sizeof(rnd));
264 } while (rnd == 0); 264 } while (rnd == 0);
265 265
266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) 266 if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); 267 get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
268 net_secret_init();
269 }
268} 270}
269EXPORT_SYMBOL(build_ehash_secret); 271EXPORT_SYMBOL(build_ehash_secret);
270 272
@@ -1283,9 +1285,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1283 int ihl; 1285 int ihl;
1284 int id; 1286 int id;
1285 unsigned int offset = 0; 1287 unsigned int offset = 0;
1286 1288 bool tunnel;
1287 if (!(features & NETIF_F_V4_CSUM))
1288 features &= ~NETIF_F_SG;
1289 1289
1290 if (unlikely(skb_shinfo(skb)->gso_type & 1290 if (unlikely(skb_shinfo(skb)->gso_type &
1291 ~(SKB_GSO_TCPV4 | 1291 ~(SKB_GSO_TCPV4 |
@@ -1293,6 +1293,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1293 SKB_GSO_DODGY | 1293 SKB_GSO_DODGY |
1294 SKB_GSO_TCP_ECN | 1294 SKB_GSO_TCP_ECN |
1295 SKB_GSO_GRE | 1295 SKB_GSO_GRE |
1296 SKB_GSO_TCPV6 |
1297 SKB_GSO_UDP_TUNNEL |
1296 0))) 1298 0)))
1297 goto out; 1299 goto out;
1298 1300
@@ -1307,6 +1309,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1307 if (unlikely(!pskb_may_pull(skb, ihl))) 1309 if (unlikely(!pskb_may_pull(skb, ihl)))
1308 goto out; 1310 goto out;
1309 1311
1312 tunnel = !!skb->encapsulation;
1313
1310 __skb_pull(skb, ihl); 1314 __skb_pull(skb, ihl);
1311 skb_reset_transport_header(skb); 1315 skb_reset_transport_header(skb);
1312 iph = ip_hdr(skb); 1316 iph = ip_hdr(skb);
@@ -1326,7 +1330,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
1326 skb = segs; 1330 skb = segs;
1327 do { 1331 do {
1328 iph = ip_hdr(skb); 1332 iph = ip_hdr(skb);
1329 if (proto == IPPROTO_UDP) { 1333 if (!tunnel && proto == IPPROTO_UDP) {
1330 iph->id = htons(id); 1334 iph->id = htons(id);
1331 iph->frag_off = htons(offset >> 3); 1335 iph->frag_off = htons(offset >> 3);
1332 if (skb->next != NULL) 1336 if (skb->next != NULL)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index fea4929f6200..247ec1951c35 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
654 arp_ptr += dev->addr_len; 654 arp_ptr += dev->addr_len;
655 memcpy(arp_ptr, &src_ip, 4); 655 memcpy(arp_ptr, &src_ip, 4);
656 arp_ptr += 4; 656 arp_ptr += 4;
657 if (target_hw != NULL) 657
658 memcpy(arp_ptr, target_hw, dev->addr_len); 658 switch (dev->type) {
659 else 659#if IS_ENABLED(CONFIG_FIREWIRE_NET)
660 memset(arp_ptr, 0, dev->addr_len); 660 case ARPHRD_IEEE1394:
661 arp_ptr += dev->addr_len; 661 break;
662#endif
663 default:
664 if (target_hw != NULL)
665 memcpy(arp_ptr, target_hw, dev->addr_len);
666 else
667 memset(arp_ptr, 0, dev->addr_len);
668 arp_ptr += dev->addr_len;
669 }
662 memcpy(arp_ptr, &dest_ip, 4); 670 memcpy(arp_ptr, &dest_ip, 4);
663 671
664 return skb; 672 return skb;
@@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb)
781 arp_ptr += dev->addr_len; 789 arp_ptr += dev->addr_len;
782 memcpy(&sip, arp_ptr, 4); 790 memcpy(&sip, arp_ptr, 4);
783 arp_ptr += 4; 791 arp_ptr += 4;
784 arp_ptr += dev->addr_len; 792 switch (dev_type) {
793#if IS_ENABLED(CONFIG_FIREWIRE_NET)
794 case ARPHRD_IEEE1394:
795 break;
796#endif
797 default:
798 arp_ptr += dev->addr_len;
799 }
785 memcpy(&tip, arp_ptr, 4); 800 memcpy(&tip, arp_ptr, 4);
786/* 801/*
787 * Check for bad requests for 127.x.x.x and requests for multicast 802 * Check for bad requests for 127.x.x.x and requests for multicast
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 96083b7a436b..dfc39d4d48b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
536 return NULL; 536 return NULL;
537} 537}
538 538
539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 539static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
540{ 540{
541 struct net *net = sock_net(skb->sk); 541 struct net *net = sock_net(skb->sk);
542 struct nlattr *tb[IFA_MAX+1]; 542 struct nlattr *tb[IFA_MAX+1];
@@ -587,13 +587,16 @@ static void check_lifetime(struct work_struct *work)
587{ 587{
588 unsigned long now, next, next_sec, next_sched; 588 unsigned long now, next, next_sec, next_sched;
589 struct in_ifaddr *ifa; 589 struct in_ifaddr *ifa;
590 struct hlist_node *n;
590 int i; 591 int i;
591 592
592 now = jiffies; 593 now = jiffies;
593 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); 594 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
594 595
595 rcu_read_lock();
596 for (i = 0; i < IN4_ADDR_HSIZE; i++) { 596 for (i = 0; i < IN4_ADDR_HSIZE; i++) {
597 bool change_needed = false;
598
599 rcu_read_lock();
597 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 600 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
598 unsigned long age; 601 unsigned long age;
599 602
@@ -606,16 +609,7 @@ static void check_lifetime(struct work_struct *work)
606 609
607 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 610 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
608 age >= ifa->ifa_valid_lft) { 611 age >= ifa->ifa_valid_lft) {
609 struct in_ifaddr **ifap ; 612 change_needed = true;
610
611 rtnl_lock();
612 for (ifap = &ifa->ifa_dev->ifa_list;
613 *ifap != NULL; ifap = &ifa->ifa_next) {
614 if (*ifap == ifa)
615 inet_del_ifa(ifa->ifa_dev,
616 ifap, 1);
617 }
618 rtnl_unlock();
619 } else if (ifa->ifa_preferred_lft == 613 } else if (ifa->ifa_preferred_lft ==
620 INFINITY_LIFE_TIME) { 614 INFINITY_LIFE_TIME) {
621 continue; 615 continue;
@@ -625,10 +619,8 @@ static void check_lifetime(struct work_struct *work)
625 next = ifa->ifa_tstamp + 619 next = ifa->ifa_tstamp +
626 ifa->ifa_valid_lft * HZ; 620 ifa->ifa_valid_lft * HZ;
627 621
628 if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) { 622 if (!(ifa->ifa_flags & IFA_F_DEPRECATED))
629 ifa->ifa_flags |= IFA_F_DEPRECATED; 623 change_needed = true;
630 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
631 }
632 } else if (time_before(ifa->ifa_tstamp + 624 } else if (time_before(ifa->ifa_tstamp +
633 ifa->ifa_preferred_lft * HZ, 625 ifa->ifa_preferred_lft * HZ,
634 next)) { 626 next)) {
@@ -636,8 +628,42 @@ static void check_lifetime(struct work_struct *work)
636 ifa->ifa_preferred_lft * HZ; 628 ifa->ifa_preferred_lft * HZ;
637 } 629 }
638 } 630 }
631 rcu_read_unlock();
632 if (!change_needed)
633 continue;
634 rtnl_lock();
635 hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
636 unsigned long age;
637
638 if (ifa->ifa_flags & IFA_F_PERMANENT)
639 continue;
640
641 /* We try to batch several events at once. */
642 age = (now - ifa->ifa_tstamp +
643 ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
644
645 if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME &&
646 age >= ifa->ifa_valid_lft) {
647 struct in_ifaddr **ifap;
648
649 for (ifap = &ifa->ifa_dev->ifa_list;
650 *ifap != NULL; ifap = &(*ifap)->ifa_next) {
651 if (*ifap == ifa) {
652 inet_del_ifa(ifa->ifa_dev,
653 ifap, 1);
654 break;
655 }
656 }
657 } else if (ifa->ifa_preferred_lft !=
658 INFINITY_LIFE_TIME &&
659 age >= ifa->ifa_preferred_lft &&
660 !(ifa->ifa_flags & IFA_F_DEPRECATED)) {
661 ifa->ifa_flags |= IFA_F_DEPRECATED;
662 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
663 }
664 }
665 rtnl_unlock();
639 } 666 }
640 rcu_read_unlock();
641 667
642 next_sec = round_jiffies_up(next); 668 next_sec = round_jiffies_up(next);
643 next_sched = next; 669 next_sched = next;
@@ -775,7 +801,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)
775 return NULL; 801 return NULL;
776} 802}
777 803
778static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 804static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
779{ 805{
780 struct net *net = sock_net(skb->sk); 806 struct net *net = sock_net(skb->sk);
781 struct in_ifaddr *ifa; 807 struct in_ifaddr *ifa;
@@ -804,6 +830,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
804 return -EEXIST; 830 return -EEXIST;
805 ifa = ifa_existing; 831 ifa = ifa_existing;
806 set_ifa_lifetime(ifa, valid_lft, prefered_lft); 832 set_ifa_lifetime(ifa, valid_lft, prefered_lft);
833 cancel_delayed_work(&check_lifetime_work);
834 schedule_delayed_work(&check_lifetime_work, 0);
807 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); 835 rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
808 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); 836 blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
809 } 837 }
@@ -1501,6 +1529,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1501 idx = 0; 1529 idx = 0;
1502 head = &net->dev_index_head[h]; 1530 head = &net->dev_index_head[h];
1503 rcu_read_lock(); 1531 rcu_read_lock();
1532 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1533 net->dev_base_seq;
1504 hlist_for_each_entry_rcu(dev, head, index_hlist) { 1534 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1505 if (idx < s_idx) 1535 if (idx < s_idx)
1506 goto cont; 1536 goto cont;
@@ -1521,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1521 rcu_read_unlock(); 1551 rcu_read_unlock();
1522 goto done; 1552 goto done;
1523 } 1553 }
1554 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1524 } 1555 }
1525cont: 1556cont:
1526 idx++; 1557 idx++;
@@ -1732,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
1732}; 1763};
1733 1764
1734static int inet_netconf_get_devconf(struct sk_buff *in_skb, 1765static int inet_netconf_get_devconf(struct sk_buff *in_skb,
1735 struct nlmsghdr *nlh, 1766 struct nlmsghdr *nlh)
1736 void *arg)
1737{ 1767{
1738 struct net *net = sock_net(in_skb->sk); 1768 struct net *net = sock_net(in_skb->sk);
1739 struct nlattr *tb[NETCONFA_MAX+1]; 1769 struct nlattr *tb[NETCONFA_MAX+1];
@@ -1793,6 +1823,77 @@ errout:
1793 return err; 1823 return err;
1794} 1824}
1795 1825
1826static int inet_netconf_dump_devconf(struct sk_buff *skb,
1827 struct netlink_callback *cb)
1828{
1829 struct net *net = sock_net(skb->sk);
1830 int h, s_h;
1831 int idx, s_idx;
1832 struct net_device *dev;
1833 struct in_device *in_dev;
1834 struct hlist_head *head;
1835
1836 s_h = cb->args[0];
1837 s_idx = idx = cb->args[1];
1838
1839 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1840 idx = 0;
1841 head = &net->dev_index_head[h];
1842 rcu_read_lock();
1843 cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
1844 net->dev_base_seq;
1845 hlist_for_each_entry_rcu(dev, head, index_hlist) {
1846 if (idx < s_idx)
1847 goto cont;
1848 in_dev = __in_dev_get_rcu(dev);
1849 if (!in_dev)
1850 goto cont;
1851
1852 if (inet_netconf_fill_devconf(skb, dev->ifindex,
1853 &in_dev->cnf,
1854 NETLINK_CB(cb->skb).portid,
1855 cb->nlh->nlmsg_seq,
1856 RTM_NEWNETCONF,
1857 NLM_F_MULTI,
1858 -1) <= 0) {
1859 rcu_read_unlock();
1860 goto done;
1861 }
1862 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
1863cont:
1864 idx++;
1865 }
1866 rcu_read_unlock();
1867 }
1868 if (h == NETDEV_HASHENTRIES) {
1869 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
1870 net->ipv4.devconf_all,
1871 NETLINK_CB(cb->skb).portid,
1872 cb->nlh->nlmsg_seq,
1873 RTM_NEWNETCONF, NLM_F_MULTI,
1874 -1) <= 0)
1875 goto done;
1876 else
1877 h++;
1878 }
1879 if (h == NETDEV_HASHENTRIES + 1) {
1880 if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
1881 net->ipv4.devconf_dflt,
1882 NETLINK_CB(cb->skb).portid,
1883 cb->nlh->nlmsg_seq,
1884 RTM_NEWNETCONF, NLM_F_MULTI,
1885 -1) <= 0)
1886 goto done;
1887 else
1888 h++;
1889 }
1890done:
1891 cb->args[0] = h;
1892 cb->args[1] = idx;
1893
1894 return skb->len;
1895}
1896
1796#ifdef CONFIG_SYSCTL 1897#ifdef CONFIG_SYSCTL
1797 1898
1798static void devinet_copy_dflt_conf(struct net *net, int i) 1899static void devinet_copy_dflt_conf(struct net *net, int i)
@@ -2197,6 +2298,6 @@ void __init devinet_init(void)
2197 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); 2298 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
2198 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); 2299 rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
2199 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2300 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
2200 NULL, NULL); 2301 inet_netconf_dump_devconf, NULL);
2201} 2302}
2202 2303
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 3b4f0cd2e63e..4cfe34d4cc96 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -139,8 +139,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
139 139
140 /* skb is pure payload to encrypt */ 140 /* skb is pure payload to encrypt */
141 141
142 err = -ENOMEM;
143
144 esp = x->data; 142 esp = x->data;
145 aead = esp->aead; 143 aead = esp->aead;
146 alen = crypto_aead_authsize(aead); 144 alen = crypto_aead_authsize(aead);
@@ -176,8 +174,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
176 } 174 }
177 175
178 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); 176 tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen);
179 if (!tmp) 177 if (!tmp) {
178 err = -ENOMEM;
180 goto error; 179 goto error;
180 }
181 181
182 seqhi = esp_tmp_seqhi(tmp); 182 seqhi = esp_tmp_seqhi(tmp);
183 iv = esp_tmp_iv(aead, tmp, seqhilen); 183 iv = esp_tmp_iv(aead, tmp, seqhilen);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index eb4bb12b3eb4..c7629a209f9d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -604,7 +604,7 @@ errout:
604 return err; 604 return err;
605} 605}
606 606
607static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 607static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
608{ 608{
609 struct net *net = sock_net(skb->sk); 609 struct net *net = sock_net(skb->sk);
610 struct fib_config cfg; 610 struct fib_config cfg;
@@ -626,7 +626,7 @@ errout:
626 return err; 626 return err;
627} 627}
628 628
629static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 629static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
630{ 630{
631 struct net *net = sock_net(skb->sk); 631 struct net *net = sock_net(skb->sk);
632 struct fib_config cfg; 632 struct fib_config cfg;
@@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb)
957 957
958 net = sock_net(skb->sk); 958 net = sock_net(skb->sk);
959 nlh = nlmsg_hdr(skb); 959 nlh = nlmsg_hdr(skb);
960 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 960 if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
961 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 961 nlmsg_len(nlh) < sizeof(*frn))
962 return; 962 return;
963 963
964 skb = skb_clone(skb, GFP_KERNEL); 964 skb = skb_clone(skb, GFP_KERNEL);
@@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb)
966 return; 966 return;
967 nlh = nlmsg_hdr(skb); 967 nlh = nlmsg_hdr(skb);
968 968
969 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 969 frn = (struct fib_result_nl *) nlmsg_data(nlh);
970 tb = fib_get_table(net, frn->tb_id_in); 970 tb = fib_get_table(net, frn->tb_id_in);
971 971
972 nl_fib_lookup(frn, tb); 972 nl_fib_lookup(frn, tb);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index ff06b7543d9f..49616fed9340 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -125,7 +125,6 @@ struct tnode {
125 unsigned int empty_children; /* KEYLENGTH bits needed */ 125 unsigned int empty_children; /* KEYLENGTH bits needed */
126 union { 126 union {
127 struct rcu_head rcu; 127 struct rcu_head rcu;
128 struct work_struct work;
129 struct tnode *tnode_free; 128 struct tnode *tnode_free;
130 }; 129 };
131 struct rt_trie_node __rcu *child[0]; 130 struct rt_trie_node __rcu *child[0];
@@ -383,12 +382,6 @@ static struct tnode *tnode_alloc(size_t size)
383 return vzalloc(size); 382 return vzalloc(size);
384} 383}
385 384
386static void __tnode_vfree(struct work_struct *arg)
387{
388 struct tnode *tn = container_of(arg, struct tnode, work);
389 vfree(tn);
390}
391
392static void __tnode_free_rcu(struct rcu_head *head) 385static void __tnode_free_rcu(struct rcu_head *head)
393{ 386{
394 struct tnode *tn = container_of(head, struct tnode, rcu); 387 struct tnode *tn = container_of(head, struct tnode, rcu);
@@ -397,10 +390,8 @@ static void __tnode_free_rcu(struct rcu_head *head)
397 390
398 if (size <= PAGE_SIZE) 391 if (size <= PAGE_SIZE)
399 kfree(tn); 392 kfree(tn);
400 else { 393 else
401 INIT_WORK(&tn->work, __tnode_vfree); 394 vfree(tn);
402 schedule_work(&tn->work);
403 }
404} 395}
405 396
406static inline void tnode_free(struct tnode *tn) 397static inline void tnode_free(struct tnode *tn)
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 7a4c710c4cdd..b2e805af9b87 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -27,11 +27,6 @@
27 27
28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; 28static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
29static DEFINE_SPINLOCK(gre_proto_lock); 29static DEFINE_SPINLOCK(gre_proto_lock);
30struct gre_base_hdr {
31 __be16 flags;
32 __be16 protocol;
33};
34#define GRE_HEADER_SECTION 4
35 30
36int gre_add_protocol(const struct gre_protocol *proto, u8 version) 31int gre_add_protocol(const struct gre_protocol *proto, u8 version)
37{ 32{
@@ -126,6 +121,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
126 int ghl = GRE_HEADER_SECTION; 121 int ghl = GRE_HEADER_SECTION;
127 struct gre_base_hdr *greh; 122 struct gre_base_hdr *greh;
128 int mac_len = skb->mac_len; 123 int mac_len = skb->mac_len;
124 __be16 protocol = skb->protocol;
129 int tnl_hlen; 125 int tnl_hlen;
130 bool csum; 126 bool csum;
131 127
@@ -154,13 +150,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
154 csum = false; 150 csum = false;
155 151
156 /* setup inner skb. */ 152 /* setup inner skb. */
157 if (greh->protocol == htons(ETH_P_TEB)) { 153 skb->protocol = greh->protocol;
158 struct ethhdr *eth = eth_hdr(skb);
159 skb->protocol = eth->h_proto;
160 } else {
161 skb->protocol = greh->protocol;
162 }
163
164 skb->encapsulation = 0; 154 skb->encapsulation = 0;
165 155
166 if (unlikely(!pskb_may_pull(skb, ghl))) 156 if (unlikely(!pskb_may_pull(skb, ghl)))
@@ -204,6 +194,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
204 skb_reset_mac_header(skb); 194 skb_reset_mac_header(skb);
205 skb_set_network_header(skb, mac_len); 195 skb_set_network_header(skb, mac_len);
206 skb->mac_len = mac_len; 196 skb->mac_len = mac_len;
197 skb->protocol = protocol;
207 } while ((skb = skb->next)); 198 } while ((skb = skb->next));
208out: 199out:
209 return segs; 200 return segs;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3ac5dff79627..76e10b47e053 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb)
881 case CHECKSUM_NONE: 881 case CHECKSUM_NONE:
882 skb->csum = 0; 882 skb->csum = 0;
883 if (__skb_checksum_complete(skb)) 883 if (__skb_checksum_complete(skb))
884 goto error; 884 goto csum_error;
885 } 885 }
886 886
887 if (!pskb_pull(skb, sizeof(*icmph))) 887 if (!pskb_pull(skb, sizeof(*icmph)))
@@ -929,6 +929,8 @@ int icmp_rcv(struct sk_buff *skb)
929drop: 929drop:
930 kfree_skb(skb); 930 kfree_skb(skb);
931 return 0; 931 return 0;
932csum_error:
933 ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
932error: 934error:
933 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); 935 ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
934 goto drop; 936 goto drop;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 786d97aee751..6acb541c9091 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
559 559
560int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) 560int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
561{ 561{
562 int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); 562 int err = req->rsk_ops->rtx_syn_ack(parent, req);
563 563
564 if (!err) 564 if (!err)
565 req->num_retrans++; 565 req->num_retrans++;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7afa2c3c788f..5f648751fce2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
158 158
159#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) 159#define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
160 160
161 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 161 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
162 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
163 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
162 r->idiag_timer = 1; 164 r->idiag_timer = 1;
163 r->idiag_retrans = icsk->icsk_retransmits; 165 r->idiag_retrans = icsk->icsk_retransmits;
164 r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); 166 r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
@@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
322 } 324 }
323 325
324 err = sk_diag_fill(sk, rep, req, 326 err = sk_diag_fill(sk, rep, req,
325 sk_user_ns(NETLINK_CB(in_skb).ssk), 327 sk_user_ns(NETLINK_CB(in_skb).sk),
326 NETLINK_CB(in_skb).portid, 328 NETLINK_CB(in_skb).portid,
327 nlh->nlmsg_seq, 0, nlh); 329 nlh->nlmsg_seq, 0, nlh);
328 if (err < 0) { 330 if (err < 0) {
@@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk,
628 return 0; 630 return 0;
629 631
630 return inet_csk_diag_fill(sk, skb, r, 632 return inet_csk_diag_fill(sk, skb, r,
631 sk_user_ns(NETLINK_CB(cb->skb).ssk), 633 sk_user_ns(NETLINK_CB(cb->skb).sk),
632 NETLINK_CB(cb->skb).portid, 634 NETLINK_CB(cb->skb).portid,
633 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
634} 636}
@@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
803 } 805 }
804 806
805 err = inet_diag_fill_req(skb, sk, req, 807 err = inet_diag_fill_req(skb, sk, req,
806 sk_user_ns(NETLINK_CB(cb->skb).ssk), 808 sk_user_ns(NETLINK_CB(cb->skb).sk),
807 NETLINK_CB(cb->skb).portid, 809 NETLINK_CB(cb->skb).portid,
808 cb->nlh->nlmsg_seq, cb->nlh); 810 cb->nlh->nlmsg_seq, cb->nlh);
809 if (err < 0) { 811 if (err < 0) {
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f4fd23de9b13..7e06641e36ae 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -23,6 +23,28 @@
23 23
24#include <net/sock.h> 24#include <net/sock.h>
25#include <net/inet_frag.h> 25#include <net/inet_frag.h>
26#include <net/inet_ecn.h>
27
28/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
29 * Value : 0xff if frame should be dropped.
30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
31 */
32const u8 ip_frag_ecn_table[16] = {
33 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
34 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
35 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
36 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
37
38 /* invalid combinations : drop frame */
39 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
40 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
41 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
42 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
43 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
44 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
45 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
46};
47EXPORT_SYMBOL(ip_frag_ecn_table);
26 48
27static void inet_frag_secret_rebuild(unsigned long dummy) 49static void inet_frag_secret_rebuild(unsigned long dummy)
28{ 50{
@@ -30,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
30 unsigned long now = jiffies; 52 unsigned long now = jiffies;
31 int i; 53 int i;
32 54
55 /* Per bucket lock NOT needed here, due to write lock protection */
33 write_lock(&f->lock); 56 write_lock(&f->lock);
57
34 get_random_bytes(&f->rnd, sizeof(u32)); 58 get_random_bytes(&f->rnd, sizeof(u32));
35 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 59 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
60 struct inet_frag_bucket *hb;
36 struct inet_frag_queue *q; 61 struct inet_frag_queue *q;
37 struct hlist_node *n; 62 struct hlist_node *n;
38 63
39 hlist_for_each_entry_safe(q, n, &f->hash[i], list) { 64 hb = &f->hash[i];
65 hlist_for_each_entry_safe(q, n, &hb->chain, list) {
40 unsigned int hval = f->hashfn(q); 66 unsigned int hval = f->hashfn(q);
41 67
42 if (hval != i) { 68 if (hval != i) {
69 struct inet_frag_bucket *hb_dest;
70
43 hlist_del(&q->list); 71 hlist_del(&q->list);
44 72
45 /* Relink to new hash chain. */ 73 /* Relink to new hash chain. */
46 hlist_add_head(&q->list, &f->hash[hval]); 74 hb_dest = &f->hash[hval];
75 hlist_add_head(&q->list, &hb_dest->chain);
47 } 76 }
48 } 77 }
49 } 78 }
@@ -56,9 +85,12 @@ void inet_frags_init(struct inet_frags *f)
56{ 85{
57 int i; 86 int i;
58 87
59 for (i = 0; i < INETFRAGS_HASHSZ; i++) 88 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
60 INIT_HLIST_HEAD(&f->hash[i]); 89 struct inet_frag_bucket *hb = &f->hash[i];
61 90
91 spin_lock_init(&hb->chain_lock);
92 INIT_HLIST_HEAD(&hb->chain);
93 }
62 rwlock_init(&f->lock); 94 rwlock_init(&f->lock);
63 95
64 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ 96 f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
@@ -100,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net);
100 132
101static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 133static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
102{ 134{
103 write_lock(&f->lock); 135 struct inet_frag_bucket *hb;
136 unsigned int hash;
137
138 read_lock(&f->lock);
139 hash = f->hashfn(fq);
140 hb = &f->hash[hash];
141
142 spin_lock(&hb->chain_lock);
104 hlist_del(&fq->list); 143 hlist_del(&fq->list);
105 fq->net->nqueues--; 144 spin_unlock(&hb->chain_lock);
106 write_unlock(&f->lock); 145
146 read_unlock(&f->lock);
107 inet_frag_lru_del(fq); 147 inet_frag_lru_del(fq);
108} 148}
109 149
@@ -182,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
182 q = list_first_entry(&nf->lru_list, 222 q = list_first_entry(&nf->lru_list,
183 struct inet_frag_queue, lru_list); 223 struct inet_frag_queue, lru_list);
184 atomic_inc(&q->refcnt); 224 atomic_inc(&q->refcnt);
225 /* Remove q from list to avoid several CPUs grabbing it */
226 list_del_init(&q->lru_list);
227
185 spin_unlock(&nf->lru_lock); 228 spin_unlock(&nf->lru_lock);
186 229
187 spin_lock(&q->lock); 230 spin_lock(&q->lock);
@@ -202,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
202 struct inet_frag_queue *qp_in, struct inet_frags *f, 245 struct inet_frag_queue *qp_in, struct inet_frags *f,
203 void *arg) 246 void *arg)
204{ 247{
248 struct inet_frag_bucket *hb;
205 struct inet_frag_queue *qp; 249 struct inet_frag_queue *qp;
206#ifdef CONFIG_SMP 250#ifdef CONFIG_SMP
207#endif 251#endif
208 unsigned int hash; 252 unsigned int hash;
209 253
210 write_lock(&f->lock); 254 read_lock(&f->lock); /* Protects against hash rebuild */
211 /* 255 /*
212 * While we stayed w/o the lock other CPU could update 256 * While we stayed w/o the lock other CPU could update
213 * the rnd seed, so we need to re-calculate the hash 257 * the rnd seed, so we need to re-calculate the hash
214 * chain. Fortunatelly the qp_in can be used to get one. 258 * chain. Fortunatelly the qp_in can be used to get one.
215 */ 259 */
216 hash = f->hashfn(qp_in); 260 hash = f->hashfn(qp_in);
261 hb = &f->hash[hash];
262 spin_lock(&hb->chain_lock);
263
217#ifdef CONFIG_SMP 264#ifdef CONFIG_SMP
218 /* With SMP race we have to recheck hash table, because 265 /* With SMP race we have to recheck hash table, because
219 * such entry could be created on other cpu, while we 266 * such entry could be created on other cpu, while we
220 * promoted read lock to write lock. 267 * released the hash bucket lock.
221 */ 268 */
222 hlist_for_each_entry(qp, &f->hash[hash], list) { 269 hlist_for_each_entry(qp, &hb->chain, list) {
223 if (qp->net == nf && f->match(qp, arg)) { 270 if (qp->net == nf && f->match(qp, arg)) {
224 atomic_inc(&qp->refcnt); 271 atomic_inc(&qp->refcnt);
225 write_unlock(&f->lock); 272 spin_unlock(&hb->chain_lock);
273 read_unlock(&f->lock);
226 qp_in->last_in |= INET_FRAG_COMPLETE; 274 qp_in->last_in |= INET_FRAG_COMPLETE;
227 inet_frag_put(qp_in, f); 275 inet_frag_put(qp_in, f);
228 return qp; 276 return qp;
@@ -234,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
234 atomic_inc(&qp->refcnt); 282 atomic_inc(&qp->refcnt);
235 283
236 atomic_inc(&qp->refcnt); 284 atomic_inc(&qp->refcnt);
237 hlist_add_head(&qp->list, &f->hash[hash]); 285 hlist_add_head(&qp->list, &hb->chain);
238 nf->nqueues++; 286 spin_unlock(&hb->chain_lock);
239 write_unlock(&f->lock); 287 read_unlock(&f->lock);
240 inet_frag_lru_add(nf, qp); 288 inet_frag_lru_add(nf, qp);
241 return qp; 289 return qp;
242} 290}
@@ -257,6 +305,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
257 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 305 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
258 spin_lock_init(&q->lock); 306 spin_lock_init(&q->lock);
259 atomic_set(&q->refcnt, 1); 307 atomic_set(&q->refcnt, 1);
308 INIT_LIST_HEAD(&q->lru_list);
260 309
261 return q; 310 return q;
262} 311}
@@ -277,17 +326,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
277 struct inet_frags *f, void *key, unsigned int hash) 326 struct inet_frags *f, void *key, unsigned int hash)
278 __releases(&f->lock) 327 __releases(&f->lock)
279{ 328{
329 struct inet_frag_bucket *hb;
280 struct inet_frag_queue *q; 330 struct inet_frag_queue *q;
281 int depth = 0; 331 int depth = 0;
282 332
283 hlist_for_each_entry(q, &f->hash[hash], list) { 333 hb = &f->hash[hash];
334
335 spin_lock(&hb->chain_lock);
336 hlist_for_each_entry(q, &hb->chain, list) {
284 if (q->net == nf && f->match(q, key)) { 337 if (q->net == nf && f->match(q, key)) {
285 atomic_inc(&q->refcnt); 338 atomic_inc(&q->refcnt);
339 spin_unlock(&hb->chain_lock);
286 read_unlock(&f->lock); 340 read_unlock(&f->lock);
287 return q; 341 return q;
288 } 342 }
289 depth++; 343 depth++;
290 } 344 }
345 spin_unlock(&hb->chain_lock);
291 read_unlock(&f->lock); 346 read_unlock(&f->lock);
292 347
293 if (depth <= INETFRAGS_MAXDEPTH) 348 if (depth <= INETFRAGS_MAXDEPTH)
diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c
index cc280a3f4f96..1975f52933c5 100644
--- a/net/ipv4/inet_lro.c
+++ b/net/ipv4/inet_lro.c
@@ -29,6 +29,7 @@
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/if_vlan.h> 30#include <linux/if_vlan.h>
31#include <linux/inet_lro.h> 31#include <linux/inet_lro.h>
32#include <net/checksum.h>
32 33
33MODULE_LICENSE("GPL"); 34MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); 35MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
@@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
114 *(p+2) = lro_desc->tcp_rcv_tsecr; 115 *(p+2) = lro_desc->tcp_rcv_tsecr;
115 } 116 }
116 117
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
117 iph->tot_len = htons(lro_desc->ip_tot_len); 119 iph->tot_len = htons(lro_desc->ip_tot_len);
118 120
119 iph->check = 0;
120 iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl);
121
122 tcph->check = 0; 121 tcph->check = 0;
123 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); 122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
124 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); 123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a6445b843ef4..b66910aaef4d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -79,40 +79,11 @@ struct ipq {
79 struct inet_peer *peer; 79 struct inet_peer *peer;
80}; 80};
81 81
82/* RFC 3168 support :
83 * We want to check ECN values of all fragments, do detect invalid combinations.
84 * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
85 */
86#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
87#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
88#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
89#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
90
91static inline u8 ip4_frag_ecn(u8 tos) 82static inline u8 ip4_frag_ecn(u8 tos)
92{ 83{
93 return 1 << (tos & INET_ECN_MASK); 84 return 1 << (tos & INET_ECN_MASK);
94} 85}
95 86
96/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
97 * Value : 0xff if frame should be dropped.
98 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
99 */
100static const u8 ip4_frag_ecn_table[16] = {
101 /* at least one fragment had CE, and others ECT_0 or ECT_1 */
102 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
103 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
104 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
105
106 /* invalid combinations : drop frame */
107 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
108 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
109 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
110 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
111 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
112 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
113 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
114};
115
116static struct inet_frags ip4_frags; 87static struct inet_frags ip4_frags;
117 88
118int ip_frag_nqueues(struct net *net) 89int ip_frag_nqueues(struct net *net)
@@ -248,8 +219,7 @@ static void ip_expire(unsigned long arg)
248 if (!head->dev) 219 if (!head->dev)
249 goto out_rcu_unlock; 220 goto out_rcu_unlock;
250 221
251 /* skb dst is stale, drop it, and perform route lookup again */ 222 /* skb has no dst, perform route lookup again */
252 skb_dst_drop(head);
253 iph = ip_hdr(head); 223 iph = ip_hdr(head);
254 err = ip_route_input_noref(head, iph->daddr, iph->saddr, 224 err = ip_route_input_noref(head, iph->daddr, iph->saddr,
255 iph->tos, head->dev); 225 iph->tos, head->dev);
@@ -523,9 +493,16 @@ found:
523 qp->q.max_size = skb->len + ihl; 493 qp->q.max_size = skb->len + ihl;
524 494
525 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 495 if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
526 qp->q.meat == qp->q.len) 496 qp->q.meat == qp->q.len) {
527 return ip_frag_reasm(qp, prev, dev); 497 unsigned long orefdst = skb->_skb_refdst;
498
499 skb->_skb_refdst = 0UL;
500 err = ip_frag_reasm(qp, prev, dev);
501 skb->_skb_refdst = orefdst;
502 return err;
503 }
528 504
505 skb_dst_drop(skb);
529 inet_frag_lru_move(&qp->q); 506 inet_frag_lru_move(&qp->q);
530 return -EINPROGRESS; 507 return -EINPROGRESS;
531 508
@@ -551,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
551 528
552 ipq_kill(qp); 529 ipq_kill(qp);
553 530
554 ecn = ip4_frag_ecn_table[qp->ecn]; 531 ecn = ip_frag_ecn_table[qp->ecn];
555 if (unlikely(ecn == 0xff)) { 532 if (unlikely(ecn == 0xff)) {
556 err = -EINVAL; 533 err = -EINVAL;
557 goto out_fail; 534 goto out_fail;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 91d66dbde9c0..c625e4dad4b0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -37,7 +37,7 @@
37#include <net/ip.h> 37#include <net/ip.h>
38#include <net/icmp.h> 38#include <net/icmp.h>
39#include <net/protocol.h> 39#include <net/protocol.h>
40#include <net/ipip.h> 40#include <net/ip_tunnels.h>
41#include <net/arp.h> 41#include <net/arp.h>
42#include <net/checksum.h> 42#include <net/checksum.h>
43#include <net/dsfield.h> 43#include <net/dsfield.h>
@@ -108,15 +108,6 @@
108 fatal route to network, even if it were you who configured 108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-) 109 fatal static route: you are innocent. :-)
110 110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov. 111 Alexey Kuznetsov.
121 */ 112 */
122 113
@@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 117
127static struct rtnl_link_ops ipgre_link_ops __read_mostly; 118static struct rtnl_link_ops ipgre_link_ops __read_mostly;
128static int ipgre_tunnel_init(struct net_device *dev); 119static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
130static int ipgre_tunnel_bind_dev(struct net_device *dev);
131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
134#define HASH_SIZE 16
135 120
136static int ipgre_net_id __read_mostly; 121static int ipgre_net_id __read_mostly;
137struct ipgre_net { 122static int gre_tap_net_id __read_mostly;
138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
139
140 struct net_device *fb_tunnel_dev;
141};
142
143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152 123
153 We require exact key match i.e. if a key is present in packet 124static __sum16 check_checksum(struct sk_buff *skb)
154 it will match only tunnel with the same key; if it is not present, 125{
155 it will match only keyless tunnel. 126 __sum16 csum = 0;
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160 127
161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 128 switch (skb->ip_summed) {
129 case CHECKSUM_COMPLETE:
130 csum = csum_fold(skb->csum);
162 131
163#define tunnels_r_l tunnels[3] 132 if (!csum)
164#define tunnels_r tunnels[2] 133 break;
165#define tunnels_l tunnels[1] 134 /* Fall through. */
166#define tunnels_wc tunnels[0]
167 135
168static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, 136 case CHECKSUM_NONE:
169 struct rtnl_link_stats64 *tot) 137 skb->csum = 0;
170{ 138 csum = __skb_checksum_complete(skb);
171 int i; 139 skb->ip_summed = CHECKSUM_COMPLETE;
172 140 break;
173 for_each_possible_cpu(i) {
174 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
175 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
176 unsigned int start;
177
178 do {
179 start = u64_stats_fetch_begin_bh(&tstats->syncp);
180 rx_packets = tstats->rx_packets;
181 tx_packets = tstats->tx_packets;
182 rx_bytes = tstats->rx_bytes;
183 tx_bytes = tstats->tx_bytes;
184 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
185
186 tot->rx_packets += rx_packets;
187 tot->tx_packets += tx_packets;
188 tot->rx_bytes += rx_bytes;
189 tot->tx_bytes += tx_bytes;
190 } 141 }
191 142
192 tot->multicast = dev->stats.multicast; 143 return csum;
193 tot->rx_crc_errors = dev->stats.rx_crc_errors;
194 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
195 tot->rx_length_errors = dev->stats.rx_length_errors;
196 tot->rx_frame_errors = dev->stats.rx_frame_errors;
197 tot->rx_errors = dev->stats.rx_errors;
198
199 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
200 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
201 tot->tx_dropped = dev->stats.tx_dropped;
202 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
203 tot->tx_errors = dev->stats.tx_errors;
204
205 return tot;
206} 144}
207 145
208/* Does key in tunnel parameters match packet */ 146static int ip_gre_calc_hlen(__be16 o_flags)
209static bool ipgre_key_match(const struct ip_tunnel_parm *p,
210 __be16 flags, __be32 key)
211{ 147{
212 if (p->i_flags & GRE_KEY) { 148 int addend = 4;
213 if (flags & GRE_KEY)
214 return key == p->i_key;
215 else
216 return false; /* key expected, none present */
217 } else
218 return !(flags & GRE_KEY);
219}
220 149
221/* Given src, dst and key, find appropriate for input tunnel. */ 150 if (o_flags&TUNNEL_CSUM)
151 addend += 4;
152 if (o_flags&TUNNEL_KEY)
153 addend += 4;
154 if (o_flags&TUNNEL_SEQ)
155 addend += 4;
156 return addend;
157}
222 158
223static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, 159static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
224 __be32 remote, __be32 local, 160 bool *csum_err, int *hdr_len)
225 __be16 flags, __be32 key,
226 __be16 gre_proto)
227{ 161{
228 struct net *net = dev_net(dev); 162 unsigned int ip_hlen = ip_hdrlen(skb);
229 int link = dev->ifindex; 163 const struct gre_base_hdr *greh;
230 unsigned int h0 = HASH(remote); 164 __be32 *options;
231 unsigned int h1 = HASH(key);
232 struct ip_tunnel *t, *cand = NULL;
233 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
234 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
235 ARPHRD_ETHER : ARPHRD_IPGRE;
236 int score, cand_score = 4;
237
238 for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
239 if (local != t->parms.iph.saddr ||
240 remote != t->parms.iph.daddr ||
241 !(t->dev->flags & IFF_UP))
242 continue;
243
244 if (!ipgre_key_match(&t->parms, flags, key))
245 continue;
246
247 if (t->dev->type != ARPHRD_IPGRE &&
248 t->dev->type != dev_type)
249 continue;
250
251 score = 0;
252 if (t->parms.link != link)
253 score |= 1;
254 if (t->dev->type != dev_type)
255 score |= 2;
256 if (score == 0)
257 return t;
258
259 if (score < cand_score) {
260 cand = t;
261 cand_score = score;
262 }
263 }
264 165
265 for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { 166 if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
266 if (remote != t->parms.iph.daddr || 167 return -EINVAL;
267 !(t->dev->flags & IFF_UP))
268 continue;
269
270 if (!ipgre_key_match(&t->parms, flags, key))
271 continue;
272
273 if (t->dev->type != ARPHRD_IPGRE &&
274 t->dev->type != dev_type)
275 continue;
276
277 score = 0;
278 if (t->parms.link != link)
279 score |= 1;
280 if (t->dev->type != dev_type)
281 score |= 2;
282 if (score == 0)
283 return t;
284
285 if (score < cand_score) {
286 cand = t;
287 cand_score = score;
288 }
289 }
290 168
291 for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { 169 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
292 if ((local != t->parms.iph.saddr && 170 if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
293 (local != t->parms.iph.daddr || 171 return -EINVAL;
294 !ipv4_is_multicast(local))) ||
295 !(t->dev->flags & IFF_UP))
296 continue;
297
298 if (!ipgre_key_match(&t->parms, flags, key))
299 continue;
300
301 if (t->dev->type != ARPHRD_IPGRE &&
302 t->dev->type != dev_type)
303 continue;
304
305 score = 0;
306 if (t->parms.link != link)
307 score |= 1;
308 if (t->dev->type != dev_type)
309 score |= 2;
310 if (score == 0)
311 return t;
312
313 if (score < cand_score) {
314 cand = t;
315 cand_score = score;
316 }
317 }
318 172
319 for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { 173 tpi->flags = gre_flags_to_tnl_flags(greh->flags);
320 if (t->parms.i_key != key || 174 *hdr_len = ip_gre_calc_hlen(tpi->flags);
321 !(t->dev->flags & IFF_UP))
322 continue;
323
324 if (t->dev->type != ARPHRD_IPGRE &&
325 t->dev->type != dev_type)
326 continue;
327
328 score = 0;
329 if (t->parms.link != link)
330 score |= 1;
331 if (t->dev->type != dev_type)
332 score |= 2;
333 if (score == 0)
334 return t;
335
336 if (score < cand_score) {
337 cand = t;
338 cand_score = score;
339 }
340 }
341 175
342 if (cand != NULL) 176 if (!pskb_may_pull(skb, *hdr_len))
343 return cand; 177 return -EINVAL;
344 178
345 dev = ign->fb_tunnel_dev; 179 greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
346 if (dev->flags & IFF_UP)
347 return netdev_priv(dev);
348 180
349 return NULL; 181 tpi->proto = greh->protocol;
350}
351 182
352static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, 183 options = (__be32 *)(greh + 1);
353 struct ip_tunnel_parm *parms) 184 if (greh->flags & GRE_CSUM) {
354{ 185 if (check_checksum(skb)) {
355 __be32 remote = parms->iph.daddr; 186 *csum_err = true;
356 __be32 local = parms->iph.saddr; 187 return -EINVAL;
357 __be32 key = parms->i_key; 188 }
358 unsigned int h = HASH(key); 189 options++;
359 int prio = 0;
360
361 if (local)
362 prio |= 1;
363 if (remote && !ipv4_is_multicast(remote)) {
364 prio |= 2;
365 h ^= HASH(remote);
366 } 190 }
367 191
368 return &ign->tunnels[prio][h]; 192 if (greh->flags & GRE_KEY) {
369} 193 tpi->key = *options;
370 194 options++;
371static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, 195 } else
372 struct ip_tunnel *t) 196 tpi->key = 0;
373{
374 return __ipgre_bucket(ign, &t->parms);
375}
376
377static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
378{
379 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
380 197
381 rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 198 if (unlikely(greh->flags & GRE_SEQ)) {
382 rcu_assign_pointer(*tp, t); 199 tpi->seq = *options;
383} 200 options++;
201 } else
202 tpi->seq = 0;
384 203
385static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 204 /* WCCP version 1 and 2 protocol decoding.
386{ 205 * - Change protocol to IP
387 struct ip_tunnel __rcu **tp; 206 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
388 struct ip_tunnel *iter; 207 */
389 208 if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
390 for (tp = ipgre_bucket(ign, t); 209 tpi->proto = htons(ETH_P_IP);
391 (iter = rtnl_dereference(*tp)) != NULL; 210 if ((*(u8 *)options & 0xF0) != 0x40) {
392 tp = &iter->next) { 211 *hdr_len += 4;
393 if (t == iter) { 212 if (!pskb_may_pull(skb, *hdr_len))
394 rcu_assign_pointer(*tp, t->next); 213 return -EINVAL;
395 break;
396 } 214 }
397 } 215 }
398}
399
400static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
401 struct ip_tunnel_parm *parms,
402 int type)
403{
404 __be32 remote = parms->iph.daddr;
405 __be32 local = parms->iph.saddr;
406 __be32 key = parms->i_key;
407 int link = parms->link;
408 struct ip_tunnel *t;
409 struct ip_tunnel __rcu **tp;
410 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
411
412 for (tp = __ipgre_bucket(ign, parms);
413 (t = rtnl_dereference(*tp)) != NULL;
414 tp = &t->next)
415 if (local == t->parms.iph.saddr &&
416 remote == t->parms.iph.daddr &&
417 key == t->parms.i_key &&
418 link == t->parms.link &&
419 type == t->dev->type)
420 break;
421
422 return t;
423}
424
425static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
426 struct ip_tunnel_parm *parms, int create)
427{
428 struct ip_tunnel *t, *nt;
429 struct net_device *dev;
430 char name[IFNAMSIZ];
431 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
432
433 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
434 if (t || !create)
435 return t;
436
437 if (parms->name[0])
438 strlcpy(name, parms->name, IFNAMSIZ);
439 else
440 strcpy(name, "gre%d");
441
442 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
443 if (!dev)
444 return NULL;
445
446 dev_net_set(dev, net);
447
448 nt = netdev_priv(dev);
449 nt->parms = *parms;
450 dev->rtnl_link_ops = &ipgre_link_ops;
451
452 dev->mtu = ipgre_tunnel_bind_dev(dev);
453 216
454 if (register_netdevice(dev) < 0) 217 return 0;
455 goto failed_free;
456
457 /* Can use a lockless transmit, unless we generate output sequences */
458 if (!(nt->parms.o_flags & GRE_SEQ))
459 dev->features |= NETIF_F_LLTX;
460
461 dev_hold(dev);
462 ipgre_tunnel_link(ign, nt);
463 return nt;
464
465failed_free:
466 free_netdev(dev);
467 return NULL;
468}
469
470static void ipgre_tunnel_uninit(struct net_device *dev)
471{
472 struct net *net = dev_net(dev);
473 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
474
475 ipgre_tunnel_unlink(ign, netdev_priv(dev));
476 dev_put(dev);
477} 218}
478 219
479
480static void ipgre_err(struct sk_buff *skb, u32 info) 220static void ipgre_err(struct sk_buff *skb, u32 info)
481{ 221{
482 222
483/* All the routers (except for Linux) return only 223 /* All the routers (except for Linux) return only
484 8 bytes of packet payload. It means, that precise relaying of 224 8 bytes of packet payload. It means, that precise relaying of
485 ICMP in the real Internet is absolutely infeasible. 225 ICMP in the real Internet is absolutely infeasible.
486 226
487 Moreover, Cisco "wise men" put GRE key to the third word 227 Moreover, Cisco "wise men" put GRE key to the third word
488 in GRE header. It makes impossible maintaining even soft state for keyed 228 in GRE header. It makes impossible maintaining even soft
489 GRE tunnels with enabled checksum. Tell them "thank you". 229 state for keyed GRE tunnels with enabled checksum. Tell
490 230 them "thank you".
491 Well, I wonder, rfc1812 was written by Cisco employee,
492 what the hell these idiots break standards established
493 by themselves???
494 */
495 231
232 Well, I wonder, rfc1812 was written by Cisco employee,
233 what the hell these idiots break standards established
234 by themselves???
235 */
236 struct net *net = dev_net(skb->dev);
237 struct ip_tunnel_net *itn;
496 const struct iphdr *iph = (const struct iphdr *)skb->data; 238 const struct iphdr *iph = (const struct iphdr *)skb->data;
497 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
498 int grehlen = (iph->ihl<<2) + 4;
499 const int type = icmp_hdr(skb)->type; 239 const int type = icmp_hdr(skb)->type;
500 const int code = icmp_hdr(skb)->code; 240 const int code = icmp_hdr(skb)->code;
501 struct ip_tunnel *t; 241 struct ip_tunnel *t;
502 __be16 flags; 242 struct tnl_ptk_info tpi;
503 __be32 key = 0; 243 int hdr_len;
244 bool csum_err = false;
504 245
505 flags = p[0]; 246 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) {
506 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { 247 if (!csum_err) /* ignore csum errors. */
507 if (flags&(GRE_VERSION|GRE_ROUTING))
508 return; 248 return;
509 if (flags&GRE_KEY) {
510 grehlen += 4;
511 if (flags&GRE_CSUM)
512 grehlen += 4;
513 }
514 } 249 }
515 250
516 /* If only 8 bytes returned, keyed message will be dropped here */
517 if (skb_headlen(skb) < grehlen)
518 return;
519
520 if (flags & GRE_KEY)
521 key = *(((__be32 *)p) + (grehlen / 4) - 1);
522
523 switch (type) { 251 switch (type) {
524 default: 252 default:
525 case ICMP_PARAMETERPROB: 253 case ICMP_PARAMETERPROB:
@@ -548,8 +276,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
548 break; 276 break;
549 } 277 }
550 278
551 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, 279 if (tpi.proto == htons(ETH_P_TEB))
552 flags, key, p[1]); 280 itn = net_generic(net, gre_tap_net_id);
281 else
282 itn = net_generic(net, ipgre_net_id);
283
284 t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
285 iph->daddr, iph->saddr, tpi.key);
553 286
554 if (t == NULL) 287 if (t == NULL)
555 return; 288 return;
@@ -578,158 +311,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
578 t->err_time = jiffies; 311 t->err_time = jiffies;
579} 312}
580 313
581static inline u8
582ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
583{
584 u8 inner = 0;
585 if (skb->protocol == htons(ETH_P_IP))
586 inner = old_iph->tos;
587 else if (skb->protocol == htons(ETH_P_IPV6))
588 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
589 return INET_ECN_encapsulate(tos, inner);
590}
591
592static int ipgre_rcv(struct sk_buff *skb) 314static int ipgre_rcv(struct sk_buff *skb)
593{ 315{
316 struct net *net = dev_net(skb->dev);
317 struct ip_tunnel_net *itn;
594 const struct iphdr *iph; 318 const struct iphdr *iph;
595 u8 *h;
596 __be16 flags;
597 __sum16 csum = 0;
598 __be32 key = 0;
599 u32 seqno = 0;
600 struct ip_tunnel *tunnel; 319 struct ip_tunnel *tunnel;
601 int offset = 4; 320 struct tnl_ptk_info tpi;
602 __be16 gre_proto; 321 int hdr_len;
603 int err; 322 bool csum_err = false;
604 323
605 if (!pskb_may_pull(skb, 16)) 324 if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)
606 goto drop; 325 goto drop;
607 326
608 iph = ip_hdr(skb); 327 if (tpi.proto == htons(ETH_P_TEB))
609 h = skb->data; 328 itn = net_generic(net, gre_tap_net_id);
610 flags = *(__be16 *)h; 329 else
611 330 itn = net_generic(net, ipgre_net_id);
612 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
613 /* - Version must be 0.
614 - We do not support routing headers.
615 */
616 if (flags&(GRE_VERSION|GRE_ROUTING))
617 goto drop;
618
619 if (flags&GRE_CSUM) {
620 switch (skb->ip_summed) {
621 case CHECKSUM_COMPLETE:
622 csum = csum_fold(skb->csum);
623 if (!csum)
624 break;
625 /* fall through */
626 case CHECKSUM_NONE:
627 skb->csum = 0;
628 csum = __skb_checksum_complete(skb);
629 skb->ip_summed = CHECKSUM_COMPLETE;
630 }
631 offset += 4;
632 }
633 if (flags&GRE_KEY) {
634 key = *(__be32 *)(h + offset);
635 offset += 4;
636 }
637 if (flags&GRE_SEQ) {
638 seqno = ntohl(*(__be32 *)(h + offset));
639 offset += 4;
640 }
641 }
642 331
643 gre_proto = *(__be16 *)(h + 2); 332 iph = ip_hdr(skb);
333 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags,
334 iph->saddr, iph->daddr, tpi.key);
644 335
645 tunnel = ipgre_tunnel_lookup(skb->dev,
646 iph->saddr, iph->daddr, flags, key,
647 gre_proto);
648 if (tunnel) { 336 if (tunnel) {
649 struct pcpu_tstats *tstats; 337 ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
650
651 secpath_reset(skb);
652
653 skb->protocol = gre_proto;
654 /* WCCP version 1 and 2 protocol decoding.
655 * - Change protocol to IP
656 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
657 */
658 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
659 skb->protocol = htons(ETH_P_IP);
660 if ((*(h + offset) & 0xF0) != 0x40)
661 offset += 4;
662 }
663
664 skb->mac_header = skb->network_header;
665 __pskb_pull(skb, offset);
666 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
667 skb->pkt_type = PACKET_HOST;
668#ifdef CONFIG_NET_IPGRE_BROADCAST
669 if (ipv4_is_multicast(iph->daddr)) {
670 /* Looped back packet, drop it! */
671 if (rt_is_output_route(skb_rtable(skb)))
672 goto drop;
673 tunnel->dev->stats.multicast++;
674 skb->pkt_type = PACKET_BROADCAST;
675 }
676#endif
677
678 if (((flags&GRE_CSUM) && csum) ||
679 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
680 tunnel->dev->stats.rx_crc_errors++;
681 tunnel->dev->stats.rx_errors++;
682 goto drop;
683 }
684 if (tunnel->parms.i_flags&GRE_SEQ) {
685 if (!(flags&GRE_SEQ) ||
686 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
687 tunnel->dev->stats.rx_fifo_errors++;
688 tunnel->dev->stats.rx_errors++;
689 goto drop;
690 }
691 tunnel->i_seqno = seqno + 1;
692 }
693
694 /* Warning: All skb pointers will be invalidated! */
695 if (tunnel->dev->type == ARPHRD_ETHER) {
696 if (!pskb_may_pull(skb, ETH_HLEN)) {
697 tunnel->dev->stats.rx_length_errors++;
698 tunnel->dev->stats.rx_errors++;
699 goto drop;
700 }
701
702 iph = ip_hdr(skb);
703 skb->protocol = eth_type_trans(skb, tunnel->dev);
704 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
705 }
706
707 __skb_tunnel_rx(skb, tunnel->dev);
708
709 skb_reset_network_header(skb);
710 err = IP_ECN_decapsulate(iph, skb);
711 if (unlikely(err)) {
712 if (log_ecn_error)
713 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
714 &iph->saddr, iph->tos);
715 if (err > 1) {
716 ++tunnel->dev->stats.rx_frame_errors;
717 ++tunnel->dev->stats.rx_errors;
718 goto drop;
719 }
720 }
721
722 tstats = this_cpu_ptr(tunnel->dev->tstats);
723 u64_stats_update_begin(&tstats->syncp);
724 tstats->rx_packets++;
725 tstats->rx_bytes += skb->len;
726 u64_stats_update_end(&tstats->syncp);
727
728 gro_cells_receive(&tunnel->gro_cells, skb);
729 return 0; 338 return 0;
730 } 339 }
731 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 340 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
732
733drop: 341drop:
734 kfree_skb(skb); 342 kfree_skb(skb);
735 return 0; 343 return 0;
@@ -746,7 +354,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff
746 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; 354 skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;
747 return skb; 355 return skb;
748 } else if (skb->ip_summed == CHECKSUM_PARTIAL && 356 } else if (skb->ip_summed == CHECKSUM_PARTIAL &&
749 tunnel->parms.o_flags&GRE_CSUM) { 357 tunnel->parms.o_flags&TUNNEL_CSUM) {
750 err = skb_checksum_help(skb); 358 err = skb_checksum_help(skb);
751 if (unlikely(err)) 359 if (unlikely(err))
752 goto error; 360 goto error;
@@ -760,494 +368,157 @@ error:
760 return ERR_PTR(err); 368 return ERR_PTR(err);
761} 369}
762 370
763static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 371static struct sk_buff *gre_build_header(struct sk_buff *skb,
372 const struct tnl_ptk_info *tpi,
373 int hdr_len)
764{ 374{
765 struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); 375 struct gre_base_hdr *greh;
766 struct ip_tunnel *tunnel = netdev_priv(dev);
767 const struct iphdr *old_iph;
768 const struct iphdr *tiph;
769 struct flowi4 fl4;
770 u8 tos;
771 __be16 df;
772 struct rtable *rt; /* Route to the other host */
773 struct net_device *tdev; /* Device to other host */
774 struct iphdr *iph; /* Our new IP header */
775 unsigned int max_headroom; /* The extra header space needed */
776 int gre_hlen;
777 __be32 dst;
778 int mtu;
779 u8 ttl;
780 int err;
781 int pkt_len;
782
783 skb = handle_offloads(tunnel, skb);
784 if (IS_ERR(skb)) {
785 dev->stats.tx_dropped++;
786 return NETDEV_TX_OK;
787 }
788 376
789 if (!skb->encapsulation) { 377 skb_push(skb, hdr_len);
790 skb_reset_inner_headers(skb);
791 skb->encapsulation = 1;
792 }
793 378
794 old_iph = ip_hdr(skb); 379 greh = (struct gre_base_hdr *)skb->data;
380 greh->flags = tnl_flags_to_gre_flags(tpi->flags);
381 greh->protocol = tpi->proto;
795 382
796 if (dev->type == ARPHRD_ETHER) 383 if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
797 IPCB(skb)->flags = 0; 384 __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
798 385
799 if (dev->header_ops && dev->type == ARPHRD_IPGRE) { 386 if (tpi->flags&TUNNEL_SEQ) {
800 gre_hlen = 0; 387 *ptr = tpi->seq;
801 tiph = (const struct iphdr *)skb->data; 388 ptr--;
802 } else {
803 gre_hlen = tunnel->hlen;
804 tiph = &tunnel->parms.iph;
805 }
806
807 if ((dst = tiph->daddr) == 0) {
808 /* NBMA tunnel */
809
810 if (skb_dst(skb) == NULL) {
811 dev->stats.tx_fifo_errors++;
812 goto tx_error;
813 } 389 }
814 390 if (tpi->flags&TUNNEL_KEY) {
815 if (skb->protocol == htons(ETH_P_IP)) { 391 *ptr = tpi->key;
816 rt = skb_rtable(skb); 392 ptr--;
817 dst = rt_nexthop(rt, old_iph->daddr);
818 } 393 }
819#if IS_ENABLED(CONFIG_IPV6) 394 if (tpi->flags&TUNNEL_CSUM &&
820 else if (skb->protocol == htons(ETH_P_IPV6)) { 395 !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
821 const struct in6_addr *addr6; 396 *(__sum16 *)ptr = 0;
822 struct neighbour *neigh; 397 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
823 bool do_tx_error_icmp; 398 skb->len, 0));
824 int addr_type;
825
826 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
827 if (neigh == NULL)
828 goto tx_error;
829
830 addr6 = (const struct in6_addr *)&neigh->primary_key;
831 addr_type = ipv6_addr_type(addr6);
832
833 if (addr_type == IPV6_ADDR_ANY) {
834 addr6 = &ipv6_hdr(skb)->daddr;
835 addr_type = ipv6_addr_type(addr6);
836 }
837
838 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
839 do_tx_error_icmp = true;
840 else {
841 do_tx_error_icmp = false;
842 dst = addr6->s6_addr32[3];
843 }
844 neigh_release(neigh);
845 if (do_tx_error_icmp)
846 goto tx_error_icmp;
847 } 399 }
848#endif
849 else
850 goto tx_error;
851 } 400 }
852 401
853 ttl = tiph->ttl; 402 return skb;
854 tos = tiph->tos; 403}
855 if (tos & 0x1) {
856 tos &= ~0x1;
857 if (skb->protocol == htons(ETH_P_IP))
858 tos = old_iph->tos;
859 else if (skb->protocol == htons(ETH_P_IPV6))
860 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
861 }
862 404
863 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, 405static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
864 tunnel->parms.o_key, RT_TOS(tos), 406 const struct iphdr *tnl_params,
865 tunnel->parms.link); 407 __be16 proto)
866 if (IS_ERR(rt)) { 408{
867 dev->stats.tx_carrier_errors++; 409 struct ip_tunnel *tunnel = netdev_priv(dev);
868 goto tx_error; 410 struct tnl_ptk_info tpi;
869 }
870 tdev = rt->dst.dev;
871 411
872 if (tdev == dev) { 412 if (likely(!skb->encapsulation)) {
873 ip_rt_put(rt); 413 skb_reset_inner_headers(skb);
874 dev->stats.collisions++; 414 skb->encapsulation = 1;
875 goto tx_error;
876 } 415 }
877 416
878 df = tiph->frag_off; 417 tpi.flags = tunnel->parms.o_flags;
879 if (df) 418 tpi.proto = proto;
880 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; 419 tpi.key = tunnel->parms.o_key;
881 else 420 if (tunnel->parms.o_flags & TUNNEL_SEQ)
882 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 421 tunnel->o_seqno++;
883 422 tpi.seq = htonl(tunnel->o_seqno);
884 if (skb_dst(skb))
885 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
886
887 if (skb->protocol == htons(ETH_P_IP)) {
888 df |= (old_iph->frag_off&htons(IP_DF));
889 423
890 if (!skb_is_gso(skb) && 424 /* Push GRE header. */
891 (old_iph->frag_off&htons(IP_DF)) && 425 skb = gre_build_header(skb, &tpi, tunnel->hlen);
892 mtu < ntohs(old_iph->tot_len)) { 426 if (unlikely(!skb)) {
893 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 427 dev->stats.tx_dropped++;
894 ip_rt_put(rt); 428 return;
895 goto tx_error;
896 }
897 } 429 }
898#if IS_ENABLED(CONFIG_IPV6)
899 else if (skb->protocol == htons(ETH_P_IPV6)) {
900 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
901
902 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
903 if ((tunnel->parms.iph.daddr &&
904 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
905 rt6->rt6i_dst.plen == 128) {
906 rt6->rt6i_flags |= RTF_MODIFIED;
907 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
908 }
909 }
910 430
911 if (!skb_is_gso(skb) && 431 ip_tunnel_xmit(skb, dev, tnl_params);
912 mtu >= IPV6_MIN_MTU && 432}
913 mtu < skb->len - tunnel->hlen + gre_hlen) {
914 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
915 ip_rt_put(rt);
916 goto tx_error;
917 }
918 }
919#endif
920 433
921 if (tunnel->err_count > 0) { 434static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
922 if (time_before(jiffies, 435 struct net_device *dev)
923 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 436{
924 tunnel->err_count--; 437 struct ip_tunnel *tunnel = netdev_priv(dev);
438 const struct iphdr *tnl_params;
925 439
926 dst_link_failure(skb); 440 skb = handle_offloads(tunnel, skb);
927 } else 441 if (IS_ERR(skb))
928 tunnel->err_count = 0; 442 goto out;
929 }
930 443
931 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; 444 if (dev->header_ops) {
932 445 /* Need space for new headers */
933 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 446 if (skb_cow_head(skb, dev->needed_headroom -
934 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 447 (tunnel->hlen + sizeof(struct iphdr))))
935 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 448 goto free_skb;
936 if (max_headroom > dev->needed_headroom)
937 dev->needed_headroom = max_headroom;
938 if (!new_skb) {
939 ip_rt_put(rt);
940 dev->stats.tx_dropped++;
941 dev_kfree_skb(skb);
942 return NETDEV_TX_OK;
943 }
944 if (skb->sk)
945 skb_set_owner_w(new_skb, skb->sk);
946 dev_kfree_skb(skb);
947 skb = new_skb;
948 old_iph = ip_hdr(skb);
949 /* Warning : tiph value might point to freed memory */
950 }
951 449
952 skb_push(skb, gre_hlen); 450 tnl_params = (const struct iphdr *)skb->data;
953 skb_reset_network_header(skb);
954 skb_set_transport_header(skb, sizeof(*iph));
955 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
956 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
957 IPSKB_REROUTED);
958 skb_dst_drop(skb);
959 skb_dst_set(skb, &rt->dst);
960
961 /*
962 * Push down and install the IPIP header.
963 */
964 451
965 iph = ip_hdr(skb); 452 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
966 iph->version = 4; 453 * to gre header.
967 iph->ihl = sizeof(struct iphdr) >> 2; 454 */
968 iph->frag_off = df; 455 skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
969 iph->protocol = IPPROTO_GRE; 456 } else {
970 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); 457 if (skb_cow_head(skb, dev->needed_headroom))
971 iph->daddr = fl4.daddr; 458 goto free_skb;
972 iph->saddr = fl4.saddr;
973 iph->ttl = ttl;
974
975 tunnel_ip_select_ident(skb, old_iph, &rt->dst);
976
977 if (ttl == 0) {
978 if (skb->protocol == htons(ETH_P_IP))
979 iph->ttl = old_iph->ttl;
980#if IS_ENABLED(CONFIG_IPV6)
981 else if (skb->protocol == htons(ETH_P_IPV6))
982 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
983#endif
984 else
985 iph->ttl = ip4_dst_hoplimit(&rt->dst);
986 }
987
988 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
989 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
990 htons(ETH_P_TEB) : skb->protocol;
991
992 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
993 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
994 459
995 if (tunnel->parms.o_flags&GRE_SEQ) { 460 tnl_params = &tunnel->parms.iph;
996 ++tunnel->o_seqno;
997 *ptr = htonl(tunnel->o_seqno);
998 ptr--;
999 }
1000 if (tunnel->parms.o_flags&GRE_KEY) {
1001 *ptr = tunnel->parms.o_key;
1002 ptr--;
1003 }
1004 /* Skip GRE checksum if skb is getting offloaded. */
1005 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) &&
1006 (tunnel->parms.o_flags&GRE_CSUM)) {
1007 int offset = skb_transport_offset(skb);
1008
1009 if (skb_has_shared_frag(skb)) {
1010 err = __skb_linearize(skb);
1011 if (err)
1012 goto tx_error;
1013 }
1014
1015 *ptr = 0;
1016 *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset,
1017 skb->len - offset,
1018 0));
1019 }
1020 } 461 }
1021 462
1022 nf_reset(skb); 463 __gre_xmit(skb, dev, tnl_params, skb->protocol);
1023 464
1024 pkt_len = skb->len - skb_transport_offset(skb);
1025 err = ip_local_out(skb);
1026 if (likely(net_xmit_eval(err) == 0)) {
1027 u64_stats_update_begin(&tstats->syncp);
1028 tstats->tx_bytes += pkt_len;
1029 tstats->tx_packets++;
1030 u64_stats_update_end(&tstats->syncp);
1031 } else {
1032 dev->stats.tx_errors++;
1033 dev->stats.tx_aborted_errors++;
1034 }
1035 return NETDEV_TX_OK; 465 return NETDEV_TX_OK;
1036 466
1037#if IS_ENABLED(CONFIG_IPV6) 467free_skb:
1038tx_error_icmp:
1039 dst_link_failure(skb);
1040#endif
1041tx_error:
1042 dev->stats.tx_errors++;
1043 dev_kfree_skb(skb); 468 dev_kfree_skb(skb);
469out:
470 dev->stats.tx_dropped++;
1044 return NETDEV_TX_OK; 471 return NETDEV_TX_OK;
1045} 472}
1046 473
1047static int ipgre_tunnel_bind_dev(struct net_device *dev) 474static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
475 struct net_device *dev)
1048{ 476{
1049 struct net_device *tdev = NULL; 477 struct ip_tunnel *tunnel = netdev_priv(dev);
1050 struct ip_tunnel *tunnel;
1051 const struct iphdr *iph;
1052 int hlen = LL_MAX_HEADER;
1053 int mtu = ETH_DATA_LEN;
1054 int addend = sizeof(struct iphdr) + 4;
1055
1056 tunnel = netdev_priv(dev);
1057 iph = &tunnel->parms.iph;
1058
1059 /* Guess output device to choose reasonable mtu and needed_headroom */
1060
1061 if (iph->daddr) {
1062 struct flowi4 fl4;
1063 struct rtable *rt;
1064
1065 rt = ip_route_output_gre(dev_net(dev), &fl4,
1066 iph->daddr, iph->saddr,
1067 tunnel->parms.o_key,
1068 RT_TOS(iph->tos),
1069 tunnel->parms.link);
1070 if (!IS_ERR(rt)) {
1071 tdev = rt->dst.dev;
1072 ip_rt_put(rt);
1073 }
1074
1075 if (dev->type != ARPHRD_ETHER)
1076 dev->flags |= IFF_POINTOPOINT;
1077 }
1078 478
1079 if (!tdev && tunnel->parms.link) 479 skb = handle_offloads(tunnel, skb);
1080 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 480 if (IS_ERR(skb))
481 goto out;
1081 482
1082 if (tdev) { 483 if (skb_cow_head(skb, dev->needed_headroom))
1083 hlen = tdev->hard_header_len + tdev->needed_headroom; 484 goto free_skb;
1084 mtu = tdev->mtu;
1085 }
1086 dev->iflink = tunnel->parms.link;
1087
1088 /* Precalculate GRE options length */
1089 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1090 if (tunnel->parms.o_flags&GRE_CSUM)
1091 addend += 4;
1092 if (tunnel->parms.o_flags&GRE_KEY)
1093 addend += 4;
1094 if (tunnel->parms.o_flags&GRE_SEQ)
1095 addend += 4;
1096 }
1097 dev->needed_headroom = addend + hlen;
1098 mtu -= dev->hard_header_len + addend;
1099 485
1100 if (mtu < 68) 486 __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
1101 mtu = 68;
1102 487
1103 tunnel->hlen = addend; 488 return NETDEV_TX_OK;
1104 /* TCP offload with GRE SEQ is not supported. */
1105 if (!(tunnel->parms.o_flags & GRE_SEQ)) {
1106 dev->features |= NETIF_F_GSO_SOFTWARE;
1107 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1108 }
1109 489
1110 return mtu; 490free_skb:
491 dev_kfree_skb(skb);
492out:
493 dev->stats.tx_dropped++;
494 return NETDEV_TX_OK;
1111} 495}
1112 496
1113static int 497static int ipgre_tunnel_ioctl(struct net_device *dev,
1114ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 498 struct ifreq *ifr, int cmd)
1115{ 499{
1116 int err = 0; 500 int err = 0;
1117 struct ip_tunnel_parm p; 501 struct ip_tunnel_parm p;
1118 struct ip_tunnel *t;
1119 struct net *net = dev_net(dev);
1120 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1121
1122 switch (cmd) {
1123 case SIOCGETTUNNEL:
1124 t = NULL;
1125 if (dev == ign->fb_tunnel_dev) {
1126 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1127 err = -EFAULT;
1128 break;
1129 }
1130 t = ipgre_tunnel_locate(net, &p, 0);
1131 }
1132 if (t == NULL)
1133 t = netdev_priv(dev);
1134 memcpy(&p, &t->parms, sizeof(p));
1135 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1136 err = -EFAULT;
1137 break;
1138
1139 case SIOCADDTUNNEL:
1140 case SIOCCHGTUNNEL:
1141 err = -EPERM;
1142 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1143 goto done;
1144
1145 err = -EFAULT;
1146 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1147 goto done;
1148
1149 err = -EINVAL;
1150 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1151 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1152 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1153 goto done;
1154 if (p.iph.ttl)
1155 p.iph.frag_off |= htons(IP_DF);
1156
1157 if (!(p.i_flags&GRE_KEY))
1158 p.i_key = 0;
1159 if (!(p.o_flags&GRE_KEY))
1160 p.o_key = 0;
1161
1162 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1163
1164 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1165 if (t != NULL) {
1166 if (t->dev != dev) {
1167 err = -EEXIST;
1168 break;
1169 }
1170 } else {
1171 unsigned int nflags = 0;
1172
1173 t = netdev_priv(dev);
1174
1175 if (ipv4_is_multicast(p.iph.daddr))
1176 nflags = IFF_BROADCAST;
1177 else if (p.iph.daddr)
1178 nflags = IFF_POINTOPOINT;
1179
1180 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1181 err = -EINVAL;
1182 break;
1183 }
1184 ipgre_tunnel_unlink(ign, t);
1185 synchronize_net();
1186 t->parms.iph.saddr = p.iph.saddr;
1187 t->parms.iph.daddr = p.iph.daddr;
1188 t->parms.i_key = p.i_key;
1189 t->parms.o_key = p.o_key;
1190 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1191 memcpy(dev->broadcast, &p.iph.daddr, 4);
1192 ipgre_tunnel_link(ign, t);
1193 netdev_state_change(dev);
1194 }
1195 }
1196
1197 if (t) {
1198 err = 0;
1199 if (cmd == SIOCCHGTUNNEL) {
1200 t->parms.iph.ttl = p.iph.ttl;
1201 t->parms.iph.tos = p.iph.tos;
1202 t->parms.iph.frag_off = p.iph.frag_off;
1203 if (t->parms.link != p.link) {
1204 t->parms.link = p.link;
1205 dev->mtu = ipgre_tunnel_bind_dev(dev);
1206 netdev_state_change(dev);
1207 }
1208 }
1209 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1210 err = -EFAULT;
1211 } else
1212 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1213 break;
1214
1215 case SIOCDELTUNNEL:
1216 err = -EPERM;
1217 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1218 goto done;
1219
1220 if (dev == ign->fb_tunnel_dev) {
1221 err = -EFAULT;
1222 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1223 goto done;
1224 err = -ENOENT;
1225 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1226 goto done;
1227 err = -EPERM;
1228 if (t == netdev_priv(ign->fb_tunnel_dev))
1229 goto done;
1230 dev = t->dev;
1231 }
1232 unregister_netdevice(dev);
1233 err = 0;
1234 break;
1235 502
1236 default: 503 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1237 err = -EINVAL; 504 return -EFAULT;
505 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
506 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
507 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
508 return -EINVAL;
1238 } 509 }
510 p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
511 p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
1239 512
1240done: 513 err = ip_tunnel_ioctl(dev, &p, cmd);
1241 return err; 514 if (err)
1242} 515 return err;
1243 516
1244static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) 517 p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
1245{ 518 p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
1246 struct ip_tunnel *tunnel = netdev_priv(dev); 519
1247 if (new_mtu < 68 || 520 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1248 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) 521 return -EFAULT;
1249 return -EINVAL;
1250 dev->mtu = new_mtu;
1251 return 0; 522 return 0;
1252} 523}
1253 524
@@ -1277,25 +548,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1277 ... 548 ...
1278 ftp fec0:6666:6666::193.233.7.65 549 ftp fec0:6666:6666::193.233.7.65
1279 ... 550 ...
1280
1281 */ 551 */
1282
1283static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 552static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1284 unsigned short type, 553 unsigned short type,
1285 const void *daddr, const void *saddr, unsigned int len) 554 const void *daddr, const void *saddr, unsigned int len)
1286{ 555{
1287 struct ip_tunnel *t = netdev_priv(dev); 556 struct ip_tunnel *t = netdev_priv(dev);
1288 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 557 struct iphdr *iph;
1289 __be16 *p = (__be16 *)(iph+1); 558 struct gre_base_hdr *greh;
1290 559
1291 memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); 560 iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
1292 p[0] = t->parms.o_flags; 561 greh = (struct gre_base_hdr *)(iph+1);
1293 p[1] = htons(type); 562 greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
563 greh->protocol = htons(type);
1294 564
1295 /* 565 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1296 * Set the source hardware address.
1297 */
1298 566
567 /* Set the source hardware address. */
1299 if (saddr) 568 if (saddr)
1300 memcpy(&iph->saddr, saddr, 4); 569 memcpy(&iph->saddr, saddr, 4);
1301 if (daddr) 570 if (daddr)
@@ -1303,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1303 if (iph->daddr) 572 if (iph->daddr)
1304 return t->hlen; 573 return t->hlen;
1305 574
1306 return -t->hlen; 575 return -(t->hlen + sizeof(*iph));
1307} 576}
1308 577
1309static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) 578static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
@@ -1357,31 +626,21 @@ static int ipgre_close(struct net_device *dev)
1357 } 626 }
1358 return 0; 627 return 0;
1359} 628}
1360
1361#endif 629#endif
1362 630
1363static const struct net_device_ops ipgre_netdev_ops = { 631static const struct net_device_ops ipgre_netdev_ops = {
1364 .ndo_init = ipgre_tunnel_init, 632 .ndo_init = ipgre_tunnel_init,
1365 .ndo_uninit = ipgre_tunnel_uninit, 633 .ndo_uninit = ip_tunnel_uninit,
1366#ifdef CONFIG_NET_IPGRE_BROADCAST 634#ifdef CONFIG_NET_IPGRE_BROADCAST
1367 .ndo_open = ipgre_open, 635 .ndo_open = ipgre_open,
1368 .ndo_stop = ipgre_close, 636 .ndo_stop = ipgre_close,
1369#endif 637#endif
1370 .ndo_start_xmit = ipgre_tunnel_xmit, 638 .ndo_start_xmit = ipgre_xmit,
1371 .ndo_do_ioctl = ipgre_tunnel_ioctl, 639 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1372 .ndo_change_mtu = ipgre_tunnel_change_mtu, 640 .ndo_change_mtu = ip_tunnel_change_mtu,
1373 .ndo_get_stats64 = ipgre_get_stats64, 641 .ndo_get_stats64 = ip_tunnel_get_stats64,
1374}; 642};
1375 643
1376static void ipgre_dev_free(struct net_device *dev)
1377{
1378 struct ip_tunnel *tunnel = netdev_priv(dev);
1379
1380 gro_cells_destroy(&tunnel->gro_cells);
1381 free_percpu(dev->tstats);
1382 free_netdev(dev);
1383}
1384
1385#define GRE_FEATURES (NETIF_F_SG | \ 644#define GRE_FEATURES (NETIF_F_SG | \
1386 NETIF_F_FRAGLIST | \ 645 NETIF_F_FRAGLIST | \
1387 NETIF_F_HIGHDMA | \ 646 NETIF_F_HIGHDMA | \
@@ -1390,35 +649,48 @@ static void ipgre_dev_free(struct net_device *dev)
1390static void ipgre_tunnel_setup(struct net_device *dev) 649static void ipgre_tunnel_setup(struct net_device *dev)
1391{ 650{
1392 dev->netdev_ops = &ipgre_netdev_ops; 651 dev->netdev_ops = &ipgre_netdev_ops;
1393 dev->destructor = ipgre_dev_free; 652 ip_tunnel_setup(dev, ipgre_net_id);
653}
1394 654
1395 dev->type = ARPHRD_IPGRE; 655static void __gre_tunnel_init(struct net_device *dev)
1396 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 656{
657 struct ip_tunnel *tunnel;
658
659 tunnel = netdev_priv(dev);
660 tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
661 tunnel->parms.iph.protocol = IPPROTO_GRE;
662
663 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1397 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; 664 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1398 dev->flags = IFF_NOARP;
1399 dev->iflink = 0;
1400 dev->addr_len = 4;
1401 dev->features |= NETIF_F_NETNS_LOCAL;
1402 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1403 665
1404 dev->features |= GRE_FEATURES; 666 dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
1405 dev->hw_features |= GRE_FEATURES; 667 dev->hw_features |= GRE_FEATURES;
668
669 if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
670 /* TCP offload with GRE SEQ is not supported. */
671 dev->features |= NETIF_F_GSO_SOFTWARE;
672 dev->hw_features |= NETIF_F_GSO_SOFTWARE;
673 /* Can use a lockless transmit, unless we generate
674 * output sequences
675 */
676 dev->features |= NETIF_F_LLTX;
677 }
1406} 678}
1407 679
1408static int ipgre_tunnel_init(struct net_device *dev) 680static int ipgre_tunnel_init(struct net_device *dev)
1409{ 681{
1410 struct ip_tunnel *tunnel; 682 struct ip_tunnel *tunnel = netdev_priv(dev);
1411 struct iphdr *iph; 683 struct iphdr *iph = &tunnel->parms.iph;
1412 int err;
1413 684
1414 tunnel = netdev_priv(dev); 685 __gre_tunnel_init(dev);
1415 iph = &tunnel->parms.iph;
1416 686
1417 tunnel->dev = dev; 687 memcpy(dev->dev_addr, &iph->saddr, 4);
1418 strcpy(tunnel->parms.name, dev->name); 688 memcpy(dev->broadcast, &iph->daddr, 4);
1419 689
1420 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 690 dev->type = ARPHRD_IPGRE;
1421 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 691 dev->flags = IFF_NOARP;
692 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
693 dev->addr_len = 4;
1422 694
1423 if (iph->daddr) { 695 if (iph->daddr) {
1424#ifdef CONFIG_NET_IPGRE_BROADCAST 696#ifdef CONFIG_NET_IPGRE_BROADCAST
@@ -1432,106 +704,30 @@ static int ipgre_tunnel_init(struct net_device *dev)
1432 } else 704 } else
1433 dev->header_ops = &ipgre_header_ops; 705 dev->header_ops = &ipgre_header_ops;
1434 706
1435 dev->tstats = alloc_percpu(struct pcpu_tstats); 707 return ip_tunnel_init(dev);
1436 if (!dev->tstats)
1437 return -ENOMEM;
1438
1439 err = gro_cells_init(&tunnel->gro_cells, dev);
1440 if (err) {
1441 free_percpu(dev->tstats);
1442 return err;
1443 }
1444
1445 return 0;
1446}
1447
1448static void ipgre_fb_tunnel_init(struct net_device *dev)
1449{
1450 struct ip_tunnel *tunnel = netdev_priv(dev);
1451 struct iphdr *iph = &tunnel->parms.iph;
1452
1453 tunnel->dev = dev;
1454 strcpy(tunnel->parms.name, dev->name);
1455
1456 iph->version = 4;
1457 iph->protocol = IPPROTO_GRE;
1458 iph->ihl = 5;
1459 tunnel->hlen = sizeof(struct iphdr) + 4;
1460
1461 dev_hold(dev);
1462} 708}
1463 709
1464
1465static const struct gre_protocol ipgre_protocol = { 710static const struct gre_protocol ipgre_protocol = {
1466 .handler = ipgre_rcv, 711 .handler = ipgre_rcv,
1467 .err_handler = ipgre_err, 712 .err_handler = ipgre_err,
1468}; 713};
1469 714
1470static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1471{
1472 int prio;
1473
1474 for (prio = 0; prio < 4; prio++) {
1475 int h;
1476 for (h = 0; h < HASH_SIZE; h++) {
1477 struct ip_tunnel *t;
1478
1479 t = rtnl_dereference(ign->tunnels[prio][h]);
1480
1481 while (t != NULL) {
1482 unregister_netdevice_queue(t->dev, head);
1483 t = rtnl_dereference(t->next);
1484 }
1485 }
1486 }
1487}
1488
1489static int __net_init ipgre_init_net(struct net *net) 715static int __net_init ipgre_init_net(struct net *net)
1490{ 716{
1491 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 717 return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1492 int err;
1493
1494 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1495 ipgre_tunnel_setup);
1496 if (!ign->fb_tunnel_dev) {
1497 err = -ENOMEM;
1498 goto err_alloc_dev;
1499 }
1500 dev_net_set(ign->fb_tunnel_dev, net);
1501
1502 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1503 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1504
1505 if ((err = register_netdev(ign->fb_tunnel_dev)))
1506 goto err_reg_dev;
1507
1508 rcu_assign_pointer(ign->tunnels_wc[0],
1509 netdev_priv(ign->fb_tunnel_dev));
1510 return 0;
1511
1512err_reg_dev:
1513 ipgre_dev_free(ign->fb_tunnel_dev);
1514err_alloc_dev:
1515 return err;
1516} 718}
1517 719
1518static void __net_exit ipgre_exit_net(struct net *net) 720static void __net_exit ipgre_exit_net(struct net *net)
1519{ 721{
1520 struct ipgre_net *ign; 722 struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
1521 LIST_HEAD(list); 723 ip_tunnel_delete_net(itn);
1522
1523 ign = net_generic(net, ipgre_net_id);
1524 rtnl_lock();
1525 ipgre_destroy_tunnels(ign, &list);
1526 unregister_netdevice_many(&list);
1527 rtnl_unlock();
1528} 724}
1529 725
1530static struct pernet_operations ipgre_net_ops = { 726static struct pernet_operations ipgre_net_ops = {
1531 .init = ipgre_init_net, 727 .init = ipgre_init_net,
1532 .exit = ipgre_exit_net, 728 .exit = ipgre_exit_net,
1533 .id = &ipgre_net_id, 729 .id = &ipgre_net_id,
1534 .size = sizeof(struct ipgre_net), 730 .size = sizeof(struct ip_tunnel_net),
1535}; 731};
1536 732
1537static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 733static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1576,8 +772,8 @@ out:
1576 return ipgre_tunnel_validate(tb, data); 772 return ipgre_tunnel_validate(tb, data);
1577} 773}
1578 774
1579static void ipgre_netlink_parms(struct nlattr *data[], 775static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
1580 struct ip_tunnel_parm *parms) 776 struct ip_tunnel_parm *parms)
1581{ 777{
1582 memset(parms, 0, sizeof(*parms)); 778 memset(parms, 0, sizeof(*parms));
1583 779
@@ -1590,10 +786,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1590 parms->link = nla_get_u32(data[IFLA_GRE_LINK]); 786 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1591 787
1592 if (data[IFLA_GRE_IFLAGS]) 788 if (data[IFLA_GRE_IFLAGS])
1593 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); 789 parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1594 790
1595 if (data[IFLA_GRE_OFLAGS]) 791 if (data[IFLA_GRE_OFLAGS])
1596 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); 792 parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1597 793
1598 if (data[IFLA_GRE_IKEY]) 794 if (data[IFLA_GRE_IKEY])
1599 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); 795 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -1617,148 +813,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],
1617 parms->iph.frag_off = htons(IP_DF); 813 parms->iph.frag_off = htons(IP_DF);
1618} 814}
1619 815
1620static int ipgre_tap_init(struct net_device *dev) 816static int gre_tap_init(struct net_device *dev)
1621{ 817{
1622 struct ip_tunnel *tunnel; 818 __gre_tunnel_init(dev);
1623
1624 tunnel = netdev_priv(dev);
1625
1626 tunnel->dev = dev;
1627 strcpy(tunnel->parms.name, dev->name);
1628 819
1629 ipgre_tunnel_bind_dev(dev); 820 return ip_tunnel_init(dev);
1630
1631 dev->tstats = alloc_percpu(struct pcpu_tstats);
1632 if (!dev->tstats)
1633 return -ENOMEM;
1634
1635 return 0;
1636} 821}
1637 822
1638static const struct net_device_ops ipgre_tap_netdev_ops = { 823static const struct net_device_ops gre_tap_netdev_ops = {
1639 .ndo_init = ipgre_tap_init, 824 .ndo_init = gre_tap_init,
1640 .ndo_uninit = ipgre_tunnel_uninit, 825 .ndo_uninit = ip_tunnel_uninit,
1641 .ndo_start_xmit = ipgre_tunnel_xmit, 826 .ndo_start_xmit = gre_tap_xmit,
1642 .ndo_set_mac_address = eth_mac_addr, 827 .ndo_set_mac_address = eth_mac_addr,
1643 .ndo_validate_addr = eth_validate_addr, 828 .ndo_validate_addr = eth_validate_addr,
1644 .ndo_change_mtu = ipgre_tunnel_change_mtu, 829 .ndo_change_mtu = ip_tunnel_change_mtu,
1645 .ndo_get_stats64 = ipgre_get_stats64, 830 .ndo_get_stats64 = ip_tunnel_get_stats64,
1646}; 831};
1647 832
1648static void ipgre_tap_setup(struct net_device *dev) 833static void ipgre_tap_setup(struct net_device *dev)
1649{ 834{
1650
1651 ether_setup(dev); 835 ether_setup(dev);
1652 836 dev->netdev_ops = &gre_tap_netdev_ops;
1653 dev->netdev_ops = &ipgre_tap_netdev_ops; 837 ip_tunnel_setup(dev, gre_tap_net_id);
1654 dev->destructor = ipgre_dev_free;
1655
1656 dev->iflink = 0;
1657 dev->features |= NETIF_F_NETNS_LOCAL;
1658
1659 dev->features |= GRE_FEATURES;
1660 dev->hw_features |= GRE_FEATURES;
1661} 838}
1662 839
1663static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], 840static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1664 struct nlattr *data[]) 841 struct nlattr *tb[], struct nlattr *data[])
1665{ 842{
1666 struct ip_tunnel *nt; 843 struct ip_tunnel_parm p;
1667 struct net *net = dev_net(dev);
1668 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1669 int mtu;
1670 int err;
1671
1672 nt = netdev_priv(dev);
1673 ipgre_netlink_parms(data, &nt->parms);
1674
1675 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1676 return -EEXIST;
1677
1678 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1679 eth_hw_addr_random(dev);
1680
1681 mtu = ipgre_tunnel_bind_dev(dev);
1682 if (!tb[IFLA_MTU])
1683 dev->mtu = mtu;
1684
1685 /* Can use a lockless transmit, unless we generate output sequences */
1686 if (!(nt->parms.o_flags & GRE_SEQ))
1687 dev->features |= NETIF_F_LLTX;
1688
1689 err = register_netdevice(dev);
1690 if (err)
1691 goto out;
1692
1693 dev_hold(dev);
1694 ipgre_tunnel_link(ign, nt);
1695 844
1696out: 845 ipgre_netlink_parms(data, tb, &p);
1697 return err; 846 return ip_tunnel_newlink(dev, tb, &p);
1698} 847}
1699 848
1700static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], 849static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1701 struct nlattr *data[]) 850 struct nlattr *data[])
1702{ 851{
1703 struct ip_tunnel *t, *nt;
1704 struct net *net = dev_net(dev);
1705 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1706 struct ip_tunnel_parm p; 852 struct ip_tunnel_parm p;
1707 int mtu;
1708
1709 if (dev == ign->fb_tunnel_dev)
1710 return -EINVAL;
1711
1712 nt = netdev_priv(dev);
1713 ipgre_netlink_parms(data, &p);
1714
1715 t = ipgre_tunnel_locate(net, &p, 0);
1716
1717 if (t) {
1718 if (t->dev != dev)
1719 return -EEXIST;
1720 } else {
1721 t = nt;
1722
1723 if (dev->type != ARPHRD_ETHER) {
1724 unsigned int nflags = 0;
1725
1726 if (ipv4_is_multicast(p.iph.daddr))
1727 nflags = IFF_BROADCAST;
1728 else if (p.iph.daddr)
1729 nflags = IFF_POINTOPOINT;
1730
1731 if ((dev->flags ^ nflags) &
1732 (IFF_POINTOPOINT | IFF_BROADCAST))
1733 return -EINVAL;
1734 }
1735 853
1736 ipgre_tunnel_unlink(ign, t); 854 ipgre_netlink_parms(data, tb, &p);
1737 t->parms.iph.saddr = p.iph.saddr; 855 return ip_tunnel_changelink(dev, tb, &p);
1738 t->parms.iph.daddr = p.iph.daddr;
1739 t->parms.i_key = p.i_key;
1740 if (dev->type != ARPHRD_ETHER) {
1741 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1742 memcpy(dev->broadcast, &p.iph.daddr, 4);
1743 }
1744 ipgre_tunnel_link(ign, t);
1745 netdev_state_change(dev);
1746 }
1747
1748 t->parms.o_key = p.o_key;
1749 t->parms.iph.ttl = p.iph.ttl;
1750 t->parms.iph.tos = p.iph.tos;
1751 t->parms.iph.frag_off = p.iph.frag_off;
1752
1753 if (t->parms.link != p.link) {
1754 t->parms.link = p.link;
1755 mtu = ipgre_tunnel_bind_dev(dev);
1756 if (!tb[IFLA_MTU])
1757 dev->mtu = mtu;
1758 netdev_state_change(dev);
1759 }
1760
1761 return 0;
1762} 856}
1763 857
1764static size_t ipgre_get_size(const struct net_device *dev) 858static size_t ipgre_get_size(const struct net_device *dev)
@@ -1793,8 +887,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1793 struct ip_tunnel_parm *p = &t->parms; 887 struct ip_tunnel_parm *p = &t->parms;
1794 888
1795 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || 889 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1796 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || 890 nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
1797 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || 891 nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
1798 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || 892 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1799 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || 893 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1800 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || 894 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
@@ -1832,6 +926,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1832 .validate = ipgre_tunnel_validate, 926 .validate = ipgre_tunnel_validate,
1833 .newlink = ipgre_newlink, 927 .newlink = ipgre_newlink,
1834 .changelink = ipgre_changelink, 928 .changelink = ipgre_changelink,
929 .dellink = ip_tunnel_dellink,
1835 .get_size = ipgre_get_size, 930 .get_size = ipgre_get_size,
1836 .fill_info = ipgre_fill_info, 931 .fill_info = ipgre_fill_info,
1837}; 932};
@@ -1845,13 +940,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1845 .validate = ipgre_tap_validate, 940 .validate = ipgre_tap_validate,
1846 .newlink = ipgre_newlink, 941 .newlink = ipgre_newlink,
1847 .changelink = ipgre_changelink, 942 .changelink = ipgre_changelink,
943 .dellink = ip_tunnel_dellink,
1848 .get_size = ipgre_get_size, 944 .get_size = ipgre_get_size,
1849 .fill_info = ipgre_fill_info, 945 .fill_info = ipgre_fill_info,
1850}; 946};
1851 947
1852/* 948static int __net_init ipgre_tap_init_net(struct net *net)
1853 * And now the modules code and kernel interface. 949{
1854 */ 950 return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
951}
952
953static void __net_exit ipgre_tap_exit_net(struct net *net)
954{
955 struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
956 ip_tunnel_delete_net(itn);
957}
958
959static struct pernet_operations ipgre_tap_net_ops = {
960 .init = ipgre_tap_init_net,
961 .exit = ipgre_tap_exit_net,
962 .id = &gre_tap_net_id,
963 .size = sizeof(struct ip_tunnel_net),
964};
1855 965
1856static int __init ipgre_init(void) 966static int __init ipgre_init(void)
1857{ 967{
@@ -1863,6 +973,10 @@ static int __init ipgre_init(void)
1863 if (err < 0) 973 if (err < 0)
1864 return err; 974 return err;
1865 975
976 err = register_pernet_device(&ipgre_tap_net_ops);
977 if (err < 0)
978 goto pnet_tap_faied;
979
1866 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); 980 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1867 if (err < 0) { 981 if (err < 0) {
1868 pr_info("%s: can't add protocol\n", __func__); 982 pr_info("%s: can't add protocol\n", __func__);
@@ -1877,16 +991,17 @@ static int __init ipgre_init(void)
1877 if (err < 0) 991 if (err < 0)
1878 goto tap_ops_failed; 992 goto tap_ops_failed;
1879 993
1880out: 994 return 0;
1881 return err;
1882 995
1883tap_ops_failed: 996tap_ops_failed:
1884 rtnl_link_unregister(&ipgre_link_ops); 997 rtnl_link_unregister(&ipgre_link_ops);
1885rtnl_link_failed: 998rtnl_link_failed:
1886 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); 999 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1887add_proto_failed: 1000add_proto_failed:
1001 unregister_pernet_device(&ipgre_tap_net_ops);
1002pnet_tap_faied:
1888 unregister_pernet_device(&ipgre_net_ops); 1003 unregister_pernet_device(&ipgre_net_ops);
1889 goto out; 1004 return err;
1890} 1005}
1891 1006
1892static void __exit ipgre_fini(void) 1007static void __exit ipgre_fini(void)
@@ -1895,6 +1010,7 @@ static void __exit ipgre_fini(void)
1895 rtnl_link_unregister(&ipgre_link_ops); 1010 rtnl_link_unregister(&ipgre_link_ops);
1896 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) 1011 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1897 pr_info("%s: can't remove protocol\n", __func__); 1012 pr_info("%s: can't remove protocol\n", __func__);
1013 unregister_pernet_device(&ipgre_tap_net_ops);
1898 unregister_pernet_device(&ipgre_net_ops); 1014 unregister_pernet_device(&ipgre_net_ops);
1899} 1015}
1900 1016
@@ -1904,3 +1020,4 @@ MODULE_LICENSE("GPL");
1904MODULE_ALIAS_RTNL_LINK("gre"); 1020MODULE_ALIAS_RTNL_LINK("gre");
1905MODULE_ALIAS_RTNL_LINK("gretap"); 1021MODULE_ALIAS_RTNL_LINK("gretap");
1906MODULE_ALIAS_NETDEV("gre0"); 1022MODULE_ALIAS_NETDEV("gre0");
1023MODULE_ALIAS_NETDEV("gretap0");
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2bdf802e28e2..3da817b89e9b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -419,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
419 iph = ip_hdr(skb); 419 iph = ip_hdr(skb);
420 420
421 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 421 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
422 goto inhdr_error; 422 goto csum_error;
423 423
424 len = ntohs(iph->tot_len); 424 len = ntohs(iph->tot_len);
425 if (skb->len < len) { 425 if (skb->len < len) {
@@ -446,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
446 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, 446 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
447 ip_rcv_finish); 447 ip_rcv_finish);
448 448
449csum_error:
450 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
449inhdr_error: 451inhdr_error:
450 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 452 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
451drop: 453drop:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5e12dca7b3dd..147abf5275aa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
430 to->tc_index = from->tc_index; 430 to->tc_index = from->tc_index;
431#endif 431#endif
432 nf_copy(to, from); 432 nf_copy(to, from);
433#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 433#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
434 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
435 to->nf_trace = from->nf_trace; 434 to->nf_trace = from->nf_trace;
436#endif 435#endif
437#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 436#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
new file mode 100644
index 000000000000..e4147ec1665a
--- /dev/null
+++ b/net/ipv4/ip_tunnel.c
@@ -0,0 +1,1035 @@
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74{
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113}
114EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118{
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127}
128
129/* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139*/
140struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144{
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
171
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
174
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
179 }
180
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
183
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
193
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
198 }
199
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
202
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214skip_key_lookup:
215 if (cand)
216 return cand;
217
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
220
221
222 return NULL;
223}
224EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
228{
229 unsigned int h;
230 __be32 remote;
231
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
236
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
239}
240
241static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242{
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245 hlist_add_head_rcu(&t->hash_node, head);
246}
247
248static void ip_tunnel_del(struct ip_tunnel *t)
249{
250 hlist_del_init_rcu(&t->hash_node);
251}
252
253static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
256{
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
263
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
271 }
272 return t;
273}
274
275static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
278{
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
283
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
287 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288 err = -E2BIG;
289 goto failed;
290 }
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
293 }
294
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
300 }
301 dev_net_set(dev, net);
302
303 dev->rtnl_link_ops = ops;
304
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
307
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
311
312 return dev;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
325{
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
334}
335
336static int ip_tunnel_bind_dev(struct net_device *dev)
337{
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345 iph = &tunnel->parms.iph;
346
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
351
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
361 }
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
364 }
365
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
372 }
373 dev->iflink = tunnel->parms.link;
374
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
377
378 if (mtu < 68)
379 mtu = 68;
380
381 return mtu;
382}
383
384static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
387{
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
390
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
396
397 dev->mtu = ip_tunnel_bind_dev(dev);
398
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
402}
403
404int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
406{
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
425 }
426#endif
427
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
433 }
434
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 }
444
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
482
483drop:
484 kfree_skb(skb);
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490 const struct iphdr *tnl_params)
491{
492 struct ip_tunnel *tunnel = netdev_priv(dev);
493 const struct iphdr *inner_iph;
494 struct iphdr *iph;
495 struct flowi4 fl4;
496 u8 tos, ttl;
497 __be16 df;
498 struct rtable *rt; /* Route to the other host */
499 struct net_device *tdev; /* Device to other host */
500 unsigned int max_headroom; /* The extra header space needed */
501 __be32 dst;
502 int mtu;
503
504 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505
506 dst = tnl_params->daddr;
507 if (dst == 0) {
508 /* NBMA tunnel */
509
510 if (skb_dst(skb) == NULL) {
511 dev->stats.tx_fifo_errors++;
512 goto tx_error;
513 }
514
515 if (skb->protocol == htons(ETH_P_IP)) {
516 rt = skb_rtable(skb);
517 dst = rt_nexthop(rt, inner_iph->daddr);
518 }
519#if IS_ENABLED(CONFIG_IPV6)
520 else if (skb->protocol == htons(ETH_P_IPV6)) {
521 const struct in6_addr *addr6;
522 struct neighbour *neigh;
523 bool do_tx_error_icmp;
524 int addr_type;
525
526 neigh = dst_neigh_lookup(skb_dst(skb),
527 &ipv6_hdr(skb)->daddr);
528 if (neigh == NULL)
529 goto tx_error;
530
531 addr6 = (const struct in6_addr *)&neigh->primary_key;
532 addr_type = ipv6_addr_type(addr6);
533
534 if (addr_type == IPV6_ADDR_ANY) {
535 addr6 = &ipv6_hdr(skb)->daddr;
536 addr_type = ipv6_addr_type(addr6);
537 }
538
539 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
540 do_tx_error_icmp = true;
541 else {
542 do_tx_error_icmp = false;
543 dst = addr6->s6_addr32[3];
544 }
545 neigh_release(neigh);
546 if (do_tx_error_icmp)
547 goto tx_error_icmp;
548 }
549#endif
550 else
551 goto tx_error;
552 }
553
554 tos = tnl_params->tos;
555 if (tos & 0x1) {
556 tos &= ~0x1;
557 if (skb->protocol == htons(ETH_P_IP))
558 tos = inner_iph->tos;
559 else if (skb->protocol == htons(ETH_P_IPV6))
560 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
561 }
562
563 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
564 tunnel->parms.iph.protocol,
565 dst, tnl_params->saddr,
566 tunnel->parms.o_key,
567 RT_TOS(tos),
568 tunnel->parms.link);
569 if (IS_ERR(rt)) {
570 dev->stats.tx_carrier_errors++;
571 goto tx_error;
572 }
573 tdev = rt->dst.dev;
574
575 if (tdev == dev) {
576 ip_rt_put(rt);
577 dev->stats.collisions++;
578 goto tx_error;
579 }
580
581 df = tnl_params->frag_off;
582
583 if (df)
584 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
585 - sizeof(struct iphdr);
586 else
587 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
588
589 if (skb_dst(skb))
590 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
591
592 if (skb->protocol == htons(ETH_P_IP)) {
593 df |= (inner_iph->frag_off&htons(IP_DF));
594
595 if (!skb_is_gso(skb) &&
596 (inner_iph->frag_off&htons(IP_DF)) &&
597 mtu < ntohs(inner_iph->tot_len)) {
598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
599 ip_rt_put(rt);
600 goto tx_error;
601 }
602 }
603#if IS_ENABLED(CONFIG_IPV6)
604 else if (skb->protocol == htons(ETH_P_IPV6)) {
605 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
606
607 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
608 mtu >= IPV6_MIN_MTU) {
609 if ((tunnel->parms.iph.daddr &&
610 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
611 rt6->rt6i_dst.plen == 128) {
612 rt6->rt6i_flags |= RTF_MODIFIED;
613 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
614 }
615 }
616
617 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
618 mtu < skb->len) {
619 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
620 ip_rt_put(rt);
621 goto tx_error;
622 }
623 }
624#endif
625
626 if (tunnel->err_count > 0) {
627 if (time_before(jiffies,
628 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
629 tunnel->err_count--;
630
631 dst_link_failure(skb);
632 } else
633 tunnel->err_count = 0;
634 }
635
636 ttl = tnl_params->ttl;
637 if (ttl == 0) {
638 if (skb->protocol == htons(ETH_P_IP))
639 ttl = inner_iph->ttl;
640#if IS_ENABLED(CONFIG_IPV6)
641 else if (skb->protocol == htons(ETH_P_IPV6))
642 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
643#endif
644 else
645 ttl = ip4_dst_hoplimit(&rt->dst);
646 }
647
648 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
649 + rt->dst.header_len;
650 if (max_headroom > dev->needed_headroom) {
651 dev->needed_headroom = max_headroom;
652 if (skb_cow_head(skb, dev->needed_headroom)) {
653 dev->stats.tx_dropped++;
654 dev_kfree_skb(skb);
655 return;
656 }
657 }
658
659 skb_dst_drop(skb);
660 skb_dst_set(skb, &rt->dst);
661 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
662
663 /* Push down and install the IP header. */
664 skb_push(skb, sizeof(struct iphdr));
665 skb_reset_network_header(skb);
666
667 iph = ip_hdr(skb);
668 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669
670 iph->version = 4;
671 iph->ihl = sizeof(struct iphdr) >> 2;
672 iph->frag_off = df;
673 iph->protocol = tnl_params->protocol;
674 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
675 iph->daddr = fl4.daddr;
676 iph->saddr = fl4.saddr;
677 iph->ttl = ttl;
678 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680 iptunnel_xmit(skb, dev);
681 return;
682
683#if IS_ENABLED(CONFIG_IPV6)
684tx_error_icmp:
685 dst_link_failure(skb);
686#endif
687tx_error:
688 dev->stats.tx_errors++;
689 dev_kfree_skb(skb);
690}
691EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
692
693static void ip_tunnel_update(struct ip_tunnel_net *itn,
694 struct ip_tunnel *t,
695 struct net_device *dev,
696 struct ip_tunnel_parm *p,
697 bool set_mtu)
698{
699 ip_tunnel_del(t);
700 t->parms.iph.saddr = p->iph.saddr;
701 t->parms.iph.daddr = p->iph.daddr;
702 t->parms.i_key = p->i_key;
703 t->parms.o_key = p->o_key;
704 if (dev->type != ARPHRD_ETHER) {
705 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706 memcpy(dev->broadcast, &p->iph.daddr, 4);
707 }
708 ip_tunnel_add(itn, t);
709
710 t->parms.iph.ttl = p->iph.ttl;
711 t->parms.iph.tos = p->iph.tos;
712 t->parms.iph.frag_off = p->iph.frag_off;
713
714 if (t->parms.link != p->link) {
715 int mtu;
716
717 t->parms.link = p->link;
718 mtu = ip_tunnel_bind_dev(dev);
719 if (set_mtu)
720 dev->mtu = mtu;
721 }
722 netdev_state_change(dev);
723}
724
725int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726{
727 int err = 0;
728 struct ip_tunnel *t;
729 struct net *net = dev_net(dev);
730 struct ip_tunnel *tunnel = netdev_priv(dev);
731 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733 BUG_ON(!itn->fb_tunnel_dev);
734 switch (cmd) {
735 case SIOCGETTUNNEL:
736 t = NULL;
737 if (dev == itn->fb_tunnel_dev)
738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739 if (t == NULL)
740 t = netdev_priv(dev);
741 memcpy(p, &t->parms, sizeof(*p));
742 break;
743
744 case SIOCADDTUNNEL:
745 case SIOCCHGTUNNEL:
746 err = -EPERM;
747 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748 goto done;
749 if (p->iph.ttl)
750 p->iph.frag_off |= htons(IP_DF);
751 if (!(p->i_flags&TUNNEL_KEY))
752 p->i_key = 0;
753 if (!(p->o_flags&TUNNEL_KEY))
754 p->o_key = 0;
755
756 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758 if (!t && (cmd == SIOCADDTUNNEL))
759 t = ip_tunnel_create(net, itn, p);
760
761 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762 if (t != NULL) {
763 if (t->dev != dev) {
764 err = -EEXIST;
765 break;
766 }
767 } else {
768 unsigned int nflags = 0;
769
770 if (ipv4_is_multicast(p->iph.daddr))
771 nflags = IFF_BROADCAST;
772 else if (p->iph.daddr)
773 nflags = IFF_POINTOPOINT;
774
775 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776 err = -EINVAL;
777 break;
778 }
779
780 t = netdev_priv(dev);
781 }
782 }
783
784 if (t) {
785 err = 0;
786 ip_tunnel_update(itn, t, dev, p, true);
787 } else
788 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789 break;
790
791 case SIOCDELTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
795
796 if (dev == itn->fb_tunnel_dev) {
797 err = -ENOENT;
798 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799 if (t == NULL)
800 goto done;
801 err = -EPERM;
802 if (t == netdev_priv(itn->fb_tunnel_dev))
803 goto done;
804 dev = t->dev;
805 }
806 unregister_netdevice(dev);
807 err = 0;
808 break;
809
810 default:
811 err = -EINVAL;
812 }
813
814done:
815 return err;
816}
817EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820{
821 struct ip_tunnel *tunnel = netdev_priv(dev);
822 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824 if (new_mtu < 68 ||
825 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826 return -EINVAL;
827 dev->mtu = new_mtu;
828 return 0;
829}
830EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832static void ip_tunnel_dev_free(struct net_device *dev)
833{
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835
836 gro_cells_destroy(&tunnel->gro_cells);
837 free_percpu(dev->tstats);
838 free_netdev(dev);
839}
840
841void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842{
843 struct net *net = dev_net(dev);
844 struct ip_tunnel *tunnel = netdev_priv(dev);
845 struct ip_tunnel_net *itn;
846
847 itn = net_generic(net, tunnel->ip_tnl_net_id);
848
849 if (itn->fb_tunnel_dev != dev) {
850 ip_tunnel_del(netdev_priv(dev));
851 unregister_netdevice_queue(dev, head);
852 }
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857 struct rtnl_link_ops *ops, char *devname)
858{
859 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860 struct ip_tunnel_parm parms;
861
862 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863 if (!itn->tunnels)
864 return -ENOMEM;
865
866 if (!ops) {
867 itn->fb_tunnel_dev = NULL;
868 return 0;
869 }
870 memset(&parms, 0, sizeof(parms));
871 if (devname)
872 strlcpy(parms.name, devname, IFNAMSIZ);
873
874 rtnl_lock();
875 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876 rtnl_unlock();
877 if (IS_ERR(itn->fb_tunnel_dev)) {
878 kfree(itn->tunnels);
879 return PTR_ERR(itn->fb_tunnel_dev);
880 }
881
882 return 0;
883}
884EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
887{
888 int h;
889
890 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891 struct ip_tunnel *t;
892 struct hlist_node *n;
893 struct hlist_head *thead = &itn->tunnels[h];
894
895 hlist_for_each_entry_safe(t, n, thead, hash_node)
896 unregister_netdevice_queue(t->dev, head);
897 }
898 if (itn->fb_tunnel_dev)
899 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900}
901
902void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn)
903{
904 LIST_HEAD(list);
905
906 rtnl_lock();
907 ip_tunnel_destroy(itn, &list);
908 unregister_netdevice_many(&list);
909 rtnl_unlock();
910 kfree(itn->tunnels);
911}
912EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913
914int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915 struct ip_tunnel_parm *p)
916{
917 struct ip_tunnel *nt;
918 struct net *net = dev_net(dev);
919 struct ip_tunnel_net *itn;
920 int mtu;
921 int err;
922
923 nt = netdev_priv(dev);
924 itn = net_generic(net, nt->ip_tnl_net_id);
925
926 if (ip_tunnel_find(itn, p, dev->type))
927 return -EEXIST;
928
929 nt->parms = *p;
930 err = register_netdevice(dev);
931 if (err)
932 goto out;
933
934 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935 eth_hw_addr_random(dev);
936
937 mtu = ip_tunnel_bind_dev(dev);
938 if (!tb[IFLA_MTU])
939 dev->mtu = mtu;
940
941 ip_tunnel_add(itn, nt);
942
943out:
944 return err;
945}
946EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
947
948int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949 struct ip_tunnel_parm *p)
950{
951 struct ip_tunnel *t, *nt;
952 struct net *net = dev_net(dev);
953 struct ip_tunnel *tunnel = netdev_priv(dev);
954 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955
956 if (dev == itn->fb_tunnel_dev)
957 return -EINVAL;
958
959 nt = netdev_priv(dev);
960
961 t = ip_tunnel_find(itn, p, dev->type);
962
963 if (t) {
964 if (t->dev != dev)
965 return -EEXIST;
966 } else {
967 t = nt;
968
969 if (dev->type != ARPHRD_ETHER) {
970 unsigned int nflags = 0;
971
972 if (ipv4_is_multicast(p->iph.daddr))
973 nflags = IFF_BROADCAST;
974 else if (p->iph.daddr)
975 nflags = IFF_POINTOPOINT;
976
977 if ((dev->flags ^ nflags) &
978 (IFF_POINTOPOINT | IFF_BROADCAST))
979 return -EINVAL;
980 }
981 }
982
983 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984 return 0;
985}
986EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
987
988int ip_tunnel_init(struct net_device *dev)
989{
990 struct ip_tunnel *tunnel = netdev_priv(dev);
991 struct iphdr *iph = &tunnel->parms.iph;
992 int err;
993
994 dev->destructor = ip_tunnel_dev_free;
995 dev->tstats = alloc_percpu(struct pcpu_tstats);
996 if (!dev->tstats)
997 return -ENOMEM;
998
999 err = gro_cells_init(&tunnel->gro_cells, dev);
1000 if (err) {
1001 free_percpu(dev->tstats);
1002 return err;
1003 }
1004
1005 tunnel->dev = dev;
1006 strcpy(tunnel->parms.name, dev->name);
1007 iph->version = 4;
1008 iph->ihl = 5;
1009
1010 return 0;
1011}
1012EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013
1014void ip_tunnel_uninit(struct net_device *dev)
1015{
1016 struct net *net = dev_net(dev);
1017 struct ip_tunnel *tunnel = netdev_priv(dev);
1018 struct ip_tunnel_net *itn;
1019
1020 itn = net_generic(net, tunnel->ip_tnl_net_id);
1021 /* fb_tunnel_dev will be unregisted in net-exit call. */
1022 if (itn->fb_tunnel_dev != dev)
1023 ip_tunnel_del(netdev_priv(dev));
1024}
1025EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1026
1027/* Do least required initialization, rest of init is done in tunnel_init call */
1028void ip_tunnel_setup(struct net_device *dev, int net_id)
1029{
1030 struct ip_tunnel *tunnel = netdev_priv(dev);
1031 tunnel->ip_tnl_net_id = net_id;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1034
1035MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c3a4233c0ac2..9d2bdb2c1d3f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -38,7 +38,7 @@
38#include <net/sock.h> 38#include <net/sock.h>
39#include <net/ip.h> 39#include <net/ip.h>
40#include <net/icmp.h> 40#include <net/icmp.h>
41#include <net/ipip.h> 41#include <net/ip_tunnels.h>
42#include <net/inet_ecn.h> 42#include <net/inet_ecn.h>
43#include <net/xfrm.h> 43#include <net/xfrm.h>
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
@@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);
82} while (0) 82} while (0)
83 83
84 84
85static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
86 struct rtnl_link_stats64 *tot)
87{
88 int i;
89
90 for_each_possible_cpu(i) {
91 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
92 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
93 unsigned int start;
94
95 do {
96 start = u64_stats_fetch_begin_bh(&tstats->syncp);
97 rx_packets = tstats->rx_packets;
98 tx_packets = tstats->tx_packets;
99 rx_bytes = tstats->rx_bytes;
100 tx_bytes = tstats->tx_bytes;
101 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
102
103 tot->rx_packets += rx_packets;
104 tot->tx_packets += tx_packets;
105 tot->rx_bytes += rx_bytes;
106 tot->tx_bytes += tx_bytes;
107 }
108
109 tot->multicast = dev->stats.multicast;
110 tot->rx_crc_errors = dev->stats.rx_crc_errors;
111 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
112 tot->rx_length_errors = dev->stats.rx_length_errors;
113 tot->rx_errors = dev->stats.rx_errors;
114 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
115 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
116 tot->tx_dropped = dev->stats.tx_dropped;
117 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
118 tot->tx_errors = dev->stats.tx_errors;
119
120 return tot;
121}
122
123static struct ip_tunnel *vti_tunnel_lookup(struct net *net, 85static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
124 __be32 remote, __be32 local) 86 __be32 remote, __be32 local)
125{ 87{
@@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = {
597 .ndo_start_xmit = vti_tunnel_xmit, 559 .ndo_start_xmit = vti_tunnel_xmit,
598 .ndo_do_ioctl = vti_tunnel_ioctl, 560 .ndo_do_ioctl = vti_tunnel_ioctl,
599 .ndo_change_mtu = vti_tunnel_change_mtu, 561 .ndo_change_mtu = vti_tunnel_change_mtu,
600 .ndo_get_stats64 = vti_get_stats64, 562 .ndo_get_stats64 = ip_tunnel_get_stats64,
601}; 563};
602 564
603static void vti_dev_free(struct net_device *dev) 565static void vti_dev_free(struct net_device *dev)
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1aff7f..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
75 t->props.mode = x->props.mode; 75 t->props.mode = x->props.mode;
76 t->props.saddr.a4 = x->props.saddr.a4; 76 t->props.saddr.a4 = x->props.saddr.a4;
77 t->props.flags = x->props.flags; 77 t->props.flags = x->props.flags;
78 t->props.extra_flags = x->props.extra_flags;
78 memcpy(&t->mark, &x->mark, sizeof(t->mark)); 79 memcpy(&t->mark, &x->mark, sizeof(t->mark));
79 80
80 if (xfrm_init_state(t)) 81 if (xfrm_init_state(t))
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index bf6c5cf31aed..efa1138fa523 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -206,7 +206,7 @@ static int __init ic_open_devs(void)
206 struct ic_device *d, **last; 206 struct ic_device *d, **last;
207 struct net_device *dev; 207 struct net_device *dev;
208 unsigned short oflags; 208 unsigned short oflags;
209 unsigned long start; 209 unsigned long start, next_msg;
210 210
211 last = &ic_first_dev; 211 last = &ic_first_dev;
212 rtnl_lock(); 212 rtnl_lock();
@@ -263,12 +263,23 @@ static int __init ic_open_devs(void)
263 263
264 /* wait for a carrier on at least one device */ 264 /* wait for a carrier on at least one device */
265 start = jiffies; 265 start = jiffies;
266 next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
266 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { 267 while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) {
268 int wait, elapsed;
269
267 for_each_netdev(&init_net, dev) 270 for_each_netdev(&init_net, dev)
268 if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) 271 if (ic_is_init_dev(dev) && netif_carrier_ok(dev))
269 goto have_carrier; 272 goto have_carrier;
270 273
271 msleep(1); 274 msleep(1);
275
276 if time_before(jiffies, next_msg)
277 continue;
278
279 elapsed = jiffies_to_msecs(jiffies - start);
280 wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000;
281 pr_info("Waiting up to %d more seconds for network.\n", wait);
282 next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);
272 } 283 }
273have_carrier: 284have_carrier:
274 rtnl_unlock(); 285 rtnl_unlock();
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 8f024d41eefa..77bfcce64fe5 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -111,227 +111,21 @@
111#include <net/sock.h> 111#include <net/sock.h>
112#include <net/ip.h> 112#include <net/ip.h>
113#include <net/icmp.h> 113#include <net/icmp.h>
114#include <net/ipip.h> 114#include <net/ip_tunnels.h>
115#include <net/inet_ecn.h> 115#include <net/inet_ecn.h>
116#include <net/xfrm.h> 116#include <net/xfrm.h>
117#include <net/net_namespace.h> 117#include <net/net_namespace.h>
118#include <net/netns/generic.h> 118#include <net/netns/generic.h>
119 119
120#define HASH_SIZE 16
121#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123static bool log_ecn_error = true; 120static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644); 121module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 122MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126 123
127static int ipip_net_id __read_mostly; 124static int ipip_net_id __read_mostly;
128struct ipip_net {
129 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132 struct ip_tunnel __rcu *tunnels_wc[1];
133 struct ip_tunnel __rcu **tunnels[4];
134
135 struct net_device *fb_tunnel_dev;
136};
137 125
138static int ipip_tunnel_init(struct net_device *dev); 126static int ipip_tunnel_init(struct net_device *dev);
139static void ipip_tunnel_setup(struct net_device *dev);
140static void ipip_dev_free(struct net_device *dev);
141static struct rtnl_link_ops ipip_link_ops __read_mostly; 127static struct rtnl_link_ops ipip_link_ops __read_mostly;
142 128
143static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144 struct rtnl_link_stats64 *tot)
145{
146 int i;
147
148 for_each_possible_cpu(i) {
149 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151 unsigned int start;
152
153 do {
154 start = u64_stats_fetch_begin_bh(&tstats->syncp);
155 rx_packets = tstats->rx_packets;
156 tx_packets = tstats->tx_packets;
157 rx_bytes = tstats->rx_bytes;
158 tx_bytes = tstats->tx_bytes;
159 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161 tot->rx_packets += rx_packets;
162 tot->tx_packets += tx_packets;
163 tot->rx_bytes += rx_bytes;
164 tot->tx_bytes += tx_bytes;
165 }
166
167 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169 tot->tx_dropped = dev->stats.tx_dropped;
170 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171 tot->tx_errors = dev->stats.tx_errors;
172 tot->collisions = dev->stats.collisions;
173
174 return tot;
175}
176
177static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178 __be32 remote, __be32 local)
179{
180 unsigned int h0 = HASH(remote);
181 unsigned int h1 = HASH(local);
182 struct ip_tunnel *t;
183 struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185 for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186 if (local == t->parms.iph.saddr &&
187 remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188 return t;
189
190 for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192 return t;
193
194 for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196 return t;
197
198 t = rcu_dereference(ipn->tunnels_wc[0]);
199 if (t && (t->dev->flags&IFF_UP))
200 return t;
201 return NULL;
202}
203
204static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205 struct ip_tunnel_parm *parms)
206{
207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 unsigned int h = 0;
210 int prio = 0;
211
212 if (remote) {
213 prio |= 2;
214 h ^= HASH(remote);
215 }
216 if (local) {
217 prio |= 1;
218 h ^= HASH(local);
219 }
220 return &ipn->tunnels[prio][h];
221}
222
223static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224 struct ip_tunnel *t)
225{
226 return __ipip_bucket(ipn, &t->parms);
227}
228
229static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230{
231 struct ip_tunnel __rcu **tp;
232 struct ip_tunnel *iter;
233
234 for (tp = ipip_bucket(ipn, t);
235 (iter = rtnl_dereference(*tp)) != NULL;
236 tp = &iter->next) {
237 if (t == iter) {
238 rcu_assign_pointer(*tp, t->next);
239 break;
240 }
241 }
242}
243
244static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245{
246 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249 rcu_assign_pointer(*tp, t);
250}
251
252static int ipip_tunnel_create(struct net_device *dev)
253{
254 struct ip_tunnel *t = netdev_priv(dev);
255 struct net *net = dev_net(dev);
256 struct ipip_net *ipn = net_generic(net, ipip_net_id);
257 int err;
258
259 err = ipip_tunnel_init(dev);
260 if (err < 0)
261 goto out;
262
263 err = register_netdevice(dev);
264 if (err < 0)
265 goto out;
266
267 strcpy(t->parms.name, dev->name);
268 dev->rtnl_link_ops = &ipip_link_ops;
269
270 dev_hold(dev);
271 ipip_tunnel_link(ipn, t);
272 return 0;
273
274out:
275 return err;
276}
277
278static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279 struct ip_tunnel_parm *parms, int create)
280{
281 __be32 remote = parms->iph.daddr;
282 __be32 local = parms->iph.saddr;
283 struct ip_tunnel *t, *nt;
284 struct ip_tunnel __rcu **tp;
285 struct net_device *dev;
286 char name[IFNAMSIZ];
287 struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289 for (tp = __ipip_bucket(ipn, parms);
290 (t = rtnl_dereference(*tp)) != NULL;
291 tp = &t->next) {
292 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293 return t;
294 }
295 if (!create)
296 return NULL;
297
298 if (parms->name[0])
299 strlcpy(name, parms->name, IFNAMSIZ);
300 else
301 strcpy(name, "tunl%d");
302
303 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304 if (dev == NULL)
305 return NULL;
306
307 dev_net_set(dev, net);
308
309 nt = netdev_priv(dev);
310 nt->parms = *parms;
311
312 if (ipip_tunnel_create(dev) < 0)
313 goto failed_free;
314
315 return nt;
316
317failed_free:
318 ipip_dev_free(dev);
319 return NULL;
320}
321
322/* called with RTNL */
323static void ipip_tunnel_uninit(struct net_device *dev)
324{
325 struct net *net = dev_net(dev);
326 struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328 if (dev == ipn->fb_tunnel_dev)
329 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330 else
331 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332 dev_put(dev);
333}
334
335static int ipip_err(struct sk_buff *skb, u32 info) 129static int ipip_err(struct sk_buff *skb, u32 info)
336{ 130{
337 131
@@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)
339 8 bytes of packet payload. It means, that precise relaying of 133 8 bytes of packet payload. It means, that precise relaying of
340 ICMP in the real Internet is absolutely infeasible. 134 ICMP in the real Internet is absolutely infeasible.
341 */ 135 */
136 struct net *net = dev_net(skb->dev);
137 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
342 const struct iphdr *iph = (const struct iphdr *)skb->data; 138 const struct iphdr *iph = (const struct iphdr *)skb->data;
343 const int type = icmp_hdr(skb)->type;
344 const int code = icmp_hdr(skb)->code;
345 struct ip_tunnel *t; 139 struct ip_tunnel *t;
346 int err; 140 int err;
347 141 const int type = icmp_hdr(skb)->type;
348 switch (type) { 142 const int code = icmp_hdr(skb)->code;
349 default:
350 case ICMP_PARAMETERPROB:
351 return 0;
352
353 case ICMP_DEST_UNREACH:
354 switch (code) {
355 case ICMP_SR_FAILED:
356 case ICMP_PORT_UNREACH:
357 /* Impossible event. */
358 return 0;
359 default:
360 /* All others are translated to HOST_UNREACH.
361 rfc2003 contains "deep thoughts" about NET_UNREACH,
362 I believe they are just ether pollution. --ANK
363 */
364 break;
365 }
366 break;
367 case ICMP_TIME_EXCEEDED:
368 if (code != ICMP_EXC_TTL)
369 return 0;
370 break;
371 case ICMP_REDIRECT:
372 break;
373 }
374 143
375 err = -ENOENT; 144 err = -ENOENT;
376 t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 145 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
146 iph->daddr, iph->saddr, 0);
377 if (t == NULL) 147 if (t == NULL)
378 goto out; 148 goto out;
379 149
@@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)
403 else 173 else
404 t->err_count = 1; 174 t->err_count = 1;
405 t->err_time = jiffies; 175 t->err_time = jiffies;
406out:
407 176
177out:
408 return err; 178 return err;
409} 179}
410 180
181static const struct tnl_ptk_info tpi = {
182 /* no tunnel info required for ipip. */
183 .proto = htons(ETH_P_IP),
184};
185
411static int ipip_rcv(struct sk_buff *skb) 186static int ipip_rcv(struct sk_buff *skb)
412{ 187{
188 struct net *net = dev_net(skb->dev);
189 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
413 struct ip_tunnel *tunnel; 190 struct ip_tunnel *tunnel;
414 const struct iphdr *iph = ip_hdr(skb); 191 const struct iphdr *iph = ip_hdr(skb);
415 int err;
416
417 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418 if (tunnel != NULL) {
419 struct pcpu_tstats *tstats;
420 192
193 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
194 iph->saddr, iph->daddr, 0);
195 if (tunnel) {
421 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 196 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422 goto drop; 197 goto drop;
423 198 return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
424 secpath_reset(skb);
425
426 skb->mac_header = skb->network_header;
427 skb_reset_network_header(skb);
428 skb->protocol = htons(ETH_P_IP);
429 skb->pkt_type = PACKET_HOST;
430
431 __skb_tunnel_rx(skb, tunnel->dev);
432
433 err = IP_ECN_decapsulate(iph, skb);
434 if (unlikely(err)) {
435 if (log_ecn_error)
436 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437 &iph->saddr, iph->tos);
438 if (err > 1) {
439 ++tunnel->dev->stats.rx_frame_errors;
440 ++tunnel->dev->stats.rx_errors;
441 goto drop;
442 }
443 }
444
445 tstats = this_cpu_ptr(tunnel->dev->tstats);
446 u64_stats_update_begin(&tstats->syncp);
447 tstats->rx_packets++;
448 tstats->rx_bytes += skb->len;
449 u64_stats_update_end(&tstats->syncp);
450
451 netif_rx(skb);
452 return 0;
453 } 199 }
454 200
455 return -1; 201 return -1;
@@ -463,329 +209,64 @@ drop:
463 * This function assumes it is being called from dev_queue_xmit() 209 * This function assumes it is being called from dev_queue_xmit()
464 * and that skb is filled properly by that function. 210 * and that skb is filled properly by that function.
465 */ 211 */
466
467static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 212static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468{ 213{
469 struct ip_tunnel *tunnel = netdev_priv(dev); 214 struct ip_tunnel *tunnel = netdev_priv(dev);
470 const struct iphdr *tiph = &tunnel->parms.iph; 215 const struct iphdr *tiph = &tunnel->parms.iph;
471 u8 tos = tunnel->parms.iph.tos;
472 __be16 df = tiph->frag_off;
473 struct rtable *rt; /* Route to the other host */
474 struct net_device *tdev; /* Device to other host */
475 const struct iphdr *old_iph;
476 struct iphdr *iph; /* Our new IP header */
477 unsigned int max_headroom; /* The extra header space needed */
478 __be32 dst = tiph->daddr;
479 struct flowi4 fl4;
480 int mtu;
481
482 if (skb->protocol != htons(ETH_P_IP))
483 goto tx_error;
484 216
485 if (skb->ip_summed == CHECKSUM_PARTIAL && 217 if (unlikely(skb->protocol != htons(ETH_P_IP)))
486 skb_checksum_help(skb))
487 goto tx_error; 218 goto tx_error;
488 219
489 old_iph = ip_hdr(skb); 220 if (likely(!skb->encapsulation)) {
490 221 skb_reset_inner_headers(skb);
491 if (tos & 1) 222 skb->encapsulation = 1;
492 tos = old_iph->tos;
493
494 if (!dst) {
495 /* NBMA tunnel */
496 if ((rt = skb_rtable(skb)) == NULL) {
497 dev->stats.tx_fifo_errors++;
498 goto tx_error;
499 }
500 dst = rt_nexthop(rt, old_iph->daddr);
501 } 223 }
502 224
503 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, 225 ip_tunnel_xmit(skb, dev, tiph);
504 dst, tiph->saddr,
505 0, 0,
506 IPPROTO_IPIP, RT_TOS(tos),
507 tunnel->parms.link);
508 if (IS_ERR(rt)) {
509 dev->stats.tx_carrier_errors++;
510 goto tx_error_icmp;
511 }
512 tdev = rt->dst.dev;
513
514 if (tdev == dev) {
515 ip_rt_put(rt);
516 dev->stats.collisions++;
517 goto tx_error;
518 }
519
520 df |= old_iph->frag_off & htons(IP_DF);
521
522 if (df) {
523 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
524
525 if (mtu < 68) {
526 dev->stats.collisions++;
527 ip_rt_put(rt);
528 goto tx_error;
529 }
530
531 if (skb_dst(skb))
532 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
533
534 if ((old_iph->frag_off & htons(IP_DF)) &&
535 mtu < ntohs(old_iph->tot_len)) {
536 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
537 htonl(mtu));
538 ip_rt_put(rt);
539 goto tx_error;
540 }
541 }
542
543 if (tunnel->err_count > 0) {
544 if (time_before(jiffies,
545 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
546 tunnel->err_count--;
547 dst_link_failure(skb);
548 } else
549 tunnel->err_count = 0;
550 }
551
552 /*
553 * Okay, now see if we can stuff it in the buffer as-is.
554 */
555 max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
556
557 if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
558 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
559 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
560 if (!new_skb) {
561 ip_rt_put(rt);
562 dev->stats.tx_dropped++;
563 dev_kfree_skb(skb);
564 return NETDEV_TX_OK;
565 }
566 if (skb->sk)
567 skb_set_owner_w(new_skb, skb->sk);
568 dev_kfree_skb(skb);
569 skb = new_skb;
570 old_iph = ip_hdr(skb);
571 }
572
573 skb->transport_header = skb->network_header;
574 skb_push(skb, sizeof(struct iphdr));
575 skb_reset_network_header(skb);
576 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
578 IPSKB_REROUTED);
579 skb_dst_drop(skb);
580 skb_dst_set(skb, &rt->dst);
581
582 /*
583 * Push down and install the IPIP header.
584 */
585
586 iph = ip_hdr(skb);
587 iph->version = 4;
588 iph->ihl = sizeof(struct iphdr)>>2;
589 iph->frag_off = df;
590 iph->protocol = IPPROTO_IPIP;
591 iph->tos = INET_ECN_encapsulate(tos, old_iph->tos);
592 iph->daddr = fl4.daddr;
593 iph->saddr = fl4.saddr;
594
595 if ((iph->ttl = tiph->ttl) == 0)
596 iph->ttl = old_iph->ttl;
597
598 iptunnel_xmit(skb, dev);
599 return NETDEV_TX_OK; 226 return NETDEV_TX_OK;
600 227
601tx_error_icmp:
602 dst_link_failure(skb);
603tx_error: 228tx_error:
604 dev->stats.tx_errors++; 229 dev->stats.tx_errors++;
605 dev_kfree_skb(skb); 230 dev_kfree_skb(skb);
606 return NETDEV_TX_OK; 231 return NETDEV_TX_OK;
607} 232}
608 233
609static void ipip_tunnel_bind_dev(struct net_device *dev)
610{
611 struct net_device *tdev = NULL;
612 struct ip_tunnel *tunnel;
613 const struct iphdr *iph;
614
615 tunnel = netdev_priv(dev);
616 iph = &tunnel->parms.iph;
617
618 if (iph->daddr) {
619 struct rtable *rt;
620 struct flowi4 fl4;
621
622 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
623 iph->daddr, iph->saddr,
624 0, 0,
625 IPPROTO_IPIP,
626 RT_TOS(iph->tos),
627 tunnel->parms.link);
628 if (!IS_ERR(rt)) {
629 tdev = rt->dst.dev;
630 ip_rt_put(rt);
631 }
632 dev->flags |= IFF_POINTOPOINT;
633 }
634
635 if (!tdev && tunnel->parms.link)
636 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
637
638 if (tdev) {
639 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
640 dev->mtu = tdev->mtu - sizeof(struct iphdr);
641 }
642 dev->iflink = tunnel->parms.link;
643}
644
645static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
646{
647 struct net *net = dev_net(t->dev);
648 struct ipip_net *ipn = net_generic(net, ipip_net_id);
649
650 ipip_tunnel_unlink(ipn, t);
651 synchronize_net();
652 t->parms.iph.saddr = p->iph.saddr;
653 t->parms.iph.daddr = p->iph.daddr;
654 memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
655 memcpy(t->dev->broadcast, &p->iph.daddr, 4);
656 ipip_tunnel_link(ipn, t);
657 t->parms.iph.ttl = p->iph.ttl;
658 t->parms.iph.tos = p->iph.tos;
659 t->parms.iph.frag_off = p->iph.frag_off;
660 if (t->parms.link != p->link) {
661 t->parms.link = p->link;
662 ipip_tunnel_bind_dev(t->dev);
663 }
664 netdev_state_change(t->dev);
665}
666
667static int 234static int
668ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) 235ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
669{ 236{
670 int err = 0; 237 int err = 0;
671 struct ip_tunnel_parm p; 238 struct ip_tunnel_parm p;
672 struct ip_tunnel *t;
673 struct net *net = dev_net(dev);
674 struct ipip_net *ipn = net_generic(net, ipip_net_id);
675
676 switch (cmd) {
677 case SIOCGETTUNNEL:
678 t = NULL;
679 if (dev == ipn->fb_tunnel_dev) {
680 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
681 err = -EFAULT;
682 break;
683 }
684 t = ipip_tunnel_locate(net, &p, 0);
685 }
686 if (t == NULL)
687 t = netdev_priv(dev);
688 memcpy(&p, &t->parms, sizeof(p));
689 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
690 err = -EFAULT;
691 break;
692
693 case SIOCADDTUNNEL:
694 case SIOCCHGTUNNEL:
695 err = -EPERM;
696 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
697 goto done;
698
699 err = -EFAULT;
700 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
701 goto done;
702
703 err = -EINVAL;
704 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
705 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
706 goto done;
707 if (p.iph.ttl)
708 p.iph.frag_off |= htons(IP_DF);
709
710 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
711
712 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
713 if (t != NULL) {
714 if (t->dev != dev) {
715 err = -EEXIST;
716 break;
717 }
718 } else {
719 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
720 (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
721 err = -EINVAL;
722 break;
723 }
724 t = netdev_priv(dev);
725 }
726
727 ipip_tunnel_update(t, &p);
728 }
729
730 if (t) {
731 err = 0;
732 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733 err = -EFAULT;
734 } else
735 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736 break;
737
738 case SIOCDELTUNNEL:
739 err = -EPERM;
740 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741 goto done;
742
743 if (dev == ipn->fb_tunnel_dev) {
744 err = -EFAULT;
745 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746 goto done;
747 err = -ENOENT;
748 if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
749 goto done;
750 err = -EPERM;
751 if (t->dev == ipn->fb_tunnel_dev)
752 goto done;
753 dev = t->dev;
754 }
755 unregister_netdevice(dev);
756 err = 0;
757 break;
758 239
759 default: 240 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
760 err = -EINVAL; 241 return -EFAULT;
761 }
762
763done:
764 return err;
765}
766 242
767static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) 243 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
768{ 244 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
769 if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) 245 return -EINVAL;
246 if (p.i_key || p.o_key || p.i_flags || p.o_flags)
770 return -EINVAL; 247 return -EINVAL;
771 dev->mtu = new_mtu; 248 if (p.iph.ttl)
249 p.iph.frag_off |= htons(IP_DF);
250
251 err = ip_tunnel_ioctl(dev, &p, cmd);
252 if (err)
253 return err;
254
255 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
256 return -EFAULT;
257
772 return 0; 258 return 0;
773} 259}
774 260
775static const struct net_device_ops ipip_netdev_ops = { 261static const struct net_device_ops ipip_netdev_ops = {
776 .ndo_uninit = ipip_tunnel_uninit, 262 .ndo_init = ipip_tunnel_init,
263 .ndo_uninit = ip_tunnel_uninit,
777 .ndo_start_xmit = ipip_tunnel_xmit, 264 .ndo_start_xmit = ipip_tunnel_xmit,
778 .ndo_do_ioctl = ipip_tunnel_ioctl, 265 .ndo_do_ioctl = ipip_tunnel_ioctl,
779 .ndo_change_mtu = ipip_tunnel_change_mtu, 266 .ndo_change_mtu = ip_tunnel_change_mtu,
780 .ndo_get_stats64 = ipip_get_stats64, 267 .ndo_get_stats64 = ip_tunnel_get_stats64,
781}; 268};
782 269
783static void ipip_dev_free(struct net_device *dev)
784{
785 free_percpu(dev->tstats);
786 free_netdev(dev);
787}
788
789#define IPIP_FEATURES (NETIF_F_SG | \ 270#define IPIP_FEATURES (NETIF_F_SG | \
790 NETIF_F_FRAGLIST | \ 271 NETIF_F_FRAGLIST | \
791 NETIF_F_HIGHDMA | \ 272 NETIF_F_HIGHDMA | \
@@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)
794static void ipip_tunnel_setup(struct net_device *dev) 275static void ipip_tunnel_setup(struct net_device *dev)
795{ 276{
796 dev->netdev_ops = &ipip_netdev_ops; 277 dev->netdev_ops = &ipip_netdev_ops;
797 dev->destructor = ipip_dev_free;
798 278
799 dev->type = ARPHRD_TUNNEL; 279 dev->type = ARPHRD_TUNNEL;
800 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
801 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr);
802 dev->flags = IFF_NOARP; 280 dev->flags = IFF_NOARP;
803 dev->iflink = 0; 281 dev->iflink = 0;
804 dev->addr_len = 4; 282 dev->addr_len = 4;
@@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)
808 286
809 dev->features |= IPIP_FEATURES; 287 dev->features |= IPIP_FEATURES;
810 dev->hw_features |= IPIP_FEATURES; 288 dev->hw_features |= IPIP_FEATURES;
289 ip_tunnel_setup(dev, ipip_net_id);
811} 290}
812 291
813static int ipip_tunnel_init(struct net_device *dev) 292static int ipip_tunnel_init(struct net_device *dev)
814{ 293{
815 struct ip_tunnel *tunnel = netdev_priv(dev); 294 struct ip_tunnel *tunnel = netdev_priv(dev);
816 295
817 tunnel->dev = dev;
818
819 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 296 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
820 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 297 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
821 298
822 ipip_tunnel_bind_dev(dev); 299 tunnel->hlen = 0;
823 300 tunnel->parms.iph.protocol = IPPROTO_IPIP;
824 dev->tstats = alloc_percpu(struct pcpu_tstats); 301 return ip_tunnel_init(dev);
825 if (!dev->tstats)
826 return -ENOMEM;
827
828 return 0;
829}
830
831static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
832{
833 struct ip_tunnel *tunnel = netdev_priv(dev);
834 struct iphdr *iph = &tunnel->parms.iph;
835 struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
836
837 tunnel->dev = dev;
838 strcpy(tunnel->parms.name, dev->name);
839
840 iph->version = 4;
841 iph->protocol = IPPROTO_IPIP;
842 iph->ihl = 5;
843
844 dev->tstats = alloc_percpu(struct pcpu_tstats);
845 if (!dev->tstats)
846 return -ENOMEM;
847
848 dev_hold(dev);
849 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
850 return 0;
851} 302}
852 303
853static void ipip_netlink_parms(struct nlattr *data[], 304static void ipip_netlink_parms(struct nlattr *data[],
@@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],
887static int ipip_newlink(struct net *src_net, struct net_device *dev, 338static int ipip_newlink(struct net *src_net, struct net_device *dev,
888 struct nlattr *tb[], struct nlattr *data[]) 339 struct nlattr *tb[], struct nlattr *data[])
889{ 340{
890 struct net *net = dev_net(dev); 341 struct ip_tunnel_parm p;
891 struct ip_tunnel *nt;
892
893 nt = netdev_priv(dev);
894 ipip_netlink_parms(data, &nt->parms);
895
896 if (ipip_tunnel_locate(net, &nt->parms, 0))
897 return -EEXIST;
898 342
899 return ipip_tunnel_create(dev); 343 ipip_netlink_parms(data, &p);
344 return ip_tunnel_newlink(dev, tb, &p);
900} 345}
901 346
902static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], 347static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
903 struct nlattr *data[]) 348 struct nlattr *data[])
904{ 349{
905 struct ip_tunnel *t;
906 struct ip_tunnel_parm p; 350 struct ip_tunnel_parm p;
907 struct net *net = dev_net(dev);
908 struct ipip_net *ipn = net_generic(net, ipip_net_id);
909
910 if (dev == ipn->fb_tunnel_dev)
911 return -EINVAL;
912 351
913 ipip_netlink_parms(data, &p); 352 ipip_netlink_parms(data, &p);
914 353
@@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
916 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) 355 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
917 return -EINVAL; 356 return -EINVAL;
918 357
919 t = ipip_tunnel_locate(net, &p, 0); 358 return ip_tunnel_changelink(dev, tb, &p);
920
921 if (t) {
922 if (t->dev != dev)
923 return -EEXIST;
924 } else
925 t = netdev_priv(dev);
926
927 ipip_tunnel_update(t, &p);
928 return 0;
929} 359}
930 360
931static size_t ipip_get_size(const struct net_device *dev) 361static size_t ipip_get_size(const struct net_device *dev)
@@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
982 .setup = ipip_tunnel_setup, 412 .setup = ipip_tunnel_setup,
983 .newlink = ipip_newlink, 413 .newlink = ipip_newlink,
984 .changelink = ipip_changelink, 414 .changelink = ipip_changelink,
415 .dellink = ip_tunnel_dellink,
985 .get_size = ipip_get_size, 416 .get_size = ipip_get_size,
986 .fill_info = ipip_fill_info, 417 .fill_info = ipip_fill_info,
987}; 418};
@@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {
992 .priority = 1, 423 .priority = 1,
993}; 424};
994 425
995static const char banner[] __initconst =
996 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
997
998static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
999{
1000 int prio;
1001
1002 for (prio = 1; prio < 4; prio++) {
1003 int h;
1004 for (h = 0; h < HASH_SIZE; h++) {
1005 struct ip_tunnel *t;
1006
1007 t = rtnl_dereference(ipn->tunnels[prio][h]);
1008 while (t != NULL) {
1009 unregister_netdevice_queue(t->dev, head);
1010 t = rtnl_dereference(t->next);
1011 }
1012 }
1013 }
1014}
1015
1016static int __net_init ipip_init_net(struct net *net) 426static int __net_init ipip_init_net(struct net *net)
1017{ 427{
1018 struct ipip_net *ipn = net_generic(net, ipip_net_id); 428 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
1019 struct ip_tunnel *t;
1020 int err;
1021
1022 ipn->tunnels[0] = ipn->tunnels_wc;
1023 ipn->tunnels[1] = ipn->tunnels_l;
1024 ipn->tunnels[2] = ipn->tunnels_r;
1025 ipn->tunnels[3] = ipn->tunnels_r_l;
1026
1027 ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1028 "tunl0",
1029 ipip_tunnel_setup);
1030 if (!ipn->fb_tunnel_dev) {
1031 err = -ENOMEM;
1032 goto err_alloc_dev;
1033 }
1034 dev_net_set(ipn->fb_tunnel_dev, net);
1035
1036 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1037 if (err)
1038 goto err_reg_dev;
1039
1040 if ((err = register_netdev(ipn->fb_tunnel_dev)))
1041 goto err_reg_dev;
1042
1043 t = netdev_priv(ipn->fb_tunnel_dev);
1044
1045 strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1046 return 0;
1047
1048err_reg_dev:
1049 ipip_dev_free(ipn->fb_tunnel_dev);
1050err_alloc_dev:
1051 /* nothing */
1052 return err;
1053} 429}
1054 430
1055static void __net_exit ipip_exit_net(struct net *net) 431static void __net_exit ipip_exit_net(struct net *net)
1056{ 432{
1057 struct ipip_net *ipn = net_generic(net, ipip_net_id); 433 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
1058 LIST_HEAD(list); 434 ip_tunnel_delete_net(itn);
1059
1060 rtnl_lock();
1061 ipip_destroy_tunnels(ipn, &list);
1062 unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1063 unregister_netdevice_many(&list);
1064 rtnl_unlock();
1065} 435}
1066 436
1067static struct pernet_operations ipip_net_ops = { 437static struct pernet_operations ipip_net_ops = {
1068 .init = ipip_init_net, 438 .init = ipip_init_net,
1069 .exit = ipip_exit_net, 439 .exit = ipip_exit_net,
1070 .id = &ipip_net_id, 440 .id = &ipip_net_id,
1071 .size = sizeof(struct ipip_net), 441 .size = sizeof(struct ip_tunnel_net),
1072}; 442};
1073 443
1074static int __init ipip_init(void) 444static int __init ipip_init(void)
1075{ 445{
1076 int err; 446 int err;
1077 447
1078 printk(banner); 448 pr_info("ipip: IPv4 over IPv4 tunneling driver\n");
1079 449
1080 err = register_pernet_device(&ipip_net_ops); 450 err = register_pernet_device(&ipip_net_ops);
1081 if (err < 0) 451 if (err < 0)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5f95b3aa579e..9d9610ae7855 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -61,7 +61,7 @@
61#include <linux/netfilter_ipv4.h> 61#include <linux/netfilter_ipv4.h>
62#include <linux/compat.h> 62#include <linux/compat.h>
63#include <linux/export.h> 63#include <linux/export.h>
64#include <net/ipip.h> 64#include <net/ip_tunnels.h>
65#include <net/checksum.h> 65#include <net/checksum.h>
66#include <net/netlink.h> 66#include <net/netlink.h>
67#include <net/fib_rules.h> 67#include <net/fib_rules.h>
@@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
626 if (ip_hdr(skb)->version == 0) { 626 if (ip_hdr(skb)->version == 0) {
627 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 627 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
628 nlh->nlmsg_type = NLMSG_ERROR; 628 nlh->nlmsg_type = NLMSG_ERROR;
629 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 629 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
630 skb_trim(skb, nlh->nlmsg_len); 630 skb_trim(skb, nlh->nlmsg_len);
631 e = NLMSG_DATA(nlh); 631 e = nlmsg_data(nlh);
632 e->error = -ETIMEDOUT; 632 e->error = -ETIMEDOUT;
633 memset(&e->msg, 0, sizeof(e->msg)); 633 memset(&e->msg, 0, sizeof(e->msg));
634 634
@@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
910 if (ip_hdr(skb)->version == 0) { 910 if (ip_hdr(skb)->version == 0) {
911 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 911 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
912 912
913 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 913 if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
914 nlh->nlmsg_len = skb_tail_pointer(skb) - 914 nlh->nlmsg_len = skb_tail_pointer(skb) -
915 (u8 *)nlh; 915 (u8 *)nlh;
916 } else { 916 } else {
917 nlh->nlmsg_type = NLMSG_ERROR; 917 nlh->nlmsg_type = NLMSG_ERROR;
918 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 918 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
919 skb_trim(skb, nlh->nlmsg_len); 919 skb_trim(skb, nlh->nlmsg_len);
920 e = NLMSG_DATA(nlh); 920 e = nlmsg_data(nlh);
921 e->error = -EMSGSIZE; 921 e->error = -EMSGSIZE;
922 memset(&e->msg, 0, sizeof(e->msg)); 922 memset(&e->msg, 0, sizeof(e->msg));
923 } 923 }
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 4c0cf63dd92e..c3e0adea9c27 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -1,4 +1,9 @@
1/* IPv4 specific functions of netfilter core */ 1/*
2 * IPv4 specific functions of netfilter core
3 *
4 * Rusty Russell (C) 2000 -- This code is GPL.
5 * Patrick McHardy (C) 2006-2012
6 */
2#include <linux/kernel.h> 7#include <linux/kernel.h>
3#include <linux/netfilter.h> 8#include <linux/netfilter.h>
4#include <linux/netfilter_ipv4.h> 9#include <linux/netfilter_ipv4.h>
@@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
40 fl4.flowi4_flags = flags; 45 fl4.flowi4_flags = flags;
41 rt = ip_route_output_key(net, &fl4); 46 rt = ip_route_output_key(net, &fl4);
42 if (IS_ERR(rt)) 47 if (IS_ERR(rt))
43 return -1; 48 return PTR_ERR(rt);
44 49
45 /* Drop old route. */ 50 /* Drop old route. */
46 skb_dst_drop(skb); 51 skb_dst_drop(skb);
47 skb_dst_set(skb, &rt->dst); 52 skb_dst_set(skb, &rt->dst);
48 53
49 if (skb_dst(skb)->error) 54 if (skb_dst(skb)->error)
50 return -1; 55 return skb_dst(skb)->error;
51 56
52#ifdef CONFIG_XFRM 57#ifdef CONFIG_XFRM
53 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 58 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
56 skb_dst_set(skb, NULL); 61 skb_dst_set(skb, NULL);
57 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); 62 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
58 if (IS_ERR(dst)) 63 if (IS_ERR(dst))
59 return -1; 64 return PTR_ERR(dst);;
60 skb_dst_set(skb, dst); 65 skb_dst_set(skb, dst);
61 } 66 }
62#endif 67#endif
@@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
66 if (skb_headroom(skb) < hh_len && 71 if (skb_headroom(skb) < hh_len &&
67 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 72 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
68 0, GFP_ATOMIC)) 73 0, GFP_ATOMIC))
69 return -1; 74 return -ENOMEM;
70 75
71 return 0; 76 return 0;
72} 77}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 0d755c50994b..e7916c193932 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -71,7 +71,7 @@ config IP_NF_MATCH_ECN
71 71
72config IP_NF_MATCH_RPFILTER 72config IP_NF_MATCH_RPFILTER
73 tristate '"rpfilter" reverse path filter match support' 73 tristate '"rpfilter" reverse path filter match support'
74 depends on NETFILTER_ADVANCED 74 depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
75 ---help--- 75 ---help---
76 This option allows you to match packets whose replies would 76 This option allows you to match packets whose replies would
77 go out via the interface the packet came in. 77 go out via the interface the packet came in.
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7dc6a9743592..85a4f21aac1a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,6 +6,7 @@
6 * Some ARP specific bits are: 6 * Some ARP specific bits are:
7 * 7 *
8 * Copyright (C) 2002 David S. Miller (davem@redhat.com) 8 * Copyright (C) 2002 David S. Miller (davem@redhat.com)
9 * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
9 * 10 *
10 */ 11 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e70d497..eadab1ed6500 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
48 net->ipv4.arptable_filter = 48 net->ipv4.arptable_filter =
49 arpt_register_table(net, &packet_filter, repl); 49 arpt_register_table(net, &packet_filter, repl);
50 kfree(repl); 50 kfree(repl);
51 if (IS_ERR(net->ipv4.arptable_filter)) 51 return PTR_RET(net->ipv4.arptable_filter);
52 return PTR_ERR(net->ipv4.arptable_filter);
53 return 0;
54} 52}
55 53
56static void __net_exit arptable_filter_net_exit(struct net *net) 54static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3efcf87400c3..d23118d95ff9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)
182 return ipt_get_target((struct ipt_entry *)e); 183 return ipt_get_target((struct ipt_entry *)e);
183} 184}
184 185
185#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 186#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
186 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
187static const char *const hooknames[] = { 187static const char *const hooknames[] = {
188 [NF_INET_PRE_ROUTING] = "PREROUTING", 188 [NF_INET_PRE_ROUTING] = "PREROUTING",
189 [NF_INET_LOCAL_IN] = "INPUT", 189 [NF_INET_LOCAL_IN] = "INPUT",
@@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,
259 const char *hookname, *chainname, *comment; 259 const char *hookname, *chainname, *comment;
260 const struct ipt_entry *iter; 260 const struct ipt_entry *iter;
261 unsigned int rulenum = 0; 261 unsigned int rulenum = 0;
262 struct net *net = dev_net(in ? in : out);
262 263
263 table_base = private->entries[smp_processor_id()]; 264 table_base = private->entries[smp_processor_id()];
264 root = get_entry(table_base, private->hook_entry[hook]); 265 root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,
271 &chainname, &comment, &rulenum) != 0) 272 &chainname, &comment, &rulenum) != 0)
272 break; 273 break;
273 274
274 nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, 275 nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
275 "TRACE: %s:%s:%s:%u ", 276 "TRACE: %s:%s:%s:%u ",
276 tablename, chainname, comment, rulenum); 277 tablename, chainname, comment, rulenum);
277} 278}
@@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb,
361 t = ipt_get_target(e); 362 t = ipt_get_target(e);
362 IP_NF_ASSERT(t->u.kernel.target); 363 IP_NF_ASSERT(t->u.kernel.target);
363 364
364#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ 365#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
365 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
366 /* The packet is traced: log it */ 366 /* The packet is traced: log it */
367 if (unlikely(skb->nf_trace)) 367 if (unlikely(skb->nf_trace))
368 trace_packet(skb, hook, in, out, 368 trace_packet(skb, hook, in, out,
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5852b249054f..0b732efd32e2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -105,7 +105,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
105 * functions are also incrementing the refcount on their own, 105 * functions are also incrementing the refcount on their own,
106 * so it's safe to remove the entry even if it's in use. */ 106 * so it's safe to remove the entry even if it's in use. */
107#ifdef CONFIG_PROC_FS 107#ifdef CONFIG_PROC_FS
108 remove_proc_entry(c->pde->name, c->pde->parent); 108 proc_remove(c->pde);
109#endif 109#endif
110 return; 110 return;
111 } 111 }
@@ -631,7 +631,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
631 631
632 if (!ret) { 632 if (!ret) {
633 struct seq_file *sf = file->private_data; 633 struct seq_file *sf = file->private_data;
634 struct clusterip_config *c = PDE(inode)->data; 634 struct clusterip_config *c = PDE_DATA(inode);
635 635
636 sf->private = c; 636 sf->private = c;
637 637
@@ -643,7 +643,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
643 643
644static int clusterip_proc_release(struct inode *inode, struct file *file) 644static int clusterip_proc_release(struct inode *inode, struct file *file)
645{ 645{
646 struct clusterip_config *c = PDE(inode)->data; 646 struct clusterip_config *c = PDE_DATA(inode);
647 int ret; 647 int ret;
648 648
649 ret = seq_release(inode, file); 649 ret = seq_release(inode, file);
@@ -657,7 +657,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
657static ssize_t clusterip_proc_write(struct file *file, const char __user *input, 657static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
658 size_t size, loff_t *ofs) 658 size_t size, loff_t *ofs)
659{ 659{
660 struct clusterip_config *c = PDE(file_inode(file))->data; 660 struct clusterip_config *c = PDE_DATA(file_inode(file));
661#define PROC_WRITELEN 10 661#define PROC_WRITELEN 10
662 char buffer[PROC_WRITELEN+1]; 662 char buffer[PROC_WRITELEN+1];
663 unsigned long nodenum; 663 unsigned long nodenum;
@@ -736,7 +736,7 @@ static void __exit clusterip_tg_exit(void)
736{ 736{
737 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); 737 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
738#ifdef CONFIG_PROC_FS 738#ifdef CONFIG_PROC_FS
739 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); 739 proc_remove(clusterip_procdir);
740#endif 740#endif
741 nf_unregister_hook(&cip_arp_ops); 741 nf_unregister_hook(&cip_arp_ops);
742 xt_unregister_target(&clusterip_tg_reg); 742 xt_unregister_target(&clusterip_tg_reg);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 7d168dcbd135..f8a222cb6448 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> 4 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
5 * (C) 1999-2001 Paul `Rusty' Russell 5 * (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
@@ -37,7 +38,7 @@
37#include <linux/skbuff.h> 38#include <linux/skbuff.h>
38#include <linux/kernel.h> 39#include <linux/kernel.h>
39#include <linux/timer.h> 40#include <linux/timer.h>
40#include <linux/netlink.h> 41#include <net/netlink.h>
41#include <linux/netdevice.h> 42#include <linux/netdevice.h>
42#include <linux/mm.h> 43#include <linux/mm.h>
43#include <linux/moduleparam.h> 44#include <linux/moduleparam.h>
@@ -45,6 +46,7 @@
45#include <linux/netfilter/x_tables.h> 46#include <linux/netfilter/x_tables.h>
46#include <linux/netfilter_ipv4/ipt_ULOG.h> 47#include <linux/netfilter_ipv4/ipt_ULOG.h>
47#include <net/netfilter/nf_log.h> 48#include <net/netfilter/nf_log.h>
49#include <net/netns/generic.h>
48#include <net/sock.h> 50#include <net/sock.h>
49#include <linux/bitops.h> 51#include <linux/bitops.h>
50#include <asm/unaligned.h> 52#include <asm/unaligned.h>
@@ -78,15 +80,23 @@ typedef struct {
78 struct timer_list timer; /* the timer function */ 80 struct timer_list timer; /* the timer function */
79} ulog_buff_t; 81} ulog_buff_t;
80 82
81static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ 83static int ulog_net_id __read_mostly;
84struct ulog_net {
85 unsigned int nlgroup[ULOG_MAXNLGROUPS];
86 ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
87 struct sock *nflognl;
88 spinlock_t lock;
89};
82 90
83static struct sock *nflognl; /* our socket */ 91static struct ulog_net *ulog_pernet(struct net *net)
84static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ 92{
93 return net_generic(net, ulog_net_id);
94}
85 95
86/* send one ulog_buff_t to userspace */ 96/* send one ulog_buff_t to userspace */
87static void ulog_send(unsigned int nlgroupnum) 97static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
88{ 98{
89 ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; 99 ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
90 100
91 pr_debug("ulog_send: timer is deleting\n"); 101 pr_debug("ulog_send: timer is deleting\n");
92 del_timer(&ub->timer); 102 del_timer(&ub->timer);
@@ -103,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)
103 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; 113 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
104 pr_debug("throwing %d packets to netlink group %u\n", 114 pr_debug("throwing %d packets to netlink group %u\n",
105 ub->qlen, nlgroupnum + 1); 115 ub->qlen, nlgroupnum + 1);
106 netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); 116 netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
117 GFP_ATOMIC);
107 118
108 ub->qlen = 0; 119 ub->qlen = 0;
109 ub->skb = NULL; 120 ub->skb = NULL;
@@ -114,13 +125,16 @@ static void ulog_send(unsigned int nlgroupnum)
114/* timer function to flush queue in flushtimeout time */ 125/* timer function to flush queue in flushtimeout time */
115static void ulog_timer(unsigned long data) 126static void ulog_timer(unsigned long data)
116{ 127{
128 struct ulog_net *ulog = container_of((void *)data,
129 struct ulog_net,
130 nlgroup[*(unsigned int *)data]);
117 pr_debug("timer function called, calling ulog_send\n"); 131 pr_debug("timer function called, calling ulog_send\n");
118 132
119 /* lock to protect against somebody modifying our structure 133 /* lock to protect against somebody modifying our structure
120 * from ipt_ulog_target at the same time */ 134 * from ipt_ulog_target at the same time */
121 spin_lock_bh(&ulog_lock); 135 spin_lock_bh(&ulog->lock);
122 ulog_send(data); 136 ulog_send(ulog, data);
123 spin_unlock_bh(&ulog_lock); 137 spin_unlock_bh(&ulog->lock);
124} 138}
125 139
126static struct sk_buff *ulog_alloc_skb(unsigned int size) 140static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -160,6 +174,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
160 size_t size, copy_len; 174 size_t size, copy_len;
161 struct nlmsghdr *nlh; 175 struct nlmsghdr *nlh;
162 struct timeval tv; 176 struct timeval tv;
177 struct net *net = dev_net(in ? in : out);
178 struct ulog_net *ulog = ulog_pernet(net);
163 179
164 /* ffs == find first bit set, necessary because userspace 180 /* ffs == find first bit set, necessary because userspace
165 * is already shifting groupnumber, but we need unshifted. 181 * is already shifting groupnumber, but we need unshifted.
@@ -172,11 +188,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
172 else 188 else
173 copy_len = loginfo->copy_range; 189 copy_len = loginfo->copy_range;
174 190
175 size = NLMSG_SPACE(sizeof(*pm) + copy_len); 191 size = nlmsg_total_size(sizeof(*pm) + copy_len);
176 192
177 ub = &ulog_buffers[groupnum]; 193 ub = &ulog->ulog_buffers[groupnum];
178 194
179 spin_lock_bh(&ulog_lock); 195 spin_lock_bh(&ulog->lock);
180 196
181 if (!ub->skb) { 197 if (!ub->skb) {
182 if (!(ub->skb = ulog_alloc_skb(size))) 198 if (!(ub->skb = ulog_alloc_skb(size)))
@@ -186,7 +202,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
186 /* either the queue len is too high or we don't have 202 /* either the queue len is too high or we don't have
187 * enough room in nlskb left. send it to userspace. */ 203 * enough room in nlskb left. send it to userspace. */
188 204
189 ulog_send(groupnum); 205 ulog_send(ulog, groupnum);
190 206
191 if (!(ub->skb = ulog_alloc_skb(size))) 207 if (!(ub->skb = ulog_alloc_skb(size)))
192 goto alloc_failure; 208 goto alloc_failure;
@@ -260,16 +276,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
260 if (ub->qlen >= loginfo->qthreshold) { 276 if (ub->qlen >= loginfo->qthreshold) {
261 if (loginfo->qthreshold > 1) 277 if (loginfo->qthreshold > 1)
262 nlh->nlmsg_type = NLMSG_DONE; 278 nlh->nlmsg_type = NLMSG_DONE;
263 ulog_send(groupnum); 279 ulog_send(ulog, groupnum);
264 } 280 }
265out_unlock: 281out_unlock:
266 spin_unlock_bh(&ulog_lock); 282 spin_unlock_bh(&ulog->lock);
267 283
268 return; 284 return;
269 285
270alloc_failure: 286alloc_failure:
271 pr_debug("Error building netlink message\n"); 287 pr_debug("Error building netlink message\n");
272 spin_unlock_bh(&ulog_lock); 288 spin_unlock_bh(&ulog->lock);
273} 289}
274 290
275static unsigned int 291static unsigned int
@@ -376,54 +392,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
376 .me = THIS_MODULE, 392 .me = THIS_MODULE,
377}; 393};
378 394
379static int __init ulog_tg_init(void) 395static int __net_init ulog_tg_net_init(struct net *net)
380{ 396{
381 int ret, i; 397 int i;
398 struct ulog_net *ulog = ulog_pernet(net);
382 struct netlink_kernel_cfg cfg = { 399 struct netlink_kernel_cfg cfg = {
383 .groups = ULOG_MAXNLGROUPS, 400 .groups = ULOG_MAXNLGROUPS,
384 }; 401 };
385 402
386 pr_debug("init module\n"); 403 spin_lock_init(&ulog->lock);
387
388 if (nlbufsiz > 128*1024) {
389 pr_warning("Netlink buffer has to be <= 128kB\n");
390 return -EINVAL;
391 }
392
393 /* initialize ulog_buffers */ 404 /* initialize ulog_buffers */
394 for (i = 0; i < ULOG_MAXNLGROUPS; i++) 405 for (i = 0; i < ULOG_MAXNLGROUPS; i++)
395 setup_timer(&ulog_buffers[i].timer, ulog_timer, i); 406 setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i);
396 407
397 nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); 408 ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
398 if (!nflognl) 409 if (!ulog->nflognl)
399 return -ENOMEM; 410 return -ENOMEM;
400 411
401 ret = xt_register_target(&ulog_tg_reg);
402 if (ret < 0) {
403 netlink_kernel_release(nflognl);
404 return ret;
405 }
406 if (nflog) 412 if (nflog)
407 nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); 413 nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
408 414
409 return 0; 415 return 0;
410} 416}
411 417
412static void __exit ulog_tg_exit(void) 418static void __net_exit ulog_tg_net_exit(struct net *net)
413{ 419{
414 ulog_buff_t *ub; 420 ulog_buff_t *ub;
415 int i; 421 int i;
416 422 struct ulog_net *ulog = ulog_pernet(net);
417 pr_debug("cleanup_module\n");
418 423
419 if (nflog) 424 if (nflog)
420 nf_log_unregister(&ipt_ulog_logger); 425 nf_log_unset(net, &ipt_ulog_logger);
421 xt_unregister_target(&ulog_tg_reg); 426
422 netlink_kernel_release(nflognl); 427 netlink_kernel_release(ulog->nflognl);
423 428
424 /* remove pending timers and free allocated skb's */ 429 /* remove pending timers and free allocated skb's */
425 for (i = 0; i < ULOG_MAXNLGROUPS; i++) { 430 for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
426 ub = &ulog_buffers[i]; 431 ub = &ulog->ulog_buffers[i];
427 pr_debug("timer is deleting\n"); 432 pr_debug("timer is deleting\n");
428 del_timer(&ub->timer); 433 del_timer(&ub->timer);
429 434
@@ -434,5 +439,50 @@ static void __exit ulog_tg_exit(void)
434 } 439 }
435} 440}
436 441
442static struct pernet_operations ulog_tg_net_ops = {
443 .init = ulog_tg_net_init,
444 .exit = ulog_tg_net_exit,
445 .id = &ulog_net_id,
446 .size = sizeof(struct ulog_net),
447};
448
449static int __init ulog_tg_init(void)
450{
451 int ret;
452 pr_debug("init module\n");
453
454 if (nlbufsiz > 128*1024) {
455 pr_warn("Netlink buffer has to be <= 128kB\n");
456 return -EINVAL;
457 }
458
459 ret = register_pernet_subsys(&ulog_tg_net_ops);
460 if (ret)
461 goto out_pernet;
462
463 ret = xt_register_target(&ulog_tg_reg);
464 if (ret < 0)
465 goto out_target;
466
467 if (nflog)
468 nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
469
470 return 0;
471
472out_target:
473 unregister_pernet_subsys(&ulog_tg_net_ops);
474out_pernet:
475 return ret;
476}
477
478static void __exit ulog_tg_exit(void)
479{
480 pr_debug("cleanup_module\n");
481 if (nflog)
482 nf_log_unregister(&ipt_ulog_logger);
483 xt_unregister_target(&ulog_tg_reg);
484 unregister_pernet_subsys(&ulog_tg_net_ops);
485}
486
437module_init(ulog_tg_init); 487module_init(ulog_tg_init);
438module_exit(ulog_tg_exit); 488module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c30130062cd6..c49dcd0284a0 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
66 return dev_match; 66 return dev_match;
67} 67}
68 68
69static bool rpfilter_is_local(const struct sk_buff *skb)
70{
71 const struct rtable *rt = skb_rtable(skb);
72 return rt && (rt->rt_flags & RTCF_LOCAL);
73}
74
69static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) 75static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
70{ 76{
71 const struct xt_rpfilter_info *info; 77 const struct xt_rpfilter_info *info;
@@ -76,7 +82,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
76 info = par->matchinfo; 82 info = par->matchinfo;
77 invert = info->flags & XT_RPFILTER_INVERT; 83 invert = info->flags & XT_RPFILTER_INVERT;
78 84
79 if (par->in->flags & IFF_LOOPBACK) 85 if (rpfilter_is_local(skb))
80 return true ^ invert; 86 return true ^ invert;
81 87
82 iph = ip_hdr(skb); 88 iph = ip_hdr(skb);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 85d88f206447..cba5658ec82c 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
44 u_int8_t tos; 44 u_int8_t tos;
45 __be32 saddr, daddr; 45 __be32 saddr, daddr;
46 u_int32_t mark; 46 u_int32_t mark;
47 int err;
47 48
48 /* root is playing with raw sockets. */ 49 /* root is playing with raw sockets. */
49 if (skb->len < sizeof(struct iphdr) || 50 if (skb->len < sizeof(struct iphdr) ||
@@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
66 if (iph->saddr != saddr || 67 if (iph->saddr != saddr ||
67 iph->daddr != daddr || 68 iph->daddr != daddr ||
68 skb->mark != mark || 69 skb->mark != mark ||
69 iph->tos != tos) 70 iph->tos != tos) {
70 if (ip_route_me_harder(skb, RTN_UNSPEC)) 71 err = ip_route_me_harder(skb, RTN_UNSPEC);
71 ret = NF_DROP; 72 if (err < 0)
73 ret = NF_DROP_ERR(err);
74 }
72 } 75 }
73 76
74 return ret; 77 return ret;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index eeaff7e4acb5..6383273d54e1 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
176#ifdef CONFIG_XFRM 176#ifdef CONFIG_XFRM
177 const struct nf_conn *ct; 177 const struct nf_conn *ct;
178 enum ip_conntrack_info ctinfo; 178 enum ip_conntrack_info ctinfo;
179 int err;
179#endif 180#endif
180 unsigned int ret; 181 unsigned int ret;
181 182
@@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum,
195 ct->tuplehash[!dir].tuple.dst.u3.ip) || 196 ct->tuplehash[!dir].tuple.dst.u3.ip) ||
196 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 197 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
197 ct->tuplehash[dir].tuple.src.u.all != 198 ct->tuplehash[dir].tuple.src.u.all !=
198 ct->tuplehash[!dir].tuple.dst.u.all)) 199 ct->tuplehash[!dir].tuple.dst.u.all)) {
199 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 200 err = nf_xfrm_me_harder(skb, AF_INET);
200 ret = NF_DROP; 201 if (err < 0)
202 ret = NF_DROP_ERR(err);
203 }
201 } 204 }
202#endif 205#endif
203 return ret; 206 return ret;
@@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
213 const struct nf_conn *ct; 216 const struct nf_conn *ct;
214 enum ip_conntrack_info ctinfo; 217 enum ip_conntrack_info ctinfo;
215 unsigned int ret; 218 unsigned int ret;
219 int err;
216 220
217 /* root is playing with raw sockets. */ 221 /* root is playing with raw sockets. */
218 if (skb->len < sizeof(struct iphdr) || 222 if (skb->len < sizeof(struct iphdr) ||
@@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
226 230
227 if (ct->tuplehash[dir].tuple.dst.u3.ip != 231 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
228 ct->tuplehash[!dir].tuple.src.u3.ip) { 232 ct->tuplehash[!dir].tuple.src.u3.ip) {
229 if (ip_route_me_harder(skb, RTN_UNSPEC)) 233 err = ip_route_me_harder(skb, RTN_UNSPEC);
230 ret = NF_DROP; 234 if (err < 0)
235 ret = NF_DROP_ERR(err);
231 } 236 }
232#ifdef CONFIG_XFRM 237#ifdef CONFIG_XFRM
233 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 238 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
234 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 239 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
235 ct->tuplehash[dir].tuple.dst.u.all != 240 ct->tuplehash[dir].tuple.dst.u.all !=
236 ct->tuplehash[!dir].tuple.src.u.all) 241 ct->tuplehash[!dir].tuple.src.u.all) {
237 if (nf_xfrm_me_harder(skb, AF_INET) < 0) 242 err = nf_xfrm_me_harder(skb, AF_INET);
238 ret = NF_DROP; 243 if (err < 0)
244 ret = NF_DROP_ERR(err);
245 }
239#endif 246#endif
240 } 247 }
241 return ret; 248 return ret;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2820aa18b542..567d84168bd2 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
1 1
2/* (C) 1999-2001 Paul `Rusty' Russell 2/* (C) 1999-2001 Paul `Rusty' Russell
3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index f2ca12794081..4c48e434bb1f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 1999-2001 Paul `Rusty' Russell 3 * (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab75..a338dad41b7d 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
187 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); 188 icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
188 if (icmph == NULL) { 189 if (icmph == NULL) {
189 if (LOG_INVALID(net, IPPROTO_ICMP)) 190 if (LOG_INVALID(net, IPPROTO_ICMP))
190 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 191 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
191 "nf_ct_icmp: short packet "); 192 NULL, "nf_ct_icmp: short packet ");
192 return -NF_ACCEPT; 193 return -NF_ACCEPT;
193 } 194 }
194 195
@@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
196 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 197 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
197 nf_ip_checksum(skb, hooknum, dataoff, 0)) { 198 nf_ip_checksum(skb, hooknum, dataoff, 0)) {
198 if (LOG_INVALID(net, IPPROTO_ICMP)) 199 if (LOG_INVALID(net, IPPROTO_ICMP))
199 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 200 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
200 "nf_ct_icmp: bad HW ICMP checksum "); 201 "nf_ct_icmp: bad HW ICMP checksum ");
201 return -NF_ACCEPT; 202 return -NF_ACCEPT;
202 } 203 }
@@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
209 */ 210 */
210 if (icmph->type > NR_ICMP_TYPES) { 211 if (icmph->type > NR_ICMP_TYPES) {
211 if (LOG_INVALID(net, IPPROTO_ICMP)) 212 if (LOG_INVALID(net, IPPROTO_ICMP))
212 nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, 213 nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
213 "nf_ct_icmp: invalid ICMP type "); 214 "nf_ct_icmp: invalid ICMP type ");
214 return -NF_ACCEPT; 215 return -NF_ACCEPT;
215 } 216 }
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9c3db10b22d3..9eea059dd621 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
2 * H.323 extension for NAT alteration. 2 * H.323 extension for NAT alteration.
3 * 3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> 4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This source code is licensed under General Public License version 2. 7 * This source code is licensed under General Public License version 2.
7 * 8 *
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a06d7d74817d..657d2307f031 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
13 * 13 *
14 * Development of this code funded by Astaro AG (http://www.astaro.com/) 14 * Development of this code funded by Astaro AG (http://www.astaro.com/)
15 * 15 *
16 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
17 *
16 * TODO: - NAT to a unique tuple, not to TCP source port 18 * TODO: - NAT to a unique tuple, not to TCP source port
17 * (needs netfilter tuple reservation) 19 * (needs netfilter tuple reservation)
18 */ 20 */
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index ea44f02563b5..690d890111bb 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
21 * 21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/) 22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 * 23 *
24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
25 *
24 */ 26 */
25 27
26#include <linux/module.h> 28#include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index bac712293fd6..5f011cc89cd9 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -38,6 +38,8 @@
38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 38 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
39 * 39 *
40 * Author: James Morris <jmorris@intercode.com.au> 40 * Author: James Morris <jmorris@intercode.com.au>
41 *
42 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
41 */ 43 */
42#include <linux/module.h> 44#include <linux/module.h>
43#include <linux/moduleparam.h> 45#include <linux/moduleparam.h>
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2e91006d6076..7d93d62cd5fd 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
514 ipc.opt = NULL; 514 ipc.opt = NULL;
515 ipc.oif = sk->sk_bound_dev_if; 515 ipc.oif = sk->sk_bound_dev_if;
516 ipc.tx_flags = 0; 516 ipc.tx_flags = 0;
517 err = sock_tx_timestamp(sk, &ipc.tx_flags); 517
518 if (err) 518 sock_tx_timestamp(sk, &ipc.tx_flags);
519 return err;
520 519
521 if (msg->msg_controllen) { 520 if (msg->msg_controllen) {
522 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 521 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 32030a24e776..2a5bf86d2415 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), 125 SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 126 SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 127 SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
128 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
128 SNMP_MIB_SENTINEL 129 SNMP_MIB_SENTINEL
129}; 130};
130 131
@@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = {
162 SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS), 163 SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),
163 SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS), 164 SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
164 SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS), 165 SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
166 SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
165 SNMP_MIB_SENTINEL 167 SNMP_MIB_SENTINEL
166}; 168};
167 169
@@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
172 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), 174 SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
173 SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), 175 SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
174 SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), 176 SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
177 SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
175 SNMP_MIB_SENTINEL 178 SNMP_MIB_SENTINEL
176}; 179};
177 180
@@ -224,6 +227,8 @@ static const struct snmp_mib snmp4_net_list[] = {
224 SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), 227 SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
225 SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), 228 SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
226 SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), 229 SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
230 SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
231 SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
227 SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), 232 SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
228 SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), 233 SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
229 SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), 234 SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
@@ -267,6 +272,7 @@ static const struct snmp_mib snmp4_net_list[] = {
267 SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL), 272 SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
268 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), 273 SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
269 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), 274 SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
275 SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
270 SNMP_MIB_SENTINEL 276 SNMP_MIB_SENTINEL
271}; 277};
272 278
@@ -319,15 +325,16 @@ static void icmp_put(struct seq_file *seq)
319 struct net *net = seq->private; 325 struct net *net = seq->private;
320 atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; 326 atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
321 327
322 seq_puts(seq, "\nIcmp: InMsgs InErrors"); 328 seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
323 for (i=0; icmpmibmap[i].name != NULL; i++) 329 for (i=0; icmpmibmap[i].name != NULL; i++)
324 seq_printf(seq, " In%s", icmpmibmap[i].name); 330 seq_printf(seq, " In%s", icmpmibmap[i].name);
325 seq_printf(seq, " OutMsgs OutErrors"); 331 seq_printf(seq, " OutMsgs OutErrors");
326 for (i=0; icmpmibmap[i].name != NULL; i++) 332 for (i=0; icmpmibmap[i].name != NULL; i++)
327 seq_printf(seq, " Out%s", icmpmibmap[i].name); 333 seq_printf(seq, " Out%s", icmpmibmap[i].name);
328 seq_printf(seq, "\nIcmp: %lu %lu", 334 seq_printf(seq, "\nIcmp: %lu %lu %lu",
329 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), 335 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
330 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); 336 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
337 snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
331 for (i=0; icmpmibmap[i].name != NULL; i++) 338 for (i=0; icmpmibmap[i].name != NULL; i++)
332 seq_printf(seq, " %lu", 339 seq_printf(seq, " %lu",
333 atomic_long_read(ptr + icmpmibmap[i].index)); 340 atomic_long_read(ptr + icmpmibmap[i].index));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6e2851464f8f..550781a17b34 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2311,7 +2311,7 @@ nla_put_failure:
2311 return -EMSGSIZE; 2311 return -EMSGSIZE;
2312} 2312}
2313 2313
2314static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 2314static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
2315{ 2315{
2316 struct net *net = sock_net(in_skb->sk); 2316 struct net *net = sock_net(in_skb->sk);
2317 struct rtmsg *rtm; 2317 struct rtmsg *rtm;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ef54377fb11c..b05c96e7af8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
267 struct ip_options *opt) 267 struct ip_options *opt)
268{ 268{
269 struct tcp_options_received tcp_opt; 269 struct tcp_options_received tcp_opt;
270 const u8 *hash_location;
271 struct inet_request_sock *ireq; 270 struct inet_request_sock *ireq;
272 struct tcp_request_sock *treq; 271 struct tcp_request_sock *treq;
273 struct tcp_sock *tp = tcp_sk(sk); 272 struct tcp_sock *tp = tcp_sk(sk);
@@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
294 293
295 /* check for timestamp cookie support */ 294 /* check for timestamp cookie support */
296 memset(&tcp_opt, 0, sizeof(tcp_opt)); 295 memset(&tcp_opt, 0, sizeof(tcp_opt));
297 tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 296 tcp_parse_options(skb, &tcp_opt, 0, NULL);
298 297
299 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) 298 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
300 goto out; 299 goto out;
@@ -349,8 +348,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
349 * hasn't changed since we received the original syn, but I see 348 * hasn't changed since we received the original syn, but I see
350 * no easy way to do this. 349 * no easy way to do this.
351 */ 350 */
352 flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), 351 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark,
353 RT_SCOPE_UNIVERSE, IPPROTO_TCP, 352 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
354 inet_sk_flowi_flags(sk), 353 inet_sk_flowi_flags(sk),
355 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, 354 (opt && opt->srr) ? opt->faddr : ireq->rmt_addr,
356 ireq->loc_addr, th->source, th->dest); 355 ireq->loc_addr, th->source, th->dest);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 960fd29d9b8e..fa2f63fc453b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,7 +28,7 @@
28 28
29static int zero; 29static int zero;
30static int one = 1; 30static int one = 1;
31static int two = 2; 31static int four = 4;
32static int tcp_retr1_max = 255; 32static int tcp_retr1_max = 255;
33static int ip_local_port_range_min[] = { 1, 1 }; 33static int ip_local_port_range_min[] = { 1, 1 };
34static int ip_local_port_range_max[] = { 65535, 65535 }; 34static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = {
592 .proc_handler = proc_dointvec 592 .proc_handler = proc_dointvec
593 }, 593 },
594 { 594 {
595 .procname = "tcp_frto_response",
596 .data = &sysctl_tcp_frto_response,
597 .maxlen = sizeof(int),
598 .mode = 0644,
599 .proc_handler = proc_dointvec
600 },
601 {
602 .procname = "tcp_low_latency", 595 .procname = "tcp_low_latency",
603 .data = &sysctl_tcp_low_latency, 596 .data = &sysctl_tcp_low_latency,
604 .maxlen = sizeof(int), 597 .maxlen = sizeof(int),
@@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = {
733 .proc_handler = proc_dointvec, 726 .proc_handler = proc_dointvec,
734 }, 727 },
735 { 728 {
736 .procname = "tcp_cookie_size",
737 .data = &sysctl_tcp_cookie_size,
738 .maxlen = sizeof(int),
739 .mode = 0644,
740 .proc_handler = proc_dointvec
741 },
742 {
743 .procname = "tcp_thin_linear_timeouts", 729 .procname = "tcp_thin_linear_timeouts",
744 .data = &sysctl_tcp_thin_linear_timeouts, 730 .data = &sysctl_tcp_thin_linear_timeouts,
745 .maxlen = sizeof(int), 731 .maxlen = sizeof(int),
@@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = {
760 .mode = 0644, 746 .mode = 0644,
761 .proc_handler = proc_dointvec_minmax, 747 .proc_handler = proc_dointvec_minmax,
762 .extra1 = &zero, 748 .extra1 = &zero,
763 .extra2 = &two, 749 .extra2 = &four,
764 }, 750 },
765 { 751 {
766 .procname = "udp_mem", 752 .procname = "udp_mem",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e22020790709..dcb116dde216 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
409 409
410 icsk->icsk_sync_mss = tcp_sync_mss; 410 icsk->icsk_sync_mss = tcp_sync_mss;
411 411
412 /* TCP Cookie Transactions */
413 if (sysctl_tcp_cookie_size > 0) {
414 /* Default, cookies without s_data_payload. */
415 tp->cookie_values =
416 kzalloc(sizeof(*tp->cookie_values),
417 sk->sk_allocation);
418 if (tp->cookie_values != NULL)
419 kref_init(&tp->cookie_values->kref);
420 }
421 /* Presumed zeroed, in order of appearance: 412 /* Presumed zeroed, in order of appearance:
422 * cookie_in_always, cookie_out_never, 413 * cookie_in_always, cookie_out_never,
423 * s_data_constant, s_data_in, s_data_out 414 * s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2397 release_sock(sk); 2388 release_sock(sk);
2398 return err; 2389 return err;
2399 } 2390 }
2400 case TCP_COOKIE_TRANSACTIONS: {
2401 struct tcp_cookie_transactions ctd;
2402 struct tcp_cookie_values *cvp = NULL;
2403
2404 if (sizeof(ctd) > optlen)
2405 return -EINVAL;
2406 if (copy_from_user(&ctd, optval, sizeof(ctd)))
2407 return -EFAULT;
2408
2409 if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
2410 ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
2411 return -EINVAL;
2412
2413 if (ctd.tcpct_cookie_desired == 0) {
2414 /* default to global value */
2415 } else if ((0x1 & ctd.tcpct_cookie_desired) ||
2416 ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
2417 ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
2418 return -EINVAL;
2419 }
2420
2421 if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
2422 /* Supercedes all other values */
2423 lock_sock(sk);
2424 if (tp->cookie_values != NULL) {
2425 kref_put(&tp->cookie_values->kref,
2426 tcp_cookie_values_release);
2427 tp->cookie_values = NULL;
2428 }
2429 tp->rx_opt.cookie_in_always = 0; /* false */
2430 tp->rx_opt.cookie_out_never = 1; /* true */
2431 release_sock(sk);
2432 return err;
2433 }
2434
2435 /* Allocate ancillary memory before locking.
2436 */
2437 if (ctd.tcpct_used > 0 ||
2438 (tp->cookie_values == NULL &&
2439 (sysctl_tcp_cookie_size > 0 ||
2440 ctd.tcpct_cookie_desired > 0 ||
2441 ctd.tcpct_s_data_desired > 0))) {
2442 cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
2443 GFP_KERNEL);
2444 if (cvp == NULL)
2445 return -ENOMEM;
2446
2447 kref_init(&cvp->kref);
2448 }
2449 lock_sock(sk);
2450 tp->rx_opt.cookie_in_always =
2451 (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
2452 tp->rx_opt.cookie_out_never = 0; /* false */
2453
2454 if (tp->cookie_values != NULL) {
2455 if (cvp != NULL) {
2456 /* Changed values are recorded by a changed
2457 * pointer, ensuring the cookie will differ,
2458 * without separately hashing each value later.
2459 */
2460 kref_put(&tp->cookie_values->kref,
2461 tcp_cookie_values_release);
2462 } else {
2463 cvp = tp->cookie_values;
2464 }
2465 }
2466
2467 if (cvp != NULL) {
2468 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2469
2470 if (ctd.tcpct_used > 0) {
2471 memcpy(cvp->s_data_payload, ctd.tcpct_value,
2472 ctd.tcpct_used);
2473 cvp->s_data_desired = ctd.tcpct_used;
2474 cvp->s_data_constant = 1; /* true */
2475 } else {
2476 /* No constant payload data. */
2477 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2478 cvp->s_data_constant = 0; /* false */
2479 }
2480
2481 tp->cookie_values = cvp;
2482 }
2483 release_sock(sk);
2484 return err;
2485 }
2486 default: 2391 default:
2487 /* fallthru */ 2392 /* fallthru */
2488 break; 2393 break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2902 return -EFAULT; 2807 return -EFAULT;
2903 return 0; 2808 return 0;
2904 2809
2905 case TCP_COOKIE_TRANSACTIONS: {
2906 struct tcp_cookie_transactions ctd;
2907 struct tcp_cookie_values *cvp = tp->cookie_values;
2908
2909 if (get_user(len, optlen))
2910 return -EFAULT;
2911 if (len < sizeof(ctd))
2912 return -EINVAL;
2913
2914 memset(&ctd, 0, sizeof(ctd));
2915 ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
2916 TCP_COOKIE_IN_ALWAYS : 0)
2917 | (tp->rx_opt.cookie_out_never ?
2918 TCP_COOKIE_OUT_NEVER : 0);
2919
2920 if (cvp != NULL) {
2921 ctd.tcpct_flags |= (cvp->s_data_in ?
2922 TCP_S_DATA_IN : 0)
2923 | (cvp->s_data_out ?
2924 TCP_S_DATA_OUT : 0);
2925
2926 ctd.tcpct_cookie_desired = cvp->cookie_desired;
2927 ctd.tcpct_s_data_desired = cvp->s_data_desired;
2928
2929 memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
2930 cvp->cookie_pair_size);
2931 ctd.tcpct_used = cvp->cookie_pair_size;
2932 }
2933
2934 if (put_user(sizeof(ctd), optlen))
2935 return -EFAULT;
2936 if (copy_to_user(optval, &ctd, sizeof(ctd)))
2937 return -EFAULT;
2938 return 0;
2939 }
2940 case TCP_THIN_LINEAR_TIMEOUTS: 2810 case TCP_THIN_LINEAR_TIMEOUTS:
2941 val = tp->thin_lto; 2811 val = tp->thin_lto;
2942 break; 2812 break;
@@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3015 __be32 delta; 2885 __be32 delta;
3016 unsigned int oldlen; 2886 unsigned int oldlen;
3017 unsigned int mss; 2887 unsigned int mss;
2888 struct sk_buff *gso_skb = skb;
2889 __sum16 newcheck;
3018 2890
3019 if (!pskb_may_pull(skb, sizeof(*th))) 2891 if (!pskb_may_pull(skb, sizeof(*th)))
3020 goto out; 2892 goto out;
@@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3044 SKB_GSO_TCP_ECN | 2916 SKB_GSO_TCP_ECN |
3045 SKB_GSO_TCPV6 | 2917 SKB_GSO_TCPV6 |
3046 SKB_GSO_GRE | 2918 SKB_GSO_GRE |
2919 SKB_GSO_UDP_TUNNEL |
3047 0) || 2920 0) ||
3048 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 2921 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
3049 goto out; 2922 goto out;
@@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3064 th = tcp_hdr(skb); 2937 th = tcp_hdr(skb);
3065 seq = ntohl(th->seq); 2938 seq = ntohl(th->seq);
3066 2939
2940 newcheck = ~csum_fold((__force __wsum)((__force u32)th->check +
2941 (__force u32)delta));
2942
3067 do { 2943 do {
3068 th->fin = th->psh = 0; 2944 th->fin = th->psh = 0;
2945 th->check = newcheck;
3069 2946
3070 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
3071 (__force u32)delta));
3072 if (skb->ip_summed != CHECKSUM_PARTIAL) 2947 if (skb->ip_summed != CHECKSUM_PARTIAL)
3073 th->check = 2948 th->check =
3074 csum_fold(csum_partial(skb_transport_header(skb), 2949 csum_fold(csum_partial(skb_transport_header(skb),
@@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
3082 th->cwr = 0; 2957 th->cwr = 0;
3083 } while (skb->next); 2958 } while (skb->next);
3084 2959
2960 /* Following permits TCP Small Queues to work well with GSO :
2961 * The callback to TCP stack will be called at the time last frag
2962 * is freed at TX completion, and not right now when gso_skb
2963 * is freed by GSO engine
2964 */
2965 if (gso_skb->destructor == tcp_wfree) {
2966 swap(gso_skb->sk, skb->sk);
2967 swap(gso_skb->destructor, skb->destructor);
2968 swap(gso_skb->truesize, skb->truesize);
2969 }
2970
3085 delta = htonl(oldlen + (skb->tail - skb->transport_header) + 2971 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
3086 skb->data_len); 2972 skb->data_len);
3087 th->check = ~csum_fold((__force __wsum)((__force u32)th->check + 2973 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
@@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
3408 3294
3409#endif 3295#endif
3410 3296
3411/* Each Responder maintains up to two secret values concurrently for
3412 * efficient secret rollover. Each secret value has 4 states:
3413 *
3414 * Generating. (tcp_secret_generating != tcp_secret_primary)
3415 * Generates new Responder-Cookies, but not yet used for primary
3416 * verification. This is a short-term state, typically lasting only
3417 * one round trip time (RTT).
3418 *
3419 * Primary. (tcp_secret_generating == tcp_secret_primary)
3420 * Used both for generation and primary verification.
3421 *
3422 * Retiring. (tcp_secret_retiring != tcp_secret_secondary)
3423 * Used for verification, until the first failure that can be
3424 * verified by the newer Generating secret. At that time, this
3425 * cookie's state is changed to Secondary, and the Generating
3426 * cookie's state is changed to Primary. This is a short-term state,
3427 * typically lasting only one round trip time (RTT).
3428 *
3429 * Secondary. (tcp_secret_retiring == tcp_secret_secondary)
3430 * Used for secondary verification, after primary verification
3431 * failures. This state lasts no more than twice the Maximum Segment
3432 * Lifetime (2MSL). Then, the secret is discarded.
3433 */
3434struct tcp_cookie_secret {
3435 /* The secret is divided into two parts. The digest part is the
3436 * equivalent of previously hashing a secret and saving the state,
3437 * and serves as an initialization vector (IV). The message part
3438 * serves as the trailing secret.
3439 */
3440 u32 secrets[COOKIE_WORKSPACE_WORDS];
3441 unsigned long expires;
3442};
3443
3444#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
3445#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
3446#define TCP_SECRET_LIFE (HZ * 600)
3447
3448static struct tcp_cookie_secret tcp_secret_one;
3449static struct tcp_cookie_secret tcp_secret_two;
3450
3451/* Essentially a circular list, without dynamic allocation. */
3452static struct tcp_cookie_secret *tcp_secret_generating;
3453static struct tcp_cookie_secret *tcp_secret_primary;
3454static struct tcp_cookie_secret *tcp_secret_retiring;
3455static struct tcp_cookie_secret *tcp_secret_secondary;
3456
3457static DEFINE_SPINLOCK(tcp_secret_locker);
3458
3459/* Select a pseudo-random word in the cookie workspace.
3460 */
3461static inline u32 tcp_cookie_work(const u32 *ws, const int n)
3462{
3463 return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
3464}
3465
3466/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
3467 * Called in softirq context.
3468 * Returns: 0 for success.
3469 */
3470int tcp_cookie_generator(u32 *bakery)
3471{
3472 unsigned long jiffy = jiffies;
3473
3474 if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
3475 spin_lock_bh(&tcp_secret_locker);
3476 if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
3477 /* refreshed by another */
3478 memcpy(bakery,
3479 &tcp_secret_generating->secrets[0],
3480 COOKIE_WORKSPACE_WORDS);
3481 } else {
3482 /* still needs refreshing */
3483 get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
3484
3485 /* The first time, paranoia assumes that the
3486 * randomization function isn't as strong. But,
3487 * this secret initialization is delayed until
3488 * the last possible moment (packet arrival).
3489 * Although that time is observable, it is
3490 * unpredictably variable. Mash in the most
3491 * volatile clock bits available, and expire the
3492 * secret extra quickly.
3493 */
3494 if (unlikely(tcp_secret_primary->expires ==
3495 tcp_secret_secondary->expires)) {
3496 struct timespec tv;
3497
3498 getnstimeofday(&tv);
3499 bakery[COOKIE_DIGEST_WORDS+0] ^=
3500 (u32)tv.tv_nsec;
3501
3502 tcp_secret_secondary->expires = jiffy
3503 + TCP_SECRET_1MSL
3504 + (0x0f & tcp_cookie_work(bakery, 0));
3505 } else {
3506 tcp_secret_secondary->expires = jiffy
3507 + TCP_SECRET_LIFE
3508 + (0xff & tcp_cookie_work(bakery, 1));
3509 tcp_secret_primary->expires = jiffy
3510 + TCP_SECRET_2MSL
3511 + (0x1f & tcp_cookie_work(bakery, 2));
3512 }
3513 memcpy(&tcp_secret_secondary->secrets[0],
3514 bakery, COOKIE_WORKSPACE_WORDS);
3515
3516 rcu_assign_pointer(tcp_secret_generating,
3517 tcp_secret_secondary);
3518 rcu_assign_pointer(tcp_secret_retiring,
3519 tcp_secret_primary);
3520 /*
3521 * Neither call_rcu() nor synchronize_rcu() needed.
3522 * Retiring data is not freed. It is replaced after
3523 * further (locked) pointer updates, and a quiet time
3524 * (minimum 1MSL, maximum LIFE - 2MSL).
3525 */
3526 }
3527 spin_unlock_bh(&tcp_secret_locker);
3528 } else {
3529 rcu_read_lock_bh();
3530 memcpy(bakery,
3531 &rcu_dereference(tcp_secret_generating)->secrets[0],
3532 COOKIE_WORKSPACE_WORDS);
3533 rcu_read_unlock_bh();
3534 }
3535 return 0;
3536}
3537EXPORT_SYMBOL(tcp_cookie_generator);
3538
3539void tcp_done(struct sock *sk) 3297void tcp_done(struct sock *sk)
3540{ 3298{
3541 struct request_sock *req = tcp_sk(sk)->fastopen_rsk; 3299 struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3590,7 +3348,6 @@ void __init tcp_init(void)
3590 unsigned long limit; 3348 unsigned long limit;
3591 int max_rshare, max_wshare, cnt; 3349 int max_rshare, max_wshare, cnt;
3592 unsigned int i; 3350 unsigned int i;
3593 unsigned long jiffy = jiffies;
3594 3351
3595 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); 3352 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
3596 3353
@@ -3666,13 +3423,5 @@ void __init tcp_init(void)
3666 3423
3667 tcp_register_congestion_control(&tcp_reno); 3424 tcp_register_congestion_control(&tcp_reno);
3668 3425
3669 memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
3670 memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
3671 tcp_secret_one.expires = jiffy; /* past due */
3672 tcp_secret_two.expires = jiffy; /* past due */
3673 tcp_secret_generating = &tcp_secret_one;
3674 tcp_secret_primary = &tcp_secret_one;
3675 tcp_secret_retiring = &tcp_secret_two;
3676 tcp_secret_secondary = &tcp_secret_two;
3677 tcp_tasklet_init(); 3426 tcp_tasklet_init();
3678} 3427}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3bd55bad230a..08bbe6096528 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly;
93int sysctl_tcp_rfc1337 __read_mostly; 93int sysctl_tcp_rfc1337 __read_mostly;
94int sysctl_tcp_max_orphans __read_mostly = NR_FILE; 94int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
95int sysctl_tcp_frto __read_mostly = 2; 95int sysctl_tcp_frto __read_mostly = 2;
96int sysctl_tcp_frto_response __read_mostly;
97 96
98int sysctl_tcp_thin_dupack __read_mostly; 97int sysctl_tcp_thin_dupack __read_mostly;
99 98
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 99int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_early_retrans __read_mostly = 2; 100int sysctl_tcp_early_retrans __read_mostly = 3;
102 101
103#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 102#define FLAG_DATA 0x01 /* Incoming frame contained data. */
104#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 103#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -108,17 +107,16 @@ int sysctl_tcp_early_retrans __read_mostly = 2;
108#define FLAG_DATA_SACKED 0x20 /* New SACK. */ 107#define FLAG_DATA_SACKED 0x20 /* New SACK. */
109#define FLAG_ECE 0x40 /* ECE in this ACK */ 108#define FLAG_ECE 0x40 /* ECE in this ACK */
110#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ 109#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
111#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ 110#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
112#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ 111#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
113#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ 112#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
114#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
115#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ 113#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
114#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
116 115
117#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) 116#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
118#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) 117#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
119#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) 118#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE)
120#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) 119#define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED)
121#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
122 120
123#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) 121#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
124#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) 122#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
@@ -1159,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
1159 tcp_highest_sack_seq(tp))) 1157 tcp_highest_sack_seq(tp)))
1160 state->reord = min(fack_count, 1158 state->reord = min(fack_count,
1161 state->reord); 1159 state->reord);
1162 1160 if (!after(end_seq, tp->high_seq))
1163 /* SACK enhanced F-RTO (RFC4138; Appendix B) */ 1161 state->flag |= FLAG_ORIG_SACK_ACKED;
1164 if (!after(end_seq, tp->frto_highmark))
1165 state->flag |= FLAG_ONLY_ORIG_SACKED;
1166 } 1162 }
1167 1163
1168 if (sacked & TCPCB_LOST) { 1164 if (sacked & TCPCB_LOST) {
@@ -1555,7 +1551,6 @@ static int
1555tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, 1551tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
1556 u32 prior_snd_una) 1552 u32 prior_snd_una)
1557{ 1553{
1558 const struct inet_connection_sock *icsk = inet_csk(sk);
1559 struct tcp_sock *tp = tcp_sk(sk); 1554 struct tcp_sock *tp = tcp_sk(sk);
1560 const unsigned char *ptr = (skb_transport_header(ack_skb) + 1555 const unsigned char *ptr = (skb_transport_header(ack_skb) +
1561 TCP_SKB_CB(ack_skb)->sacked); 1556 TCP_SKB_CB(ack_skb)->sacked);
@@ -1728,12 +1723,6 @@ walk:
1728 start_seq, end_seq, dup_sack); 1723 start_seq, end_seq, dup_sack);
1729 1724
1730advance_sp: 1725advance_sp:
1731 /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
1732 * due to in-order walk
1733 */
1734 if (after(end_seq, tp->frto_highmark))
1735 state.flag &= ~FLAG_ONLY_ORIG_SACKED;
1736
1737 i++; 1726 i++;
1738 } 1727 }
1739 1728
@@ -1750,8 +1739,7 @@ advance_sp:
1750 tcp_verify_left_out(tp); 1739 tcp_verify_left_out(tp);
1751 1740
1752 if ((state.reord < tp->fackets_out) && 1741 if ((state.reord < tp->fackets_out) &&
1753 ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && 1742 ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
1754 (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
1755 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); 1743 tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);
1756 1744
1757out: 1745out:
@@ -1825,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
1825 tp->sacked_out = 0; 1813 tp->sacked_out = 0;
1826} 1814}
1827 1815
1828static int tcp_is_sackfrto(const struct tcp_sock *tp)
1829{
1830 return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
1831}
1832
1833/* F-RTO can only be used if TCP has never retransmitted anything other than
1834 * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
1835 */
1836bool tcp_use_frto(struct sock *sk)
1837{
1838 const struct tcp_sock *tp = tcp_sk(sk);
1839 const struct inet_connection_sock *icsk = inet_csk(sk);
1840 struct sk_buff *skb;
1841
1842 if (!sysctl_tcp_frto)
1843 return false;
1844
1845 /* MTU probe and F-RTO won't really play nicely along currently */
1846 if (icsk->icsk_mtup.probe_size)
1847 return false;
1848
1849 if (tcp_is_sackfrto(tp))
1850 return true;
1851
1852 /* Avoid expensive walking of rexmit queue if possible */
1853 if (tp->retrans_out > 1)
1854 return false;
1855
1856 skb = tcp_write_queue_head(sk);
1857 if (tcp_skb_is_last(sk, skb))
1858 return true;
1859 skb = tcp_write_queue_next(sk, skb); /* Skips head */
1860 tcp_for_write_queue_from(skb, sk) {
1861 if (skb == tcp_send_head(sk))
1862 break;
1863 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1864 return false;
1865 /* Short-circuit when first non-SACKed skb has been checked */
1866 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
1867 break;
1868 }
1869 return true;
1870}
1871
1872/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
1873 * recovery a bit and use heuristics in tcp_process_frto() to detect if
1874 * the RTO was spurious. Only clear SACKED_RETRANS of the head here to
1875 * keep retrans_out counting accurate (with SACK F-RTO, other than head
1876 * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS
1877 * bits are handled if the Loss state is really to be entered (in
1878 * tcp_enter_frto_loss).
1879 *
1880 * Do like tcp_enter_loss() would; when RTO expires the second time it
1881 * does:
1882 * "Reduce ssthresh if it has not yet been made inside this window."
1883 */
1884void tcp_enter_frto(struct sock *sk)
1885{
1886 const struct inet_connection_sock *icsk = inet_csk(sk);
1887 struct tcp_sock *tp = tcp_sk(sk);
1888 struct sk_buff *skb;
1889
1890 if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
1891 tp->snd_una == tp->high_seq ||
1892 ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
1893 !icsk->icsk_retransmits)) {
1894 tp->prior_ssthresh = tcp_current_ssthresh(sk);
1895 /* Our state is too optimistic in ssthresh() call because cwnd
1896 * is not reduced until tcp_enter_frto_loss() when previous F-RTO
1897 * recovery has not yet completed. Pattern would be this: RTO,
1898 * Cumulative ACK, RTO (2xRTO for the same segment does not end
1899 * up here twice).
1900 * RFC4138 should be more specific on what to do, even though
1901 * RTO is quite unlikely to occur after the first Cumulative ACK
1902 * due to back-off and complexity of triggering events ...
1903 */
1904 if (tp->frto_counter) {
1905 u32 stored_cwnd;
1906 stored_cwnd = tp->snd_cwnd;
1907 tp->snd_cwnd = 2;
1908 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1909 tp->snd_cwnd = stored_cwnd;
1910 } else {
1911 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
1912 }
1913 /* ... in theory, cong.control module could do "any tricks" in
1914 * ssthresh(), which means that ca_state, lost bits and lost_out
1915 * counter would have to be faked before the call occurs. We
1916 * consider that too expensive, unlikely and hacky, so modules
1917 * using these in ssthresh() must deal these incompatibility
1918 * issues if they receives CA_EVENT_FRTO and frto_counter != 0
1919 */
1920 tcp_ca_event(sk, CA_EVENT_FRTO);
1921 }
1922
1923 tp->undo_marker = tp->snd_una;
1924 tp->undo_retrans = 0;
1925
1926 skb = tcp_write_queue_head(sk);
1927 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1928 tp->undo_marker = 0;
1929 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1930 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1931 tp->retrans_out -= tcp_skb_pcount(skb);
1932 }
1933 tcp_verify_left_out(tp);
1934
1935 /* Too bad if TCP was application limited */
1936 tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
1937
1938 /* Earlier loss recovery underway (see RFC4138; Appendix B).
1939 * The last condition is necessary at least in tp->frto_counter case.
1940 */
1941 if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
1942 ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
1943 after(tp->high_seq, tp->snd_una)) {
1944 tp->frto_highmark = tp->high_seq;
1945 } else {
1946 tp->frto_highmark = tp->snd_nxt;
1947 }
1948 tcp_set_ca_state(sk, TCP_CA_Disorder);
1949 tp->high_seq = tp->snd_nxt;
1950 tp->frto_counter = 1;
1951}
1952
1953/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
1954 * which indicates that we should follow the traditional RTO recovery,
1955 * i.e. mark everything lost and do go-back-N retransmission.
1956 */
1957static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag)
1958{
1959 struct tcp_sock *tp = tcp_sk(sk);
1960 struct sk_buff *skb;
1961
1962 tp->lost_out = 0;
1963 tp->retrans_out = 0;
1964 if (tcp_is_reno(tp))
1965 tcp_reset_reno_sack(tp);
1966
1967 tcp_for_write_queue(skb, sk) {
1968 if (skb == tcp_send_head(sk))
1969 break;
1970
1971 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1972 /*
1973 * Count the retransmission made on RTO correctly (only when
1974 * waiting for the first ACK and did not get it)...
1975 */
1976 if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
1977 /* For some reason this R-bit might get cleared? */
1978 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
1979 tp->retrans_out += tcp_skb_pcount(skb);
1980 /* ...enter this if branch just for the first segment */
1981 flag |= FLAG_DATA_ACKED;
1982 } else {
1983 if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
1984 tp->undo_marker = 0;
1985 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1986 }
1987
1988 /* Marking forward transmissions that were made after RTO lost
1989 * can cause unnecessary retransmissions in some scenarios,
1990 * SACK blocks will mitigate that in some but not in all cases.
1991 * We used to not mark them but it was causing break-ups with
1992 * receivers that do only in-order receival.
1993 *
1994 * TODO: we could detect presence of such receiver and select
1995 * different behavior per flow.
1996 */
1997 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1998 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1999 tp->lost_out += tcp_skb_pcount(skb);
2000 tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
2001 }
2002 }
2003 tcp_verify_left_out(tp);
2004
2005 tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
2006 tp->snd_cwnd_cnt = 0;
2007 tp->snd_cwnd_stamp = tcp_time_stamp;
2008 tp->frto_counter = 0;
2009
2010 tp->reordering = min_t(unsigned int, tp->reordering,
2011 sysctl_tcp_reordering);
2012 tcp_set_ca_state(sk, TCP_CA_Loss);
2013 tp->high_seq = tp->snd_nxt;
2014 TCP_ECN_queue_cwr(tp);
2015
2016 tcp_clear_all_retrans_hints(tp);
2017}
2018
2019static void tcp_clear_retrans_partial(struct tcp_sock *tp) 1816static void tcp_clear_retrans_partial(struct tcp_sock *tp)
2020{ 1817{
2021 tp->retrans_out = 0; 1818 tp->retrans_out = 0;
@@ -2042,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how)
2042 const struct inet_connection_sock *icsk = inet_csk(sk); 1839 const struct inet_connection_sock *icsk = inet_csk(sk);
2043 struct tcp_sock *tp = tcp_sk(sk); 1840 struct tcp_sock *tp = tcp_sk(sk);
2044 struct sk_buff *skb; 1841 struct sk_buff *skb;
1842 bool new_recovery = false;
2045 1843
2046 /* Reduce ssthresh if it has not yet been made inside this window. */ 1844 /* Reduce ssthresh if it has not yet been made inside this window. */
2047 if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || 1845 if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
1846 !after(tp->high_seq, tp->snd_una) ||
2048 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { 1847 (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
1848 new_recovery = true;
2049 tp->prior_ssthresh = tcp_current_ssthresh(sk); 1849 tp->prior_ssthresh = tcp_current_ssthresh(sk);
2050 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); 1850 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
2051 tcp_ca_event(sk, CA_EVENT_LOSS); 1851 tcp_ca_event(sk, CA_EVENT_LOSS);
@@ -2087,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how)
2087 tcp_set_ca_state(sk, TCP_CA_Loss); 1887 tcp_set_ca_state(sk, TCP_CA_Loss);
2088 tp->high_seq = tp->snd_nxt; 1888 tp->high_seq = tp->snd_nxt;
2089 TCP_ECN_queue_cwr(tp); 1889 TCP_ECN_queue_cwr(tp);
2090 /* Abort F-RTO algorithm if one is in progress */ 1890
2091 tp->frto_counter = 0; 1891 /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
1892 * loss recovery is underway except recurring timeout(s) on
1893 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
1894 */
1895 tp->frto = sysctl_tcp_frto &&
1896 (new_recovery || icsk->icsk_retransmits) &&
1897 !inet_csk(sk)->icsk_mtup.probe_size;
2092} 1898}
2093 1899
2094/* If ACK arrived pointing to a remembered SACK, it means that our 1900/* If ACK arrived pointing to a remembered SACK, it means that our
@@ -2147,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2147 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples 1953 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2148 * available, or RTO is scheduled to fire first. 1954 * available, or RTO is scheduled to fire first.
2149 */ 1955 */
2150 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) 1956 if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
1957 (flag & FLAG_ECE) || !tp->srtt)
2151 return false; 1958 return false;
2152 1959
2153 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); 1960 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2154 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) 1961 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2155 return false; 1962 return false;
2156 1963
2157 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); 1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2158 tp->early_retrans_delayed = 1; 1965 TCP_RTO_MAX);
2159 return true; 1966 return true;
2160} 1967}
2161 1968
@@ -2271,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2271 struct tcp_sock *tp = tcp_sk(sk); 2078 struct tcp_sock *tp = tcp_sk(sk);
2272 __u32 packets_out; 2079 __u32 packets_out;
2273 2080
2274 /* Do not perform any recovery during F-RTO algorithm */
2275 if (tp->frto_counter)
2276 return false;
2277
2278 /* Trick#1: The loss is proven. */ 2081 /* Trick#1: The loss is proven. */
2279 if (tp->lost_out) 2082 if (tp->lost_out)
2280 return true; 2083 return true;
@@ -2318,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
2318 * interval if appropriate. 2121 * interval if appropriate.
2319 */ 2122 */
2320 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && 2123 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2321 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && 2124 (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2322 !tcp_may_send_now(sk)) 2125 !tcp_may_send_now(sk))
2323 return !tcp_pause_early_retransmit(sk, flag); 2126 return !tcp_pause_early_retransmit(sk, flag);
2324 2127
@@ -2635,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)
2635 return failed; 2438 return failed;
2636} 2439}
2637 2440
2638/* Undo during loss recovery after partial ACK. */ 2441/* Undo during loss recovery after partial ACK or using F-RTO. */
2639static bool tcp_try_undo_loss(struct sock *sk) 2442static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
2640{ 2443{
2641 struct tcp_sock *tp = tcp_sk(sk); 2444 struct tcp_sock *tp = tcp_sk(sk);
2642 2445
2643 if (tcp_may_undo(tp)) { 2446 if (frto_undo || tcp_may_undo(tp)) {
2644 struct sk_buff *skb; 2447 struct sk_buff *skb;
2645 tcp_for_write_queue(skb, sk) { 2448 tcp_for_write_queue(skb, sk) {
2646 if (skb == tcp_send_head(sk)) 2449 if (skb == tcp_send_head(sk))
@@ -2654,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk)
2654 tp->lost_out = 0; 2457 tp->lost_out = 0;
2655 tcp_undo_cwr(sk, true); 2458 tcp_undo_cwr(sk, true);
2656 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); 2459 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
2460 if (frto_undo)
2461 NET_INC_STATS_BH(sock_net(sk),
2462 LINUX_MIB_TCPSPURIOUSRTOS);
2657 inet_csk(sk)->icsk_retransmits = 0; 2463 inet_csk(sk)->icsk_retransmits = 0;
2658 tp->undo_marker = 0; 2464 tp->undo_marker = 0;
2659 if (tcp_is_sack(tp)) 2465 if (frto_undo || tcp_is_sack(tp))
2660 tcp_set_ca_state(sk, TCP_CA_Open); 2466 tcp_set_ca_state(sk, TCP_CA_Open);
2661 return true; 2467 return true;
2662 } 2468 }
@@ -2678,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
2678 struct tcp_sock *tp = tcp_sk(sk); 2484 struct tcp_sock *tp = tcp_sk(sk);
2679 2485
2680 tp->high_seq = tp->snd_nxt; 2486 tp->high_seq = tp->snd_nxt;
2487 tp->tlp_high_seq = 0;
2681 tp->snd_cwnd_cnt = 0; 2488 tp->snd_cwnd_cnt = 0;
2682 tp->prior_cwnd = tp->snd_cwnd; 2489 tp->prior_cwnd = tp->snd_cwnd;
2683 tp->prr_delivered = 0; 2490 tp->prr_delivered = 0;
@@ -2755,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
2755 2562
2756 tcp_verify_left_out(tp); 2563 tcp_verify_left_out(tp);
2757 2564
2758 if (!tp->frto_counter && !tcp_any_retrans_done(sk)) 2565 if (!tcp_any_retrans_done(sk))
2759 tp->retrans_stamp = 0; 2566 tp->retrans_stamp = 0;
2760 2567
2761 if (flag & FLAG_ECE) 2568 if (flag & FLAG_ECE)
@@ -2872,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2872 tcp_set_ca_state(sk, TCP_CA_Recovery); 2679 tcp_set_ca_state(sk, TCP_CA_Recovery);
2873} 2680}
2874 2681
2682/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
2683 * recovered or spurious. Otherwise retransmits more on partial ACKs.
2684 */
2685static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2686{
2687 struct inet_connection_sock *icsk = inet_csk(sk);
2688 struct tcp_sock *tp = tcp_sk(sk);
2689 bool recovered = !before(tp->snd_una, tp->high_seq);
2690
2691 if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
2692 if (flag & FLAG_ORIG_SACK_ACKED) {
2693 /* Step 3.b. A timeout is spurious if not all data are
2694 * lost, i.e., never-retransmitted data are (s)acked.
2695 */
2696 tcp_try_undo_loss(sk, true);
2697 return;
2698 }
2699 if (after(tp->snd_nxt, tp->high_seq) &&
2700 (flag & FLAG_DATA_SACKED || is_dupack)) {
2701 tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
2702 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
2703 tp->high_seq = tp->snd_nxt;
2704 __tcp_push_pending_frames(sk, tcp_current_mss(sk),
2705 TCP_NAGLE_OFF);
2706 if (after(tp->snd_nxt, tp->high_seq))
2707 return; /* Step 2.b */
2708 tp->frto = 0;
2709 }
2710 }
2711
2712 if (recovered) {
2713 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
2714 icsk->icsk_retransmits = 0;
2715 tcp_try_undo_recovery(sk);
2716 return;
2717 }
2718 if (flag & FLAG_DATA_ACKED)
2719 icsk->icsk_retransmits = 0;
2720 if (tcp_is_reno(tp)) {
2721 /* A Reno DUPACK means new data in F-RTO step 2.b above are
2722 * delivered. Lower inflight to clock out (re)tranmissions.
2723 */
2724 if (after(tp->snd_nxt, tp->high_seq) && is_dupack)
2725 tcp_add_reno_sack(sk);
2726 else if (flag & FLAG_SND_UNA_ADVANCED)
2727 tcp_reset_reno_sack(tp);
2728 }
2729 if (tcp_try_undo_loss(sk, false))
2730 return;
2731 tcp_xmit_retransmit_queue(sk);
2732}
2733
2875/* Process an event, which can update packets-in-flight not trivially. 2734/* Process an event, which can update packets-in-flight not trivially.
2876 * Main goal of this function is to calculate new estimate for left_out, 2735 * Main goal of this function is to calculate new estimate for left_out,
2877 * taking into account both packets sitting in receiver's buffer and 2736 * taking into account both packets sitting in receiver's buffer and
@@ -2918,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2918 tp->retrans_stamp = 0; 2777 tp->retrans_stamp = 0;
2919 } else if (!before(tp->snd_una, tp->high_seq)) { 2778 } else if (!before(tp->snd_una, tp->high_seq)) {
2920 switch (icsk->icsk_ca_state) { 2779 switch (icsk->icsk_ca_state) {
2921 case TCP_CA_Loss:
2922 icsk->icsk_retransmits = 0;
2923 if (tcp_try_undo_recovery(sk))
2924 return;
2925 break;
2926
2927 case TCP_CA_CWR: 2780 case TCP_CA_CWR:
2928 /* CWR is to be held something *above* high_seq 2781 /* CWR is to be held something *above* high_seq
2929 * is ACKed for CWR bit to reach receiver. */ 2782 * is ACKed for CWR bit to reach receiver. */
@@ -2954,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
2954 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; 2807 newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
2955 break; 2808 break;
2956 case TCP_CA_Loss: 2809 case TCP_CA_Loss:
2957 if (flag & FLAG_DATA_ACKED) 2810 tcp_process_loss(sk, flag, is_dupack);
2958 icsk->icsk_retransmits = 0;
2959 if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
2960 tcp_reset_reno_sack(tp);
2961 if (!tcp_try_undo_loss(sk)) {
2962 tcp_moderate_cwnd(tp);
2963 tcp_xmit_retransmit_queue(sk);
2964 return;
2965 }
2966 if (icsk->icsk_ca_state != TCP_CA_Open) 2811 if (icsk->icsk_ca_state != TCP_CA_Open)
2967 return; 2812 return;
2968 /* Loss is undone; fall through to processing in Open state. */ 2813 /* Fall through to processing in Open state. */
2969 default: 2814 default:
2970 if (tcp_is_reno(tp)) { 2815 if (tcp_is_reno(tp)) {
2971 if (flag & FLAG_SND_UNA_ADVANCED) 2816 if (flag & FLAG_SND_UNA_ADVANCED)
@@ -3078,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3078 */ 2923 */
3079void tcp_rearm_rto(struct sock *sk) 2924void tcp_rearm_rto(struct sock *sk)
3080{ 2925{
2926 const struct inet_connection_sock *icsk = inet_csk(sk);
3081 struct tcp_sock *tp = tcp_sk(sk); 2927 struct tcp_sock *tp = tcp_sk(sk);
3082 2928
3083 /* If the retrans timer is currently being used by Fast Open 2929 /* If the retrans timer is currently being used by Fast Open
@@ -3091,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk)
3091 } else { 2937 } else {
3092 u32 rto = inet_csk(sk)->icsk_rto; 2938 u32 rto = inet_csk(sk)->icsk_rto;
3093 /* Offset the time elapsed after installing regular RTO */ 2939 /* Offset the time elapsed after installing regular RTO */
3094 if (tp->early_retrans_delayed) { 2940 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2941 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
3095 struct sk_buff *skb = tcp_write_queue_head(sk); 2942 struct sk_buff *skb = tcp_write_queue_head(sk);
3096 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; 2943 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3097 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); 2944 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3098 /* delta may not be positive if the socket is locked 2945 /* delta may not be positive if the socket is locked
3099 * when the delayed ER timer fires and is rescheduled. 2946 * when the retrans timer fires and is rescheduled.
3100 */ 2947 */
3101 if (delta > 0) 2948 if (delta > 0)
3102 rto = delta; 2949 rto = delta;
@@ -3104,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk)
3104 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, 2951 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3105 TCP_RTO_MAX); 2952 TCP_RTO_MAX);
3106 } 2953 }
3107 tp->early_retrans_delayed = 0;
3108} 2954}
3109 2955
3110/* This function is called when the delayed ER timer fires. TCP enters 2956/* This function is called when the delayed ER timer fires. TCP enters
@@ -3192,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3192 flag |= FLAG_RETRANS_DATA_ACKED; 3038 flag |= FLAG_RETRANS_DATA_ACKED;
3193 ca_seq_rtt = -1; 3039 ca_seq_rtt = -1;
3194 seq_rtt = -1; 3040 seq_rtt = -1;
3195 if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
3196 flag |= FLAG_NONHEAD_RETRANS_ACKED;
3197 } else { 3041 } else {
3198 ca_seq_rtt = now - scb->when; 3042 ca_seq_rtt = now - scb->when;
3199 last_ackt = skb->tstamp; 3043 last_ackt = skb->tstamp;
@@ -3202,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3202 } 3046 }
3203 if (!(sacked & TCPCB_SACKED_ACKED)) 3047 if (!(sacked & TCPCB_SACKED_ACKED))
3204 reord = min(pkts_acked, reord); 3048 reord = min(pkts_acked, reord);
3049 if (!after(scb->end_seq, tp->high_seq))
3050 flag |= FLAG_ORIG_SACK_ACKED;
3205 } 3051 }
3206 3052
3207 if (sacked & TCPCB_SACKED_ACKED) 3053 if (sacked & TCPCB_SACKED_ACKED)
@@ -3402,165 +3248,74 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
3402 return flag; 3248 return flag;
3403} 3249}
3404 3250
3405/* A very conservative spurious RTO response algorithm: reduce cwnd and 3251/* RFC 5961 7 [ACK Throttling] */
3406 * continue in congestion avoidance. 3252static void tcp_send_challenge_ack(struct sock *sk)
3407 */
3408static void tcp_conservative_spur_to_response(struct tcp_sock *tp)
3409{ 3253{
3410 tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); 3254 /* unprotected vars, we dont care of overwrites */
3411 tp->snd_cwnd_cnt = 0; 3255 static u32 challenge_timestamp;
3412 TCP_ECN_queue_cwr(tp); 3256 static unsigned int challenge_count;
3413 tcp_moderate_cwnd(tp); 3257 u32 now = jiffies / HZ;
3414}
3415 3258
3416/* A conservative spurious RTO response algorithm: reduce cwnd using 3259 if (now != challenge_timestamp) {
3417 * PRR and continue in congestion avoidance. 3260 challenge_timestamp = now;
3418 */ 3261 challenge_count = 0;
3419static void tcp_cwr_spur_to_response(struct sock *sk) 3262 }
3420{ 3263 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) {
3421 tcp_enter_cwr(sk, 0); 3264 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK);
3265 tcp_send_ack(sk);
3266 }
3422} 3267}
3423 3268
3424static void tcp_undo_spur_to_response(struct sock *sk, int flag) 3269static void tcp_store_ts_recent(struct tcp_sock *tp)
3425{ 3270{
3426 if (flag & FLAG_ECE) 3271 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3427 tcp_cwr_spur_to_response(sk); 3272 tp->rx_opt.ts_recent_stamp = get_seconds();
3428 else
3429 tcp_undo_cwr(sk, true);
3430} 3273}
3431 3274
3432/* F-RTO spurious RTO detection algorithm (RFC4138) 3275static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3433 *
3434 * F-RTO affects during two new ACKs following RTO (well, almost, see inline
3435 * comments). State (ACK number) is kept in frto_counter. When ACK advances
3436 * window (but not to or beyond highest sequence sent before RTO):
3437 * On First ACK, send two new segments out.
3438 * On Second ACK, RTO was likely spurious. Do spurious response (response
3439 * algorithm is not part of the F-RTO detection algorithm
3440 * given in RFC4138 but can be selected separately).
3441 * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
3442 * and TCP falls back to conventional RTO recovery. F-RTO allows overriding
3443 * of Nagle, this is done using frto_counter states 2 and 3, when a new data
3444 * segment of any size sent during F-RTO, state 2 is upgraded to 3.
3445 *
3446 * Rationale: if the RTO was spurious, new ACKs should arrive from the
3447 * original window even after we transmit two new data segments.
3448 *
3449 * SACK version:
3450 * on first step, wait until first cumulative ACK arrives, then move to
3451 * the second step. In second step, the next ACK decides.
3452 *
3453 * F-RTO is implemented (mainly) in four functions:
3454 * - tcp_use_frto() is used to determine if TCP is can use F-RTO
3455 * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
3456 * called when tcp_use_frto() showed green light
3457 * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
3458 * - tcp_enter_frto_loss() is called if there is not enough evidence
3459 * to prove that the RTO is indeed spurious. It transfers the control
3460 * from F-RTO to the conventional RTO recovery
3461 */
3462static bool tcp_process_frto(struct sock *sk, int flag)
3463{ 3276{
3464 struct tcp_sock *tp = tcp_sk(sk); 3277 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3465 3278 /* PAWS bug workaround wrt. ACK frames, the PAWS discard
3466 tcp_verify_left_out(tp); 3279 * extra check below makes sure this can only happen
3467 3280 * for pure ACK frames. -DaveM
3468 /* Duplicate the behavior from Loss state (fastretrans_alert) */ 3281 *
3469 if (flag & FLAG_DATA_ACKED) 3282 * Not only, also it occurs for expired timestamps.
3470 inet_csk(sk)->icsk_retransmits = 0;
3471
3472 if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
3473 ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
3474 tp->undo_marker = 0;
3475
3476 if (!before(tp->snd_una, tp->frto_highmark)) {
3477 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
3478 return true;
3479 }
3480
3481 if (!tcp_is_sackfrto(tp)) {
3482 /* RFC4138 shortcoming in step 2; should also have case c):
3483 * ACK isn't duplicate nor advances window, e.g., opposite dir
3484 * data, winupdate
3485 */ 3283 */
3486 if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
3487 return true;
3488 3284
3489 if (!(flag & FLAG_DATA_ACKED)) { 3285 if (tcp_paws_check(&tp->rx_opt, 0))
3490 tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), 3286 tcp_store_ts_recent(tp);
3491 flag);
3492 return true;
3493 }
3494 } else {
3495 if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
3496 if (!tcp_packets_in_flight(tp)) {
3497 tcp_enter_frto_loss(sk, 2, flag);
3498 return true;
3499 }
3500
3501 /* Prevent sending of new data. */
3502 tp->snd_cwnd = min(tp->snd_cwnd,
3503 tcp_packets_in_flight(tp));
3504 return true;
3505 }
3506
3507 if ((tp->frto_counter >= 2) &&
3508 (!(flag & FLAG_FORWARD_PROGRESS) ||
3509 ((flag & FLAG_DATA_SACKED) &&
3510 !(flag & FLAG_ONLY_ORIG_SACKED)))) {
3511 /* RFC4138 shortcoming (see comment above) */
3512 if (!(flag & FLAG_FORWARD_PROGRESS) &&
3513 (flag & FLAG_NOT_DUP))
3514 return true;
3515
3516 tcp_enter_frto_loss(sk, 3, flag);
3517 return true;
3518 }
3519 }
3520
3521 if (tp->frto_counter == 1) {
3522 /* tcp_may_send_now needs to see updated state */
3523 tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
3524 tp->frto_counter = 2;
3525
3526 if (!tcp_may_send_now(sk))
3527 tcp_enter_frto_loss(sk, 2, flag);
3528
3529 return true;
3530 } else {
3531 switch (sysctl_tcp_frto_response) {
3532 case 2:
3533 tcp_undo_spur_to_response(sk, flag);
3534 break;
3535 case 1:
3536 tcp_conservative_spur_to_response(tp);
3537 break;
3538 default:
3539 tcp_cwr_spur_to_response(sk);
3540 break;
3541 }
3542 tp->frto_counter = 0;
3543 tp->undo_marker = 0;
3544 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS);
3545 } 3287 }
3546 return false;
3547} 3288}
3548 3289
3549/* RFC 5961 7 [ACK Throttling] */ 3290/* This routine deals with acks during a TLP episode.
3550static void tcp_send_challenge_ack(struct sock *sk) 3291 * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
3292 */
3293static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
3551{ 3294{
3552 /* unprotected vars, we dont care of overwrites */ 3295 struct tcp_sock *tp = tcp_sk(sk);
3553 static u32 challenge_timestamp; 3296 bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
3554 static unsigned int challenge_count; 3297 !(flag & (FLAG_SND_UNA_ADVANCED |
3555 u32 now = jiffies / HZ; 3298 FLAG_NOT_DUP | FLAG_DATA_SACKED));
3556 3299
3557 if (now != challenge_timestamp) { 3300 /* Mark the end of TLP episode on receiving TLP dupack or when
3558 challenge_timestamp = now; 3301 * ack is after tlp_high_seq.
3559 challenge_count = 0; 3302 */
3303 if (is_tlp_dupack) {
3304 tp->tlp_high_seq = 0;
3305 return;
3560 } 3306 }
3561 if (++challenge_count <= sysctl_tcp_challenge_ack_limit) { 3307
3562 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); 3308 if (after(ack, tp->tlp_high_seq)) {
3563 tcp_send_ack(sk); 3309 tp->tlp_high_seq = 0;
3310 /* Don't reduce cwnd if DSACK arrives for TLP retrans. */
3311 if (!(flag & FLAG_DSACKING_ACK)) {
3312 tcp_init_cwnd_reduction(sk, true);
3313 tcp_set_ca_state(sk, TCP_CA_CWR);
3314 tcp_end_cwnd_reduction(sk);
3315 tcp_set_ca_state(sk, TCP_CA_Open);
3316 NET_INC_STATS_BH(sock_net(sk),
3317 LINUX_MIB_TCPLOSSPROBERECOVERY);
3318 }
3564 } 3319 }
3565} 3320}
3566 3321
@@ -3578,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3578 int prior_packets; 3333 int prior_packets;
3579 int prior_sacked = tp->sacked_out; 3334 int prior_sacked = tp->sacked_out;
3580 int pkts_acked = 0; 3335 int pkts_acked = 0;
3581 bool frto_cwnd = false;
3582 3336
3583 /* If the ack is older than previous acks 3337 /* If the ack is older than previous acks
3584 * then we can probably ignore it. 3338 * then we can probably ignore it.
@@ -3598,7 +3352,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3598 if (after(ack, tp->snd_nxt)) 3352 if (after(ack, tp->snd_nxt))
3599 goto invalid_ack; 3353 goto invalid_ack;
3600 3354
3601 if (tp->early_retrans_delayed) 3355 if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3356 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3602 tcp_rearm_rto(sk); 3357 tcp_rearm_rto(sk);
3603 3358
3604 if (after(ack, prior_snd_una)) 3359 if (after(ack, prior_snd_una))
@@ -3607,6 +3362,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3607 prior_fackets = tp->fackets_out; 3362 prior_fackets = tp->fackets_out;
3608 prior_in_flight = tcp_packets_in_flight(tp); 3363 prior_in_flight = tcp_packets_in_flight(tp);
3609 3364
3365 /* ts_recent update must be made after we are sure that the packet
3366 * is in window.
3367 */
3368 if (flag & FLAG_UPDATE_TS_RECENT)
3369 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
3370
3610 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 3371 if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
3611 /* Window is constant, pure forward advance. 3372 /* Window is constant, pure forward advance.
3612 * No more checks are required. 3373 * No more checks are required.
@@ -3651,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3651 3412
3652 pkts_acked = prior_packets - tp->packets_out; 3413 pkts_acked = prior_packets - tp->packets_out;
3653 3414
3654 if (tp->frto_counter)
3655 frto_cwnd = tcp_process_frto(sk, flag);
3656 /* Guarantee sacktag reordering detection against wrap-arounds */
3657 if (before(tp->frto_highmark, tp->snd_una))
3658 tp->frto_highmark = 0;
3659
3660 if (tcp_ack_is_dubious(sk, flag)) { 3415 if (tcp_ack_is_dubious(sk, flag)) {
3661 /* Advance CWND, if state allows this. */ 3416 /* Advance CWND, if state allows this. */
3662 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && 3417 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3663 tcp_may_raise_cwnd(sk, flag))
3664 tcp_cong_avoid(sk, ack, prior_in_flight); 3418 tcp_cong_avoid(sk, ack, prior_in_flight);
3665 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); 3419 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3666 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, 3420 tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3667 is_dupack, flag); 3421 is_dupack, flag);
3668 } else { 3422 } else {
3669 if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) 3423 if (flag & FLAG_DATA_ACKED)
3670 tcp_cong_avoid(sk, ack, prior_in_flight); 3424 tcp_cong_avoid(sk, ack, prior_in_flight);
3671 } 3425 }
3672 3426
3427 if (tp->tlp_high_seq)
3428 tcp_process_tlp_ack(sk, ack, flag);
3429
3673 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { 3430 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3674 struct dst_entry *dst = __sk_dst_get(sk); 3431 struct dst_entry *dst = __sk_dst_get(sk);
3675 if (dst) 3432 if (dst)
3676 dst_confirm(dst); 3433 dst_confirm(dst);
3677 } 3434 }
3435
3436 if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3437 tcp_schedule_loss_probe(sk);
3678 return 1; 3438 return 1;
3679 3439
3680no_queue: 3440no_queue:
@@ -3688,6 +3448,9 @@ no_queue:
3688 */ 3448 */
3689 if (tcp_send_head(sk)) 3449 if (tcp_send_head(sk))
3690 tcp_ack_probe(sk); 3450 tcp_ack_probe(sk);
3451
3452 if (tp->tlp_high_seq)
3453 tcp_process_tlp_ack(sk, ack, flag);
3691 return 1; 3454 return 1;
3692 3455
3693invalid_ack: 3456invalid_ack:
@@ -3712,8 +3475,8 @@ old_ack:
3712 * But, this can also be called on packets in the established flow when 3475 * But, this can also be called on packets in the established flow when
3713 * the fast version below fails. 3476 * the fast version below fails.
3714 */ 3477 */
3715void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, 3478void tcp_parse_options(const struct sk_buff *skb,
3716 const u8 **hvpp, int estab, 3479 struct tcp_options_received *opt_rx, int estab,
3717 struct tcp_fastopen_cookie *foc) 3480 struct tcp_fastopen_cookie *foc)
3718{ 3481{
3719 const unsigned char *ptr; 3482 const unsigned char *ptr;
@@ -3797,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
3797 */ 3560 */
3798 break; 3561 break;
3799#endif 3562#endif
3800 case TCPOPT_COOKIE:
3801 /* This option is variable length.
3802 */
3803 switch (opsize) {
3804 case TCPOLEN_COOKIE_BASE:
3805 /* not yet implemented */
3806 break;
3807 case TCPOLEN_COOKIE_PAIR:
3808 /* not yet implemented */
3809 break;
3810 case TCPOLEN_COOKIE_MIN+0:
3811 case TCPOLEN_COOKIE_MIN+2:
3812 case TCPOLEN_COOKIE_MIN+4:
3813 case TCPOLEN_COOKIE_MIN+6:
3814 case TCPOLEN_COOKIE_MAX:
3815 /* 16-bit multiple */
3816 opt_rx->cookie_plus = opsize;
3817 *hvpp = ptr;
3818 break;
3819 default:
3820 /* ignore option */
3821 break;
3822 }
3823 break;
3824
3825 case TCPOPT_EXP: 3563 case TCPOPT_EXP:
3826 /* Fast Open option shares code 254 using a 3564 /* Fast Open option shares code 254 using a
3827 * 16 bits magic number. It's valid only in 3565 * 16 bits magic number. It's valid only in
@@ -3867,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
3867 * If it is wrong it falls back on tcp_parse_options(). 3605 * If it is wrong it falls back on tcp_parse_options().
3868 */ 3606 */
3869static bool tcp_fast_parse_options(const struct sk_buff *skb, 3607static bool tcp_fast_parse_options(const struct sk_buff *skb,
3870 const struct tcphdr *th, 3608 const struct tcphdr *th, struct tcp_sock *tp)
3871 struct tcp_sock *tp, const u8 **hvpp)
3872{ 3609{
3873 /* In the spirit of fast parsing, compare doff directly to constant 3610 /* In the spirit of fast parsing, compare doff directly to constant
3874 * values. Because equality is used, short doff can be ignored here. 3611 * values. Because equality is used, short doff can be ignored here.
@@ -3882,7 +3619,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
3882 return true; 3619 return true;
3883 } 3620 }
3884 3621
3885 tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); 3622 tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
3886 if (tp->rx_opt.saw_tstamp) 3623 if (tp->rx_opt.saw_tstamp)
3887 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 3624 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
3888 3625
@@ -3927,27 +3664,6 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th)
3927EXPORT_SYMBOL(tcp_parse_md5sig_option); 3664EXPORT_SYMBOL(tcp_parse_md5sig_option);
3928#endif 3665#endif
3929 3666
3930static inline void tcp_store_ts_recent(struct tcp_sock *tp)
3931{
3932 tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
3933 tp->rx_opt.ts_recent_stamp = get_seconds();
3934}
3935
3936static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3937{
3938 if (tp->rx_opt.saw_tstamp && !after(seq, tp->rcv_wup)) {
3939 /* PAWS bug workaround wrt. ACK frames, the PAWS discard
3940 * extra check below makes sure this can only happen
3941 * for pure ACK frames. -DaveM
3942 *
3943 * Not only, also it occurs for expired timestamps.
3944 */
3945
3946 if (tcp_paws_check(&tp->rx_opt, 0))
3947 tcp_store_ts_recent(tp);
3948 }
3949}
3950
3951/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM 3667/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
3952 * 3668 *
3953 * It is not fatal. If this ACK does _not_ change critical state (seqs, window) 3669 * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
@@ -5263,12 +4979,10 @@ out:
5263static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, 4979static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
5264 const struct tcphdr *th, int syn_inerr) 4980 const struct tcphdr *th, int syn_inerr)
5265{ 4981{
5266 const u8 *hash_location;
5267 struct tcp_sock *tp = tcp_sk(sk); 4982 struct tcp_sock *tp = tcp_sk(sk);
5268 4983
5269 /* RFC1323: H1. Apply PAWS check first. */ 4984 /* RFC1323: H1. Apply PAWS check first. */
5270 if (tcp_fast_parse_options(skb, th, tp, &hash_location) && 4985 if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
5271 tp->rx_opt.saw_tstamp &&
5272 tcp_paws_discard(sk, skb)) { 4986 tcp_paws_discard(sk, skb)) {
5273 if (!th->rst) { 4987 if (!th->rst) {
5274 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); 4988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5543,14 +5257,9 @@ slow_path:
5543 return 0; 5257 return 0;
5544 5258
5545step5: 5259step5:
5546 if (tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) 5260 if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
5547 goto discard; 5261 goto discard;
5548 5262
5549 /* ts_recent update must be made after we are sure that the packet
5550 * is in window.
5551 */
5552 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
5553
5554 tcp_rcv_rtt_measure_ts(sk, skb); 5263 tcp_rcv_rtt_measure_ts(sk, skb);
5555 5264
5556 /* Process urgent data. */ 5265 /* Process urgent data. */
@@ -5564,6 +5273,7 @@ step5:
5564 return 0; 5273 return 0;
5565 5274
5566csum_error: 5275csum_error:
5276 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
5567 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 5277 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
5568 5278
5569discard: 5279discard:
@@ -5622,12 +5332,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5622 5332
5623 if (mss == tp->rx_opt.user_mss) { 5333 if (mss == tp->rx_opt.user_mss) {
5624 struct tcp_options_received opt; 5334 struct tcp_options_received opt;
5625 const u8 *hash_location;
5626 5335
5627 /* Get original SYNACK MSS value if user MSS sets mss_clamp */ 5336 /* Get original SYNACK MSS value if user MSS sets mss_clamp */
5628 tcp_clear_options(&opt); 5337 tcp_clear_options(&opt);
5629 opt.user_mss = opt.mss_clamp = 0; 5338 opt.user_mss = opt.mss_clamp = 0;
5630 tcp_parse_options(synack, &opt, &hash_location, 0, NULL); 5339 tcp_parse_options(synack, &opt, 0, NULL);
5631 mss = opt.mss_clamp; 5340 mss = opt.mss_clamp;
5632 } 5341 }
5633 5342
@@ -5658,14 +5367,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
5658static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5367static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5659 const struct tcphdr *th, unsigned int len) 5368 const struct tcphdr *th, unsigned int len)
5660{ 5369{
5661 const u8 *hash_location;
5662 struct inet_connection_sock *icsk = inet_csk(sk); 5370 struct inet_connection_sock *icsk = inet_csk(sk);
5663 struct tcp_sock *tp = tcp_sk(sk); 5371 struct tcp_sock *tp = tcp_sk(sk);
5664 struct tcp_cookie_values *cvp = tp->cookie_values;
5665 struct tcp_fastopen_cookie foc = { .len = -1 }; 5372 struct tcp_fastopen_cookie foc = { .len = -1 };
5666 int saved_clamp = tp->rx_opt.mss_clamp; 5373 int saved_clamp = tp->rx_opt.mss_clamp;
5667 5374
5668 tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); 5375 tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
5669 if (tp->rx_opt.saw_tstamp) 5376 if (tp->rx_opt.saw_tstamp)
5670 tp->rx_opt.rcv_tsecr -= tp->tsoffset; 5377 tp->rx_opt.rcv_tsecr -= tp->tsoffset;
5671 5378
@@ -5762,30 +5469,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5762 * is initialized. */ 5469 * is initialized. */
5763 tp->copied_seq = tp->rcv_nxt; 5470 tp->copied_seq = tp->rcv_nxt;
5764 5471
5765 if (cvp != NULL &&
5766 cvp->cookie_pair_size > 0 &&
5767 tp->rx_opt.cookie_plus > 0) {
5768 int cookie_size = tp->rx_opt.cookie_plus
5769 - TCPOLEN_COOKIE_BASE;
5770 int cookie_pair_size = cookie_size
5771 + cvp->cookie_desired;
5772
5773 /* A cookie extension option was sent and returned.
5774 * Note that each incoming SYNACK replaces the
5775 * Responder cookie. The initial exchange is most
5776 * fragile, as protection against spoofing relies
5777 * entirely upon the sequence and timestamp (above).
5778 * This replacement strategy allows the correct pair to
5779 * pass through, while any others will be filtered via
5780 * Responder verification later.
5781 */
5782 if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
5783 memcpy(&cvp->cookie_pair[cvp->cookie_desired],
5784 hash_location, cookie_size);
5785 cvp->cookie_pair_size = cookie_pair_size;
5786 }
5787 }
5788
5789 smp_mb(); 5472 smp_mb();
5790 5473
5791 tcp_finish_connect(sk, skb); 5474 tcp_finish_connect(sk, skb);
@@ -5986,7 +5669,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5986 5669
5987 /* step 5: check the ACK field */ 5670 /* step 5: check the ACK field */
5988 if (true) { 5671 if (true) {
5989 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; 5672 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
5673 FLAG_UPDATE_TS_RECENT) > 0;
5990 5674
5991 switch (sk->sk_state) { 5675 switch (sk->sk_state) {
5992 case TCP_SYN_RECV: 5676 case TCP_SYN_RECV:
@@ -6137,11 +5821,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
6137 } 5821 }
6138 } 5822 }
6139 5823
6140 /* ts_recent update must be made after we are sure that the packet
6141 * is in window.
6142 */
6143 tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
6144
6145 /* step 6: check the URG bit */ 5824 /* step 6: check the URG bit */
6146 tcp_urg(sk, skb, th); 5825 tcp_urg(sk, skb, th);
6147 5826
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d09203c63264..719652305a29 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
838 */ 838 */
839static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, 839static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
840 struct request_sock *req, 840 struct request_sock *req,
841 struct request_values *rvp,
842 u16 queue_mapping, 841 u16 queue_mapping,
843 bool nocache) 842 bool nocache)
844{ 843{
@@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
851 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 850 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
852 return -1; 851 return -1;
853 852
854 skb = tcp_make_synack(sk, dst, req, rvp, NULL); 853 skb = tcp_make_synack(sk, dst, req, NULL);
855 854
856 if (skb) { 855 if (skb) {
857 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); 856 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
868 return err; 867 return err;
869} 868}
870 869
871static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, 870static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
872 struct request_values *rvp)
873{ 871{
874 int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); 872 int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
875 873
876 if (!res) 874 if (!res)
877 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 875 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
1371static int tcp_v4_conn_req_fastopen(struct sock *sk, 1369static int tcp_v4_conn_req_fastopen(struct sock *sk,
1372 struct sk_buff *skb, 1370 struct sk_buff *skb,
1373 struct sk_buff *skb_synack, 1371 struct sk_buff *skb_synack,
1374 struct request_sock *req, 1372 struct request_sock *req)
1375 struct request_values *rvp)
1376{ 1373{
1377 struct tcp_sock *tp = tcp_sk(sk); 1374 struct tcp_sock *tp = tcp_sk(sk);
1378 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 1375 struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
1467 1464
1468int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 1465int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1469{ 1466{
1470 struct tcp_extend_values tmp_ext;
1471 struct tcp_options_received tmp_opt; 1467 struct tcp_options_received tmp_opt;
1472 const u8 *hash_location;
1473 struct request_sock *req; 1468 struct request_sock *req;
1474 struct inet_request_sock *ireq; 1469 struct inet_request_sock *ireq;
1475 struct tcp_sock *tp = tcp_sk(sk); 1470 struct tcp_sock *tp = tcp_sk(sk);
@@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1519 tcp_clear_options(&tmp_opt); 1514 tcp_clear_options(&tmp_opt);
1520 tmp_opt.mss_clamp = TCP_MSS_DEFAULT; 1515 tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
1521 tmp_opt.user_mss = tp->rx_opt.user_mss; 1516 tmp_opt.user_mss = tp->rx_opt.user_mss;
1522 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, 1517 tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
1523 want_cookie ? NULL : &foc);
1524
1525 if (tmp_opt.cookie_plus > 0 &&
1526 tmp_opt.saw_tstamp &&
1527 !tp->rx_opt.cookie_out_never &&
1528 (sysctl_tcp_cookie_size > 0 ||
1529 (tp->cookie_values != NULL &&
1530 tp->cookie_values->cookie_desired > 0))) {
1531 u8 *c;
1532 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1533 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1534
1535 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1536 goto drop_and_release;
1537
1538 /* Secret recipe starts with IP addresses */
1539 *mess++ ^= (__force u32)daddr;
1540 *mess++ ^= (__force u32)saddr;
1541
1542 /* plus variable length Initiator Cookie */
1543 c = (u8 *)mess;
1544 while (l-- > 0)
1545 *c++ ^= *hash_location++;
1546
1547 want_cookie = false; /* not our kind of cookie */
1548 tmp_ext.cookie_out_never = 0; /* false */
1549 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1550 } else if (!tp->rx_opt.cookie_in_always) {
1551 /* redundant indications, but ensure initialization. */
1552 tmp_ext.cookie_out_never = 1; /* true */
1553 tmp_ext.cookie_plus = 0;
1554 } else {
1555 goto drop_and_release;
1556 }
1557 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1558 1518
1559 if (want_cookie && !tmp_opt.saw_tstamp) 1519 if (want_cookie && !tmp_opt.saw_tstamp)
1560 tcp_clear_options(&tmp_opt); 1520 tcp_clear_options(&tmp_opt);
@@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1636 * of tcp_v4_send_synack()->tcp_select_initial_window(). 1596 * of tcp_v4_send_synack()->tcp_select_initial_window().
1637 */ 1597 */
1638 skb_synack = tcp_make_synack(sk, dst, req, 1598 skb_synack = tcp_make_synack(sk, dst, req,
1639 (struct request_values *)&tmp_ext,
1640 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); 1599 fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
1641 1600
1642 if (skb_synack) { 1601 if (skb_synack) {
@@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1660 if (fastopen_cookie_present(&foc) && foc.len != 0) 1619 if (fastopen_cookie_present(&foc) && foc.len != 0)
1661 NET_INC_STATS_BH(sock_net(sk), 1620 NET_INC_STATS_BH(sock_net(sk),
1662 LINUX_MIB_TCPFASTOPENPASSIVEFAIL); 1621 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
1663 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, 1622 } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
1664 (struct request_values *)&tmp_ext))
1665 goto drop_and_free; 1623 goto drop_and_free;
1666 1624
1667 return 0; 1625 return 0;
@@ -1908,6 +1866,7 @@ discard:
1908 return 0; 1866 return 0;
1909 1867
1910csum_err: 1868csum_err:
1869 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1911 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1870 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1912 goto discard; 1871 goto discard;
1913} 1872}
@@ -1950,6 +1909,51 @@ void tcp_v4_early_demux(struct sk_buff *skb)
1950 } 1909 }
1951} 1910}
1952 1911
1912/* Packet is added to VJ-style prequeue for processing in process
1913 * context, if a reader task is waiting. Apparently, this exciting
1914 * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1915 * failed somewhere. Latency? Burstiness? Well, at least now we will
1916 * see, why it failed. 8)8) --ANK
1917 *
1918 */
1919bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1920{
1921 struct tcp_sock *tp = tcp_sk(sk);
1922
1923 if (sysctl_tcp_low_latency || !tp->ucopy.task)
1924 return false;
1925
1926 if (skb->len <= tcp_hdrlen(skb) &&
1927 skb_queue_len(&tp->ucopy.prequeue) == 0)
1928 return false;
1929
1930 skb_dst_force(skb);
1931 __skb_queue_tail(&tp->ucopy.prequeue, skb);
1932 tp->ucopy.memory += skb->truesize;
1933 if (tp->ucopy.memory > sk->sk_rcvbuf) {
1934 struct sk_buff *skb1;
1935
1936 BUG_ON(sock_owned_by_user(sk));
1937
1938 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) {
1939 sk_backlog_rcv(sk, skb1);
1940 NET_INC_STATS_BH(sock_net(sk),
1941 LINUX_MIB_TCPPREQUEUEDROPPED);
1942 }
1943
1944 tp->ucopy.memory = 0;
1945 } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1946 wake_up_interruptible_sync_poll(sk_sleep(sk),
1947 POLLIN | POLLRDNORM | POLLRDBAND);
1948 if (!inet_csk_ack_scheduled(sk))
1949 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1950 (3 * tcp_rto_min(sk)) / 4,
1951 TCP_RTO_MAX);
1952 }
1953 return true;
1954}
1955EXPORT_SYMBOL(tcp_prequeue);
1956
1953/* 1957/*
1954 * From tcp_input.c 1958 * From tcp_input.c
1955 */ 1959 */
@@ -1983,7 +1987,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1983 * provided case of th->doff==0 is eliminated. 1987 * provided case of th->doff==0 is eliminated.
1984 * So, we defer the checks. */ 1988 * So, we defer the checks. */
1985 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) 1989 if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
1986 goto bad_packet; 1990 goto csum_error;
1987 1991
1988 th = tcp_hdr(skb); 1992 th = tcp_hdr(skb);
1989 iph = ip_hdr(skb); 1993 iph = ip_hdr(skb);
@@ -2049,6 +2053,8 @@ no_tcp_socket:
2049 goto discard_it; 2053 goto discard_it;
2050 2054
2051 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 2055 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
2056csum_error:
2057 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
2052bad_packet: 2058bad_packet:
2053 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 2059 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
2054 } else { 2060 } else {
@@ -2070,10 +2076,13 @@ do_time_wait:
2070 goto discard_it; 2076 goto discard_it;
2071 } 2077 }
2072 2078
2073 if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { 2079 if (skb->len < (th->doff << 2)) {
2074 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
2075 inet_twsk_put(inet_twsk(sk)); 2080 inet_twsk_put(inet_twsk(sk));
2076 goto discard_it; 2081 goto bad_packet;
2082 }
2083 if (tcp_checksum_complete(skb)) {
2084 inet_twsk_put(inet_twsk(sk));
2085 goto csum_error;
2077 } 2086 }
2078 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 2087 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
2079 case TCP_TW_SYN: { 2088 case TCP_TW_SYN: {
@@ -2197,12 +2206,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
2197 if (inet_csk(sk)->icsk_bind_hash) 2206 if (inet_csk(sk)->icsk_bind_hash)
2198 inet_put_port(sk); 2207 inet_put_port(sk);
2199 2208
2200 /* TCP Cookie Transactions */
2201 if (tp->cookie_values != NULL) {
2202 kref_put(&tp->cookie_values->kref,
2203 tcp_cookie_values_release);
2204 tp->cookie_values = NULL;
2205 }
2206 BUG_ON(tp->fastopen_rsk != NULL); 2209 BUG_ON(tp->fastopen_rsk != NULL);
2207 2210
2208 /* If socket is aborted during connect operation */ 2211 /* If socket is aborted during connect operation */
@@ -2580,7 +2583,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
2580 2583
2581int tcp_seq_open(struct inode *inode, struct file *file) 2584int tcp_seq_open(struct inode *inode, struct file *file)
2582{ 2585{
2583 struct tcp_seq_afinfo *afinfo = PDE(inode)->data; 2586 struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2584 struct tcp_iter_state *s; 2587 struct tcp_iter_state *s;
2585 int err; 2588 int err;
2586 2589
@@ -2659,7 +2662,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
2659 __u16 srcp = ntohs(inet->inet_sport); 2662 __u16 srcp = ntohs(inet->inet_sport);
2660 int rx_queue; 2663 int rx_queue;
2661 2664
2662 if (icsk->icsk_pending == ICSK_TIME_RETRANS) { 2665 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2666 icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2667 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2663 timer_active = 1; 2668 timer_active = 1;
2664 timer_expires = icsk->icsk_timeout; 2669 timer_expires = icsk->icsk_timeout;
2665 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2670 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index b6f3583ddfe8..da14436c1735 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
64{ 64{
65 struct cg_proto *cg_proto; 65 struct cg_proto *cg_proto;
66 struct tcp_memcontrol *tcp; 66 struct tcp_memcontrol *tcp;
67 u64 val;
68 67
69 cg_proto = tcp_prot.proto_cgroup(memcg); 68 cg_proto = tcp_prot.proto_cgroup(memcg);
70 if (!cg_proto) 69 if (!cg_proto)
@@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
72 71
73 tcp = tcp_from_cgproto(cg_proto); 72 tcp = tcp_from_cgproto(cg_proto);
74 percpu_counter_destroy(&tcp->tcp_sockets_allocated); 73 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
75
76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
77} 74}
78EXPORT_SYMBOL(tcp_destroy_cgroup); 75EXPORT_SYMBOL(tcp_destroy_cgroup);
79 76
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f696d7c2e9fa..f6a005c485a9 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -96,7 +96,8 @@ struct tcpm_hash_bucket {
96 96
97static DEFINE_SPINLOCK(tcp_metrics_lock); 97static DEFINE_SPINLOCK(tcp_metrics_lock);
98 98
99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst) 99static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
100 bool fastopen_clear)
100{ 101{
101 u32 val; 102 u32 val;
102 103
@@ -122,9 +123,11 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst)
122 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); 123 tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
123 tm->tcpm_ts = 0; 124 tm->tcpm_ts = 0;
124 tm->tcpm_ts_stamp = 0; 125 tm->tcpm_ts_stamp = 0;
125 tm->tcpm_fastopen.mss = 0; 126 if (fastopen_clear) {
126 tm->tcpm_fastopen.syn_loss = 0; 127 tm->tcpm_fastopen.mss = 0;
127 tm->tcpm_fastopen.cookie.len = 0; 128 tm->tcpm_fastopen.syn_loss = 0;
129 tm->tcpm_fastopen.cookie.len = 0;
130 }
128} 131}
129 132
130static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, 133static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
@@ -154,7 +157,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
154 } 157 }
155 tm->tcpm_addr = *addr; 158 tm->tcpm_addr = *addr;
156 159
157 tcpm_suck_dst(tm, dst); 160 tcpm_suck_dst(tm, dst, true);
158 161
159 if (likely(!reclaim)) { 162 if (likely(!reclaim)) {
160 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain; 163 tm->tcpm_next = net->ipv4.tcp_metrics_hash[hash].chain;
@@ -171,7 +174,7 @@ out_unlock:
171static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) 174static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
172{ 175{
173 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) 176 if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
174 tcpm_suck_dst(tm, dst); 177 tcpm_suck_dst(tm, dst, false);
175} 178}
176 179
177#define TCP_METRICS_RECLAIM_DEPTH 5 180#define TCP_METRICS_RECLAIM_DEPTH 5
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b83a49cc3816..0f0178827259 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
93 const struct tcphdr *th) 93 const struct tcphdr *th)
94{ 94{
95 struct tcp_options_received tmp_opt; 95 struct tcp_options_received tmp_opt;
96 const u8 *hash_location;
97 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); 96 struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
98 bool paws_reject = false; 97 bool paws_reject = false;
99 98
100 tmp_opt.saw_tstamp = 0; 99 tmp_opt.saw_tstamp = 0;
101 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { 100 if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
102 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 101 tcp_parse_options(skb, &tmp_opt, 0, NULL);
103 102
104 if (tmp_opt.saw_tstamp) { 103 if (tmp_opt.saw_tstamp) {
105 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; 104 tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset;
@@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
388 struct tcp_request_sock *treq = tcp_rsk(req); 387 struct tcp_request_sock *treq = tcp_rsk(req);
389 struct inet_connection_sock *newicsk = inet_csk(newsk); 388 struct inet_connection_sock *newicsk = inet_csk(newsk);
390 struct tcp_sock *newtp = tcp_sk(newsk); 389 struct tcp_sock *newtp = tcp_sk(newsk);
391 struct tcp_sock *oldtp = tcp_sk(sk);
392 struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
393
394 /* TCP Cookie Transactions require space for the cookie pair,
395 * as it differs for each connection. There is no need to
396 * copy any s_data_payload stored at the original socket.
397 * Failure will prevent resuming the connection.
398 *
399 * Presumed copied, in order of appearance:
400 * cookie_in_always, cookie_out_never
401 */
402 if (oldcvp != NULL) {
403 struct tcp_cookie_values *newcvp =
404 kzalloc(sizeof(*newtp->cookie_values),
405 GFP_ATOMIC);
406
407 if (newcvp != NULL) {
408 kref_init(&newcvp->kref);
409 newcvp->cookie_desired =
410 oldcvp->cookie_desired;
411 newtp->cookie_values = newcvp;
412 } else {
413 /* Not Yet Implemented */
414 newtp->cookie_values = NULL;
415 }
416 }
417 390
418 /* Now setup tcp_sock */ 391 /* Now setup tcp_sock */
419 newtp->pred_flags = 0; 392 newtp->pred_flags = 0;
@@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
422 newtp->rcv_nxt = treq->rcv_isn + 1; 395 newtp->rcv_nxt = treq->rcv_isn + 1;
423 396
424 newtp->snd_sml = newtp->snd_una = 397 newtp->snd_sml = newtp->snd_una =
425 newtp->snd_nxt = newtp->snd_up = 398 newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
426 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
427 399
428 tcp_prequeue_init(newtp); 400 tcp_prequeue_init(newtp);
429 INIT_LIST_HEAD(&newtp->tsq_node); 401 INIT_LIST_HEAD(&newtp->tsq_node);
@@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
440 newtp->fackets_out = 0; 412 newtp->fackets_out = 0;
441 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; 413 newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
442 tcp_enable_early_retrans(newtp); 414 tcp_enable_early_retrans(newtp);
415 newtp->tlp_high_seq = 0;
443 416
444 /* So many TCP implementations out there (incorrectly) count the 417 /* So many TCP implementations out there (incorrectly) count the
445 * initial SYN frame in their delayed-ACK and congestion control 418 * initial SYN frame in their delayed-ACK and congestion control
@@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
449 newtp->snd_cwnd = TCP_INIT_CWND; 422 newtp->snd_cwnd = TCP_INIT_CWND;
450 newtp->snd_cwnd_cnt = 0; 423 newtp->snd_cwnd_cnt = 0;
451 424
452 newtp->frto_counter = 0;
453 newtp->frto_highmark = 0;
454
455 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && 425 if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
456 !try_module_get(newicsk->icsk_ca_ops->owner)) 426 !try_module_get(newicsk->icsk_ca_ops->owner))
457 newicsk->icsk_ca_ops = &tcp_init_congestion_ops; 427 newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
@@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
459 tcp_set_ca_state(newsk, TCP_CA_Open); 429 tcp_set_ca_state(newsk, TCP_CA_Open);
460 tcp_init_xmit_timers(newsk); 430 tcp_init_xmit_timers(newsk);
461 skb_queue_head_init(&newtp->out_of_order_queue); 431 skb_queue_head_init(&newtp->out_of_order_queue);
462 newtp->write_seq = newtp->pushed_seq = 432 newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
463 treq->snt_isn + 1 + tcp_s_data_size(oldtp);
464 433
465 newtp->rx_opt.saw_tstamp = 0; 434 newtp->rx_opt.saw_tstamp = 0;
466 435
@@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
537 bool fastopen) 506 bool fastopen)
538{ 507{
539 struct tcp_options_received tmp_opt; 508 struct tcp_options_received tmp_opt;
540 const u8 *hash_location;
541 struct sock *child; 509 struct sock *child;
542 const struct tcphdr *th = tcp_hdr(skb); 510 const struct tcphdr *th = tcp_hdr(skb);
543 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 511 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
547 515
548 tmp_opt.saw_tstamp = 0; 516 tmp_opt.saw_tstamp = 0;
549 if (th->doff > (sizeof(struct tcphdr)>>2)) { 517 if (th->doff > (sizeof(struct tcphdr)>>2)) {
550 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 518 tcp_parse_options(skb, &tmp_opt, 0, NULL);
551 519
552 if (tmp_opt.saw_tstamp) { 520 if (tmp_opt.saw_tstamp) {
553 tmp_opt.ts_recent = req->ts_recent; 521 tmp_opt.ts_recent = req->ts_recent;
@@ -583,8 +551,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
583 * 551 *
584 * Note that even if there is new data in the SYN packet 552 * Note that even if there is new data in the SYN packet
585 * they will be thrown away too. 553 * they will be thrown away too.
554 *
555 * Reset timer after retransmitting SYNACK, similar to
556 * the idea of fast retransmit in recovery.
586 */ 557 */
587 inet_rtx_syn_ack(sk, req); 558 if (!inet_rtx_syn_ack(sk, req))
559 req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
560 TCP_RTO_MAX) + jiffies;
588 return NULL; 561 return NULL;
589 } 562 }
590 563
@@ -647,7 +620,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
647 */ 620 */
648 if ((flg & TCP_FLAG_ACK) && !fastopen && 621 if ((flg & TCP_FLAG_ACK) && !fastopen &&
649 (TCP_SKB_CB(skb)->ack_seq != 622 (TCP_SKB_CB(skb)->ack_seq !=
650 tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) 623 tcp_rsk(req)->snt_isn + 1))
651 return sk; 624 return sk;
652 625
653 /* Also, it would be not so bad idea to check rcv_tsecr, which 626 /* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5d0b4387cba6..536d40929ba6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp); 69 int push_one, gfp_t gfp);
73 70
74/* Account for new data that has been sent to the network. */ 71/* Account for new data that has been sent to the network. */
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 72static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
76{ 73{
74 struct inet_connection_sock *icsk = inet_csk(sk);
77 struct tcp_sock *tp = tcp_sk(sk); 75 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out; 76 unsigned int prior_packets = tp->packets_out;
79 77
80 tcp_advance_send_head(sk, skb); 78 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 79 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 80
83 /* Don't override Nagle indefinitely with F-RTO */
84 if (tp->frto_counter == 2)
85 tp->frto_counter = 3;
86
87 tp->packets_out += tcp_skb_pcount(skb); 81 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed) 82 if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
83 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
89 tcp_rearm_rto(sk); 84 tcp_rearm_rto(sk);
85 }
90} 86}
91 87
92/* SND.NXT, if window was not shrunk. 88/* SND.NXT, if window was not shrunk.
@@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
384#define OPTION_TS (1 << 1) 380#define OPTION_TS (1 << 1)
385#define OPTION_MD5 (1 << 2) 381#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3) 382#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8) 383#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389 384
390struct tcp_out_options { 385struct tcp_out_options {
@@ -398,36 +393,6 @@ struct tcp_out_options {
398 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 393 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
399}; 394};
400 395
401/* The sysctl int routines are generic, so check consistency here.
402 */
403static u8 tcp_cookie_size_check(u8 desired)
404{
405 int cookie_size;
406
407 if (desired > 0)
408 /* previously specified */
409 return desired;
410
411 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
412 if (cookie_size <= 0)
413 /* no default specified */
414 return 0;
415
416 if (cookie_size <= TCP_COOKIE_MIN)
417 /* value too small, specify minimum */
418 return TCP_COOKIE_MIN;
419
420 if (cookie_size >= TCP_COOKIE_MAX)
421 /* value too large, specify maximum */
422 return TCP_COOKIE_MAX;
423
424 if (cookie_size & 1)
425 /* 8-bit multiple, illegal, fix it */
426 cookie_size++;
427
428 return (u8)cookie_size;
429}
430
431/* Write previously computed TCP options to the packet. 396/* Write previously computed TCP options to the packet.
432 * 397 *
433 * Beware: Something in the Internet is very sensitive to the ordering of 398 * Beware: Something in the Internet is very sensitive to the ordering of
@@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
446{ 411{
447 u16 options = opts->options; /* mungable copy */ 412 u16 options = opts->options; /* mungable copy */
448 413
449 /* Having both authentication and cookies for security is redundant,
450 * and there's certainly not enough room. Instead, the cookie-less
451 * extension variant is proposed.
452 *
453 * Consider the pessimal case with authentication. The options
454 * could look like:
455 * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
456 */
457 if (unlikely(OPTION_MD5 & options)) { 414 if (unlikely(OPTION_MD5 & options)) {
458 if (unlikely(OPTION_COOKIE_EXTENSION & options)) { 415 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
459 *ptr++ = htonl((TCPOPT_COOKIE << 24) | 416 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
460 (TCPOLEN_COOKIE_BASE << 16) |
461 (TCPOPT_MD5SIG << 8) |
462 TCPOLEN_MD5SIG);
463 } else {
464 *ptr++ = htonl((TCPOPT_NOP << 24) |
465 (TCPOPT_NOP << 16) |
466 (TCPOPT_MD5SIG << 8) |
467 TCPOLEN_MD5SIG);
468 }
469 options &= ~OPTION_COOKIE_EXTENSION;
470 /* overload cookie hash location */ 417 /* overload cookie hash location */
471 opts->hash_location = (__u8 *)ptr; 418 opts->hash_location = (__u8 *)ptr;
472 ptr += 4; 419 ptr += 4;
@@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
495 *ptr++ = htonl(opts->tsecr); 442 *ptr++ = htonl(opts->tsecr);
496 } 443 }
497 444
498 /* Specification requires after timestamp, so do it now.
499 *
500 * Consider the pessimal case without authentication. The options
501 * could look like:
502 * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
503 */
504 if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
505 __u8 *cookie_copy = opts->hash_location;
506 u8 cookie_size = opts->hash_size;
507
508 /* 8-bit multiple handled in tcp_cookie_size_check() above,
509 * and elsewhere.
510 */
511 if (0x2 & cookie_size) {
512 __u8 *p = (__u8 *)ptr;
513
514 /* 16-bit multiple */
515 *p++ = TCPOPT_COOKIE;
516 *p++ = TCPOLEN_COOKIE_BASE + cookie_size;
517 *p++ = *cookie_copy++;
518 *p++ = *cookie_copy++;
519 ptr++;
520 cookie_size -= 2;
521 } else {
522 /* 32-bit multiple */
523 *ptr++ = htonl(((TCPOPT_NOP << 24) |
524 (TCPOPT_NOP << 16) |
525 (TCPOPT_COOKIE << 8) |
526 TCPOLEN_COOKIE_BASE) +
527 cookie_size);
528 }
529
530 if (cookie_size > 0) {
531 memcpy(ptr, cookie_copy, cookie_size);
532 ptr += (cookie_size / 4);
533 }
534 }
535
536 if (unlikely(OPTION_SACK_ADVERTISE & options)) { 445 if (unlikely(OPTION_SACK_ADVERTISE & options)) {
537 *ptr++ = htonl((TCPOPT_NOP << 24) | 446 *ptr++ = htonl((TCPOPT_NOP << 24) |
538 (TCPOPT_NOP << 16) | 447 (TCPOPT_NOP << 16) |
@@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
591 struct tcp_md5sig_key **md5) 500 struct tcp_md5sig_key **md5)
592{ 501{
593 struct tcp_sock *tp = tcp_sk(sk); 502 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE; 503 unsigned int remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) :
598 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req; 504 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600 505
601#ifdef CONFIG_TCP_MD5SIG 506#ifdef CONFIG_TCP_MD5SIG
@@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
647 tp->syn_fastopen = 1; 552 tp->syn_fastopen = 1;
648 } 553 }
649 } 554 }
650 /* Note that timestamps are required by the specification.
651 *
652 * Odd numbers of bytes are prohibited by the specification, ensuring
653 * that the cookie is 16-bit aligned, and the resulting cookie pair is
654 * 32-bit aligned.
655 */
656 if (*md5 == NULL &&
657 (OPTION_TS & opts->options) &&
658 cookie_size > 0) {
659 int need = TCPOLEN_COOKIE_BASE + cookie_size;
660
661 if (0x2 & need) {
662 /* 32-bit multiple */
663 need += 2; /* NOPs */
664
665 if (need > remaining) {
666 /* try shrinking cookie to fit */
667 cookie_size -= 2;
668 need -= 4;
669 }
670 }
671 while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
672 cookie_size -= 4;
673 need -= 4;
674 }
675 if (TCP_COOKIE_MIN <= cookie_size) {
676 opts->options |= OPTION_COOKIE_EXTENSION;
677 opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
678 opts->hash_size = cookie_size;
679
680 /* Remember for future incarnations. */
681 cvp->cookie_desired = cookie_size;
682
683 if (cvp->cookie_desired != cvp->cookie_pair_size) {
684 /* Currently use random bytes as a nonce,
685 * assuming these are completely unpredictable
686 * by hostile users of the same system.
687 */
688 get_random_bytes(&cvp->cookie_pair[0],
689 cookie_size);
690 cvp->cookie_pair_size = cookie_size;
691 }
692 555
693 remaining -= need;
694 }
695 }
696 return MAX_TCP_OPTION_SPACE - remaining; 556 return MAX_TCP_OPTION_SPACE - remaining;
697} 557}
698 558
@@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
702 unsigned int mss, struct sk_buff *skb, 562 unsigned int mss, struct sk_buff *skb,
703 struct tcp_out_options *opts, 563 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5, 564 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp,
706 struct tcp_fastopen_cookie *foc) 565 struct tcp_fastopen_cookie *foc)
707{ 566{
708 struct inet_request_sock *ireq = inet_rsk(req); 567 struct inet_request_sock *ireq = inet_rsk(req);
709 unsigned int remaining = MAX_TCP_OPTION_SPACE; 568 unsigned int remaining = MAX_TCP_OPTION_SPACE;
710 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
711 xvp->cookie_plus :
712 0;
713 569
714#ifdef CONFIG_TCP_MD5SIG 570#ifdef CONFIG_TCP_MD5SIG
715 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 571 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
757 remaining -= need; 613 remaining -= need;
758 } 614 }
759 } 615 }
760 /* Similar rationale to tcp_syn_options() applies here, too. 616
761 * If the <SYN> options fit, the same options should fit now!
762 */
763 if (*md5 == NULL &&
764 ireq->tstamp_ok &&
765 cookie_plus > TCPOLEN_COOKIE_BASE) {
766 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
767
768 if (0x2 & need) {
769 /* 32-bit multiple */
770 need += 2; /* NOPs */
771 }
772 if (need <= remaining) {
773 opts->options |= OPTION_COOKIE_EXTENSION;
774 opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
775 remaining -= need;
776 } else {
777 /* There's no error return, so flag it. */
778 xvp->cookie_out_never = 1; /* true */
779 opts->hash_size = 0;
780 }
781 }
782 return MAX_TCP_OPTION_SPACE - remaining; 617 return MAX_TCP_OPTION_SPACE - remaining;
783} 618}
784 619
@@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void)
953 * We cant xmit new skbs from this context, as we might already 788 * We cant xmit new skbs from this context, as we might already
954 * hold qdisc lock. 789 * hold qdisc lock.
955 */ 790 */
956static void tcp_wfree(struct sk_buff *skb) 791void tcp_wfree(struct sk_buff *skb)
957{ 792{
958 struct sock *sk = skb->sk; 793 struct sock *sk = skb->sk;
959 struct tcp_sock *tp = tcp_sk(sk); 794 struct tcp_sock *tp = tcp_sk(sk);
@@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1012 __net_timestamp(skb); 847 __net_timestamp(skb);
1013 848
1014 if (likely(clone_it)) { 849 if (likely(clone_it)) {
850 const struct sk_buff *fclone = skb + 1;
851
852 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
853 fclone->fclone == SKB_FCLONE_CLONE))
854 NET_INC_STATS_BH(sock_net(sk),
855 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
856
1015 if (unlikely(skb_cloned(skb))) 857 if (unlikely(skb_cloned(skb)))
1016 skb = pskb_copy(skb, gfp_mask); 858 skb = pskb_copy(skb, gfp_mask);
1017 else 859 else
@@ -1632,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1632 if (nonagle & TCP_NAGLE_PUSH) 1474 if (nonagle & TCP_NAGLE_PUSH)
1633 return true; 1475 return true;
1634 1476
1635 /* Don't use the nagle rule for urgent data (or for the final FIN). 1477 /* Don't use the nagle rule for urgent data (or for the final FIN). */
1636 * Nagle can be ignored during F-RTO too (see RFC4138). 1478 if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1637 */
1638 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1639 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
1640 return true; 1479 return true;
1641 1480
1642 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1481 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1961,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk)
1961 * snd_up-64k-mss .. snd_up cannot be large. However, taking into 1800 * snd_up-64k-mss .. snd_up cannot be large. However, taking into
1962 * account rare use of URG, this is not a big flaw. 1801 * account rare use of URG, this is not a big flaw.
1963 * 1802 *
1803 * Send at most one packet when push_one > 0. Temporarily ignore
1804 * cwnd limit to force at most one packet out when push_one == 2.
1805
1964 * Returns true, if no segments are in flight and we have queued segments, 1806 * Returns true, if no segments are in flight and we have queued segments,
1965 * but cannot send anything now because of SWS or another problem. 1807 * but cannot send anything now because of SWS or another problem.
1966 */ 1808 */
@@ -1996,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1996 goto repair; /* Skip network transmission */ 1838 goto repair; /* Skip network transmission */
1997 1839
1998 cwnd_quota = tcp_cwnd_test(tp, skb); 1840 cwnd_quota = tcp_cwnd_test(tp, skb);
1999 if (!cwnd_quota) 1841 if (!cwnd_quota) {
2000 break; 1842 if (push_one == 2)
1843 /* Force out a loss probe pkt. */
1844 cwnd_quota = 1;
1845 else
1846 break;
1847 }
2001 1848
2002 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) 1849 if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
2003 break; 1850 break;
@@ -2051,10 +1898,129 @@ repair:
2051 if (likely(sent_pkts)) { 1898 if (likely(sent_pkts)) {
2052 if (tcp_in_cwnd_reduction(sk)) 1899 if (tcp_in_cwnd_reduction(sk))
2053 tp->prr_out += sent_pkts; 1900 tp->prr_out += sent_pkts;
1901
1902 /* Send one loss probe per tail loss episode. */
1903 if (push_one != 2)
1904 tcp_schedule_loss_probe(sk);
2054 tcp_cwnd_validate(sk); 1905 tcp_cwnd_validate(sk);
2055 return false; 1906 return false;
2056 } 1907 }
2057 return !tp->packets_out && tcp_send_head(sk); 1908 return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
1909}
1910
1911bool tcp_schedule_loss_probe(struct sock *sk)
1912{
1913 struct inet_connection_sock *icsk = inet_csk(sk);
1914 struct tcp_sock *tp = tcp_sk(sk);
1915 u32 timeout, tlp_time_stamp, rto_time_stamp;
1916 u32 rtt = tp->srtt >> 3;
1917
1918 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
1919 return false;
1920 /* No consecutive loss probes. */
1921 if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
1922 tcp_rearm_rto(sk);
1923 return false;
1924 }
1925 /* Don't do any loss probe on a Fast Open connection before 3WHS
1926 * finishes.
1927 */
1928 if (sk->sk_state == TCP_SYN_RECV)
1929 return false;
1930
1931 /* TLP is only scheduled when next timer event is RTO. */
1932 if (icsk->icsk_pending != ICSK_TIME_RETRANS)
1933 return false;
1934
1935 /* Schedule a loss probe in 2*RTT for SACK capable connections
1936 * in Open state, that are either limited by cwnd or application.
1937 */
1938 if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
1939 !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
1940 return false;
1941
1942 if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
1943 tcp_send_head(sk))
1944 return false;
1945
1946 /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
1947 * for delayed ack when there's one outstanding packet.
1948 */
1949 timeout = rtt << 1;
1950 if (tp->packets_out == 1)
1951 timeout = max_t(u32, timeout,
1952 (rtt + (rtt >> 1) + TCP_DELACK_MAX));
1953 timeout = max_t(u32, timeout, msecs_to_jiffies(10));
1954
1955 /* If RTO is shorter, just schedule TLP in its place. */
1956 tlp_time_stamp = tcp_time_stamp + timeout;
1957 rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
1958 if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
1959 s32 delta = rto_time_stamp - tcp_time_stamp;
1960 if (delta > 0)
1961 timeout = delta;
1962 }
1963
1964 inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
1965 TCP_RTO_MAX);
1966 return true;
1967}
1968
1969/* When probe timeout (PTO) fires, send a new segment if one exists, else
1970 * retransmit the last segment.
1971 */
1972void tcp_send_loss_probe(struct sock *sk)
1973{
1974 struct tcp_sock *tp = tcp_sk(sk);
1975 struct sk_buff *skb;
1976 int pcount;
1977 int mss = tcp_current_mss(sk);
1978 int err = -1;
1979
1980 if (tcp_send_head(sk) != NULL) {
1981 err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
1982 goto rearm_timer;
1983 }
1984
1985 /* At most one outstanding TLP retransmission. */
1986 if (tp->tlp_high_seq)
1987 goto rearm_timer;
1988
1989 /* Retransmit last segment. */
1990 skb = tcp_write_queue_tail(sk);
1991 if (WARN_ON(!skb))
1992 goto rearm_timer;
1993
1994 pcount = tcp_skb_pcount(skb);
1995 if (WARN_ON(!pcount))
1996 goto rearm_timer;
1997
1998 if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
1999 if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
2000 goto rearm_timer;
2001 skb = tcp_write_queue_tail(sk);
2002 }
2003
2004 if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
2005 goto rearm_timer;
2006
2007 /* Probe with zero data doesn't trigger fast recovery. */
2008 if (skb->len > 0)
2009 err = __tcp_retransmit_skb(sk, skb);
2010
2011 /* Record snd_nxt for loss detection. */
2012 if (likely(!err))
2013 tp->tlp_high_seq = tp->snd_nxt;
2014
2015rearm_timer:
2016 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2017 inet_csk(sk)->icsk_rto,
2018 TCP_RTO_MAX);
2019
2020 if (likely(!err))
2021 NET_INC_STATS_BH(sock_net(sk),
2022 LINUX_MIB_TCPLOSSPROBES);
2023 return;
2058} 2024}
2059 2025
2060/* Push out any pending frames which were held back due to 2026/* Push out any pending frames which were held back due to
@@ -2388,8 +2354,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2388 */ 2354 */
2389 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2355 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2390 2356
2391 /* make sure skb->data is aligned on arches that require it */ 2357 /* make sure skb->data is aligned on arches that require it
2392 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { 2358 * and check if ack-trimming & collapsing extended the headroom
2359 * beyond what csum_start can cover.
2360 */
2361 if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
2362 skb_headroom(skb) >= 0xFFFF)) {
2393 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, 2363 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2394 GFP_ATOMIC); 2364 GFP_ATOMIC);
2395 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : 2365 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
@@ -2675,32 +2645,24 @@ int tcp_send_synack(struct sock *sk)
2675 * sk: listener socket 2645 * sk: listener socket
2676 * dst: dst entry attached to the SYNACK 2646 * dst: dst entry attached to the SYNACK
2677 * req: request_sock pointer 2647 * req: request_sock pointer
2678 * rvp: request_values pointer
2679 * 2648 *
2680 * Allocate one skb and build a SYNACK packet. 2649 * Allocate one skb and build a SYNACK packet.
2681 * @dst is consumed : Caller should not use it again. 2650 * @dst is consumed : Caller should not use it again.
2682 */ 2651 */
2683struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2652struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2684 struct request_sock *req, 2653 struct request_sock *req,
2685 struct request_values *rvp,
2686 struct tcp_fastopen_cookie *foc) 2654 struct tcp_fastopen_cookie *foc)
2687{ 2655{
2688 struct tcp_out_options opts; 2656 struct tcp_out_options opts;
2689 struct tcp_extend_values *xvp = tcp_xv(rvp);
2690 struct inet_request_sock *ireq = inet_rsk(req); 2657 struct inet_request_sock *ireq = inet_rsk(req);
2691 struct tcp_sock *tp = tcp_sk(sk); 2658 struct tcp_sock *tp = tcp_sk(sk);
2692 const struct tcp_cookie_values *cvp = tp->cookie_values;
2693 struct tcphdr *th; 2659 struct tcphdr *th;
2694 struct sk_buff *skb; 2660 struct sk_buff *skb;
2695 struct tcp_md5sig_key *md5; 2661 struct tcp_md5sig_key *md5;
2696 int tcp_header_size; 2662 int tcp_header_size;
2697 int mss; 2663 int mss;
2698 int s_data_desired = 0;
2699 2664
2700 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2665 skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
2701 s_data_desired = cvp->s_data_desired;
2702 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
2703 sk_gfp_atomic(sk, GFP_ATOMIC));
2704 if (unlikely(!skb)) { 2666 if (unlikely(!skb)) {
2705 dst_release(dst); 2667 dst_release(dst);
2706 return NULL; 2668 return NULL;
@@ -2709,6 +2671,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2709 skb_reserve(skb, MAX_TCP_HEADER); 2671 skb_reserve(skb, MAX_TCP_HEADER);
2710 2672
2711 skb_dst_set(skb, dst); 2673 skb_dst_set(skb, dst);
2674 security_skb_owned_by(skb, sk);
2712 2675
2713 mss = dst_metric_advmss(dst); 2676 mss = dst_metric_advmss(dst);
2714 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2677 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@ -2742,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2742 else 2705 else
2743#endif 2706#endif
2744 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2707 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2745 tcp_header_size = tcp_synack_options(sk, req, mss, 2708 tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
2746 skb, &opts, &md5, xvp, foc) 2709 foc) + sizeof(*th);
2747 + sizeof(*th);
2748 2710
2749 skb_push(skb, tcp_header_size); 2711 skb_push(skb, tcp_header_size);
2750 skb_reset_transport_header(skb); 2712 skb_reset_transport_header(skb);
@@ -2762,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2762 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2724 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2763 TCPHDR_SYN | TCPHDR_ACK); 2725 TCPHDR_SYN | TCPHDR_ACK);
2764 2726
2765 if (OPTION_COOKIE_EXTENSION & opts.options) {
2766 if (s_data_desired) {
2767 u8 *buf = skb_put(skb, s_data_desired);
2768
2769 /* copy data directly from the listening socket. */
2770 memcpy(buf, cvp->s_data_payload, s_data_desired);
2771 TCP_SKB_CB(skb)->end_seq += s_data_desired;
2772 }
2773
2774 if (opts.hash_size > 0) {
2775 __u32 workspace[SHA_WORKSPACE_WORDS];
2776 u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
2777 u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
2778
2779 /* Secret recipe depends on the Timestamp, (future)
2780 * Sequence and Acknowledgment Numbers, Initiator
2781 * Cookie, and others handled by IP variant caller.
2782 */
2783 *tail-- ^= opts.tsval;
2784 *tail-- ^= tcp_rsk(req)->rcv_isn + 1;
2785 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2786
2787 /* recommended */
2788 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2789 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2790
2791 sha_transform((__u32 *)&xvp->cookie_bakery[0],
2792 (char *)mess,
2793 &workspace[0]);
2794 opts.hash_location =
2795 (__u8 *)&xvp->cookie_bakery[0];
2796 }
2797 }
2798
2799 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2727 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2800 /* XXX data is queued and acked as is. No buffer/window check */ 2728 /* XXX data is queued and acked as is. No buffer/window check */
2801 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); 2729 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b78aac30c498..4b85e6f636c9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)
342 struct tcp_sock *tp = tcp_sk(sk); 342 struct tcp_sock *tp = tcp_sk(sk);
343 struct inet_connection_sock *icsk = inet_csk(sk); 343 struct inet_connection_sock *icsk = inet_csk(sk);
344 344
345 if (tp->early_retrans_delayed) {
346 tcp_resume_early_retransmit(sk);
347 return;
348 }
349 if (tp->fastopen_rsk) { 345 if (tp->fastopen_rsk) {
350 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && 346 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
351 sk->sk_state != TCP_FIN_WAIT1); 347 sk->sk_state != TCP_FIN_WAIT1);
@@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)
360 356
361 WARN_ON(tcp_write_queue_empty(sk)); 357 WARN_ON(tcp_write_queue_empty(sk));
362 358
359 tp->tlp_high_seq = 0;
360
363 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && 361 if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
364 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { 362 !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
365 /* Receiver dastardly shrinks window. Our retransmits 363 /* Receiver dastardly shrinks window. Our retransmits
@@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
418 NET_INC_STATS_BH(sock_net(sk), mib_idx); 416 NET_INC_STATS_BH(sock_net(sk), mib_idx);
419 } 417 }
420 418
421 if (tcp_use_frto(sk)) { 419 tcp_enter_loss(sk, 0);
422 tcp_enter_frto(sk);
423 } else {
424 tcp_enter_loss(sk, 0);
425 }
426 420
427 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { 421 if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
428 /* Retransmission failed because of local congestion, 422 /* Retransmission failed because of local congestion,
@@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk)
495 } 489 }
496 490
497 event = icsk->icsk_pending; 491 event = icsk->icsk_pending;
498 icsk->icsk_pending = 0;
499 492
500 switch (event) { 493 switch (event) {
494 case ICSK_TIME_EARLY_RETRANS:
495 tcp_resume_early_retransmit(sk);
496 break;
497 case ICSK_TIME_LOSS_PROBE:
498 tcp_send_loss_probe(sk);
499 break;
501 case ICSK_TIME_RETRANS: 500 case ICSK_TIME_RETRANS:
501 icsk->icsk_pending = 0;
502 tcp_retransmit_timer(sk); 502 tcp_retransmit_timer(sk);
503 break; 503 break;
504 case ICSK_TIME_PROBE0: 504 case ICSK_TIME_PROBE0:
505 icsk->icsk_pending = 0;
505 tcp_probe_timer(sk); 506 tcp_probe_timer(sk);
506 break; 507 break;
507 } 508 }
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 1b91bf48e277..76a1e23259e1 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 236 tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
237 break; 237 break;
238 238
239 case CA_EVENT_FRTO: 239 case CA_EVENT_LOSS:
240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); 240 tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
241 /* Update RTT_min when next ack arrives */ 241 /* Update RTT_min when next ack arrives */
242 w->reset_rtt_min = 1; 242 w->reset_rtt_min = 1;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0a073a263720..0bf5d399a03c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
902 ipc.addr = inet->inet_saddr; 902 ipc.addr = inet->inet_saddr;
903 903
904 ipc.oif = sk->sk_bound_dev_if; 904 ipc.oif = sk->sk_bound_dev_if;
905 err = sock_tx_timestamp(sk, &ipc.tx_flags); 905
906 if (err) 906 sock_tx_timestamp(sk, &ipc.tx_flags);
907 return err; 907
908 if (msg->msg_controllen) { 908 if (msg->msg_controllen) {
909 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 909 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
910 if (err) 910 if (err)
@@ -1131,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk)
1131 spin_lock_bh(&rcvq->lock); 1131 spin_lock_bh(&rcvq->lock);
1132 while ((skb = skb_peek(rcvq)) != NULL && 1132 while ((skb = skb_peek(rcvq)) != NULL &&
1133 udp_lib_checksum_complete(skb)) { 1133 udp_lib_checksum_complete(skb)) {
1134 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
1135 IS_UDPLITE(sk));
1134 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, 1136 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
1135 IS_UDPLITE(sk)); 1137 IS_UDPLITE(sk));
1136 atomic_inc(&sk->sk_drops); 1138 atomic_inc(&sk->sk_drops);
@@ -1286,8 +1288,10 @@ out:
1286 1288
1287csum_copy_err: 1289csum_copy_err:
1288 slow = lock_sock_fast(sk); 1290 slow = lock_sock_fast(sk);
1289 if (!skb_kill_datagram(sk, skb, flags)) 1291 if (!skb_kill_datagram(sk, skb, flags)) {
1292 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1290 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1293 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1294 }
1291 unlock_sock_fast(sk, slow); 1295 unlock_sock_fast(sk, slow);
1292 1296
1293 if (noblock) 1297 if (noblock)
@@ -1513,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1513 1517
1514 if (rcu_access_pointer(sk->sk_filter) && 1518 if (rcu_access_pointer(sk->sk_filter) &&
1515 udp_lib_checksum_complete(skb)) 1519 udp_lib_checksum_complete(skb))
1516 goto drop; 1520 goto csum_error;
1517 1521
1518 1522
1519 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) 1523 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -1533,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1533 1537
1534 return rc; 1538 return rc;
1535 1539
1540csum_error:
1541 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
1536drop: 1542drop:
1537 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1543 UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1538 atomic_inc(&sk->sk_drops); 1544 atomic_inc(&sk->sk_drops);
@@ -1749,6 +1755,7 @@ csum_error:
1749 proto == IPPROTO_UDPLITE ? "Lite" : "", 1755 proto == IPPROTO_UDPLITE ? "Lite" : "",
1750 &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), 1756 &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
1751 ulen); 1757 ulen);
1758 UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
1752drop: 1759drop:
1753 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 1760 UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
1754 kfree_skb(skb); 1761 kfree_skb(skb);
@@ -2093,7 +2100,7 @@ static void udp_seq_stop(struct seq_file *seq, void *v)
2093 2100
2094int udp_seq_open(struct inode *inode, struct file *file) 2101int udp_seq_open(struct inode *inode, struct file *file)
2095{ 2102{
2096 struct udp_seq_afinfo *afinfo = PDE(inode)->data; 2103 struct udp_seq_afinfo *afinfo = PDE_DATA(inode);
2097 struct udp_iter_state *s; 2104 struct udp_iter_state *s;
2098 int err; 2105 int err;
2099 2106
@@ -2279,31 +2286,91 @@ void __init udp_init(void)
2279 2286
2280int udp4_ufo_send_check(struct sk_buff *skb) 2287int udp4_ufo_send_check(struct sk_buff *skb)
2281{ 2288{
2282 const struct iphdr *iph; 2289 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
2283 struct udphdr *uh;
2284
2285 if (!pskb_may_pull(skb, sizeof(*uh)))
2286 return -EINVAL; 2290 return -EINVAL;
2287 2291
2288 iph = ip_hdr(skb); 2292 if (likely(!skb->encapsulation)) {
2289 uh = udp_hdr(skb); 2293 const struct iphdr *iph;
2294 struct udphdr *uh;
2295
2296 iph = ip_hdr(skb);
2297 uh = udp_hdr(skb);
2290 2298
2291 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 2299 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
2292 IPPROTO_UDP, 0); 2300 IPPROTO_UDP, 0);
2293 skb->csum_start = skb_transport_header(skb) - skb->head; 2301 skb->csum_start = skb_transport_header(skb) - skb->head;
2294 skb->csum_offset = offsetof(struct udphdr, check); 2302 skb->csum_offset = offsetof(struct udphdr, check);
2295 skb->ip_summed = CHECKSUM_PARTIAL; 2303 skb->ip_summed = CHECKSUM_PARTIAL;
2304 }
2296 return 0; 2305 return 0;
2297} 2306}
2298 2307
2308static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
2309 netdev_features_t features)
2310{
2311 struct sk_buff *segs = ERR_PTR(-EINVAL);
2312 int mac_len = skb->mac_len;
2313 int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
2314 __be16 protocol = skb->protocol;
2315 netdev_features_t enc_features;
2316 int outer_hlen;
2317
2318 if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
2319 goto out;
2320
2321 skb->encapsulation = 0;
2322 __skb_pull(skb, tnl_hlen);
2323 skb_reset_mac_header(skb);
2324 skb_set_network_header(skb, skb_inner_network_offset(skb));
2325 skb->mac_len = skb_inner_network_offset(skb);
2326 skb->protocol = htons(ETH_P_TEB);
2327
2328 /* segment inner packet. */
2329 enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
2330 segs = skb_mac_gso_segment(skb, enc_features);
2331 if (!segs || IS_ERR(segs))
2332 goto out;
2333
2334 outer_hlen = skb_tnl_header_len(skb);
2335 skb = segs;
2336 do {
2337 struct udphdr *uh;
2338 int udp_offset = outer_hlen - tnl_hlen;
2339
2340 skb->mac_len = mac_len;
2341
2342 skb_push(skb, outer_hlen);
2343 skb_reset_mac_header(skb);
2344 skb_set_network_header(skb, mac_len);
2345 skb_set_transport_header(skb, udp_offset);
2346 uh = udp_hdr(skb);
2347 uh->len = htons(skb->len - udp_offset);
2348
2349 /* csum segment if tunnel sets skb with csum. */
2350 if (unlikely(uh->check)) {
2351 struct iphdr *iph = ip_hdr(skb);
2352
2353 uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2354 skb->len - udp_offset,
2355 IPPROTO_UDP, 0);
2356 uh->check = csum_fold(skb_checksum(skb, udp_offset,
2357 skb->len - udp_offset, 0));
2358 if (uh->check == 0)
2359 uh->check = CSUM_MANGLED_0;
2360
2361 }
2362 skb->ip_summed = CHECKSUM_NONE;
2363 skb->protocol = protocol;
2364 } while ((skb = skb->next));
2365out:
2366 return segs;
2367}
2368
2299struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, 2369struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2300 netdev_features_t features) 2370 netdev_features_t features)
2301{ 2371{
2302 struct sk_buff *segs = ERR_PTR(-EINVAL); 2372 struct sk_buff *segs = ERR_PTR(-EINVAL);
2303 unsigned int mss; 2373 unsigned int mss;
2304 int offset;
2305 __wsum csum;
2306
2307 mss = skb_shinfo(skb)->gso_size; 2374 mss = skb_shinfo(skb)->gso_size;
2308 if (unlikely(skb->len <= mss)) 2375 if (unlikely(skb->len <= mss))
2309 goto out; 2376 goto out;
@@ -2313,6 +2380,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2313 int type = skb_shinfo(skb)->gso_type; 2380 int type = skb_shinfo(skb)->gso_type;
2314 2381
2315 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 2382 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
2383 SKB_GSO_UDP_TUNNEL |
2316 SKB_GSO_GRE) || 2384 SKB_GSO_GRE) ||
2317 !(type & (SKB_GSO_UDP)))) 2385 !(type & (SKB_GSO_UDP))))
2318 goto out; 2386 goto out;
@@ -2323,20 +2391,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
2323 goto out; 2391 goto out;
2324 } 2392 }
2325 2393
2326 /* Do software UFO. Complete and fill in the UDP checksum as HW cannot
2327 * do checksum of UDP packets sent as multiple IP fragments.
2328 */
2329 offset = skb_checksum_start_offset(skb);
2330 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2331 offset += skb->csum_offset;
2332 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2333 skb->ip_summed = CHECKSUM_NONE;
2334
2335 /* Fragment the skb. IP headers of the fragments are updated in 2394 /* Fragment the skb. IP headers of the fragments are updated in
2336 * inet_gso_segment() 2395 * inet_gso_segment()
2337 */ 2396 */
2338 segs = skb_segment(skb, features); 2397 if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
2398 segs = skb_udp_tunnel_segment(skb, features);
2399 else {
2400 int offset;
2401 __wsum csum;
2402
2403 /* Do software UFO. Complete and fill in the UDP checksum as
2404 * HW cannot do checksum of UDP packets sent as multiple
2405 * IP fragments.
2406 */
2407 offset = skb_checksum_start_offset(skb);
2408 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2409 offset += skb->csum_offset;
2410 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2411 skb->ip_summed = CHECKSUM_NONE;
2412
2413 segs = skb_segment(skb, features);
2414 }
2339out: 2415out:
2340 return segs; 2416 return segs;
2341} 2417}
2342
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 505b30ad9182..7927db0a9279 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
25 return 0; 25 return 0;
26 26
27 return inet_sk_diag_fill(sk, NULL, skb, req, 27 return inet_sk_diag_fill(sk, NULL, skb, req,
28 sk_user_ns(NETLINK_CB(cb->skb).ssk), 28 sk_user_ns(NETLINK_CB(cb->skb).sk),
29 NETLINK_CB(cb->skb).portid, 29 NETLINK_CB(cb->skb).portid,
30 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 30 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
31} 31}
@@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
64 goto out; 64 goto out;
65 65
66 err = -ENOMEM; 66 err = -ENOMEM;
67 rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + 67 rep = nlmsg_new(sizeof(struct inet_diag_msg) +
68 sizeof(struct inet_diag_meminfo) + 68 sizeof(struct inet_diag_meminfo) + 64,
69 64)), GFP_KERNEL); 69 GFP_KERNEL);
70 if (!rep) 70 if (!rep)
71 goto out; 71 goto out;
72 72
73 err = inet_sk_diag_fill(sk, NULL, rep, req, 73 err = inet_sk_diag_fill(sk, NULL, rep, req,
74 sk_user_ns(NETLINK_CB(in_skb).ssk), 74 sk_user_ns(NETLINK_CB(in_skb).sk),
75 NETLINK_CB(in_skb).portid, 75 NETLINK_CB(in_skb).portid,
76 nlh->nlmsg_seq, 0, nlh); 76 nlh->nlmsg_seq, 0, nlh);
77 if (err < 0) { 77 if (err < 0) {
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e2e114..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
103 103
104 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); 104 top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
105 105
106 /* DS disclosed */ 106 /* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
107 top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, 107 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
108 top_iph->tos = 0;
109 else
110 top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
111 top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
108 XFRM_MODE_SKB_CB(skb)->tos); 112 XFRM_MODE_SKB_CB(skb)->tos);
109 113
110 flags = x->props.flags; 114 flags = x->props.flags;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ed0b9e2e797a..11b13ea69db4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
156config IPV6_SIT 156config IPV6_SIT
157 tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" 157 tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"
158 select INET_TUNNEL 158 select INET_TUNNEL
159 select NET_IP_TUNNEL
159 select IPV6_NDISC_NODETYPE 160 select IPV6_NDISC_NODETYPE
160 default y 161 default y
161 ---help--- 162 ---help---
@@ -201,6 +202,7 @@ config IPV6_TUNNEL
201config IPV6_GRE 202config IPV6_GRE
202 tristate "IPv6: GRE tunnel" 203 tristate "IPv6: GRE tunnel"
203 select IPV6_TUNNEL 204 select IPV6_TUNNEL
205 select NET_IP_TUNNEL
204 ---help--- 206 ---help---
205 Tunneling means encapsulating data of one protocol type within 207 Tunneling means encapsulating data of one protocol type within
206 another protocol and sending it over a channel that understands the 208 another protocol and sending it over a channel that understands the
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 309af19a0a0a..9af088d2cdaa 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o
40obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o 40obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
41obj-$(CONFIG_IPV6_GRE) += ip6_gre.o 41obj-$(CONFIG_IPV6_GRE) += ip6_gre.o
42 42
43obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o 43obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o
44obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload) 44obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)
45 45
46obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o 46obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a459c4f5b769..d1ab6ab29a55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -70,6 +70,7 @@
70#include <net/snmp.h> 70#include <net/snmp.h>
71 71
72#include <net/af_ieee802154.h> 72#include <net/af_ieee802154.h>
73#include <net/firewire.h>
73#include <net/ipv6.h> 74#include <net/ipv6.h>
74#include <net/protocol.h> 75#include <net/protocol.h>
75#include <net/ndisc.h> 76#include <net/ndisc.h>
@@ -168,8 +169,6 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
168static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, 169static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
169 struct net_device *dev); 170 struct net_device *dev);
170 171
171static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
172
173static struct ipv6_devconf ipv6_devconf __read_mostly = { 172static struct ipv6_devconf ipv6_devconf __read_mostly = {
174 .forwarding = 0, 173 .forwarding = 0,
175 .hop_limit = IPV6_DEFAULT_HOPLIMIT, 174 .hop_limit = IPV6_DEFAULT_HOPLIMIT,
@@ -421,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
421 ipv6_regen_rndid((unsigned long) ndev); 420 ipv6_regen_rndid((unsigned long) ndev);
422 } 421 }
423#endif 422#endif
423 ndev->token = in6addr_any;
424 424
425 if (netif_running(dev) && addrconf_qdisc_ok(dev)) 425 if (netif_running(dev) && addrconf_qdisc_ok(dev))
426 ndev->if_flags |= IF_READY; 426 ndev->if_flags |= IF_READY;
@@ -544,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
544}; 544};
545 545
546static int inet6_netconf_get_devconf(struct sk_buff *in_skb, 546static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
547 struct nlmsghdr *nlh, 547 struct nlmsghdr *nlh)
548 void *arg)
549{ 548{
550 struct net *net = sock_net(in_skb->sk); 549 struct net *net = sock_net(in_skb->sk);
551 struct nlattr *tb[NETCONFA_MAX+1]; 550 struct nlattr *tb[NETCONFA_MAX+1];
@@ -605,6 +604,77 @@ errout:
605 return err; 604 return err;
606} 605}
607 606
607static int inet6_netconf_dump_devconf(struct sk_buff *skb,
608 struct netlink_callback *cb)
609{
610 struct net *net = sock_net(skb->sk);
611 int h, s_h;
612 int idx, s_idx;
613 struct net_device *dev;
614 struct inet6_dev *idev;
615 struct hlist_head *head;
616
617 s_h = cb->args[0];
618 s_idx = idx = cb->args[1];
619
620 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
621 idx = 0;
622 head = &net->dev_index_head[h];
623 rcu_read_lock();
624 cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
625 net->dev_base_seq;
626 hlist_for_each_entry_rcu(dev, head, index_hlist) {
627 if (idx < s_idx)
628 goto cont;
629 idev = __in6_dev_get(dev);
630 if (!idev)
631 goto cont;
632
633 if (inet6_netconf_fill_devconf(skb, dev->ifindex,
634 &idev->cnf,
635 NETLINK_CB(cb->skb).portid,
636 cb->nlh->nlmsg_seq,
637 RTM_NEWNETCONF,
638 NLM_F_MULTI,
639 -1) <= 0) {
640 rcu_read_unlock();
641 goto done;
642 }
643 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
644cont:
645 idx++;
646 }
647 rcu_read_unlock();
648 }
649 if (h == NETDEV_HASHENTRIES) {
650 if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL,
651 net->ipv6.devconf_all,
652 NETLINK_CB(cb->skb).portid,
653 cb->nlh->nlmsg_seq,
654 RTM_NEWNETCONF, NLM_F_MULTI,
655 -1) <= 0)
656 goto done;
657 else
658 h++;
659 }
660 if (h == NETDEV_HASHENTRIES + 1) {
661 if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT,
662 net->ipv6.devconf_dflt,
663 NETLINK_CB(cb->skb).portid,
664 cb->nlh->nlmsg_seq,
665 RTM_NEWNETCONF, NLM_F_MULTI,
666 -1) <= 0)
667 goto done;
668 else
669 h++;
670 }
671done:
672 cb->args[0] = h;
673 cb->args[1] = idx;
674
675 return skb->len;
676}
677
608#ifdef CONFIG_SYSCTL 678#ifdef CONFIG_SYSCTL
609static void dev_forward_change(struct inet6_dev *idev) 679static void dev_forward_change(struct inet6_dev *idev)
610{ 680{
@@ -806,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
806 ifa->prefix_len = pfxlen; 876 ifa->prefix_len = pfxlen;
807 ifa->flags = flags | IFA_F_TENTATIVE; 877 ifa->flags = flags | IFA_F_TENTATIVE;
808 ifa->cstamp = ifa->tstamp = jiffies; 878 ifa->cstamp = ifa->tstamp = jiffies;
879 ifa->tokenized = false;
809 880
810 ifa->rt = rt; 881 ifa->rt = rt;
811 882
@@ -837,7 +908,7 @@ out2:
837 rcu_read_unlock_bh(); 908 rcu_read_unlock_bh();
838 909
839 if (likely(err == 0)) 910 if (likely(err == 0))
840 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); 911 inet6addr_notifier_call_chain(NETDEV_UP, ifa);
841 else { 912 else {
842 kfree(ifa); 913 kfree(ifa);
843 ifa = ERR_PTR(err); 914 ifa = ERR_PTR(err);
@@ -927,7 +998,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
927 998
928 ipv6_ifa_notify(RTM_DELADDR, ifp); 999 ipv6_ifa_notify(RTM_DELADDR, ifp);
929 1000
930 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); 1001 inet6addr_notifier_call_chain(NETDEV_DOWN, ifp);
931 1002
932 /* 1003 /*
933 * Purge or update corresponding prefix 1004 * Purge or update corresponding prefix
@@ -1668,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)
1668 return 0; 1739 return 0;
1669} 1740}
1670 1741
1742static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev)
1743{
1744 union fwnet_hwaddr *ha;
1745
1746 if (dev->addr_len != FWNET_ALEN)
1747 return -1;
1748
1749 ha = (union fwnet_hwaddr *)dev->dev_addr;
1750
1751 memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id));
1752 eui[0] ^= 2;
1753 return 0;
1754}
1755
1671static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) 1756static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)
1672{ 1757{
1673 /* XXX: inherit EUI-64 from other interface -- yoshfuji */ 1758 /* XXX: inherit EUI-64 from other interface -- yoshfuji */
@@ -1732,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
1732 return addrconf_ifid_gre(eui, dev); 1817 return addrconf_ifid_gre(eui, dev);
1733 case ARPHRD_IEEE802154: 1818 case ARPHRD_IEEE802154:
1734 return addrconf_ifid_eui64(eui, dev); 1819 return addrconf_ifid_eui64(eui, dev);
1820 case ARPHRD_IEEE1394:
1821 return addrconf_ifid_ieee1394(eui, dev);
1735 } 1822 }
1736 return -1; 1823 return -1;
1737} 1824}
@@ -2046,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
2046 struct inet6_ifaddr *ifp; 2133 struct inet6_ifaddr *ifp;
2047 struct in6_addr addr; 2134 struct in6_addr addr;
2048 int create = 0, update_lft = 0; 2135 int create = 0, update_lft = 0;
2136 bool tokenized = false;
2049 2137
2050 if (pinfo->prefix_len == 64) { 2138 if (pinfo->prefix_len == 64) {
2051 memcpy(&addr, &pinfo->prefix, 8); 2139 memcpy(&addr, &pinfo->prefix, 8);
2052 if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && 2140
2053 ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { 2141 if (!ipv6_addr_any(&in6_dev->token)) {
2142 read_lock_bh(&in6_dev->lock);
2143 memcpy(addr.s6_addr + 8,
2144 in6_dev->token.s6_addr + 8, 8);
2145 read_unlock_bh(&in6_dev->lock);
2146 tokenized = true;
2147 } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
2148 ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
2054 in6_dev_put(in6_dev); 2149 in6_dev_put(in6_dev);
2055 return; 2150 return;
2056 } 2151 }
@@ -2091,6 +2186,7 @@ ok:
2091 2186
2092 update_lft = create = 1; 2187 update_lft = create = 1;
2093 ifp->cstamp = jiffies; 2188 ifp->cstamp = jiffies;
2189 ifp->tokenized = tokenized;
2094 addrconf_dad_start(ifp); 2190 addrconf_dad_start(ifp);
2095 } 2191 }
2096 2192
@@ -2600,7 +2696,8 @@ static void addrconf_dev_config(struct net_device *dev)
2600 (dev->type != ARPHRD_FDDI) && 2696 (dev->type != ARPHRD_FDDI) &&
2601 (dev->type != ARPHRD_ARCNET) && 2697 (dev->type != ARPHRD_ARCNET) &&
2602 (dev->type != ARPHRD_INFINIBAND) && 2698 (dev->type != ARPHRD_INFINIBAND) &&
2603 (dev->type != ARPHRD_IEEE802154)) { 2699 (dev->type != ARPHRD_IEEE802154) &&
2700 (dev->type != ARPHRD_IEEE1394)) {
2604 /* Alas, we support only Ethernet autoconfiguration. */ 2701 /* Alas, we support only Ethernet autoconfiguration. */
2605 return; 2702 return;
2606 } 2703 }
@@ -2988,7 +3085,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2988 3085
2989 if (state != INET6_IFADDR_STATE_DEAD) { 3086 if (state != INET6_IFADDR_STATE_DEAD) {
2990 __ipv6_ifa_notify(RTM_DELADDR, ifa); 3087 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2991 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); 3088 inet6addr_notifier_call_chain(NETDEV_DOWN, ifa);
2992 } 3089 }
2993 in6_ifa_put(ifa); 3090 in6_ifa_put(ifa);
2994 3091
@@ -3537,7 +3634,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
3537}; 3634};
3538 3635
3539static int 3636static int
3540inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3637inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
3541{ 3638{
3542 struct net *net = sock_net(skb->sk); 3639 struct net *net = sock_net(skb->sk);
3543 struct ifaddrmsg *ifm; 3640 struct ifaddrmsg *ifm;
@@ -3603,7 +3700,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3603} 3700}
3604 3701
3605static int 3702static int
3606inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 3703inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
3607{ 3704{
3608 struct net *net = sock_net(skb->sk); 3705 struct net *net = sock_net(skb->sk);
3609 struct ifaddrmsg *ifm; 3706 struct ifaddrmsg *ifm;
@@ -3834,6 +3931,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3834 NLM_F_MULTI); 3931 NLM_F_MULTI);
3835 if (err <= 0) 3932 if (err <= 0)
3836 break; 3933 break;
3934 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
3837 } 3935 }
3838 break; 3936 break;
3839 } 3937 }
@@ -3891,6 +3989,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3891 s_ip_idx = ip_idx = cb->args[2]; 3989 s_ip_idx = ip_idx = cb->args[2];
3892 3990
3893 rcu_read_lock(); 3991 rcu_read_lock();
3992 cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;
3894 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 3993 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
3895 idx = 0; 3994 idx = 0;
3896 head = &net->dev_index_head[h]; 3995 head = &net->dev_index_head[h];
@@ -3942,8 +4041,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
3942 return inet6_dump_addr(skb, cb, type); 4041 return inet6_dump_addr(skb, cb, type);
3943} 4042}
3944 4043
3945static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4044static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3946 void *arg)
3947{ 4045{
3948 struct net *net = sock_net(in_skb->sk); 4046 struct net *net = sock_net(in_skb->sk);
3949 struct ifaddrmsg *ifm; 4047 struct ifaddrmsg *ifm;
@@ -4076,7 +4174,8 @@ static inline size_t inet6_ifla6_size(void)
4076 + nla_total_size(sizeof(struct ifla_cacheinfo)) 4174 + nla_total_size(sizeof(struct ifla_cacheinfo))
4077 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ 4175 + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
4078 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ 4176 + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
4079 + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ 4177 + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
4178 + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
4080} 4179}
4081 4180
4082static inline size_t inet6_if_nlmsg_size(void) 4181static inline size_t inet6_if_nlmsg_size(void)
@@ -4163,6 +4262,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
4163 goto nla_put_failure; 4262 goto nla_put_failure;
4164 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); 4263 snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
4165 4264
4265 nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
4266 if (nla == NULL)
4267 goto nla_put_failure;
4268 read_lock_bh(&idev->lock);
4269 memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
4270 read_unlock_bh(&idev->lock);
4271
4166 return 0; 4272 return 0;
4167 4273
4168nla_put_failure: 4274nla_put_failure:
@@ -4190,6 +4296,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
4190 return 0; 4296 return 0;
4191} 4297}
4192 4298
4299static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
4300{
4301 struct inet6_ifaddr *ifp;
4302 struct net_device *dev = idev->dev;
4303 bool update_rs = false;
4304
4305 if (token == NULL)
4306 return -EINVAL;
4307 if (ipv6_addr_any(token))
4308 return -EINVAL;
4309 if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
4310 return -EINVAL;
4311 if (!ipv6_accept_ra(idev))
4312 return -EINVAL;
4313 if (idev->cnf.rtr_solicits <= 0)
4314 return -EINVAL;
4315
4316 write_lock_bh(&idev->lock);
4317
4318 BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
4319 memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
4320
4321 write_unlock_bh(&idev->lock);
4322
4323 if (!idev->dead && (idev->if_flags & IF_READY)) {
4324 struct in6_addr ll_addr;
4325
4326 ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE |
4327 IFA_F_OPTIMISTIC);
4328
4329 /* If we're not ready, then normal ifup will take care
4330 * of this. Otherwise, we need to request our rs here.
4331 */
4332 ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
4333 update_rs = true;
4334 }
4335
4336 write_lock_bh(&idev->lock);
4337
4338 if (update_rs)
4339 idev->if_flags |= IF_RS_SENT;
4340
4341 /* Well, that's kinda nasty ... */
4342 list_for_each_entry(ifp, &idev->addr_list, if_list) {
4343 spin_lock(&ifp->lock);
4344 if (ifp->tokenized) {
4345 ifp->valid_lft = 0;
4346 ifp->prefered_lft = 0;
4347 }
4348 spin_unlock(&ifp->lock);
4349 }
4350
4351 write_unlock_bh(&idev->lock);
4352 return 0;
4353}
4354
4355static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
4356{
4357 int err = -EINVAL;
4358 struct inet6_dev *idev = __in6_dev_get(dev);
4359 struct nlattr *tb[IFLA_INET6_MAX + 1];
4360
4361 if (!idev)
4362 return -EAFNOSUPPORT;
4363
4364 if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
4365 BUG();
4366
4367 if (tb[IFLA_INET6_TOKEN])
4368 err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
4369
4370 return err;
4371}
4372
4193static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, 4373static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
4194 u32 portid, u32 seq, int event, unsigned int flags) 4374 u32 portid, u32 seq, int event, unsigned int flags)
4195{ 4375{
@@ -4368,6 +4548,8 @@ errout:
4368 4548
4369static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4549static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4370{ 4550{
4551 struct net *net = dev_net(ifp->idev->dev);
4552
4371 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); 4553 inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
4372 4554
4373 switch (event) { 4555 switch (event) {
@@ -4393,6 +4575,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4393 dst_free(&ifp->rt->dst); 4575 dst_free(&ifp->rt->dst);
4394 break; 4576 break;
4395 } 4577 }
4578 atomic_inc(&net->ipv6.dev_addr_genid);
4396} 4579}
4397 4580
4398static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) 4581static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
@@ -4869,26 +5052,11 @@ static struct pernet_operations addrconf_ops = {
4869 .exit = addrconf_exit_net, 5052 .exit = addrconf_exit_net,
4870}; 5053};
4871 5054
4872/*
4873 * Device notifier
4874 */
4875
4876int register_inet6addr_notifier(struct notifier_block *nb)
4877{
4878 return atomic_notifier_chain_register(&inet6addr_chain, nb);
4879}
4880EXPORT_SYMBOL(register_inet6addr_notifier);
4881
4882int unregister_inet6addr_notifier(struct notifier_block *nb)
4883{
4884 return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
4885}
4886EXPORT_SYMBOL(unregister_inet6addr_notifier);
4887
4888static struct rtnl_af_ops inet6_ops = { 5055static struct rtnl_af_ops inet6_ops = {
4889 .family = AF_INET6, 5056 .family = AF_INET6,
4890 .fill_link_af = inet6_fill_link_af, 5057 .fill_link_af = inet6_fill_link_af,
4891 .get_link_af_size = inet6_get_link_af_size, 5058 .get_link_af_size = inet6_get_link_af_size,
5059 .set_link_af = inet6_set_link_af,
4892}; 5060};
4893 5061
4894/* 5062/*
@@ -4961,7 +5129,7 @@ int __init addrconf_init(void)
4961 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, 5129 __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
4962 inet6_dump_ifacaddr, NULL); 5130 inet6_dump_ifacaddr, NULL);
4963 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, 5131 __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
4964 NULL, NULL); 5132 inet6_netconf_dump_devconf, NULL);
4965 5133
4966 ipv6_addr_label_rtnl_register(); 5134 ipv6_addr_label_rtnl_register();
4967 5135
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d051e5f4bf34..72104562c864 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -78,3 +78,22 @@ int __ipv6_addr_type(const struct in6_addr *addr)
78} 78}
79EXPORT_SYMBOL(__ipv6_addr_type); 79EXPORT_SYMBOL(__ipv6_addr_type);
80 80
81static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
82
83int register_inet6addr_notifier(struct notifier_block *nb)
84{
85 return atomic_notifier_chain_register(&inet6addr_chain, nb);
86}
87EXPORT_SYMBOL(register_inet6addr_notifier);
88
89int unregister_inet6addr_notifier(struct notifier_block *nb)
90{
91 return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
92}
93EXPORT_SYMBOL(unregister_inet6addr_notifier);
94
95int inet6addr_notifier_call_chain(unsigned long val, void *v)
96{
97 return atomic_notifier_call_chain(&inet6addr_chain, val, v);
98}
99EXPORT_SYMBOL(inet6addr_notifier_call_chain);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index aad64352cb60..f083a583a05c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
414 [IFAL_LABEL] = { .len = sizeof(u32), }, 414 [IFAL_LABEL] = { .len = sizeof(u32), },
415}; 415};
416 416
417static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, 417static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)
418 void *arg)
419{ 418{
420 struct net *net = sock_net(skb->sk); 419 struct net *net = sock_net(skb->sk);
421 struct ifaddrlblmsg *ifal; 420 struct ifaddrlblmsg *ifal;
@@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
436 435
437 if (!tb[IFAL_ADDRESS]) 436 if (!tb[IFAL_ADDRESS])
438 return -EINVAL; 437 return -EINVAL;
439
440 pfx = nla_data(tb[IFAL_ADDRESS]); 438 pfx = nla_data(tb[IFAL_ADDRESS]);
441 if (!pfx)
442 return -EINVAL;
443 439
444 if (!tb[IFAL_LABEL]) 440 if (!tb[IFAL_LABEL])
445 return -EINVAL; 441 return -EINVAL;
@@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void)
533 + nla_total_size(4); /* IFAL_LABEL */ 529 + nla_total_size(4); /* IFAL_LABEL */
534} 530}
535 531
536static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 532static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)
537 void *arg)
538{ 533{
539 struct net *net = sock_net(in_skb->sk); 534 struct net *net = sock_net(in_skb->sk);
540 struct ifaddrlblmsg *ifal; 535 struct ifaddrlblmsg *ifal;
@@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
561 556
562 if (!tb[IFAL_ADDRESS]) 557 if (!tb[IFAL_ADDRESS])
563 return -EINVAL; 558 return -EINVAL;
564
565 addr = nla_data(tb[IFAL_ADDRESS]); 559 addr = nla_data(tb[IFAL_ADDRESS]);
566 if (!addr)
567 return -EINVAL;
568 560
569 rcu_read_lock(); 561 rcu_read_lock();
570 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); 562 p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 6b793bfc0e10..ab5c7ad482cd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -49,7 +49,6 @@
49#include <net/udp.h> 49#include <net/udp.h>
50#include <net/udplite.h> 50#include <net/udplite.h>
51#include <net/tcp.h> 51#include <net/tcp.h>
52#include <net/ipip.h>
53#include <net/protocol.h> 52#include <net/protocol.h>
54#include <net/inet_common.h> 53#include <net/inet_common.h>
55#include <net/route.h> 54#include <net/route.h>
@@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
323 struct net_device *dev = NULL; 322 struct net_device *dev = NULL;
324 323
325 rcu_read_lock(); 324 rcu_read_lock();
326 if (addr_type & IPV6_ADDR_LINKLOCAL) { 325 if (__ipv6_addr_needs_scope_id(addr_type)) {
327 if (addr_len >= sizeof(struct sockaddr_in6) && 326 if (addr_len >= sizeof(struct sockaddr_in6) &&
328 addr->sin6_scope_id) { 327 addr->sin6_scope_id) {
329 /* Override any existing binding, if another one 328 /* Override any existing binding, if another one
@@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
471 470
472 sin->sin6_port = inet->inet_sport; 471 sin->sin6_port = inet->inet_sport;
473 } 472 }
474 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 473 sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
475 sin->sin6_scope_id = sk->sk_bound_dev_if; 474 sk->sk_bound_dev_if);
476 *uaddr_len = sizeof(*sin); 475 *uaddr_len = sizeof(*sin);
477 return 0; 476 return 0;
478} 477}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5a54782a340..4b56cbbc7890 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -124,7 +124,7 @@ ipv4_connected:
124 goto out; 124 goto out;
125 } 125 }
126 126
127 if (addr_type&IPV6_ADDR_LINKLOCAL) { 127 if (__ipv6_addr_needs_scope_id(addr_type)) {
128 if (addr_len >= sizeof(struct sockaddr_in6) && 128 if (addr_len >= sizeof(struct sockaddr_in6) &&
129 usin->sin6_scope_id) { 129 usin->sin6_scope_id) {
130 if (sk->sk_bound_dev_if && 130 if (sk->sk_bound_dev_if &&
@@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
355 sin->sin6_family = AF_INET6; 355 sin->sin6_family = AF_INET6;
356 sin->sin6_flowinfo = 0; 356 sin->sin6_flowinfo = 0;
357 sin->sin6_port = serr->port; 357 sin->sin6_port = serr->port;
358 sin->sin6_scope_id = 0;
359 if (skb->protocol == htons(ETH_P_IPV6)) { 358 if (skb->protocol == htons(ETH_P_IPV6)) {
360 const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), 359 const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),
361 struct ipv6hdr, daddr); 360 struct ipv6hdr, daddr);
362 sin->sin6_addr = ip6h->daddr; 361 sin->sin6_addr = ip6h->daddr;
363 if (np->sndflow) 362 if (np->sndflow)
364 sin->sin6_flowinfo = ip6_flowinfo(ip6h); 363 sin->sin6_flowinfo = ip6_flowinfo(ip6h);
365 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 364 sin->sin6_scope_id =
366 sin->sin6_scope_id = IP6CB(skb)->iif; 365 ipv6_iface_scope_id(&sin->sin6_addr,
366 IP6CB(skb)->iif);
367 } else { 367 } else {
368 ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), 368 ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),
369 &sin->sin6_addr); 369 &sin->sin6_addr);
370 sin->sin6_scope_id = 0;
370 } 371 }
371 } 372 }
372 373
@@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
376 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { 377 if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
377 sin->sin6_family = AF_INET6; 378 sin->sin6_family = AF_INET6;
378 sin->sin6_flowinfo = 0; 379 sin->sin6_flowinfo = 0;
379 sin->sin6_scope_id = 0;
380 if (skb->protocol == htons(ETH_P_IPV6)) { 380 if (skb->protocol == htons(ETH_P_IPV6)) {
381 sin->sin6_addr = ipv6_hdr(skb)->saddr; 381 sin->sin6_addr = ipv6_hdr(skb)->saddr;
382 if (np->rxopt.all) 382 if (np->rxopt.all)
383 ip6_datagram_recv_ctl(sk, msg, skb); 383 ip6_datagram_recv_ctl(sk, msg, skb);
384 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 384 sin->sin6_scope_id =
385 sin->sin6_scope_id = IP6CB(skb)->iif; 385 ipv6_iface_scope_id(&sin->sin6_addr,
386 IP6CB(skb)->iif);
386 } else { 387 } else {
387 struct inet_sock *inet = inet_sk(sk); 388 struct inet_sock *inet = inet_sk(sk);
388 389
389 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 390 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
390 &sin->sin6_addr); 391 &sin->sin6_addr);
392 sin->sin6_scope_id = 0;
391 if (inet->cmsg_flags) 393 if (inet->cmsg_flags)
392 ip_cmsg_recv(msg, skb); 394 ip_cmsg_recv(msg, skb);
393 } 395 }
@@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
592 sin6.sin6_addr = ipv6_hdr(skb)->daddr; 594 sin6.sin6_addr = ipv6_hdr(skb)->daddr;
593 sin6.sin6_port = ports[1]; 595 sin6.sin6_port = ports[1];
594 sin6.sin6_flowinfo = 0; 596 sin6.sin6_flowinfo = 0;
595 sin6.sin6_scope_id = 0; 597 sin6.sin6_scope_id =
598 ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr,
599 opt->iif);
596 600
597 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); 601 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
598 } 602 }
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index fff5bdd8b680..b4ff0a42b8c7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -124,15 +124,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)
124} 124}
125 125
126/* 126/*
127 * Slightly more convenient version of icmpv6_send.
128 */
129void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
130{
131 icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos);
132 kfree_skb(skb);
133}
134
135/*
136 * Figure out, may we reply to this packet with icmp error. 127 * Figure out, may we reply to this packet with icmp error.
137 * 128 *
138 * We do not reply, if: 129 * We do not reply, if:
@@ -332,7 +323,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk
332 * anycast. 323 * anycast.
333 */ 324 */
334 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { 325 if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) {
335 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); 326 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");
336 dst_release(dst); 327 dst_release(dst);
337 return ERR_PTR(-EINVAL); 328 return ERR_PTR(-EINVAL);
338 } 329 }
@@ -381,7 +372,7 @@ relookup_failed:
381/* 372/*
382 * Send an ICMP message in response to a packet in error 373 * Send an ICMP message in response to a packet in error
383 */ 374 */
384void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) 375static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
385{ 376{
386 struct net *net = dev_net(skb->dev); 377 struct net *net = dev_net(skb->dev);
387 struct inet6_dev *idev = NULL; 378 struct inet6_dev *idev = NULL;
@@ -406,7 +397,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
406 /* 397 /*
407 * Make sure we respect the rules 398 * Make sure we respect the rules
408 * i.e. RFC 1885 2.4(e) 399 * i.e. RFC 1885 2.4(e)
409 * Rule (e.1) is enforced by not using icmpv6_send 400 * Rule (e.1) is enforced by not using icmp6_send
410 * in any code that processes icmp errors. 401 * in any code that processes icmp errors.
411 */ 402 */
412 addr_type = ipv6_addr_type(&hdr->daddr); 403 addr_type = ipv6_addr_type(&hdr->daddr);
@@ -434,7 +425,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
434 * Source addr check 425 * Source addr check
435 */ 426 */
436 427
437 if (addr_type & IPV6_ADDR_LINKLOCAL) 428 if (__ipv6_addr_needs_scope_id(addr_type))
438 iif = skb->dev->ifindex; 429 iif = skb->dev->ifindex;
439 430
440 /* 431 /*
@@ -444,7 +435,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
444 * and anycast addresses will be checked later. 435 * and anycast addresses will be checked later.
445 */ 436 */
446 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { 437 if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
447 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); 438 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");
448 return; 439 return;
449 } 440 }
450 441
@@ -452,7 +443,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
452 * Never answer to a ICMP packet. 443 * Never answer to a ICMP packet.
453 */ 444 */
454 if (is_ineligible(skb)) { 445 if (is_ineligible(skb)) {
455 LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); 446 LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");
456 return; 447 return;
457 } 448 }
458 449
@@ -529,7 +520,14 @@ out_dst_release:
529out: 520out:
530 icmpv6_xmit_unlock(sk); 521 icmpv6_xmit_unlock(sk);
531} 522}
532EXPORT_SYMBOL(icmpv6_send); 523
524/* Slightly more convenient version of icmp6_send.
525 */
526void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
527{
528 icmp6_send(skb, ICMPV6_PARAMPROB, code, pos);
529 kfree_skb(skb);
530}
533 531
534static void icmpv6_echo_reply(struct sk_buff *skb) 532static void icmpv6_echo_reply(struct sk_buff *skb)
535{ 533{
@@ -701,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
701 if (__skb_checksum_complete(skb)) { 699 if (__skb_checksum_complete(skb)) {
702 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", 700 LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
703 saddr, daddr); 701 saddr, daddr);
704 goto discard_it; 702 goto csum_error;
705 } 703 }
706 } 704 }
707 705
@@ -787,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
787 kfree_skb(skb); 785 kfree_skb(skb);
788 return 0; 786 return 0;
789 787
788csum_error:
789 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
790discard_it: 790discard_it:
791 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); 791 ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
792drop_no_count: 792drop_no_count:
@@ -885,8 +885,14 @@ int __init icmpv6_init(void)
885 err = -EAGAIN; 885 err = -EAGAIN;
886 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) 886 if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
887 goto fail; 887 goto fail;
888
889 err = inet6_register_icmp_sender(icmp6_send);
890 if (err)
891 goto sender_reg_err;
888 return 0; 892 return 0;
889 893
894sender_reg_err:
895 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
890fail: 896fail:
891 pr_err("Failed to register ICMP6 protocol\n"); 897 pr_err("Failed to register ICMP6 protocol\n");
892 unregister_pernet_subsys(&icmpv6_sk_ops); 898 unregister_pernet_subsys(&icmpv6_sk_ops);
@@ -895,6 +901,7 @@ fail:
895 901
896void icmpv6_cleanup(void) 902void icmpv6_cleanup(void)
897{ 903{
904 inet6_unregister_icmp_sender(icmp6_send);
898 unregister_pernet_subsys(&icmpv6_sk_ops); 905 unregister_pernet_subsys(&icmpv6_sk_ops);
899 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); 906 inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
900} 907}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 9bfab19ff3c0..e4311cbc8b4e 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk,
54 if (ipv6_rcv_saddr_equal(sk, sk2)) 54 if (ipv6_rcv_saddr_equal(sk, sk2))
55 break; 55 break;
56 } 56 }
57 if (!relax && reuse && sk2->sk_reuse &&
58 sk2->sk_state != TCP_LISTEN &&
59 ipv6_rcv_saddr_equal(sk, sk2))
60 break;
57 } 61 }
58 } 62 }
59 63
@@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
169 sin6->sin6_port = inet_sk(sk)->inet_dport; 173 sin6->sin6_port = inet_sk(sk)->inet_dport;
170 /* We do not store received flowlabel for TCP */ 174 /* We do not store received flowlabel for TCP */
171 sin6->sin6_flowinfo = 0; 175 sin6->sin6_flowinfo = 0;
172 sin6->sin6_scope_id = 0; 176 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
173 if (sk->sk_bound_dev_if && 177 sk->sk_bound_dev_if);
174 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
175 sin6->sin6_scope_id = sk->sk_bound_dev_if;
176} 178}
177 179
178EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); 180EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b973ed3d06cf..46e88433ec7d 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy)
144 spin_lock(&ip6_fl_lock); 144 spin_lock(&ip6_fl_lock);
145 145
146 for (i=0; i<=FL_HASH_MASK; i++) { 146 for (i=0; i<=FL_HASH_MASK; i++) {
147 struct ip6_flowlabel *fl, **flp; 147 struct ip6_flowlabel *fl;
148 struct ip6_flowlabel __rcu **flp;
149
148 flp = &fl_ht[i]; 150 flp = &fl_ht[i];
149 while ((fl = rcu_dereference_protected(*flp, 151 while ((fl = rcu_dereference_protected(*flp,
150 lockdep_is_held(&ip6_fl_lock))) != NULL) { 152 lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net)
179 181
180 spin_lock(&ip6_fl_lock); 182 spin_lock(&ip6_fl_lock);
181 for (i = 0; i <= FL_HASH_MASK; i++) { 183 for (i = 0; i <= FL_HASH_MASK; i++) {
182 struct ip6_flowlabel *fl, **flp; 184 struct ip6_flowlabel *fl;
185 struct ip6_flowlabel __rcu **flp;
186
183 flp = &fl_ht[i]; 187 flp = &fl_ht[i];
184 while ((fl = rcu_dereference_protected(*flp, 188 while ((fl = rcu_dereference_protected(*flp,
185 lockdep_is_held(&ip6_fl_lock))) != NULL) { 189 lockdep_is_held(&ip6_fl_lock))) != NULL) {
@@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
506 struct ipv6_pinfo *np = inet6_sk(sk); 510 struct ipv6_pinfo *np = inet6_sk(sk);
507 struct in6_flowlabel_req freq; 511 struct in6_flowlabel_req freq;
508 struct ipv6_fl_socklist *sfl1=NULL; 512 struct ipv6_fl_socklist *sfl1=NULL;
509 struct ipv6_fl_socklist *sfl, **sflp; 513 struct ipv6_fl_socklist *sfl;
514 struct ipv6_fl_socklist __rcu **sflp;
510 struct ip6_flowlabel *fl, *fl1 = NULL; 515 struct ip6_flowlabel *fl, *fl1 = NULL;
511 516
512 517
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index e4efffe2522e..d3ddd8400354 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -38,6 +38,7 @@
38 38
39#include <net/sock.h> 39#include <net/sock.h>
40#include <net/ip.h> 40#include <net/ip.h>
41#include <net/ip_tunnels.h>
41#include <net/icmp.h> 42#include <net/icmp.h>
42#include <net/protocol.h> 43#include <net/protocol.h>
43#include <net/addrconf.h> 44#include <net/addrconf.h>
@@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)
110#define tunnels_l tunnels[1] 111#define tunnels_l tunnels[1]
111#define tunnels_wc tunnels[0] 112#define tunnels_wc tunnels[0]
112 113
113static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev,
114 struct rtnl_link_stats64 *tot)
115{
116 int i;
117
118 for_each_possible_cpu(i) {
119 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
120 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
121 unsigned int start;
122
123 do {
124 start = u64_stats_fetch_begin_bh(&tstats->syncp);
125 rx_packets = tstats->rx_packets;
126 tx_packets = tstats->tx_packets;
127 rx_bytes = tstats->rx_bytes;
128 tx_bytes = tstats->tx_bytes;
129 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
130
131 tot->rx_packets += rx_packets;
132 tot->tx_packets += tx_packets;
133 tot->rx_bytes += rx_bytes;
134 tot->tx_bytes += tx_bytes;
135 }
136
137 tot->multicast = dev->stats.multicast;
138 tot->rx_crc_errors = dev->stats.rx_crc_errors;
139 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
140 tot->rx_length_errors = dev->stats.rx_length_errors;
141 tot->rx_frame_errors = dev->stats.rx_frame_errors;
142 tot->rx_errors = dev->stats.rx_errors;
143
144 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
145 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
146 tot->tx_dropped = dev->stats.tx_dropped;
147 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
148 tot->tx_errors = dev->stats.tx_errors;
149
150 return tot;
151}
152
153/* Given src, dst and key, find appropriate for input tunnel. */ 114/* Given src, dst and key, find appropriate for input tunnel. */
154 115
155static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, 116static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
@@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
667 struct net_device_stats *stats = &tunnel->dev->stats; 628 struct net_device_stats *stats = &tunnel->dev->stats;
668 int err = -1; 629 int err = -1;
669 u8 proto; 630 u8 proto;
670 int pkt_len;
671 struct sk_buff *new_skb; 631 struct sk_buff *new_skb;
672 632
673 if (dev->type == ARPHRD_ETHER) 633 if (dev->type == ARPHRD_ETHER)
@@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
801 } 761 }
802 } 762 }
803 763
804 nf_reset(skb); 764 ip6tunnel_xmit(skb, dev);
805 pkt_len = skb->len;
806 err = ip6_local_out(skb);
807
808 if (net_xmit_eval(err) == 0) {
809 struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats);
810
811 tstats->tx_bytes += pkt_len;
812 tstats->tx_packets++;
813 } else {
814 stats->tx_errors++;
815 stats->tx_aborted_errors++;
816 }
817
818 if (ndst) 765 if (ndst)
819 ip6_tnl_dst_store(tunnel, ndst); 766 ip6_tnl_dst_store(tunnel, ndst);
820
821 return 0; 767 return 0;
822tx_err_link_failure: 768tx_err_link_failure:
823 stats->tx_carrier_errors++; 769 stats->tx_carrier_errors++;
@@ -1271,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
1271 .ndo_start_xmit = ip6gre_tunnel_xmit, 1217 .ndo_start_xmit = ip6gre_tunnel_xmit,
1272 .ndo_do_ioctl = ip6gre_tunnel_ioctl, 1218 .ndo_do_ioctl = ip6gre_tunnel_ioctl,
1273 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 1219 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1274 .ndo_get_stats64 = ip6gre_get_stats64, 1220 .ndo_get_stats64 = ip_tunnel_get_stats64,
1275}; 1221};
1276 1222
1277static void ip6gre_dev_free(struct net_device *dev) 1223static void ip6gre_dev_free(struct net_device *dev)
@@ -1520,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
1520 .ndo_set_mac_address = eth_mac_addr, 1466 .ndo_set_mac_address = eth_mac_addr,
1521 .ndo_validate_addr = eth_validate_addr, 1467 .ndo_validate_addr = eth_validate_addr,
1522 .ndo_change_mtu = ip6gre_tunnel_change_mtu, 1468 .ndo_change_mtu = ip6gre_tunnel_change_mtu,
1523 .ndo_get_stats64 = ip6gre_get_stats64, 1469 .ndo_get_stats64 = ip_tunnel_get_stats64,
1524}; 1470};
1525 1471
1526static void ip6gre_tap_setup(struct net_device *dev) 1472static void ip6gre_tap_setup(struct net_device *dev)
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
new file mode 100644
index 000000000000..4578e23834f7
--- /dev/null
+++ b/net/ipv6/ip6_icmp.c
@@ -0,0 +1,47 @@
1#include <linux/export.h>
2#include <linux/icmpv6.h>
3#include <linux/mutex.h>
4#include <linux/netdevice.h>
5#include <linux/spinlock.h>
6
7#include <net/ipv6.h>
8
9#if IS_ENABLED(CONFIG_IPV6)
10
11static ip6_icmp_send_t __rcu *ip6_icmp_send;
12
13int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
14{
15 return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ?
16 0 : -EBUSY;
17}
18EXPORT_SYMBOL(inet6_register_icmp_sender);
19
20int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
21{
22 int ret;
23
24 ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ?
25 0 : -EINVAL;
26
27 synchronize_net();
28
29 return ret;
30}
31EXPORT_SYMBOL(inet6_unregister_icmp_sender);
32
33void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
34{
35 ip6_icmp_send_t *send;
36
37 rcu_read_lock();
38 send = rcu_dereference(ip6_icmp_send);
39
40 if (!send)
41 goto out;
42 send(skb, type, code, info);
43out:
44 rcu_read_unlock();
45}
46EXPORT_SYMBOL(icmpv6_send);
47#endif
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 8234c1dcdf72..71b766ee821d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
92 u8 *prevhdr; 92 u8 *prevhdr;
93 int offset = 0; 93 int offset = 0;
94 94
95 if (!(features & NETIF_F_V6_CSUM))
96 features &= ~NETIF_F_SG;
97
98 if (unlikely(skb_shinfo(skb)->gso_type & 95 if (unlikely(skb_shinfo(skb)->gso_type &
99 ~(SKB_GSO_UDP | 96 ~(SKB_GSO_UDP |
100 SKB_GSO_DODGY | 97 SKB_GSO_DODGY |
101 SKB_GSO_TCP_ECN | 98 SKB_GSO_TCP_ECN |
102 SKB_GSO_GRE | 99 SKB_GSO_GRE |
100 SKB_GSO_UDP_TUNNEL |
103 SKB_GSO_TCPV6 | 101 SKB_GSO_TCPV6 |
104 0))) 102 0)))
105 goto out; 103 goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 155eccfa7760..d2eedf192330 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1224 } 1224 }
1225 1225
1226 /* For UDP, check if TX timestamp is enabled */ 1226 /* For UDP, check if TX timestamp is enabled */
1227 if (sk->sk_type == SOCK_DGRAM) { 1227 if (sk->sk_type == SOCK_DGRAM)
1228 err = sock_tx_timestamp(sk, &tx_flags); 1228 sock_tx_timestamp(sk, &tx_flags);
1229 if (err)
1230 goto error;
1231 }
1232 1229
1233 /* 1230 /*
1234 * Let's try using as much space as possible. 1231 * Let's try using as much space as possible.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index fff83cbc197f..1e55866cead7 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -47,6 +47,7 @@
47 47
48#include <net/icmp.h> 48#include <net/icmp.h>
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/ip_tunnels.h>
50#include <net/ipv6.h> 51#include <net/ipv6.h>
51#include <net/ip6_route.h> 52#include <net/ip6_route.h>
52#include <net/addrconf.h> 53#include <net/addrconf.h>
@@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
955 unsigned int max_headroom = sizeof(struct ipv6hdr); 956 unsigned int max_headroom = sizeof(struct ipv6hdr);
956 u8 proto; 957 u8 proto;
957 int err = -1; 958 int err = -1;
958 int pkt_len;
959 959
960 if (!fl6->flowi6_mark) 960 if (!fl6->flowi6_mark)
961 dst = ip6_tnl_dst_check(t); 961 dst = ip6_tnl_dst_check(t);
@@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
1035 ipv6h->nexthdr = proto; 1035 ipv6h->nexthdr = proto;
1036 ipv6h->saddr = fl6->saddr; 1036 ipv6h->saddr = fl6->saddr;
1037 ipv6h->daddr = fl6->daddr; 1037 ipv6h->daddr = fl6->daddr;
1038 nf_reset(skb); 1038 ip6tunnel_xmit(skb, dev);
1039 pkt_len = skb->len;
1040 err = ip6_local_out(skb);
1041
1042 if (net_xmit_eval(err) == 0) {
1043 struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
1044
1045 tstats->tx_bytes += pkt_len;
1046 tstats->tx_packets++;
1047 } else {
1048 stats->tx_errors++;
1049 stats->tx_aborted_errors++;
1050 }
1051 if (ndst) 1039 if (ndst)
1052 ip6_tnl_dst_store(t, ndst); 1040 ip6_tnl_dst_store(t, ndst);
1053 return 0; 1041 return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 96bfb4e4b820..241fb8ad9fcf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
842 if (ipv6_hdr(skb)->version == 0) { 842 if (ipv6_hdr(skb)->version == 0) {
843 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 843 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844 nlh->nlmsg_type = NLMSG_ERROR; 844 nlh->nlmsg_type = NLMSG_ERROR;
845 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 845 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
846 skb_trim(skb, nlh->nlmsg_len); 846 skb_trim(skb, nlh->nlmsg_len);
847 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; 847 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
848 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 848 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849 } else 849 } else
850 kfree_skb(skb); 850 kfree_skb(skb);
@@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1100 if (ipv6_hdr(skb)->version == 0) { 1100 if (ipv6_hdr(skb)->version == 0) {
1101 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1101 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1102 1102
1103 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 1103 if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1104 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1104 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1105 } else { 1105 } else {
1106 nlh->nlmsg_type = NLMSG_ERROR; 1106 nlh->nlmsg_type = NLMSG_ERROR;
1107 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 1107 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1108 skb_trim(skb, nlh->nlmsg_len); 1108 skb_trim(skb, nlh->nlmsg_len);
1109 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1109 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1110 } 1110 }
1111 rtnl_unicast(skb, net, NETLINK_CB(skb).portid); 1111 rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1112 } else 1112 } else
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 76ef4353d518..2712ab22a174 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
610 } 610 }
611 } 611 }
612#endif 612#endif
613 if (!dev->addr_len)
614 send_sllao = 0;
615 if (send_sllao) 613 if (send_sllao)
616 optlen += ndisc_opt_addr_space(dev); 614 optlen += ndisc_opt_addr_space(dev);
617 615
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 429089cb073d..72836f40b730 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -1,3 +1,9 @@
1/*
2 * IPv6 specific functions of netfilter core
3 *
4 * Rusty Russell (C) 2000 -- This code is GPL.
5 * Patrick McHardy (C) 2006-2012
6 */
1#include <linux/kernel.h> 7#include <linux/kernel.h>
2#include <linux/init.h> 8#include <linux/init.h>
3#include <linux/ipv6.h> 9#include <linux/ipv6.h>
@@ -29,7 +35,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
29 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 35 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
30 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 36 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
31 dst_release(dst); 37 dst_release(dst);
32 return -EINVAL; 38 return dst->error;
33 } 39 }
34 40
35 /* Drop old route. */ 41 /* Drop old route. */
@@ -43,7 +49,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
43 skb_dst_set(skb, NULL); 49 skb_dst_set(skb, NULL);
44 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0); 50 dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
45 if (IS_ERR(dst)) 51 if (IS_ERR(dst))
46 return -1; 52 return PTR_ERR(dst);
47 skb_dst_set(skb, dst); 53 skb_dst_set(skb, dst);
48 } 54 }
49#endif 55#endif
@@ -53,7 +59,7 @@ int ip6_route_me_harder(struct sk_buff *skb)
53 if (skb_headroom(skb) < hh_len && 59 if (skb_headroom(skb) < hh_len &&
54 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 60 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
55 0, GFP_ATOMIC)) 61 0, GFP_ATOMIC))
56 return -1; 62 return -ENOMEM;
57 63
58 return 0; 64 return 0;
59} 65}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index c72532a60d88..4433ab40e7de 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH
105 105
106config IP6_NF_MATCH_RPFILTER 106config IP6_NF_MATCH_RPFILTER
107 tristate '"rpfilter" reverse path filter match support' 107 tristate '"rpfilter" reverse path filter match support'
108 depends on NETFILTER_ADVANCED 108 depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)
109 ---help--- 109 ---help---
110 This option allows you to match packets whose replies would 110 This option allows you to match packets whose replies would
111 go out via the interface the packet came in. 111 go out via the interface the packet came in.
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 341b54ade72c..44400c216dc6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> 5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6 * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,
284 const char *hookname, *chainname, *comment; 285 const char *hookname, *chainname, *comment;
285 const struct ip6t_entry *iter; 286 const struct ip6t_entry *iter;
286 unsigned int rulenum = 0; 287 unsigned int rulenum = 0;
288 struct net *net = dev_net(in ? in : out);
287 289
288 table_base = private->entries[smp_processor_id()]; 290 table_base = private->entries[smp_processor_id()];
289 root = get_entry(table_base, private->hook_entry[hook]); 291 root = get_entry(table_base, private->hook_entry[hook]);
@@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,
296 &chainname, &comment, &rulenum) != 0) 298 &chainname, &comment, &rulenum) != 0)
297 break; 299 break;
298 300
299 nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, 301 nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
300 "TRACE: %s:%s:%s:%u ", 302 "TRACE: %s:%s:%s:%u ",
301 tablename, chainname, comment, rulenum); 303 tablename, chainname, comment, rulenum);
302} 304}
diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c
index cb631143721c..590f767db5d4 100644
--- a/net/ipv6/netfilter/ip6t_NPT.c
+++ b/net/ipv6/netfilter/ip6t_NPT.c
@@ -18,9 +18,8 @@
18static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) 18static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
19{ 19{
20 struct ip6t_npt_tginfo *npt = par->targinfo; 20 struct ip6t_npt_tginfo *npt = par->targinfo;
21 __wsum src_sum = 0, dst_sum = 0;
22 struct in6_addr pfx; 21 struct in6_addr pfx;
23 unsigned int i; 22 __wsum src_sum, dst_sum;
24 23
25 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) 24 if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
26 return -EINVAL; 25 return -EINVAL;
@@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
33 if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) 32 if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
34 return -EINVAL; 33 return -EINVAL;
35 34
36 for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { 35 src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
37 src_sum = csum_add(src_sum, 36 dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
38 (__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
39 dst_sum = csum_add(dst_sum,
40 (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
41 }
42 37
43 npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); 38 npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
44 return 0; 39 return 0;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index ed3b427b2841..70f9abc0efe9 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -7,6 +7,8 @@
7 * Authors: 7 * Authors:
8 * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp> 8 * Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
9 * 9 *
10 * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net>
11 *
10 * Based on net/ipv4/netfilter/ipt_REJECT.c 12 * Based on net/ipv4/netfilter/ipt_REJECT.c
11 * 13 *
12 * This program is free software; you can redistribute it and/or 14 * This program is free software; you can redistribute it and/or
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 5060d54199ab..e0983f3648a6 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -71,6 +71,12 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
71 return ret; 71 return ret;
72} 72}
73 73
74static bool rpfilter_is_local(const struct sk_buff *skb)
75{
76 const struct rt6_info *rt = (const void *) skb_dst(skb);
77 return rt && (rt->rt6i_flags & RTF_LOCAL);
78}
79
74static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) 80static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
75{ 81{
76 const struct xt_rpfilter_info *info = par->matchinfo; 82 const struct xt_rpfilter_info *info = par->matchinfo;
@@ -78,7 +84,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
78 struct ipv6hdr *iph; 84 struct ipv6hdr *iph;
79 bool invert = info->flags & XT_RPFILTER_INVERT; 85 bool invert = info->flags & XT_RPFILTER_INVERT;
80 86
81 if (par->in->flags & IFF_LOOPBACK) 87 if (rpfilter_is_local(skb))
82 return true ^ invert; 88 return true ^ invert;
83 89
84 iph = ipv6_hdr(skb); 90 iph = ipv6_hdr(skb);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6134a1ebfb1b..e075399d8b72 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -38,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
38 struct in6_addr saddr, daddr; 38 struct in6_addr saddr, daddr;
39 u_int8_t hop_limit; 39 u_int8_t hop_limit;
40 u_int32_t flowlabel, mark; 40 u_int32_t flowlabel, mark;
41 41 int err;
42#if 0 42#if 0
43 /* root is playing with raw sockets. */ 43 /* root is playing with raw sockets. */
44 if (skb->len < sizeof(struct iphdr) || 44 if (skb->len < sizeof(struct iphdr) ||
@@ -65,8 +65,11 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
65 !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || 65 !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||
66 skb->mark != mark || 66 skb->mark != mark ||
67 ipv6_hdr(skb)->hop_limit != hop_limit || 67 ipv6_hdr(skb)->hop_limit != hop_limit ||
68 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) 68 flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
69 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; 69 err = ip6_route_me_harder(skb);
70 if (err < 0)
71 ret = NF_DROP_ERR(err);
72 }
70 73
71 return ret; 74 return ret;
72} 75}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index e0e788d25b14..6383f90efda8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
179#ifdef CONFIG_XFRM 179#ifdef CONFIG_XFRM
180 const struct nf_conn *ct; 180 const struct nf_conn *ct;
181 enum ip_conntrack_info ctinfo; 181 enum ip_conntrack_info ctinfo;
182 int err;
182#endif 183#endif
183 unsigned int ret; 184 unsigned int ret;
184 185
@@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum,
197 &ct->tuplehash[!dir].tuple.dst.u3) || 198 &ct->tuplehash[!dir].tuple.dst.u3) ||
198 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 199 (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
199 ct->tuplehash[dir].tuple.src.u.all != 200 ct->tuplehash[dir].tuple.src.u.all !=
200 ct->tuplehash[!dir].tuple.dst.u.all)) 201 ct->tuplehash[!dir].tuple.dst.u.all)) {
201 if (nf_xfrm_me_harder(skb, AF_INET6) < 0) 202 err = nf_xfrm_me_harder(skb, AF_INET6);
202 ret = NF_DROP; 203 if (err < 0)
204 ret = NF_DROP_ERR(err);
205 }
203 } 206 }
204#endif 207#endif
205 return ret; 208 return ret;
@@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
215 const struct nf_conn *ct; 218 const struct nf_conn *ct;
216 enum ip_conntrack_info ctinfo; 219 enum ip_conntrack_info ctinfo;
217 unsigned int ret; 220 unsigned int ret;
221 int err;
218 222
219 /* root is playing with raw sockets. */ 223 /* root is playing with raw sockets. */
220 if (skb->len < sizeof(struct ipv6hdr)) 224 if (skb->len < sizeof(struct ipv6hdr))
@@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
227 231
228 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, 232 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
229 &ct->tuplehash[!dir].tuple.src.u3)) { 233 &ct->tuplehash[!dir].tuple.src.u3)) {
230 if (ip6_route_me_harder(skb)) 234 err = ip6_route_me_harder(skb);
231 ret = NF_DROP; 235 if (err < 0)
236 ret = NF_DROP_ERR(err);
232 } 237 }
233#ifdef CONFIG_XFRM 238#ifdef CONFIG_XFRM
234 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 239 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
235 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 && 240 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
236 ct->tuplehash[dir].tuple.dst.u.all != 241 ct->tuplehash[dir].tuple.dst.u.all !=
237 ct->tuplehash[!dir].tuple.src.u.all) 242 ct->tuplehash[!dir].tuple.src.u.all) {
238 if (nf_xfrm_me_harder(skb, AF_INET6)) 243 err = nf_xfrm_me_harder(skb, AF_INET6);
239 ret = NF_DROP; 244 if (err < 0)
245 ret = NF_DROP_ERR(err);
246 }
240#endif 247#endif
241 } 248 }
242 return ret; 249 return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 2b6c226f5198..97bcf2bae857 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
330 sizeof(sin6.sin6_addr)); 330 sizeof(sin6.sin6_addr));
331 331
332 nf_ct_put(ct); 332 nf_ct_put(ct);
333 333 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
334 if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) 334 sk->sk_bound_dev_if);
335 sin6.sin6_scope_id = sk->sk_bound_dev_if;
336 else
337 sin6.sin6_scope_id = 0;
338
339 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 335 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
340} 336}
341 337
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 24df3dde0076..b3807c5cb888 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
131 type + 128); 131 type + 128);
132 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); 132 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
133 if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) 133 if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
134 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 134 nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL,
135 NULL, NULL,
135 "nf_ct_icmpv6: invalid new with type %d ", 136 "nf_ct_icmpv6: invalid new with type %d ",
136 type + 128); 137 type + 128);
137 return false; 138 return false;
@@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
203 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); 204 icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
204 if (icmp6h == NULL) { 205 if (icmp6h == NULL) {
205 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 206 if (LOG_INVALID(net, IPPROTO_ICMPV6))
206 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 207 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
207 "nf_ct_icmpv6: short packet "); 208 "nf_ct_icmpv6: short packet ");
208 return -NF_ACCEPT; 209 return -NF_ACCEPT;
209 } 210 }
@@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
211 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 212 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
212 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { 213 nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {
213 if (LOG_INVALID(net, IPPROTO_ICMPV6)) 214 if (LOG_INVALID(net, IPPROTO_ICMPV6))
214 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, 215 nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,
215 "nf_ct_icmpv6: ICMPv6 checksum failed "); 216 "nf_ct_icmpv6: ICMPv6 checksum failed ");
216 return -NF_ACCEPT; 217 return -NF_ACCEPT;
217 } 218 }
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6700069949dd..dffdc1a389c5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -41,6 +41,7 @@
41#include <net/rawv6.h> 41#include <net/rawv6.h>
42#include <net/ndisc.h> 42#include <net/ndisc.h>
43#include <net/addrconf.h> 43#include <net/addrconf.h>
44#include <net/inet_ecn.h>
44#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 45#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
45#include <linux/sysctl.h> 46#include <linux/sysctl.h>
46#include <linux/netfilter.h> 47#include <linux/netfilter.h>
@@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
138} 139}
139#endif 140#endif
140 141
142static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
143{
144 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
145}
146
141static unsigned int nf_hashfn(struct inet_frag_queue *q) 147static unsigned int nf_hashfn(struct inet_frag_queue *q)
142{ 148{
143 const struct frag_queue *nq; 149 const struct frag_queue *nq;
@@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)
166/* Creation primitives. */ 172/* Creation primitives. */
167static inline struct frag_queue *fq_find(struct net *net, __be32 id, 173static inline struct frag_queue *fq_find(struct net *net, __be32 id,
168 u32 user, struct in6_addr *src, 174 u32 user, struct in6_addr *src,
169 struct in6_addr *dst) 175 struct in6_addr *dst, u8 ecn)
170{ 176{
171 struct inet_frag_queue *q; 177 struct inet_frag_queue *q;
172 struct ip6_create_arg arg; 178 struct ip6_create_arg arg;
@@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
176 arg.user = user; 182 arg.user = user;
177 arg.src = src; 183 arg.src = src;
178 arg.dst = dst; 184 arg.dst = dst;
185 arg.ecn = ecn;
179 186
180 read_lock_bh(&nf_frags.lock); 187 read_lock_bh(&nf_frags.lock);
181 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); 188 hash = inet6_hash_frag(id, src, dst, nf_frags.rnd);
@@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
196 struct sk_buff *prev, *next; 203 struct sk_buff *prev, *next;
197 unsigned int payload_len; 204 unsigned int payload_len;
198 int offset, end; 205 int offset, end;
206 u8 ecn;
199 207
200 if (fq->q.last_in & INET_FRAG_COMPLETE) { 208 if (fq->q.last_in & INET_FRAG_COMPLETE) {
201 pr_debug("Already completed\n"); 209 pr_debug("Already completed\n");
@@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
213 return -1; 221 return -1;
214 } 222 }
215 223
224 ecn = ip6_frag_ecn(ipv6_hdr(skb));
225
216 if (skb->ip_summed == CHECKSUM_COMPLETE) { 226 if (skb->ip_summed == CHECKSUM_COMPLETE) {
217 const unsigned char *nh = skb_network_header(skb); 227 const unsigned char *nh = skb_network_header(skb);
218 skb->csum = csum_sub(skb->csum, 228 skb->csum = csum_sub(skb->csum,
@@ -317,6 +327,7 @@ found:
317 } 327 }
318 fq->q.stamp = skb->tstamp; 328 fq->q.stamp = skb->tstamp;
319 fq->q.meat += skb->len; 329 fq->q.meat += skb->len;
330 fq->ecn |= ecn;
320 if (payload_len > fq->q.max_size) 331 if (payload_len > fq->q.max_size)
321 fq->q.max_size = payload_len; 332 fq->q.max_size = payload_len;
322 add_frag_mem_limit(&fq->q, skb->truesize); 333 add_frag_mem_limit(&fq->q, skb->truesize);
@@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
352{ 363{
353 struct sk_buff *fp, *op, *head = fq->q.fragments; 364 struct sk_buff *fp, *op, *head = fq->q.fragments;
354 int payload_len; 365 int payload_len;
366 u8 ecn;
355 367
356 inet_frag_kill(&fq->q, &nf_frags); 368 inet_frag_kill(&fq->q, &nf_frags);
357 369
358 WARN_ON(head == NULL); 370 WARN_ON(head == NULL);
359 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); 371 WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
360 372
373 ecn = ip_frag_ecn_table[fq->ecn];
374 if (unlikely(ecn == 0xff))
375 goto out_fail;
376
361 /* Unfragmented part is taken from the first segment. */ 377 /* Unfragmented part is taken from the first segment. */
362 payload_len = ((head->data - skb_network_header(head)) - 378 payload_len = ((head->data - skb_network_header(head)) -
363 sizeof(struct ipv6hdr) + fq->q.len - 379 sizeof(struct ipv6hdr) + fq->q.len -
@@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
428 head->dev = dev; 444 head->dev = dev;
429 head->tstamp = fq->q.stamp; 445 head->tstamp = fq->q.stamp;
430 ipv6_hdr(head)->payload_len = htons(payload_len); 446 ipv6_hdr(head)->payload_len = htons(payload_len);
447 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
431 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; 448 IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
432 449
433 /* Yes, and fold redundant checksum back. 8) */ 450 /* Yes, and fold redundant checksum back. 8) */
@@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
572 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); 589 inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
573 local_bh_enable(); 590 local_bh_enable();
574 591
575 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); 592 fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
593 ip6_frag_ecn(hdr));
576 if (fq == NULL) { 594 if (fq == NULL) {
577 pr_debug("Can't find and can't create new queue\n"); 595 pr_debug("Can't find and can't create new queue\n");
578 goto ret_orig; 596 goto ret_orig;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bbbe53a99b57..f3c1ff4357ff 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
90 SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS), 90 SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS), 91 SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), 92 SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
93 SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
93 SNMP_MIB_SENTINEL 94 SNMP_MIB_SENTINEL
94}; 95};
95 96
@@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
99 SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), 100 SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
100 SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), 101 SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
101 SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), 102 SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
103 SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
102 SNMP_MIB_SENTINEL 104 SNMP_MIB_SENTINEL
103}; 105};
104 106
@@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
129 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 131 SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
130 SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS), 132 SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
131 SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), 133 SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
134 SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
132 SNMP_MIB_SENTINEL 135 SNMP_MIB_SENTINEL
133}; 136};
134 137
@@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
139 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), 142 SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
140 SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), 143 SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
141 SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), 144 SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
145 SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
142 SNMP_MIB_SENTINEL 146 SNMP_MIB_SENTINEL
143}; 147};
144 148
@@ -247,7 +251,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)
247 251
248static int snmp6_dev_seq_open(struct inode *inode, struct file *file) 252static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
249{ 253{
250 return single_open(file, snmp6_dev_seq_show, PDE(inode)->data); 254 return single_open(file, snmp6_dev_seq_show, PDE_DATA(inode));
251} 255}
252 256
253static const struct file_operations snmp6_dev_seq_fops = { 257static const struct file_operations snmp6_dev_seq_fops = {
@@ -287,8 +291,7 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
287 return -ENOENT; 291 return -ENOENT;
288 if (!idev->stats.proc_dir_entry) 292 if (!idev->stats.proc_dir_entry)
289 return -EINVAL; 293 return -EINVAL;
290 remove_proc_entry(idev->stats.proc_dir_entry->name, 294 proc_remove(idev->stats.proc_dir_entry);
291 net->mib.proc_net_devsnmp6);
292 idev->stats.proc_dir_entry = NULL; 295 idev->stats.proc_dir_entry = NULL;
293 return 0; 296 return 0;
294} 297}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 330b5e7b7df6..eedff8ccded5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
263 if (addr_type != IPV6_ADDR_ANY) { 263 if (addr_type != IPV6_ADDR_ANY) {
264 struct net_device *dev = NULL; 264 struct net_device *dev = NULL;
265 265
266 if (addr_type & IPV6_ADDR_LINKLOCAL) { 266 if (__ipv6_addr_needs_scope_id(addr_type)) {
267 if (addr_len >= sizeof(struct sockaddr_in6) && 267 if (addr_len >= sizeof(struct sockaddr_in6) &&
268 addr->sin6_scope_id) { 268 addr->sin6_scope_id) {
269 /* Override any existing binding, if another 269 /* Override any existing binding, if another
@@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
498 sin6->sin6_port = 0; 498 sin6->sin6_port = 0;
499 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 499 sin6->sin6_addr = ipv6_hdr(skb)->saddr;
500 sin6->sin6_flowinfo = 0; 500 sin6->sin6_flowinfo = 0;
501 sin6->sin6_scope_id = 0; 501 sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
502 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 502 IP6CB(skb)->iif);
503 sin6->sin6_scope_id = IP6CB(skb)->iif;
504 } 503 }
505 504
506 sock_recv_ts_and_drops(msg, sk, skb); 505 sock_recv_ts_and_drops(msg, sk, skb);
@@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
802 801
803 if (addr_len >= sizeof(struct sockaddr_in6) && 802 if (addr_len >= sizeof(struct sockaddr_in6) &&
804 sin6->sin6_scope_id && 803 sin6->sin6_scope_id &&
805 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 804 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
806 fl6.flowi6_oif = sin6->sin6_scope_id; 805 fl6.flowi6_oif = sin6->sin6_scope_id;
807 } else { 806 } else {
808 if (sk->sk_state != TCP_ESTABLISHED) 807 if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 196ab9347ad1..790d9f4b8b0b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -58,6 +58,7 @@
58#include <net/ndisc.h> 58#include <net/ndisc.h>
59#include <net/addrconf.h> 59#include <net/addrconf.h>
60#include <net/inet_frag.h> 60#include <net/inet_frag.h>
61#include <net/inet_ecn.h>
61 62
62struct ip6frag_skb_cb 63struct ip6frag_skb_cb
63{ 64{
@@ -67,6 +68,10 @@ struct ip6frag_skb_cb
67 68
68#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) 69#define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb))
69 70
71static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
72{
73 return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
74}
70 75
71static struct inet_frags ip6_frags; 76static struct inet_frags ip6_frags;
72 77
@@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
119 fq->user = arg->user; 124 fq->user = arg->user;
120 fq->saddr = *arg->src; 125 fq->saddr = *arg->src;
121 fq->daddr = *arg->dst; 126 fq->daddr = *arg->dst;
127 fq->ecn = arg->ecn;
122} 128}
123EXPORT_SYMBOL(ip6_frag_init); 129EXPORT_SYMBOL(ip6_frag_init);
124 130
@@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data)
173} 179}
174 180
175static __inline__ struct frag_queue * 181static __inline__ struct frag_queue *
176fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) 182fq_find(struct net *net, __be32 id, const struct in6_addr *src,
183 const struct in6_addr *dst, u8 ecn)
177{ 184{
178 struct inet_frag_queue *q; 185 struct inet_frag_queue *q;
179 struct ip6_create_arg arg; 186 struct ip6_create_arg arg;
@@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6
183 arg.user = IP6_DEFRAG_LOCAL_DELIVER; 190 arg.user = IP6_DEFRAG_LOCAL_DELIVER;
184 arg.src = src; 191 arg.src = src;
185 arg.dst = dst; 192 arg.dst = dst;
193 arg.ecn = ecn;
186 194
187 read_lock(&ip6_frags.lock); 195 read_lock(&ip6_frags.lock);
188 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); 196 hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd);
@@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
202 struct net_device *dev; 210 struct net_device *dev;
203 int offset, end; 211 int offset, end;
204 struct net *net = dev_net(skb_dst(skb)->dev); 212 struct net *net = dev_net(skb_dst(skb)->dev);
213 u8 ecn;
205 214
206 if (fq->q.last_in & INET_FRAG_COMPLETE) 215 if (fq->q.last_in & INET_FRAG_COMPLETE)
207 goto err; 216 goto err;
@@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
219 return -1; 228 return -1;
220 } 229 }
221 230
231 ecn = ip6_frag_ecn(ipv6_hdr(skb));
232
222 if (skb->ip_summed == CHECKSUM_COMPLETE) { 233 if (skb->ip_summed == CHECKSUM_COMPLETE) {
223 const unsigned char *nh = skb_network_header(skb); 234 const unsigned char *nh = skb_network_header(skb);
224 skb->csum = csum_sub(skb->csum, 235 skb->csum = csum_sub(skb->csum,
@@ -319,6 +330,7 @@ found:
319 } 330 }
320 fq->q.stamp = skb->tstamp; 331 fq->q.stamp = skb->tstamp;
321 fq->q.meat += skb->len; 332 fq->q.meat += skb->len;
333 fq->ecn |= ecn;
322 add_frag_mem_limit(&fq->q, skb->truesize); 334 add_frag_mem_limit(&fq->q, skb->truesize);
323 335
324 /* The first fragment. 336 /* The first fragment.
@@ -330,9 +342,17 @@ found:
330 } 342 }
331 343
332 if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && 344 if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
333 fq->q.meat == fq->q.len) 345 fq->q.meat == fq->q.len) {
334 return ip6_frag_reasm(fq, prev, dev); 346 int res;
347 unsigned long orefdst = skb->_skb_refdst;
348
349 skb->_skb_refdst = 0UL;
350 res = ip6_frag_reasm(fq, prev, dev);
351 skb->_skb_refdst = orefdst;
352 return res;
353 }
335 354
355 skb_dst_drop(skb);
336 inet_frag_lru_move(&fq->q); 356 inet_frag_lru_move(&fq->q);
337 return -1; 357 return -1;
338 358
@@ -362,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
362 int payload_len; 382 int payload_len;
363 unsigned int nhoff; 383 unsigned int nhoff;
364 int sum_truesize; 384 int sum_truesize;
385 u8 ecn;
365 386
366 inet_frag_kill(&fq->q, &ip6_frags); 387 inet_frag_kill(&fq->q, &ip6_frags);
367 388
389 ecn = ip_frag_ecn_table[fq->ecn];
390 if (unlikely(ecn == 0xff))
391 goto out_fail;
392
368 /* Make the one we just received the head. */ 393 /* Make the one we just received the head. */
369 if (prev) { 394 if (prev) {
370 head = prev->next; 395 head = prev->next;
@@ -463,6 +488,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
463 head->dev = dev; 488 head->dev = dev;
464 head->tstamp = fq->q.stamp; 489 head->tstamp = fq->q.stamp;
465 ipv6_hdr(head)->payload_len = htons(payload_len); 490 ipv6_hdr(head)->payload_len = htons(payload_len);
491 ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
466 IP6CB(head)->nhoff = nhoff; 492 IP6CB(head)->nhoff = nhoff;
467 493
468 /* Yes, and fold redundant checksum back. 8) */ 494 /* Yes, and fold redundant checksum back. 8) */
@@ -526,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
526 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), 552 IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
527 IPSTATS_MIB_REASMFAILS, evicted); 553 IPSTATS_MIB_REASMFAILS, evicted);
528 554
529 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); 555 fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
556 ip6_frag_ecn(hdr));
530 if (fq != NULL) { 557 if (fq != NULL) {
531 int ret; 558 int ret;
532 559
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e5fe0041adfa..ad0aa6b0b86a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2355,7 +2355,7 @@ beginning:
2355 return last_err; 2355 return last_err;
2356} 2356}
2357 2357
2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2358static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2359{ 2359{
2360 struct fib6_config cfg; 2360 struct fib6_config cfg;
2361 int err; 2361 int err;
@@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
2370 return ip6_route_del(&cfg); 2370 return ip6_route_del(&cfg);
2371} 2371}
2372 2372
2373static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2373static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2374{ 2374{
2375 struct fib6_config cfg; 2375 struct fib6_config cfg;
2376 int err; 2376 int err;
@@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2562 prefix, 0, NLM_F_MULTI); 2562 prefix, 0, NLM_F_MULTI);
2563} 2563}
2564 2564
2565static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2565static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2566{ 2566{
2567 struct net *net = sock_net(in_skb->sk); 2567 struct net *net = sock_net(in_skb->sk);
2568 struct nlattr *tb[RTA_MAX+1]; 2568 struct nlattr *tb[RTA_MAX+1];
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 02f96dcbcf02..335363478bbf 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -49,7 +49,7 @@
49#include <net/ip.h> 49#include <net/ip.h>
50#include <net/udp.h> 50#include <net/udp.h>
51#include <net/icmp.h> 51#include <net/icmp.h>
52#include <net/ipip.h> 52#include <net/ip_tunnels.h>
53#include <net/inet_ecn.h> 53#include <net/inet_ecn.h>
54#include <net/xfrm.h> 54#include <net/xfrm.h>
55#include <net/dsfield.h> 55#include <net/dsfield.h>
@@ -87,41 +87,6 @@ struct sit_net {
87 struct net_device *fb_tunnel_dev; 87 struct net_device *fb_tunnel_dev;
88}; 88};
89 89
90static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev,
91 struct rtnl_link_stats64 *tot)
92{
93 int i;
94
95 for_each_possible_cpu(i) {
96 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
97 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
98 unsigned int start;
99
100 do {
101 start = u64_stats_fetch_begin_bh(&tstats->syncp);
102 rx_packets = tstats->rx_packets;
103 tx_packets = tstats->tx_packets;
104 rx_bytes = tstats->rx_bytes;
105 tx_bytes = tstats->tx_bytes;
106 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
107
108 tot->rx_packets += rx_packets;
109 tot->tx_packets += tx_packets;
110 tot->rx_bytes += rx_bytes;
111 tot->tx_bytes += tx_bytes;
112 }
113
114 tot->rx_errors = dev->stats.rx_errors;
115 tot->rx_frame_errors = dev->stats.rx_frame_errors;
116 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
117 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
118 tot->tx_dropped = dev->stats.tx_dropped;
119 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
120 tot->tx_errors = dev->stats.tx_errors;
121
122 return tot;
123}
124
125/* 90/*
126 * Must be invoked with rcu_read_lock 91 * Must be invoked with rcu_read_lock
127 */ 92 */
@@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
899 if ((iph->ttl = tiph->ttl) == 0) 864 if ((iph->ttl = tiph->ttl) == 0)
900 iph->ttl = iph6->hop_limit; 865 iph->ttl = iph6->hop_limit;
901 866
867 skb->ip_summed = CHECKSUM_NONE;
868 ip_select_ident(iph, skb_dst(skb), NULL);
902 iptunnel_xmit(skb, dev); 869 iptunnel_xmit(skb, dev);
903 return NETDEV_TX_OK; 870 return NETDEV_TX_OK;
904 871
@@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
1200 .ndo_start_xmit = ipip6_tunnel_xmit, 1167 .ndo_start_xmit = ipip6_tunnel_xmit,
1201 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1168 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1202 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1169 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1203 .ndo_get_stats64= ipip6_get_stats64, 1170 .ndo_get_stats64 = ip_tunnel_get_stats64,
1204}; 1171};
1205 1172
1206static void ipip6_dev_free(struct net_device *dev) 1173static void ipip6_dev_free(struct net_device *dev)
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8a0848b60b35..d5dda20bd717 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 149struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
150{ 150{
151 struct tcp_options_received tcp_opt; 151 struct tcp_options_received tcp_opt;
152 const u8 *hash_location;
153 struct inet_request_sock *ireq; 152 struct inet_request_sock *ireq;
154 struct inet6_request_sock *ireq6; 153 struct inet6_request_sock *ireq6;
155 struct tcp_request_sock *treq; 154 struct tcp_request_sock *treq;
@@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
177 176
178 /* check for timestamp cookie support */ 177 /* check for timestamp cookie support */
179 memset(&tcp_opt, 0, sizeof(tcp_opt)); 178 memset(&tcp_opt, 0, sizeof(tcp_opt));
180 tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); 179 tcp_parse_options(skb, &tcp_opt, 0, NULL);
181 180
182 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) 181 if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
183 goto out; 182 goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f6d629fd6aee..71167069b394 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -386,6 +386,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
386 386
387 if (dst) 387 if (dst)
388 dst->ops->redirect(dst, sk, skb); 388 dst->ops->redirect(dst, sk, skb);
389 goto out;
389 } 390 }
390 391
391 if (type == ICMPV6_PKT_TOOBIG) { 392 if (type == ICMPV6_PKT_TOOBIG) {
@@ -461,7 +462,6 @@ out:
461static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, 462static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
462 struct flowi6 *fl6, 463 struct flowi6 *fl6,
463 struct request_sock *req, 464 struct request_sock *req,
464 struct request_values *rvp,
465 u16 queue_mapping) 465 u16 queue_mapping)
466{ 466{
467 struct inet6_request_sock *treq = inet6_rsk(req); 467 struct inet6_request_sock *treq = inet6_rsk(req);
@@ -473,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
473 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) 473 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
474 goto done; 474 goto done;
475 475
476 skb = tcp_make_synack(sk, dst, req, rvp, NULL); 476 skb = tcp_make_synack(sk, dst, req, NULL);
477 477
478 if (skb) { 478 if (skb) {
479 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); 479 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -488,13 +488,12 @@ done:
488 return err; 488 return err;
489} 489}
490 490
491static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, 491static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
492 struct request_values *rvp)
493{ 492{
494 struct flowi6 fl6; 493 struct flowi6 fl6;
495 int res; 494 int res;
496 495
497 res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); 496 res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
498 if (!res) 497 if (!res)
499 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 498 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
500 return res; 499 return res;
@@ -947,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
947 */ 946 */
948static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 947static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
949{ 948{
950 struct tcp_extend_values tmp_ext;
951 struct tcp_options_received tmp_opt; 949 struct tcp_options_received tmp_opt;
952 const u8 *hash_location;
953 struct request_sock *req; 950 struct request_sock *req;
954 struct inet6_request_sock *treq; 951 struct inet6_request_sock *treq;
955 struct ipv6_pinfo *np = inet6_sk(sk); 952 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -987,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
987 tcp_clear_options(&tmp_opt); 984 tcp_clear_options(&tmp_opt);
988 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 985 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
989 tmp_opt.user_mss = tp->rx_opt.user_mss; 986 tmp_opt.user_mss = tp->rx_opt.user_mss;
990 tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); 987 tcp_parse_options(skb, &tmp_opt, 0, NULL);
991
992 if (tmp_opt.cookie_plus > 0 &&
993 tmp_opt.saw_tstamp &&
994 !tp->rx_opt.cookie_out_never &&
995 (sysctl_tcp_cookie_size > 0 ||
996 (tp->cookie_values != NULL &&
997 tp->cookie_values->cookie_desired > 0))) {
998 u8 *c;
999 u32 *d;
1000 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1001 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1002
1003 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1004 goto drop_and_free;
1005
1006 /* Secret recipe starts with IP addresses */
1007 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1008 *mess++ ^= *d++;
1009 *mess++ ^= *d++;
1010 *mess++ ^= *d++;
1011 *mess++ ^= *d++;
1012 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1013 *mess++ ^= *d++;
1014 *mess++ ^= *d++;
1015 *mess++ ^= *d++;
1016 *mess++ ^= *d++;
1017
1018 /* plus variable length Initiator Cookie */
1019 c = (u8 *)mess;
1020 while (l-- > 0)
1021 *c++ ^= *hash_location++;
1022
1023 want_cookie = false; /* not our kind of cookie */
1024 tmp_ext.cookie_out_never = 0; /* false */
1025 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1026 } else if (!tp->rx_opt.cookie_in_always) {
1027 /* redundant indications, but ensure initialization. */
1028 tmp_ext.cookie_out_never = 1; /* true */
1029 tmp_ext.cookie_plus = 0;
1030 } else {
1031 goto drop_and_free;
1032 }
1033 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1034 988
1035 if (want_cookie && !tmp_opt.saw_tstamp) 989 if (want_cookie && !tmp_opt.saw_tstamp)
1036 tcp_clear_options(&tmp_opt); 990 tcp_clear_options(&tmp_opt);
@@ -1108,7 +1062,6 @@ have_isn:
1108 goto drop_and_release; 1062 goto drop_and_release;
1109 1063
1110 if (tcp_v6_send_synack(sk, dst, &fl6, req, 1064 if (tcp_v6_send_synack(sk, dst, &fl6, req,
1111 (struct request_values *)&tmp_ext,
1112 skb_get_queue_mapping(skb)) || 1065 skb_get_queue_mapping(skb)) ||
1113 want_cookie) 1066 want_cookie)
1114 goto drop_and_free; 1067 goto drop_and_free;
@@ -1452,6 +1405,7 @@ discard:
1452 kfree_skb(skb); 1405 kfree_skb(skb);
1453 return 0; 1406 return 0;
1454csum_err: 1407csum_err:
1408 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
1455 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1409 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1456 goto discard; 1410 goto discard;
1457 1411
@@ -1513,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1513 goto discard_it; 1467 goto discard_it;
1514 1468
1515 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) 1469 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1516 goto bad_packet; 1470 goto csum_error;
1517 1471
1518 th = tcp_hdr(skb); 1472 th = tcp_hdr(skb);
1519 hdr = ipv6_hdr(skb); 1473 hdr = ipv6_hdr(skb);
@@ -1577,6 +1531,8 @@ no_tcp_socket:
1577 goto discard_it; 1531 goto discard_it;
1578 1532
1579 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { 1533 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1534csum_error:
1535 TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
1580bad_packet: 1536bad_packet:
1581 TCP_INC_STATS_BH(net, TCP_MIB_INERRS); 1537 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1582 } else { 1538 } else {
@@ -1584,11 +1540,6 @@ bad_packet:
1584 } 1540 }
1585 1541
1586discard_it: 1542discard_it:
1587
1588 /*
1589 * Discard frame
1590 */
1591
1592 kfree_skb(skb); 1543 kfree_skb(skb);
1593 return 0; 1544 return 0;
1594 1545
@@ -1602,10 +1553,13 @@ do_time_wait:
1602 goto discard_it; 1553 goto discard_it;
1603 } 1554 }
1604 1555
1605 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { 1556 if (skb->len < (th->doff<<2)) {
1606 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1607 inet_twsk_put(inet_twsk(sk)); 1557 inet_twsk_put(inet_twsk(sk));
1608 goto discard_it; 1558 goto bad_packet;
1559 }
1560 if (tcp_checksum_complete(skb)) {
1561 inet_twsk_put(inet_twsk(sk));
1562 goto csum_error;
1609 } 1563 }
1610 1564
1611 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1565 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d8e5e852fc7a..d4defdd44937 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -450,15 +450,16 @@ try_again:
450 sin6->sin6_family = AF_INET6; 450 sin6->sin6_family = AF_INET6;
451 sin6->sin6_port = udp_hdr(skb)->source; 451 sin6->sin6_port = udp_hdr(skb)->source;
452 sin6->sin6_flowinfo = 0; 452 sin6->sin6_flowinfo = 0;
453 sin6->sin6_scope_id = 0;
454 453
455 if (is_udp4) 454 if (is_udp4) {
456 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, 455 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
457 &sin6->sin6_addr); 456 &sin6->sin6_addr);
458 else { 457 sin6->sin6_scope_id = 0;
458 } else {
459 sin6->sin6_addr = ipv6_hdr(skb)->saddr; 459 sin6->sin6_addr = ipv6_hdr(skb)->saddr;
460 if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) 460 sin6->sin6_scope_id =
461 sin6->sin6_scope_id = IP6CB(skb)->iif; 461 ipv6_iface_scope_id(&sin6->sin6_addr,
462 IP6CB(skb)->iif);
462 } 463 }
463 464
464 } 465 }
@@ -482,12 +483,17 @@ out:
482csum_copy_err: 483csum_copy_err:
483 slow = lock_sock_fast(sk); 484 slow = lock_sock_fast(sk);
484 if (!skb_kill_datagram(sk, skb, flags)) { 485 if (!skb_kill_datagram(sk, skb, flags)) {
485 if (is_udp4) 486 if (is_udp4) {
487 UDP_INC_STATS_USER(sock_net(sk),
488 UDP_MIB_CSUMERRORS, is_udplite);
486 UDP_INC_STATS_USER(sock_net(sk), 489 UDP_INC_STATS_USER(sock_net(sk),
487 UDP_MIB_INERRORS, is_udplite); 490 UDP_MIB_INERRORS, is_udplite);
488 else 491 } else {
492 UDP6_INC_STATS_USER(sock_net(sk),
493 UDP_MIB_CSUMERRORS, is_udplite);
489 UDP6_INC_STATS_USER(sock_net(sk), 494 UDP6_INC_STATS_USER(sock_net(sk),
490 UDP_MIB_INERRORS, is_udplite); 495 UDP_MIB_INERRORS, is_udplite);
496 }
491 } 497 }
492 unlock_sock_fast(sk, slow); 498 unlock_sock_fast(sk, slow);
493 499
@@ -636,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
636 642
637 if (rcu_access_pointer(sk->sk_filter)) { 643 if (rcu_access_pointer(sk->sk_filter)) {
638 if (udp_lib_checksum_complete(skb)) 644 if (udp_lib_checksum_complete(skb))
639 goto drop; 645 goto csum_error;
640 } 646 }
641 647
642 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) 648 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -655,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
655 bh_unlock_sock(sk); 661 bh_unlock_sock(sk);
656 662
657 return rc; 663 return rc;
664csum_error:
665 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
658drop: 666drop:
659 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 667 UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
660 atomic_inc(&sk->sk_drops); 668 atomic_inc(&sk->sk_drops);
@@ -816,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
816 } 824 }
817 825
818 if (udp6_csum_init(skb, uh, proto)) 826 if (udp6_csum_init(skb, uh, proto))
819 goto discard; 827 goto csum_error;
820 828
821 /* 829 /*
822 * Multicast receive code 830 * Multicast receive code
@@ -849,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
849 goto discard; 857 goto discard;
850 858
851 if (udp_lib_checksum_complete(skb)) 859 if (udp_lib_checksum_complete(skb))
852 goto discard; 860 goto csum_error;
853 861
854 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); 862 UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
855 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 863 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
@@ -866,7 +874,9 @@ short_packet:
866 skb->len, 874 skb->len,
867 daddr, 875 daddr,
868 ntohs(uh->dest)); 876 ntohs(uh->dest));
869 877 goto discard;
878csum_error:
879 UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
870discard: 880discard:
871 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 881 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
872 kfree_skb(skb); 882 kfree_skb(skb);
@@ -1118,7 +1128,7 @@ do_udp_sendmsg:
1118 1128
1119 if (addr_len >= sizeof(struct sockaddr_in6) && 1129 if (addr_len >= sizeof(struct sockaddr_in6) &&
1120 sin6->sin6_scope_id && 1130 sin6->sin6_scope_id &&
1121 ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) 1131 __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
1122 fl6.flowi6_oif = sin6->sin6_scope_id; 1132 fl6.flowi6_oif = sin6->sin6_scope_id;
1123 } else { 1133 } else {
1124 if (sk->sk_state != TCP_ESTABLISHED) 1134 if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index cf05cf073c51..3bb3a891a424 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
21 const struct ipv6hdr *ipv6h; 21 const struct ipv6hdr *ipv6h;
22 struct udphdr *uh; 22 struct udphdr *uh;
23 23
24 /* UDP Tunnel offload on ipv6 is not yet supported. */
25 if (skb->encapsulation)
26 return -EINVAL;
27
24 if (!pskb_may_pull(skb, sizeof(*uh))) 28 if (!pskb_may_pull(skb, sizeof(*uh)))
25 return -EINVAL; 29 return -EINVAL;
26 30
@@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
56 /* Packet is from an untrusted source, reset gso_segs. */ 60 /* Packet is from an untrusted source, reset gso_segs. */
57 int type = skb_shinfo(skb)->gso_type; 61 int type = skb_shinfo(skb)->gso_type;
58 62
59 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 63 if (unlikely(type & ~(SKB_GSO_UDP |
64 SKB_GSO_DODGY |
65 SKB_GSO_UDP_TUNNEL |
60 SKB_GSO_GRE) || 66 SKB_GSO_GRE) ||
61 !(type & (SKB_GSO_UDP)))) 67 !(type & (SKB_GSO_UDP))))
62 goto out; 68 goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74a71d2..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
49 sizeof(top_iph->flow_lbl)); 49 sizeof(top_iph->flow_lbl));
50 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); 50 top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
51 51
52 dsfield = XFRM_MODE_SKB_CB(skb)->tos; 52 if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
53 dsfield = INET_ECN_encapsulate(dsfield, dsfield); 53 dsfield = 0;
54 else
55 dsfield = XFRM_MODE_SKB_CB(skb)->tos;
56 dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
54 if (x->props.flags & XFRM_STATE_NOECN) 57 if (x->props.flags & XFRM_STATE_NOECN)
55 dsfield &= ~INET_ECN_MASK; 58 dsfield &= ~INET_ECN_MASK;
56 ipv6_change_dsfield(top_iph, 0, dsfield); 59 ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index d28e7f014cc6..0578d4fa00a9 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)
305 305
306 IRDA_DEBUG(2, "%s()\n", __func__); 306 IRDA_DEBUG(2, "%s()\n", __func__);
307 307
308 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, 308 skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);
309 GFP_ATOMIC);
310 if (skb == NULL) { 309 if (skb == NULL) {
311 IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", 310 IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",
312 __func__); 311 __func__);
@@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
1120 } 1119 }
1121 1120
1122 /* Allocate networking socket */ 1121 /* Allocate networking socket */
1123 sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); 1122 sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);
1124 if (sk == NULL) 1123 if (sk == NULL)
1125 return -ENOMEM; 1124 return -ENOMEM;
1126 1125
@@ -1386,6 +1385,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1386 1385
1387 IRDA_DEBUG(4, "%s()\n", __func__); 1386 IRDA_DEBUG(4, "%s()\n", __func__);
1388 1387
1388 msg->msg_namelen = 0;
1389
1389 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1390 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1390 flags & MSG_DONTWAIT, &err); 1391 flags & MSG_DONTWAIT, &err);
1391 if (!skb) 1392 if (!skb)
diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c
index 52079f19bbbe..b797daac063c 100644
--- a/net/irda/ircomm/ircomm_core.c
+++ b/net/irda/ircomm/ircomm_core.c
@@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)
117 117
118 IRDA_ASSERT(ircomm != NULL, return NULL;); 118 IRDA_ASSERT(ircomm != NULL, return NULL;);
119 119
120 self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); 120 self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);
121 if (self == NULL) 121 if (self == NULL)
122 return NULL; 122 return NULL;
123 123
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 362ba47968e4..41ac7938268b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -328,7 +328,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
328 spin_unlock_irqrestore(&port->lock, flags); 328 spin_unlock_irqrestore(&port->lock, flags);
329 329
330 while (1) { 330 while (1) {
331 if (tty->termios.c_cflag & CBAUD) 331 if (C_BAUD(tty) && test_bit(ASYNCB_INITIALIZED, &port->flags))
332 tty_port_raise_dtr_rts(port); 332 tty_port_raise_dtr_rts(port);
333 333
334 set_current_state(TASK_INTERRUPTIBLE); 334 set_current_state(TASK_INTERRUPTIBLE);
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index edab393e0c82..a2a508f5f268 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -997,12 +997,8 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
997 self->settings.dce = IRCOMM_DELTA_CD; 997 self->settings.dce = IRCOMM_DELTA_CD;
998 ircomm_tty_check_modem_status(self); 998 ircomm_tty_check_modem_status(self);
999 } else { 999 } else {
1000 struct tty_struct *tty = tty_port_tty_get(&self->port);
1001 IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ ); 1000 IRDA_DEBUG(0, "%s(), hanging up!\n", __func__ );
1002 if (tty) { 1001 tty_port_tty_hangup(&self->port, false);
1003 tty_hangup(tty);
1004 tty_kref_put(tty);
1005 }
1006 } 1002 }
1007 break; 1003 break;
1008 default: 1004 default:
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 29340a9a6fb9..e1b37f5a2691 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -303,7 +303,8 @@ static void iriap_disconnect_indication(void *instance, void *sap,
303{ 303{
304 struct iriap_cb *self; 304 struct iriap_cb *self;
305 305
306 IRDA_DEBUG(4, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); 306 IRDA_DEBUG(4, "%s(), reason=%s [%d]\n", __func__,
307 irlmp_reason_str(reason), reason);
307 308
308 self = instance; 309 self = instance;
309 310
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 6115a44c0a24..98ad6ec4bd3c 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -58,7 +58,7 @@ int sysctl_discovery_slots = 6; /* 6 slots by default */
58int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ; 58int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;
59char sysctl_devname[65]; 59char sysctl_devname[65];
60 60
61const char *irlmp_reasons[] = { 61static const char *irlmp_reasons[] = {
62 "ERROR, NOT USED", 62 "ERROR, NOT USED",
63 "LM_USER_REQUEST", 63 "LM_USER_REQUEST",
64 "LM_LAP_DISCONNECT", 64 "LM_LAP_DISCONNECT",
@@ -66,8 +66,15 @@ const char *irlmp_reasons[] = {
66 "LM_LAP_RESET", 66 "LM_LAP_RESET",
67 "LM_INIT_DISCONNECT", 67 "LM_INIT_DISCONNECT",
68 "ERROR, NOT USED", 68 "ERROR, NOT USED",
69 "UNKNOWN",
69}; 70};
70 71
72const char *irlmp_reason_str(LM_REASON reason)
73{
74 reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1);
75 return irlmp_reasons[reason];
76}
77
71/* 78/*
72 * Function irlmp_init (void) 79 * Function irlmp_init (void)
73 * 80 *
@@ -747,7 +754,8 @@ void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason,
747{ 754{
748 struct lsap_cb *lsap; 755 struct lsap_cb *lsap;
749 756
750 IRDA_DEBUG(1, "%s(), reason=%s\n", __func__, irlmp_reasons[reason]); 757 IRDA_DEBUG(1, "%s(), reason=%s [%d]\n", __func__,
758 irlmp_reason_str(reason), reason);
751 IRDA_ASSERT(self != NULL, return;); 759 IRDA_ASSERT(self != NULL, return;);
752 IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); 760 IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;);
753 761
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a7d11ffe4284..ae691651b721 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -49,12 +49,6 @@ static const u8 iprm_shutdown[8] =
49 49
50#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class)) 50#define TRGCLS_SIZE (sizeof(((struct iucv_message *)0)->class))
51 51
52/* macros to set/get socket control buffer at correct offset */
53#define CB_TAG(skb) ((skb)->cb) /* iucv message tag */
54#define CB_TAG_LEN (sizeof(((struct iucv_message *) 0)->tag))
55#define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
56#define CB_TRGCLS_LEN (TRGCLS_SIZE)
57
58#define __iucv_sock_wait(sk, condition, timeo, ret) \ 52#define __iucv_sock_wait(sk, condition, timeo, ret) \
59do { \ 53do { \
60 DEFINE_WAIT(__wait); \ 54 DEFINE_WAIT(__wait); \
@@ -1141,7 +1135,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1141 1135
1142 /* increment and save iucv message tag for msg_completion cbk */ 1136 /* increment and save iucv message tag for msg_completion cbk */
1143 txmsg.tag = iucv->send_tag++; 1137 txmsg.tag = iucv->send_tag++;
1144 memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); 1138 IUCV_SKB_CB(skb)->tag = txmsg.tag;
1145 1139
1146 if (iucv->transport == AF_IUCV_TRANS_HIPER) { 1140 if (iucv->transport == AF_IUCV_TRANS_HIPER) {
1147 atomic_inc(&iucv->msg_sent); 1141 atomic_inc(&iucv->msg_sent);
@@ -1224,7 +1218,7 @@ static int iucv_fragment_skb(struct sock *sk, struct sk_buff *skb, int len)
1224 return -ENOMEM; 1218 return -ENOMEM;
1225 1219
1226 /* copy target class to control buffer of new skb */ 1220 /* copy target class to control buffer of new skb */
1227 memcpy(CB_TRGCLS(nskb), CB_TRGCLS(skb), CB_TRGCLS_LEN); 1221 IUCV_SKB_CB(nskb)->class = IUCV_SKB_CB(skb)->class;
1228 1222
1229 /* copy data fragment */ 1223 /* copy data fragment */
1230 memcpy(nskb->data, skb->data + copied, size); 1224 memcpy(nskb->data, skb->data + copied, size);
@@ -1256,7 +1250,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
1256 1250
1257 /* store msg target class in the second 4 bytes of skb ctrl buffer */ 1251 /* store msg target class in the second 4 bytes of skb ctrl buffer */
1258 /* Note: the first 4 bytes are reserved for msg tag */ 1252 /* Note: the first 4 bytes are reserved for msg tag */
1259 memcpy(CB_TRGCLS(skb), &msg->class, CB_TRGCLS_LEN); 1253 IUCV_SKB_CB(skb)->class = msg->class;
1260 1254
1261 /* check for special IPRM messages (e.g. iucv_sock_shutdown) */ 1255 /* check for special IPRM messages (e.g. iucv_sock_shutdown) */
1262 if ((msg->flags & IUCV_IPRMDATA) && len > 7) { 1256 if ((msg->flags & IUCV_IPRMDATA) && len > 7) {
@@ -1292,6 +1286,7 @@ static void iucv_process_message(struct sock *sk, struct sk_buff *skb,
1292 } 1286 }
1293 } 1287 }
1294 1288
1289 IUCV_SKB_CB(skb)->offset = 0;
1295 if (sock_queue_rcv_skb(sk, skb)) 1290 if (sock_queue_rcv_skb(sk, skb))
1296 skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb); 1291 skb_queue_head(&iucv_sk(sk)->backlog_skb_q, skb);
1297} 1292}
@@ -1327,6 +1322,9 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1327 unsigned int copied, rlen; 1322 unsigned int copied, rlen;
1328 struct sk_buff *skb, *rskb, *cskb; 1323 struct sk_buff *skb, *rskb, *cskb;
1329 int err = 0; 1324 int err = 0;
1325 u32 offset;
1326
1327 msg->msg_namelen = 0;
1330 1328
1331 if ((sk->sk_state == IUCV_DISCONN) && 1329 if ((sk->sk_state == IUCV_DISCONN) &&
1332 skb_queue_empty(&iucv->backlog_skb_q) && 1330 skb_queue_empty(&iucv->backlog_skb_q) &&
@@ -1346,13 +1344,14 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1346 return err; 1344 return err;
1347 } 1345 }
1348 1346
1349 rlen = skb->len; /* real length of skb */ 1347 offset = IUCV_SKB_CB(skb)->offset;
1348 rlen = skb->len - offset; /* real length of skb */
1350 copied = min_t(unsigned int, rlen, len); 1349 copied = min_t(unsigned int, rlen, len);
1351 if (!rlen) 1350 if (!rlen)
1352 sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN; 1351 sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN;
1353 1352
1354 cskb = skb; 1353 cskb = skb;
1355 if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) { 1354 if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) {
1356 if (!(flags & MSG_PEEK)) 1355 if (!(flags & MSG_PEEK))
1357 skb_queue_head(&sk->sk_receive_queue, skb); 1356 skb_queue_head(&sk->sk_receive_queue, skb);
1358 return -EFAULT; 1357 return -EFAULT;
@@ -1370,7 +1369,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1370 * get the trgcls from the control buffer of the skb due to 1369 * get the trgcls from the control buffer of the skb due to
1371 * fragmentation of original iucv message. */ 1370 * fragmentation of original iucv message. */
1372 err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, 1371 err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS,
1373 CB_TRGCLS_LEN, CB_TRGCLS(skb)); 1372 sizeof(IUCV_SKB_CB(skb)->class),
1373 (void *)&IUCV_SKB_CB(skb)->class);
1374 if (err) { 1374 if (err) {
1375 if (!(flags & MSG_PEEK)) 1375 if (!(flags & MSG_PEEK))
1376 skb_queue_head(&sk->sk_receive_queue, skb); 1376 skb_queue_head(&sk->sk_receive_queue, skb);
@@ -1382,9 +1382,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1382 1382
1383 /* SOCK_STREAM: re-queue skb if it contains unreceived data */ 1383 /* SOCK_STREAM: re-queue skb if it contains unreceived data */
1384 if (sk->sk_type == SOCK_STREAM) { 1384 if (sk->sk_type == SOCK_STREAM) {
1385 skb_pull(skb, copied); 1385 if (copied < rlen) {
1386 if (skb->len) { 1386 IUCV_SKB_CB(skb)->offset = offset + copied;
1387 skb_queue_head(&sk->sk_receive_queue, skb);
1388 goto done; 1387 goto done;
1389 } 1388 }
1390 } 1389 }
@@ -1403,6 +1402,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
1403 spin_lock_bh(&iucv->message_q.lock); 1402 spin_lock_bh(&iucv->message_q.lock);
1404 rskb = skb_dequeue(&iucv->backlog_skb_q); 1403 rskb = skb_dequeue(&iucv->backlog_skb_q);
1405 while (rskb) { 1404 while (rskb) {
1405 IUCV_SKB_CB(rskb)->offset = 0;
1406 if (sock_queue_rcv_skb(sk, rskb)) { 1406 if (sock_queue_rcv_skb(sk, rskb)) {
1407 skb_queue_head(&iucv->backlog_skb_q, 1407 skb_queue_head(&iucv->backlog_skb_q,
1408 rskb); 1408 rskb);
@@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
1461 return iucv_accept_poll(sk); 1461 return iucv_accept_poll(sk);
1462 1462
1463 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 1463 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
1464 mask |= POLLERR; 1464 mask |= POLLERR |
1465 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
1465 1466
1466 if (sk->sk_shutdown & RCV_SHUTDOWN) 1467 if (sk->sk_shutdown & RCV_SHUTDOWN)
1467 mask |= POLLRDHUP; 1468 mask |= POLLRDHUP;
@@ -1830,7 +1831,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
1830 spin_lock_irqsave(&list->lock, flags); 1831 spin_lock_irqsave(&list->lock, flags);
1831 1832
1832 while (list_skb != (struct sk_buff *)list) { 1833 while (list_skb != (struct sk_buff *)list) {
1833 if (!memcmp(&msg->tag, CB_TAG(list_skb), CB_TAG_LEN)) { 1834 if (msg->tag != IUCV_SKB_CB(list_skb)->tag) {
1834 this = list_skb; 1835 this = list_skb;
1835 break; 1836 break;
1836 } 1837 }
@@ -2091,6 +2092,7 @@ static int afiucv_hs_callback_rx(struct sock *sk, struct sk_buff *skb)
2091 skb_pull(skb, sizeof(struct af_iucv_trans_hdr)); 2092 skb_pull(skb, sizeof(struct af_iucv_trans_hdr));
2092 skb_reset_transport_header(skb); 2093 skb_reset_transport_header(skb);
2093 skb_reset_network_header(skb); 2094 skb_reset_network_header(skb);
2095 IUCV_SKB_CB(skb)->offset = 0;
2094 spin_lock(&iucv->message_q.lock); 2096 spin_lock(&iucv->message_q.lock);
2095 if (skb_queue_empty(&iucv->backlog_skb_q)) { 2097 if (skb_queue_empty(&iucv->backlog_skb_q)) {
2096 if (sock_queue_rcv_skb(sk, skb)) { 2098 if (sock_queue_rcv_skb(sk, skb)) {
@@ -2195,8 +2197,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
2195 /* fall through and receive zero length data */ 2197 /* fall through and receive zero length data */
2196 case 0: 2198 case 0:
2197 /* plain data frame */ 2199 /* plain data frame */
2198 memcpy(CB_TRGCLS(skb), &trans_hdr->iucv_hdr.class, 2200 IUCV_SKB_CB(skb)->class = trans_hdr->iucv_hdr.class;
2199 CB_TRGCLS_LEN);
2200 err = afiucv_hs_callback_rx(sk, skb); 2201 err = afiucv_hs_callback_rx(sk, skb);
2201 break; 2202 break;
2202 default: 2203 default:
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 8aecf5df6656..6984c3a353cd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1777,7 +1777,7 @@ int l2tp_session_delete(struct l2tp_session *session)
1777 if (session->session_close != NULL) 1777 if (session->session_close != NULL)
1778 (*session->session_close)(session); 1778 (*session->session_close)(session);
1779 if (session->deref) 1779 if (session->deref)
1780 (*session->ref)(session); 1780 (*session->deref)(session);
1781 l2tp_session_dec_refcount(session); 1781 l2tp_session_dec_refcount(session);
1782 return 0; 1782 return 0;
1783} 1783}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index c74f5a91ff6a..b8a6039314e8 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -690,6 +690,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
690 lsa->l2tp_addr = ipv6_hdr(skb)->saddr; 690 lsa->l2tp_addr = ipv6_hdr(skb)->saddr;
691 lsa->l2tp_flowinfo = 0; 691 lsa->l2tp_flowinfo = 0;
692 lsa->l2tp_scope_id = 0; 692 lsa->l2tp_scope_id = 0;
693 lsa->l2tp_conn_id = 0;
693 if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) 694 if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL)
694 lsa->l2tp_scope_id = IP6CB(skb)->iif; 695 lsa->l2tp_scope_id = IP6CB(skb)->iif;
695 } 696 }
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 88709882c464..48aaa89253e0 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -720,6 +720,8 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
720 int target; /* Read at least this many bytes */ 720 int target; /* Read at least this many bytes */
721 long timeo; 721 long timeo;
722 722
723 msg->msg_namelen = 0;
724
723 lock_sock(sk); 725 lock_sock(sk);
724 copied = -ENOTCONN; 726 copied = -ENOTCONN;
725 if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) 727 if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a6893602f87a..1a89c80e6407 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
175 * add it to the device after the station. 175 * add it to the device after the station.
176 */ 176 */
177 if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { 177 if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) {
178 ieee80211_key_free(sdata->local, key); 178 ieee80211_key_free_unused(key);
179 err = -ENOENT; 179 err = -ENOENT;
180 goto out_unlock; 180 goto out_unlock;
181 } 181 }
@@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
214 } 214 }
215 215
216 err = ieee80211_key_link(key, sdata, sta); 216 err = ieee80211_key_link(key, sdata, sta);
217 if (err)
218 ieee80211_key_free(sdata->local, key);
219 217
220 out_unlock: 218 out_unlock:
221 mutex_unlock(&sdata->local->sta_mtx); 219 mutex_unlock(&sdata->local->sta_mtx);
@@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
254 goto out_unlock; 252 goto out_unlock;
255 } 253 }
256 254
257 __ieee80211_key_free(key); 255 ieee80211_key_free(key, true);
258 256
259 ret = 0; 257 ret = 0;
260 out_unlock: 258 out_unlock:
@@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
445 struct ieee80211_sub_if_data *sdata = sta->sdata; 443 struct ieee80211_sub_if_data *sdata = sta->sdata;
446 struct ieee80211_local *local = sdata->local; 444 struct ieee80211_local *local = sdata->local;
447 struct timespec uptime; 445 struct timespec uptime;
446 u64 packets = 0;
447 int ac;
448 448
449 sinfo->generation = sdata->local->sta_generation; 449 sinfo->generation = sdata->local->sta_generation;
450 450
451 sinfo->filled = STATION_INFO_INACTIVE_TIME | 451 sinfo->filled = STATION_INFO_INACTIVE_TIME |
452 STATION_INFO_RX_BYTES | 452 STATION_INFO_RX_BYTES64 |
453 STATION_INFO_TX_BYTES | 453 STATION_INFO_TX_BYTES64 |
454 STATION_INFO_RX_PACKETS | 454 STATION_INFO_RX_PACKETS |
455 STATION_INFO_TX_PACKETS | 455 STATION_INFO_TX_PACKETS |
456 STATION_INFO_TX_RETRIES | 456 STATION_INFO_TX_RETRIES |
@@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
467 sinfo->connected_time = uptime.tv_sec - sta->last_connected; 467 sinfo->connected_time = uptime.tv_sec - sta->last_connected;
468 468
469 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 469 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
470 sinfo->tx_bytes = 0;
471 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
472 sinfo->tx_bytes += sta->tx_bytes[ac];
473 packets += sta->tx_packets[ac];
474 }
475 sinfo->tx_packets = packets;
470 sinfo->rx_bytes = sta->rx_bytes; 476 sinfo->rx_bytes = sta->rx_bytes;
471 sinfo->tx_bytes = sta->tx_bytes;
472 sinfo->rx_packets = sta->rx_packets; 477 sinfo->rx_packets = sta->rx_packets;
473 sinfo->tx_packets = sta->tx_packets;
474 sinfo->tx_retries = sta->tx_retry_count; 478 sinfo->tx_retries = sta->tx_retry_count;
475 sinfo->tx_failed = sta->tx_retry_failed; 479 sinfo->tx_failed = sta->tx_retry_failed;
476 sinfo->rx_dropped_misc = sta->rx_dropped; 480 sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
598 data[i++] += sta->rx_fragments; \ 602 data[i++] += sta->rx_fragments; \
599 data[i++] += sta->rx_dropped; \ 603 data[i++] += sta->rx_dropped; \
600 \ 604 \
601 data[i++] += sta->tx_packets; \ 605 data[i++] += sinfo.tx_packets; \
602 data[i++] += sta->tx_bytes; \ 606 data[i++] += sinfo.tx_bytes; \
603 data[i++] += sta->tx_fragments; \ 607 data[i++] += sta->tx_fragments; \
604 data[i++] += sta->tx_filtered_count; \ 608 data[i++] += sta->tx_filtered_count; \
605 data[i++] += sta->tx_retry_failed; \ 609 data[i++] += sta->tx_retry_failed; \
@@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
621 if (!(sta && !WARN_ON(sta->sdata->dev != dev))) 625 if (!(sta && !WARN_ON(sta->sdata->dev != dev)))
622 goto do_survey; 626 goto do_survey;
623 627
628 sinfo.filled = 0;
629 sta_set_sinfo(sta, &sinfo);
630
624 i = 0; 631 i = 0;
625 ADD_STA_STATS(sta); 632 ADD_STA_STATS(sta);
626 633
627 data[i++] = sta->sta_state; 634 data[i++] = sta->sta_state;
628 635
629 sinfo.filled = 0;
630 sta_set_sinfo(sta, &sinfo);
631 636
632 if (sinfo.filled & STATION_INFO_TX_BITRATE) 637 if (sinfo.filled & STATION_INFO_TX_BITRATE)
633 data[i] = 100000 * 638 data[i] = 100000 *
@@ -800,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
800 IEEE80211_CHANCTX_EXCLUSIVE); 805 IEEE80211_CHANCTX_EXCLUSIVE);
801 } 806 }
802 } else if (local->open_count == local->monitors) { 807 } else if (local->open_count == local->monitors) {
803 local->_oper_channel = chandef->chan; 808 local->_oper_chandef = *chandef;
804 local->_oper_channel_type = cfg80211_get_chandef_type(chandef);
805 ieee80211_hw_config(local, 0); 809 ieee80211_hw_config(local, 0);
806 } 810 }
807 811
@@ -960,8 +964,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
960 sdata->vif.bss_conf.hidden_ssid = 964 sdata->vif.bss_conf.hidden_ssid =
961 (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); 965 (params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE);
962 966
963 sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; 967 memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
964 sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; 968 sizeof(sdata->vif.bss_conf.p2p_noa_attr));
969 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow =
970 params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
971 if (params->p2p_opp_ps)
972 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
973 IEEE80211_P2P_OPPPS_ENABLE_BIT;
965 974
966 err = ieee80211_assign_beacon(sdata, &params->beacon); 975 err = ieee80211_assign_beacon(sdata, &params->beacon);
967 if (err < 0) 976 if (err < 0)
@@ -1034,12 +1043,17 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
1034 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1043 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
1035 sta_info_flush_defer(vlan); 1044 sta_info_flush_defer(vlan);
1036 sta_info_flush_defer(sdata); 1045 sta_info_flush_defer(sdata);
1046 synchronize_net();
1037 rcu_barrier(); 1047 rcu_barrier();
1038 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) 1048 list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {
1039 sta_info_flush_cleanup(vlan); 1049 sta_info_flush_cleanup(vlan);
1050 ieee80211_free_keys(vlan);
1051 }
1040 sta_info_flush_cleanup(sdata); 1052 sta_info_flush_cleanup(sdata);
1053 ieee80211_free_keys(sdata);
1041 1054
1042 sdata->vif.bss_conf.enable_beacon = false; 1055 sdata->vif.bss_conf.enable_beacon = false;
1056 sdata->vif.bss_conf.ssid_len = 0;
1043 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 1057 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
1044 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); 1058 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
1045 1059
@@ -1177,6 +1191,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1177 mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); 1191 mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);
1178 if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) 1192 if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
1179 set |= BIT(NL80211_STA_FLAG_ASSOCIATED); 1193 set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
1194 } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
1195 /*
1196 * TDLS -- everything follows authorized, but
1197 * only becoming authorized is possible, not
1198 * going back
1199 */
1200 if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
1201 set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
1202 BIT(NL80211_STA_FLAG_ASSOCIATED);
1203 mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
1204 BIT(NL80211_STA_FLAG_ASSOCIATED);
1205 }
1180 } 1206 }
1181 1207
1182 ret = sta_apply_auth_flags(local, sta, mask, set); 1208 ret = sta_apply_auth_flags(local, sta, mask, set);
@@ -1261,7 +1287,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1261 if (ieee80211_vif_is_mesh(&sdata->vif)) { 1287 if (ieee80211_vif_is_mesh(&sdata->vif)) {
1262#ifdef CONFIG_MAC80211_MESH 1288#ifdef CONFIG_MAC80211_MESH
1263 u32 changed = 0; 1289 u32 changed = 0;
1264 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { 1290
1291 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
1265 switch (params->plink_state) { 1292 switch (params->plink_state) {
1266 case NL80211_PLINK_ESTAB: 1293 case NL80211_PLINK_ESTAB:
1267 if (sta->plink_state != NL80211_PLINK_ESTAB) 1294 if (sta->plink_state != NL80211_PLINK_ESTAB)
@@ -1292,15 +1319,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
1292 /* nothing */ 1319 /* nothing */
1293 break; 1320 break;
1294 } 1321 }
1295 } else { 1322 }
1296 switch (params->plink_action) { 1323
1297 case PLINK_ACTION_OPEN: 1324 switch (params->plink_action) {
1298 changed |= mesh_plink_open(sta); 1325 case NL80211_PLINK_ACTION_NO_ACTION:
1299 break; 1326 /* nothing */
1300 case PLINK_ACTION_BLOCK: 1327 break;
1301 changed |= mesh_plink_block(sta); 1328 case NL80211_PLINK_ACTION_OPEN:
1302 break; 1329 changed |= mesh_plink_open(sta);
1303 } 1330 break;
1331 case NL80211_PLINK_ACTION_BLOCK:
1332 changed |= mesh_plink_block(sta);
1333 break;
1304 } 1334 }
1305 1335
1306 if (params->local_pm) 1336 if (params->local_pm)
@@ -1346,8 +1376,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1346 * defaults -- if userspace wants something else we'll 1376 * defaults -- if userspace wants something else we'll
1347 * change it accordingly in sta_apply_parameters() 1377 * change it accordingly in sta_apply_parameters()
1348 */ 1378 */
1349 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); 1379 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
1350 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); 1380 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
1381 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
1382 }
1351 1383
1352 err = sta_apply_parameters(local, sta, params); 1384 err = sta_apply_parameters(local, sta, params);
1353 if (err) { 1385 if (err) {
@@ -1356,8 +1388,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
1356 } 1388 }
1357 1389
1358 /* 1390 /*
1359 * for TDLS, rate control should be initialized only when supported 1391 * for TDLS, rate control should be initialized only when
1360 * rates are known. 1392 * rates are known and station is marked authorized
1361 */ 1393 */
1362 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) 1394 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
1363 rate_control_rate_init(sta); 1395 rate_control_rate_init(sta);
@@ -1394,50 +1426,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
1394} 1426}
1395 1427
1396static int ieee80211_change_station(struct wiphy *wiphy, 1428static int ieee80211_change_station(struct wiphy *wiphy,
1397 struct net_device *dev, 1429 struct net_device *dev, u8 *mac,
1398 u8 *mac,
1399 struct station_parameters *params) 1430 struct station_parameters *params)
1400{ 1431{
1401 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1432 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1402 struct ieee80211_local *local = wiphy_priv(wiphy); 1433 struct ieee80211_local *local = wiphy_priv(wiphy);
1403 struct sta_info *sta; 1434 struct sta_info *sta;
1404 struct ieee80211_sub_if_data *vlansdata; 1435 struct ieee80211_sub_if_data *vlansdata;
1436 enum cfg80211_station_type statype;
1405 int err; 1437 int err;
1406 1438
1407 mutex_lock(&local->sta_mtx); 1439 mutex_lock(&local->sta_mtx);
1408 1440
1409 sta = sta_info_get_bss(sdata, mac); 1441 sta = sta_info_get_bss(sdata, mac);
1410 if (!sta) { 1442 if (!sta) {
1411 mutex_unlock(&local->sta_mtx); 1443 err = -ENOENT;
1412 return -ENOENT; 1444 goto out_err;
1413 } 1445 }
1414 1446
1415 /* in station mode, some updates are only valid with TDLS */ 1447 switch (sdata->vif.type) {
1416 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1448 case NL80211_IFTYPE_MESH_POINT:
1417 (params->supported_rates || params->ht_capa || params->vht_capa || 1449 if (sdata->u.mesh.user_mpm)
1418 params->sta_modify_mask || 1450 statype = CFG80211_STA_MESH_PEER_USER;
1419 (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && 1451 else
1420 !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { 1452 statype = CFG80211_STA_MESH_PEER_KERNEL;
1421 mutex_unlock(&local->sta_mtx); 1453 break;
1422 return -EINVAL; 1454 case NL80211_IFTYPE_ADHOC:
1455 statype = CFG80211_STA_IBSS;
1456 break;
1457 case NL80211_IFTYPE_STATION:
1458 if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
1459 statype = CFG80211_STA_AP_STA;
1460 break;
1461 }
1462 if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1463 statype = CFG80211_STA_TDLS_PEER_ACTIVE;
1464 else
1465 statype = CFG80211_STA_TDLS_PEER_SETUP;
1466 break;
1467 case NL80211_IFTYPE_AP:
1468 case NL80211_IFTYPE_AP_VLAN:
1469 statype = CFG80211_STA_AP_CLIENT;
1470 break;
1471 default:
1472 err = -EOPNOTSUPP;
1473 goto out_err;
1423 } 1474 }
1424 1475
1476 err = cfg80211_check_station_change(wiphy, params, statype);
1477 if (err)
1478 goto out_err;
1479
1425 if (params->vlan && params->vlan != sta->sdata->dev) { 1480 if (params->vlan && params->vlan != sta->sdata->dev) {
1426 bool prev_4addr = false; 1481 bool prev_4addr = false;
1427 bool new_4addr = false; 1482 bool new_4addr = false;
1428 1483
1429 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 1484 vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
1430 1485
1431 if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
1432 vlansdata->vif.type != NL80211_IFTYPE_AP) {
1433 mutex_unlock(&local->sta_mtx);
1434 return -EINVAL;
1435 }
1436
1437 if (params->vlan->ieee80211_ptr->use_4addr) { 1486 if (params->vlan->ieee80211_ptr->use_4addr) {
1438 if (vlansdata->u.vlan.sta) { 1487 if (vlansdata->u.vlan.sta) {
1439 mutex_unlock(&local->sta_mtx); 1488 err = -EBUSY;
1440 return -EBUSY; 1489 goto out_err;
1441 } 1490 }
1442 1491
1443 rcu_assign_pointer(vlansdata->u.vlan.sta, sta); 1492 rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
@@ -1464,12 +1513,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1464 } 1513 }
1465 1514
1466 err = sta_apply_parameters(local, sta, params); 1515 err = sta_apply_parameters(local, sta, params);
1467 if (err) { 1516 if (err)
1468 mutex_unlock(&local->sta_mtx); 1517 goto out_err;
1469 return err;
1470 }
1471 1518
1472 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) 1519 /* When peer becomes authorized, init rate control as well */
1520 if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
1521 test_sta_flag(sta, WLAN_STA_AUTHORIZED))
1473 rate_control_rate_init(sta); 1522 rate_control_rate_init(sta);
1474 1523
1475 mutex_unlock(&local->sta_mtx); 1524 mutex_unlock(&local->sta_mtx);
@@ -1479,7 +1528,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
1479 ieee80211_recalc_ps(local, -1); 1528 ieee80211_recalc_ps(local, -1);
1480 ieee80211_recalc_ps_vif(sdata); 1529 ieee80211_recalc_ps_vif(sdata);
1481 } 1530 }
1531
1482 return 0; 1532 return 0;
1533out_err:
1534 mutex_unlock(&local->sta_mtx);
1535 return err;
1483} 1536}
1484 1537
1485#ifdef CONFIG_MAC80211_MESH 1538#ifdef CONFIG_MAC80211_MESH
@@ -1489,7 +1542,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
1489 struct ieee80211_sub_if_data *sdata; 1542 struct ieee80211_sub_if_data *sdata;
1490 struct mesh_path *mpath; 1543 struct mesh_path *mpath;
1491 struct sta_info *sta; 1544 struct sta_info *sta;
1492 int err;
1493 1545
1494 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1546 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1495 1547
@@ -1500,17 +1552,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
1500 return -ENOENT; 1552 return -ENOENT;
1501 } 1553 }
1502 1554
1503 err = mesh_path_add(sdata, dst); 1555 mpath = mesh_path_add(sdata, dst);
1504 if (err) { 1556 if (IS_ERR(mpath)) {
1505 rcu_read_unlock(); 1557 rcu_read_unlock();
1506 return err; 1558 return PTR_ERR(mpath);
1507 } 1559 }
1508 1560
1509 mpath = mesh_path_lookup(sdata, dst);
1510 if (!mpath) {
1511 rcu_read_unlock();
1512 return -ENXIO;
1513 }
1514 mesh_path_fix_nexthop(mpath, sta); 1561 mesh_path_fix_nexthop(mpath, sta);
1515 1562
1516 rcu_read_unlock(); 1563 rcu_read_unlock();
@@ -1687,6 +1734,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
1687 ifmsh->mesh_sp_id = setup->sync_method; 1734 ifmsh->mesh_sp_id = setup->sync_method;
1688 ifmsh->mesh_pp_id = setup->path_sel_proto; 1735 ifmsh->mesh_pp_id = setup->path_sel_proto;
1689 ifmsh->mesh_pm_id = setup->path_metric; 1736 ifmsh->mesh_pm_id = setup->path_metric;
1737 ifmsh->user_mpm = setup->user_mpm;
1690 ifmsh->security = IEEE80211_MESH_SEC_NONE; 1738 ifmsh->security = IEEE80211_MESH_SEC_NONE;
1691 if (setup->is_authenticated) 1739 if (setup->is_authenticated)
1692 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; 1740 ifmsh->security |= IEEE80211_MESH_SEC_AUTHED;
@@ -1730,8 +1778,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
1730 conf->dot11MeshTTL = nconf->dot11MeshTTL; 1778 conf->dot11MeshTTL = nconf->dot11MeshTTL;
1731 if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) 1779 if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))
1732 conf->element_ttl = nconf->element_ttl; 1780 conf->element_ttl = nconf->element_ttl;
1733 if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) 1781 if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) {
1782 if (ifmsh->user_mpm)
1783 return -EBUSY;
1734 conf->auto_open_plinks = nconf->auto_open_plinks; 1784 conf->auto_open_plinks = nconf->auto_open_plinks;
1785 }
1735 if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) 1786 if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask))
1736 conf->dot11MeshNbrOffsetMaxNeighbor = 1787 conf->dot11MeshNbrOffsetMaxNeighbor =
1737 nconf->dot11MeshNbrOffsetMaxNeighbor; 1788 nconf->dot11MeshNbrOffsetMaxNeighbor;
@@ -1910,12 +1961,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
1910 } 1961 }
1911 1962
1912 if (params->p2p_ctwindow >= 0) { 1963 if (params->p2p_ctwindow >= 0) {
1913 sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; 1964 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
1965 ~IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
1966 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
1967 params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;
1914 changed |= BSS_CHANGED_P2P_PS; 1968 changed |= BSS_CHANGED_P2P_PS;
1915 } 1969 }
1916 1970
1917 if (params->p2p_opp_ps >= 0) { 1971 if (params->p2p_opp_ps > 0) {
1918 sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; 1972 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
1973 IEEE80211_P2P_OPPPS_ENABLE_BIT;
1974 changed |= BSS_CHANGED_P2P_PS;
1975 } else if (params->p2p_opp_ps == 0) {
1976 sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &=
1977 ~IEEE80211_P2P_OPPPS_ENABLE_BIT;
1919 changed |= BSS_CHANGED_P2P_PS; 1978 changed |= BSS_CHANGED_P2P_PS;
1920 } 1979 }
1921 1980
@@ -2359,9 +2418,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
2359 } 2418 }
2360 2419
2361 for (i = 0; i < IEEE80211_NUM_BANDS; i++) { 2420 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
2421 struct ieee80211_supported_band *sband = wiphy->bands[i];
2422 int j;
2423
2362 sdata->rc_rateidx_mask[i] = mask->control[i].legacy; 2424 sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
2363 memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs, 2425 memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs,
2364 sizeof(mask->control[i].mcs)); 2426 sizeof(mask->control[i].mcs));
2427
2428 sdata->rc_has_mcs_mask[i] = false;
2429 if (!sband)
2430 continue;
2431
2432 for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++)
2433 if (~sdata->rc_rateidx_mcs_mask[i][j]) {
2434 sdata->rc_has_mcs_mask[i] = true;
2435 break;
2436 }
2365 } 2437 }
2366 2438
2367 return 0; 2439 return 0;
@@ -2371,7 +2443,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2371 struct ieee80211_sub_if_data *sdata, 2443 struct ieee80211_sub_if_data *sdata,
2372 struct ieee80211_channel *channel, 2444 struct ieee80211_channel *channel,
2373 unsigned int duration, u64 *cookie, 2445 unsigned int duration, u64 *cookie,
2374 struct sk_buff *txskb) 2446 struct sk_buff *txskb,
2447 enum ieee80211_roc_type type)
2375{ 2448{
2376 struct ieee80211_roc_work *roc, *tmp; 2449 struct ieee80211_roc_work *roc, *tmp;
2377 bool queued = false; 2450 bool queued = false;
@@ -2390,6 +2463,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2390 roc->duration = duration; 2463 roc->duration = duration;
2391 roc->req_duration = duration; 2464 roc->req_duration = duration;
2392 roc->frame = txskb; 2465 roc->frame = txskb;
2466 roc->type = type;
2393 roc->mgmt_tx_cookie = (unsigned long)txskb; 2467 roc->mgmt_tx_cookie = (unsigned long)txskb;
2394 roc->sdata = sdata; 2468 roc->sdata = sdata;
2395 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); 2469 INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
@@ -2420,7 +2494,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2420 if (!duration) 2494 if (!duration)
2421 duration = 10; 2495 duration = 10;
2422 2496
2423 ret = drv_remain_on_channel(local, sdata, channel, duration); 2497 ret = drv_remain_on_channel(local, sdata, channel, duration, type);
2424 if (ret) { 2498 if (ret) {
2425 kfree(roc); 2499 kfree(roc);
2426 return ret; 2500 return ret;
@@ -2439,10 +2513,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2439 * 2513 *
2440 * If it hasn't started yet, just increase the duration 2514 * If it hasn't started yet, just increase the duration
2441 * and add the new one to the list of dependents. 2515 * and add the new one to the list of dependents.
2516 * If the type of the new ROC has higher priority, modify the
2517 * type of the previous one to match that of the new one.
2442 */ 2518 */
2443 if (!tmp->started) { 2519 if (!tmp->started) {
2444 list_add_tail(&roc->list, &tmp->dependents); 2520 list_add_tail(&roc->list, &tmp->dependents);
2445 tmp->duration = max(tmp->duration, roc->duration); 2521 tmp->duration = max(tmp->duration, roc->duration);
2522 tmp->type = max(tmp->type, roc->type);
2446 queued = true; 2523 queued = true;
2447 break; 2524 break;
2448 } 2525 }
@@ -2454,16 +2531,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
2454 /* 2531 /*
2455 * In the offloaded ROC case, if it hasn't begun, add 2532 * In the offloaded ROC case, if it hasn't begun, add
2456 * this new one to the dependent list to be handled 2533 * this new one to the dependent list to be handled
2457 * when the the master one begins. If it has begun, 2534 * when the master one begins. If it has begun,
2458 * check that there's still a minimum time left and 2535 * check that there's still a minimum time left and
2459 * if so, start this one, transmitting the frame, but 2536 * if so, start this one, transmitting the frame, but
2460 * add it to the list directly after this one with a 2537 * add it to the list directly after this one with
2461 * a reduced time so we'll ask the driver to execute 2538 * a reduced time so we'll ask the driver to execute
2462 * it right after finishing the previous one, in the 2539 * it right after finishing the previous one, in the
2463 * hope that it'll also be executed right afterwards, 2540 * hope that it'll also be executed right afterwards,
2464 * effectively extending the old one. 2541 * effectively extending the old one.
2465 * If there's no minimum time left, just add it to the 2542 * If there's no minimum time left, just add it to the
2466 * normal list. 2543 * normal list.
2544 * TODO: the ROC type is ignored here, assuming that it
2545 * is better to immediately use the current ROC.
2467 */ 2546 */
2468 if (!tmp->hw_begun) { 2547 if (!tmp->hw_begun) {
2469 list_add_tail(&roc->list, &tmp->dependents); 2548 list_add_tail(&roc->list, &tmp->dependents);
@@ -2557,7 +2636,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,
2557 2636
2558 mutex_lock(&local->mtx); 2637 mutex_lock(&local->mtx);
2559 ret = ieee80211_start_roc_work(local, sdata, chan, 2638 ret = ieee80211_start_roc_work(local, sdata, chan,
2560 duration, cookie, NULL); 2639 duration, cookie, NULL,
2640 IEEE80211_ROC_TYPE_NORMAL);
2561 mutex_unlock(&local->mtx); 2641 mutex_unlock(&local->mtx);
2562 2642
2563 return ret; 2643 return ret;
@@ -2792,7 +2872,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
2792 2872
2793 /* This will handle all kinds of coalescing and immediate TX */ 2873 /* This will handle all kinds of coalescing and immediate TX */
2794 ret = ieee80211_start_roc_work(local, sdata, chan, 2874 ret = ieee80211_start_roc_work(local, sdata, chan,
2795 wait, cookie, skb); 2875 wait, cookie, skb,
2876 IEEE80211_ROC_TYPE_MGMT_TX);
2796 if (ret) 2877 if (ret)
2797 kfree_skb(skb); 2878 kfree_skb(skb);
2798 out_unlock: 2879 out_unlock:
@@ -3302,9 +3383,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,
3302 if (local->use_chanctx) 3383 if (local->use_chanctx)
3303 *chandef = local->monitor_chandef; 3384 *chandef = local->monitor_chandef;
3304 else 3385 else
3305 cfg80211_chandef_create(chandef, 3386 *chandef = local->_oper_chandef;
3306 local->_oper_channel,
3307 local->_oper_channel_type);
3308 ret = 0; 3387 ret = 0;
3309 } 3388 }
3310 rcu_read_unlock(); 3389 rcu_read_unlock();
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 931be419ab5a..03e8d2e3270e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -22,7 +22,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,
22 drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH); 22 drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);
23 23
24 if (!local->use_chanctx) { 24 if (!local->use_chanctx) {
25 local->_oper_channel_type = cfg80211_get_chandef_type(chandef); 25 local->_oper_chandef = *chandef;
26 ieee80211_hw_config(local, 0); 26 ieee80211_hw_config(local, 0);
27 } 27 }
28} 28}
@@ -57,6 +57,22 @@ ieee80211_find_chanctx(struct ieee80211_local *local,
57 return NULL; 57 return NULL;
58} 58}
59 59
60static bool ieee80211_is_radar_required(struct ieee80211_local *local)
61{
62 struct ieee80211_sub_if_data *sdata;
63
64 rcu_read_lock();
65 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
66 if (sdata->radar_required) {
67 rcu_read_unlock();
68 return true;
69 }
70 }
71 rcu_read_unlock();
72
73 return false;
74}
75
60static struct ieee80211_chanctx * 76static struct ieee80211_chanctx *
61ieee80211_new_chanctx(struct ieee80211_local *local, 77ieee80211_new_chanctx(struct ieee80211_local *local,
62 const struct cfg80211_chan_def *chandef, 78 const struct cfg80211_chan_def *chandef,
@@ -76,6 +92,9 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
76 ctx->conf.rx_chains_static = 1; 92 ctx->conf.rx_chains_static = 1;
77 ctx->conf.rx_chains_dynamic = 1; 93 ctx->conf.rx_chains_dynamic = 1;
78 ctx->mode = mode; 94 ctx->mode = mode;
95 ctx->conf.radar_enabled = ieee80211_is_radar_required(local);
96 if (!local->use_chanctx)
97 local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
79 98
80 /* acquire mutex to prevent idle from changing */ 99 /* acquire mutex to prevent idle from changing */
81 mutex_lock(&local->mtx); 100 mutex_lock(&local->mtx);
@@ -85,9 +104,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
85 ieee80211_hw_config(local, changed); 104 ieee80211_hw_config(local, changed);
86 105
87 if (!local->use_chanctx) { 106 if (!local->use_chanctx) {
88 local->_oper_channel_type = 107 local->_oper_chandef = *chandef;
89 cfg80211_get_chandef_type(chandef);
90 local->_oper_channel = chandef->chan;
91 ieee80211_hw_config(local, 0); 108 ieee80211_hw_config(local, 0);
92 } else { 109 } else {
93 err = drv_add_chanctx(local, ctx); 110 err = drv_add_chanctx(local, ctx);
@@ -112,12 +129,24 @@ ieee80211_new_chanctx(struct ieee80211_local *local,
112static void ieee80211_free_chanctx(struct ieee80211_local *local, 129static void ieee80211_free_chanctx(struct ieee80211_local *local,
113 struct ieee80211_chanctx *ctx) 130 struct ieee80211_chanctx *ctx)
114{ 131{
132 bool check_single_channel = false;
115 lockdep_assert_held(&local->chanctx_mtx); 133 lockdep_assert_held(&local->chanctx_mtx);
116 134
117 WARN_ON_ONCE(ctx->refcount != 0); 135 WARN_ON_ONCE(ctx->refcount != 0);
118 136
119 if (!local->use_chanctx) { 137 if (!local->use_chanctx) {
120 local->_oper_channel_type = NL80211_CHAN_NO_HT; 138 struct cfg80211_chan_def *chandef = &local->_oper_chandef;
139 chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
140 chandef->center_freq1 = chandef->chan->center_freq;
141 chandef->center_freq2 = 0;
142
143 /* NOTE: Disabling radar is only valid here for
144 * single channel context. To be sure, check it ...
145 */
146 if (local->hw.conf.radar_enabled)
147 check_single_channel = true;
148 local->hw.conf.radar_enabled = false;
149
121 ieee80211_hw_config(local, 0); 150 ieee80211_hw_config(local, 0);
122 } else { 151 } else {
123 drv_remove_chanctx(local, ctx); 152 drv_remove_chanctx(local, ctx);
@@ -126,6 +155,9 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,
126 list_del_rcu(&ctx->list); 155 list_del_rcu(&ctx->list);
127 kfree_rcu(ctx, rcu_head); 156 kfree_rcu(ctx, rcu_head);
128 157
158 /* throw a warning if this wasn't the only channel context. */
159 WARN_ON(check_single_channel && !list_empty(&local->chanctx_list));
160
129 mutex_lock(&local->mtx); 161 mutex_lock(&local->mtx);
130 ieee80211_recalc_idle(local); 162 ieee80211_recalc_idle(local);
131 mutex_unlock(&local->mtx); 163 mutex_unlock(&local->mtx);
@@ -237,19 +269,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)
237void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local, 269void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,
238 struct ieee80211_chanctx *chanctx) 270 struct ieee80211_chanctx *chanctx)
239{ 271{
240 struct ieee80211_sub_if_data *sdata; 272 bool radar_enabled;
241 bool radar_enabled = false;
242 273
243 lockdep_assert_held(&local->chanctx_mtx); 274 lockdep_assert_held(&local->chanctx_mtx);
244 275
245 rcu_read_lock(); 276 radar_enabled = ieee80211_is_radar_required(local);
246 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
247 if (sdata->radar_required) {
248 radar_enabled = true;
249 break;
250 }
251 }
252 rcu_read_unlock();
253 277
254 if (radar_enabled == chanctx->conf.radar_enabled) 278 if (radar_enabled == chanctx->conf.radar_enabled)
255 return; 279 return;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index c3a3082b72e5..1521cabad3d6 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
295 char buf[50]; 295 char buf[50];
296 struct ieee80211_key *key; 296 struct ieee80211_key *key;
297 297
298 if (!sdata->debugfs.dir) 298 if (!sdata->vif.debugfs_dir)
299 return; 299 return;
300 300
301 lockdep_assert_held(&sdata->local->key_mtx); 301 lockdep_assert_held(&sdata->local->key_mtx);
@@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
311 sprintf(buf, "../keys/%d", key->debugfs.cnt); 311 sprintf(buf, "../keys/%d", key->debugfs.cnt);
312 sdata->debugfs.default_unicast_key = 312 sdata->debugfs.default_unicast_key =
313 debugfs_create_symlink("default_unicast_key", 313 debugfs_create_symlink("default_unicast_key",
314 sdata->debugfs.dir, buf); 314 sdata->vif.debugfs_dir, buf);
315 } 315 }
316 316
317 if (sdata->debugfs.default_multicast_key) { 317 if (sdata->debugfs.default_multicast_key) {
@@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)
325 sprintf(buf, "../keys/%d", key->debugfs.cnt); 325 sprintf(buf, "../keys/%d", key->debugfs.cnt);
326 sdata->debugfs.default_multicast_key = 326 sdata->debugfs.default_multicast_key =
327 debugfs_create_symlink("default_multicast_key", 327 debugfs_create_symlink("default_multicast_key",
328 sdata->debugfs.dir, buf); 328 sdata->vif.debugfs_dir, buf);
329 } 329 }
330} 330}
331 331
@@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
334 char buf[50]; 334 char buf[50];
335 struct ieee80211_key *key; 335 struct ieee80211_key *key;
336 336
337 if (!sdata->debugfs.dir) 337 if (!sdata->vif.debugfs_dir)
338 return; 338 return;
339 339
340 key = key_mtx_dereference(sdata->local, 340 key = key_mtx_dereference(sdata->local,
@@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
343 sprintf(buf, "../keys/%d", key->debugfs.cnt); 343 sprintf(buf, "../keys/%d", key->debugfs.cnt);
344 sdata->debugfs.default_mgmt_key = 344 sdata->debugfs.default_mgmt_key =
345 debugfs_create_symlink("default_mgmt_key", 345 debugfs_create_symlink("default_mgmt_key",
346 sdata->debugfs.dir, buf); 346 sdata->vif.debugfs_dir, buf);
347 } else 347 } else
348 ieee80211_debugfs_key_remove_mgmt_default(sdata); 348 ieee80211_debugfs_key_remove_mgmt_default(sdata);
349} 349}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 059bbb82e84f..14abcf44f974 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -124,6 +124,15 @@ static ssize_t ieee80211_if_fmt_##name( \
124 return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \ 124 return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \
125} 125}
126 126
127#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \
128static ssize_t ieee80211_if_fmt_##name( \
129 const struct ieee80211_sub_if_data *sdata, \
130 char *buf, int buflen) \
131{ \
132 return scnprintf(buf, buflen, "%d\n", \
133 jiffies_to_msecs(sdata->field)); \
134}
135
127#define __IEEE80211_IF_FILE(name, _write) \ 136#define __IEEE80211_IF_FILE(name, _write) \
128static ssize_t ieee80211_if_read_##name(struct file *file, \ 137static ssize_t ieee80211_if_read_##name(struct file *file, \
129 char __user *userbuf, \ 138 char __user *userbuf, \
@@ -197,6 +206,7 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
197IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); 206IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
198IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC); 207IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
199IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16); 208IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
209IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
200 210
201static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, 211static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
202 enum ieee80211_smps_mode smps_mode) 212 enum ieee80211_smps_mode smps_mode)
@@ -521,7 +531,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,
521#endif 531#endif
522 532
523#define DEBUGFS_ADD_MODE(name, mode) \ 533#define DEBUGFS_ADD_MODE(name, mode) \
524 debugfs_create_file(#name, mode, sdata->debugfs.dir, \ 534 debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \
525 sdata, &name##_ops); 535 sdata, &name##_ops);
526 536
527#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) 537#define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400)
@@ -542,6 +552,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
542 DEBUGFS_ADD(aid); 552 DEBUGFS_ADD(aid);
543 DEBUGFS_ADD(last_beacon); 553 DEBUGFS_ADD(last_beacon);
544 DEBUGFS_ADD(ave_beacon); 554 DEBUGFS_ADD(ave_beacon);
555 DEBUGFS_ADD(beacon_timeout);
545 DEBUGFS_ADD_MODE(smps, 0600); 556 DEBUGFS_ADD_MODE(smps, 0600);
546 DEBUGFS_ADD_MODE(tkip_mic_test, 0200); 557 DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
547 DEBUGFS_ADD_MODE(uapsd_queues, 0600); 558 DEBUGFS_ADD_MODE(uapsd_queues, 0600);
@@ -577,7 +588,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)
577static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) 588static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
578{ 589{
579 struct dentry *dir = debugfs_create_dir("mesh_stats", 590 struct dentry *dir = debugfs_create_dir("mesh_stats",
580 sdata->debugfs.dir); 591 sdata->vif.debugfs_dir);
581#define MESHSTATS_ADD(name)\ 592#define MESHSTATS_ADD(name)\
582 debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); 593 debugfs_create_file(#name, 0400, dir, sdata, &name##_ops);
583 594
@@ -594,7 +605,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
594static void add_mesh_config(struct ieee80211_sub_if_data *sdata) 605static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
595{ 606{
596 struct dentry *dir = debugfs_create_dir("mesh_config", 607 struct dentry *dir = debugfs_create_dir("mesh_config",
597 sdata->debugfs.dir); 608 sdata->vif.debugfs_dir);
598 609
599#define MESHPARAMS_ADD(name) \ 610#define MESHPARAMS_ADD(name) \
600 debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); 611 debugfs_create_file(#name, 0600, dir, sdata, &name##_ops);
@@ -631,7 +642,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
631 642
632static void add_files(struct ieee80211_sub_if_data *sdata) 643static void add_files(struct ieee80211_sub_if_data *sdata)
633{ 644{
634 if (!sdata->debugfs.dir) 645 if (!sdata->vif.debugfs_dir)
635 return; 646 return;
636 647
637 DEBUGFS_ADD(flags); 648 DEBUGFS_ADD(flags);
@@ -673,21 +684,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
673 char buf[10+IFNAMSIZ]; 684 char buf[10+IFNAMSIZ];
674 685
675 sprintf(buf, "netdev:%s", sdata->name); 686 sprintf(buf, "netdev:%s", sdata->name);
676 sdata->debugfs.dir = debugfs_create_dir(buf, 687 sdata->vif.debugfs_dir = debugfs_create_dir(buf,
677 sdata->local->hw.wiphy->debugfsdir); 688 sdata->local->hw.wiphy->debugfsdir);
678 if (sdata->debugfs.dir) 689 if (sdata->vif.debugfs_dir)
679 sdata->debugfs.subdir_stations = debugfs_create_dir("stations", 690 sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
680 sdata->debugfs.dir); 691 sdata->vif.debugfs_dir);
681 add_files(sdata); 692 add_files(sdata);
682} 693}
683 694
684void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata) 695void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
685{ 696{
686 if (!sdata->debugfs.dir) 697 if (!sdata->vif.debugfs_dir)
687 return; 698 return;
688 699
689 debugfs_remove_recursive(sdata->debugfs.dir); 700 debugfs_remove_recursive(sdata->vif.debugfs_dir);
690 sdata->debugfs.dir = NULL; 701 sdata->vif.debugfs_dir = NULL;
691} 702}
692 703
693void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) 704void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
@@ -695,7 +706,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
695 struct dentry *dir; 706 struct dentry *dir;
696 char buf[10 + IFNAMSIZ]; 707 char buf[10 + IFNAMSIZ];
697 708
698 dir = sdata->debugfs.dir; 709 dir = sdata->vif.debugfs_dir;
699 710
700 if (!dir) 711 if (!dir)
701 return; 712 return;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c7591f73dbc3..44e201d60a13 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -54,6 +54,7 @@ STA_FILE(aid, sta.aid, D);
54STA_FILE(dev, sdata->name, S); 54STA_FILE(dev, sdata->name, S);
55STA_FILE(last_signal, last_signal, D); 55STA_FILE(last_signal, last_signal, D);
56STA_FILE(last_ack_signal, last_ack_signal, D); 56STA_FILE(last_ack_signal, last_ack_signal, D);
57STA_FILE(beacon_loss_count, beacon_loss_count, D);
57 58
58static ssize_t sta_flags_read(struct file *file, char __user *userbuf, 59static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
59 size_t count, loff_t *ppos) 60 size_t count, loff_t *ppos)
@@ -325,6 +326,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
325} 326}
326STA_OPS(ht_capa); 327STA_OPS(ht_capa);
327 328
329static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
330 size_t count, loff_t *ppos)
331{
332 char buf[128], *p = buf;
333 struct sta_info *sta = file->private_data;
334 struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap;
335
336 p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n",
337 vhtc->vht_supported ? "" : "not ");
338 if (vhtc->vht_supported) {
339 p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap);
340
341 p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n",
342 le16_to_cpu(vhtc->vht_mcs.rx_mcs_map));
343 if (vhtc->vht_mcs.rx_highest)
344 p += scnprintf(p, sizeof(buf)+buf-p,
345 "MCS RX highest: %d Mbps\n",
346 le16_to_cpu(vhtc->vht_mcs.rx_highest));
347 p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n",
348 le16_to_cpu(vhtc->vht_mcs.tx_mcs_map));
349 if (vhtc->vht_mcs.tx_highest)
350 p += scnprintf(p, sizeof(buf)+buf-p,
351 "MCS TX highest: %d Mbps\n",
352 le16_to_cpu(vhtc->vht_mcs.tx_highest));
353 }
354
355 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
356}
357STA_OPS(vht_capa);
358
328static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf, 359static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,
329 size_t count, loff_t *ppos) 360 size_t count, loff_t *ppos)
330{ 361{
@@ -404,7 +435,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
404 DEBUGFS_ADD(agg_status); 435 DEBUGFS_ADD(agg_status);
405 DEBUGFS_ADD(dev); 436 DEBUGFS_ADD(dev);
406 DEBUGFS_ADD(last_signal); 437 DEBUGFS_ADD(last_signal);
438 DEBUGFS_ADD(beacon_loss_count);
407 DEBUGFS_ADD(ht_capa); 439 DEBUGFS_ADD(ht_capa);
440 DEBUGFS_ADD(vht_capa);
408 DEBUGFS_ADD(last_ack_signal); 441 DEBUGFS_ADD(last_ack_signal);
409 DEBUGFS_ADD(current_tx_rate); 442 DEBUGFS_ADD(current_tx_rate);
410 DEBUGFS_ADD(last_rx_rate); 443 DEBUGFS_ADD(last_rx_rate);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ee56d0779d8b..169664c122e2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
241 return ret; 241 return ret;
242} 242}
243 243
244static inline void drv_set_multicast_list(struct ieee80211_local *local,
245 struct ieee80211_sub_if_data *sdata,
246 struct netdev_hw_addr_list *mc_list)
247{
248 bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
249
250 trace_drv_set_multicast_list(local, sdata, mc_list->count);
251
252 check_sdata_in_driver(sdata);
253
254 if (local->ops->set_multicast_list)
255 local->ops->set_multicast_list(&local->hw, &sdata->vif,
256 allmulti, mc_list);
257 trace_drv_return_void(local);
258}
259
244static inline void drv_configure_filter(struct ieee80211_local *local, 260static inline void drv_configure_filter(struct ieee80211_local *local,
245 unsigned int changed_flags, 261 unsigned int changed_flags,
246 unsigned int *total_flags, 262 unsigned int *total_flags,
@@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,
531 local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, 547 local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,
532 sta, dir); 548 sta, dir);
533} 549}
534
535static inline
536void drv_add_interface_debugfs(struct ieee80211_local *local,
537 struct ieee80211_sub_if_data *sdata)
538{
539 might_sleep();
540
541 check_sdata_in_driver(sdata);
542
543 if (!local->ops->add_interface_debugfs)
544 return;
545
546 local->ops->add_interface_debugfs(&local->hw, &sdata->vif,
547 sdata->debugfs.dir);
548}
549
550static inline
551void drv_remove_interface_debugfs(struct ieee80211_local *local,
552 struct ieee80211_sub_if_data *sdata)
553{
554 might_sleep();
555
556 check_sdata_in_driver(sdata);
557
558 if (!local->ops->remove_interface_debugfs)
559 return;
560
561 local->ops->remove_interface_debugfs(&local->hw, &sdata->vif,
562 sdata->debugfs.dir);
563}
564#else
565static inline
566void drv_add_interface_debugfs(struct ieee80211_local *local,
567 struct ieee80211_sub_if_data *sdata) {}
568static inline
569void drv_remove_interface_debugfs(struct ieee80211_local *local,
570 struct ieee80211_sub_if_data *sdata) {}
571#endif 550#endif
572 551
573static inline __must_check 552static inline __must_check
@@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
741 local->ops->rfkill_poll(&local->hw); 720 local->ops->rfkill_poll(&local->hw);
742} 721}
743 722
744static inline void drv_flush(struct ieee80211_local *local, bool drop) 723static inline void drv_flush(struct ieee80211_local *local,
724 u32 queues, bool drop)
745{ 725{
746 might_sleep(); 726 might_sleep();
747 727
748 trace_drv_flush(local, drop); 728 trace_drv_flush(local, queues, drop);
749 if (local->ops->flush) 729 if (local->ops->flush)
750 local->ops->flush(&local->hw, drop); 730 local->ops->flush(&local->hw, queues, drop);
751 trace_drv_return_void(local); 731 trace_drv_return_void(local);
752} 732}
753 733
@@ -787,15 +767,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local,
787static inline int drv_remain_on_channel(struct ieee80211_local *local, 767static inline int drv_remain_on_channel(struct ieee80211_local *local,
788 struct ieee80211_sub_if_data *sdata, 768 struct ieee80211_sub_if_data *sdata,
789 struct ieee80211_channel *chan, 769 struct ieee80211_channel *chan,
790 unsigned int duration) 770 unsigned int duration,
771 enum ieee80211_roc_type type)
791{ 772{
792 int ret; 773 int ret;
793 774
794 might_sleep(); 775 might_sleep();
795 776
796 trace_drv_remain_on_channel(local, sdata, chan, duration); 777 trace_drv_remain_on_channel(local, sdata, chan, duration, type);
797 ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, 778 ret = local->ops->remain_on_channel(&local->hw, &sdata->vif,
798 chan, duration); 779 chan, duration, type);
799 trace_drv_return_int(local, ret); 780 trace_drv_return_int(local, ret);
800 781
801 return ret; 782 return ret;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 0db25d4bb223..af8cee06e4f3 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -40,13 +40,6 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,
40 if (!ht_cap->ht_supported) 40 if (!ht_cap->ht_supported)
41 return; 41 return;
42 42
43 if (sdata->vif.type != NL80211_IFTYPE_STATION) {
44 /* AP interfaces call this code when adding new stations,
45 * so just silently ignore non station interfaces.
46 */
47 return;
48 }
49
50 /* NOTE: If you add more over-rides here, update register_hw 43 /* NOTE: If you add more over-rides here, update register_hw
51 * ht_capa_mod_msk logic in main.c as well. 44 * ht_capa_mod_msk logic in main.c as well.
52 * And, if this method can ever change ht_cap.ht_supported, fix 45 * And, if this method can ever change ht_cap.ht_supported, fix
@@ -97,7 +90,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
97 const struct ieee80211_ht_cap *ht_cap_ie, 90 const struct ieee80211_ht_cap *ht_cap_ie,
98 struct sta_info *sta) 91 struct sta_info *sta)
99{ 92{
100 struct ieee80211_sta_ht_cap ht_cap; 93 struct ieee80211_sta_ht_cap ht_cap, own_cap;
101 u8 ampdu_info, tx_mcs_set_cap; 94 u8 ampdu_info, tx_mcs_set_cap;
102 int i, max_tx_streams; 95 int i, max_tx_streams;
103 bool changed; 96 bool changed;
@@ -111,6 +104,18 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
111 104
112 ht_cap.ht_supported = true; 105 ht_cap.ht_supported = true;
113 106
107 own_cap = sband->ht_cap;
108
109 /*
110 * If user has specified capability over-rides, take care
111 * of that if the station we're setting up is the AP that
112 * we advertised a restricted capability set to. Override
113 * our own capabilities and then use those below.
114 */
115 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
116 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
117 ieee80211_apply_htcap_overrides(sdata, &own_cap);
118
114 /* 119 /*
115 * The bits listed in this expression should be 120 * The bits listed in this expression should be
116 * the same for the peer and us, if the station 121 * the same for the peer and us, if the station
@@ -118,21 +123,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
118 * we mask them out. 123 * we mask them out.
119 */ 124 */
120 ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & 125 ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) &
121 (sband->ht_cap.cap | 126 (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING |
122 ~(IEEE80211_HT_CAP_LDPC_CODING | 127 IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
123 IEEE80211_HT_CAP_SUP_WIDTH_20_40 | 128 IEEE80211_HT_CAP_GRN_FLD |
124 IEEE80211_HT_CAP_GRN_FLD | 129 IEEE80211_HT_CAP_SGI_20 |
125 IEEE80211_HT_CAP_SGI_20 | 130 IEEE80211_HT_CAP_SGI_40 |
126 IEEE80211_HT_CAP_SGI_40 | 131 IEEE80211_HT_CAP_DSSSCCK40));
127 IEEE80211_HT_CAP_DSSSCCK40));
128 132
129 /* 133 /*
130 * The STBC bits are asymmetric -- if we don't have 134 * The STBC bits are asymmetric -- if we don't have
131 * TX then mask out the peer's RX and vice versa. 135 * TX then mask out the peer's RX and vice versa.
132 */ 136 */
133 if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) 137 if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC))
134 ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; 138 ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC;
135 if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) 139 if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC))
136 ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; 140 ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC;
137 141
138 ampdu_info = ht_cap_ie->ampdu_params_info; 142 ampdu_info = ht_cap_ie->ampdu_params_info;
@@ -142,7 +146,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
142 (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; 146 (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;
143 147
144 /* own MCS TX capabilities */ 148 /* own MCS TX capabilities */
145 tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; 149 tx_mcs_set_cap = own_cap.mcs.tx_params;
146 150
147 /* Copy peer MCS TX capabilities, the driver might need them. */ 151 /* Copy peer MCS TX capabilities, the driver might need them. */
148 ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; 152 ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params;
@@ -168,26 +172,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,
168 */ 172 */
169 for (i = 0; i < max_tx_streams; i++) 173 for (i = 0; i < max_tx_streams; i++)
170 ht_cap.mcs.rx_mask[i] = 174 ht_cap.mcs.rx_mask[i] =
171 sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; 175 own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];
172 176
173 if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) 177 if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)
174 for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; 178 for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;
175 i < IEEE80211_HT_MCS_MASK_LEN; i++) 179 i < IEEE80211_HT_MCS_MASK_LEN; i++)
176 ht_cap.mcs.rx_mask[i] = 180 ht_cap.mcs.rx_mask[i] =
177 sband->ht_cap.mcs.rx_mask[i] & 181 own_cap.mcs.rx_mask[i] &
178 ht_cap_ie->mcs.rx_mask[i]; 182 ht_cap_ie->mcs.rx_mask[i];
179 183
180 /* handle MCS rate 32 too */ 184 /* handle MCS rate 32 too */
181 if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) 185 if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)
182 ht_cap.mcs.rx_mask[32/8] |= 1; 186 ht_cap.mcs.rx_mask[32/8] |= 1;
183 187
184 apply: 188 apply:
185 /*
186 * If user has specified capability over-rides, take care
187 * of that here.
188 */
189 ieee80211_apply_htcap_overrides(sdata, &ht_cap);
190
191 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 189 changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
192 190
193 memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); 191 memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 40b71dfcc79d..170f9a7fa319 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -44,7 +44,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 44 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
45 struct ieee80211_local *local = sdata->local; 45 struct ieee80211_local *local = sdata->local;
46 int rates, i; 46 int rates, i;
47 struct sk_buff *skb;
48 struct ieee80211_mgmt *mgmt; 47 struct ieee80211_mgmt *mgmt;
49 u8 *pos; 48 u8 *pos;
50 struct ieee80211_supported_band *sband; 49 struct ieee80211_supported_band *sband;
@@ -52,20 +51,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
52 u32 bss_change; 51 u32 bss_change;
53 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 52 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
54 struct cfg80211_chan_def chandef; 53 struct cfg80211_chan_def chandef;
54 struct beacon_data *presp;
55 int frame_len;
55 56
56 lockdep_assert_held(&ifibss->mtx); 57 lockdep_assert_held(&ifibss->mtx);
57 58
58 /* Reset own TSF to allow time synchronization work. */ 59 /* Reset own TSF to allow time synchronization work. */
59 drv_reset_tsf(local, sdata); 60 drv_reset_tsf(local, sdata);
60 61
61 skb = ifibss->skb;
62 RCU_INIT_POINTER(ifibss->presp, NULL);
63 synchronize_rcu();
64 skb->data = skb->head;
65 skb->len = 0;
66 skb_reset_tail_pointer(skb);
67 skb_reserve(skb, sdata->local->hw.extra_tx_headroom);
68
69 if (!ether_addr_equal(ifibss->bssid, bssid)) 62 if (!ether_addr_equal(ifibss->bssid, bssid))
70 sta_info_flush(sdata); 63 sta_info_flush(sdata);
71 64
@@ -73,10 +66,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
73 if (sdata->vif.bss_conf.ibss_joined) { 66 if (sdata->vif.bss_conf.ibss_joined) {
74 sdata->vif.bss_conf.ibss_joined = false; 67 sdata->vif.bss_conf.ibss_joined = false;
75 sdata->vif.bss_conf.ibss_creator = false; 68 sdata->vif.bss_conf.ibss_creator = false;
69 sdata->vif.bss_conf.enable_beacon = false;
76 netif_carrier_off(sdata->dev); 70 netif_carrier_off(sdata->dev);
77 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS); 71 ieee80211_bss_info_change_notify(sdata,
72 BSS_CHANGED_IBSS |
73 BSS_CHANGED_BEACON_ENABLED);
78 } 74 }
79 75
76 presp = rcu_dereference_protected(ifibss->presp,
77 lockdep_is_held(&ifibss->mtx));
78 rcu_assign_pointer(ifibss->presp, NULL);
79 if (presp)
80 kfree_rcu(presp, rcu_head);
81
80 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; 82 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
81 83
82 cfg80211_chandef_create(&chandef, chan, ifibss->channel_type); 84 cfg80211_chandef_create(&chandef, chan, ifibss->channel_type);
@@ -98,19 +100,24 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
98 100
99 sband = local->hw.wiphy->bands[chan->band]; 101 sband = local->hw.wiphy->bands[chan->band];
100 102
101 /* build supported rates array */
102 pos = supp_rates;
103 for (i = 0; i < sband->n_bitrates; i++) {
104 int rate = sband->bitrates[i].bitrate;
105 u8 basic = 0;
106 if (basic_rates & BIT(i))
107 basic = 0x80;
108 *pos++ = basic | (u8) (rate / 5);
109 }
110
111 /* Build IBSS probe response */ 103 /* Build IBSS probe response */
112 mgmt = (void *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); 104 frame_len = sizeof(struct ieee80211_hdr_3addr) +
113 memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); 105 12 /* struct ieee80211_mgmt.u.beacon */ +
106 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
107 2 + 8 /* max Supported Rates */ +
108 3 /* max DS params */ +
109 4 /* IBSS params */ +
110 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
111 2 + sizeof(struct ieee80211_ht_cap) +
112 2 + sizeof(struct ieee80211_ht_operation) +
113 ifibss->ie_len;
114 presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
115 if (!presp)
116 return;
117
118 presp->head = (void *)(presp + 1);
119
120 mgmt = (void *) presp->head;
114 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 121 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
115 IEEE80211_STYPE_PROBE_RESP); 122 IEEE80211_STYPE_PROBE_RESP);
116 eth_broadcast_addr(mgmt->da); 123 eth_broadcast_addr(mgmt->da);
@@ -120,27 +127,30 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
120 mgmt->u.beacon.timestamp = cpu_to_le64(tsf); 127 mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
121 mgmt->u.beacon.capab_info = cpu_to_le16(capability); 128 mgmt->u.beacon.capab_info = cpu_to_le16(capability);
122 129
123 pos = skb_put(skb, 2 + ifibss->ssid_len); 130 pos = (u8 *)mgmt + offsetof(struct ieee80211_mgmt, u.beacon.variable);
131
124 *pos++ = WLAN_EID_SSID; 132 *pos++ = WLAN_EID_SSID;
125 *pos++ = ifibss->ssid_len; 133 *pos++ = ifibss->ssid_len;
126 memcpy(pos, ifibss->ssid, ifibss->ssid_len); 134 memcpy(pos, ifibss->ssid, ifibss->ssid_len);
135 pos += ifibss->ssid_len;
127 136
128 rates = sband->n_bitrates; 137 rates = min_t(int, 8, sband->n_bitrates);
129 if (rates > 8)
130 rates = 8;
131 pos = skb_put(skb, 2 + rates);
132 *pos++ = WLAN_EID_SUPP_RATES; 138 *pos++ = WLAN_EID_SUPP_RATES;
133 *pos++ = rates; 139 *pos++ = rates;
134 memcpy(pos, supp_rates, rates); 140 for (i = 0; i < rates; i++) {
141 int rate = sband->bitrates[i].bitrate;
142 u8 basic = 0;
143 if (basic_rates & BIT(i))
144 basic = 0x80;
145 *pos++ = basic | (u8) (rate / 5);
146 }
135 147
136 if (sband->band == IEEE80211_BAND_2GHZ) { 148 if (sband->band == IEEE80211_BAND_2GHZ) {
137 pos = skb_put(skb, 2 + 1);
138 *pos++ = WLAN_EID_DS_PARAMS; 149 *pos++ = WLAN_EID_DS_PARAMS;
139 *pos++ = 1; 150 *pos++ = 1;
140 *pos++ = ieee80211_frequency_to_channel(chan->center_freq); 151 *pos++ = ieee80211_frequency_to_channel(chan->center_freq);
141 } 152 }
142 153
143 pos = skb_put(skb, 2 + 2);
144 *pos++ = WLAN_EID_IBSS_PARAMS; 154 *pos++ = WLAN_EID_IBSS_PARAMS;
145 *pos++ = 2; 155 *pos++ = 2;
146 /* FIX: set ATIM window based on scan results */ 156 /* FIX: set ATIM window based on scan results */
@@ -148,23 +158,25 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
148 *pos++ = 0; 158 *pos++ = 0;
149 159
150 if (sband->n_bitrates > 8) { 160 if (sband->n_bitrates > 8) {
151 rates = sband->n_bitrates - 8;
152 pos = skb_put(skb, 2 + rates);
153 *pos++ = WLAN_EID_EXT_SUPP_RATES; 161 *pos++ = WLAN_EID_EXT_SUPP_RATES;
154 *pos++ = rates; 162 *pos++ = sband->n_bitrates - 8;
155 memcpy(pos, &supp_rates[8], rates); 163 for (i = 8; i < sband->n_bitrates; i++) {
164 int rate = sband->bitrates[i].bitrate;
165 u8 basic = 0;
166 if (basic_rates & BIT(i))
167 basic = 0x80;
168 *pos++ = basic | (u8) (rate / 5);
169 }
156 } 170 }
157 171
158 if (ifibss->ie_len) 172 if (ifibss->ie_len) {
159 memcpy(skb_put(skb, ifibss->ie_len), 173 memcpy(pos, ifibss->ie, ifibss->ie_len);
160 ifibss->ie, ifibss->ie_len); 174 pos += ifibss->ie_len;
175 }
161 176
162 /* add HT capability and information IEs */ 177 /* add HT capability and information IEs */
163 if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && 178 if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&
164 sband->ht_cap.ht_supported) { 179 sband->ht_cap.ht_supported) {
165 pos = skb_put(skb, 4 +
166 sizeof(struct ieee80211_ht_cap) +
167 sizeof(struct ieee80211_ht_operation));
168 pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, 180 pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,
169 sband->ht_cap.cap); 181 sband->ht_cap.cap);
170 /* 182 /*
@@ -177,7 +189,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
177 } 189 }
178 190
179 if (local->hw.queues >= IEEE80211_NUM_ACS) { 191 if (local->hw.queues >= IEEE80211_NUM_ACS) {
180 pos = skb_put(skb, 9);
181 *pos++ = WLAN_EID_VENDOR_SPECIFIC; 192 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
182 *pos++ = 7; /* len */ 193 *pos++ = 7; /* len */
183 *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ 194 *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
@@ -189,11 +200,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
189 *pos++ = 0; /* U-APSD no in use */ 200 *pos++ = 0; /* U-APSD no in use */
190 } 201 }
191 202
192 rcu_assign_pointer(ifibss->presp, skb); 203 presp->head_len = pos - presp->head;
204 if (WARN_ON(presp->head_len > frame_len))
205 return;
206
207 rcu_assign_pointer(ifibss->presp, presp);
193 208
194 sdata->vif.bss_conf.enable_beacon = true; 209 sdata->vif.bss_conf.enable_beacon = true;
195 sdata->vif.bss_conf.beacon_int = beacon_int; 210 sdata->vif.bss_conf.beacon_int = beacon_int;
196 sdata->vif.bss_conf.basic_rates = basic_rates; 211 sdata->vif.bss_conf.basic_rates = basic_rates;
212 sdata->vif.bss_conf.ssid_len = ifibss->ssid_len;
213 memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len);
197 bss_change = BSS_CHANGED_BEACON_INT; 214 bss_change = BSS_CHANGED_BEACON_INT;
198 bss_change |= ieee80211_reset_erp_info(sdata); 215 bss_change |= ieee80211_reset_erp_info(sdata);
199 bss_change |= BSS_CHANGED_BSSID; 216 bss_change |= BSS_CHANGED_BSSID;
@@ -202,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
202 bss_change |= BSS_CHANGED_BASIC_RATES; 219 bss_change |= BSS_CHANGED_BASIC_RATES;
203 bss_change |= BSS_CHANGED_HT; 220 bss_change |= BSS_CHANGED_HT;
204 bss_change |= BSS_CHANGED_IBSS; 221 bss_change |= BSS_CHANGED_IBSS;
222 bss_change |= BSS_CHANGED_SSID;
205 223
206 /* 224 /*
207 * In 5 GHz/802.11a, we can always use short slot time. 225 * In 5 GHz/802.11a, we can always use short slot time.
@@ -227,7 +245,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
227 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 245 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
228 246
229 bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, 247 bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan,
230 mgmt, skb->len, 0, GFP_KERNEL); 248 mgmt, presp->head_len, 0, GFP_KERNEL);
231 cfg80211_put_bss(local->hw.wiphy, bss); 249 cfg80211_put_bss(local->hw.wiphy, bss);
232 netif_carrier_on(sdata->dev); 250 netif_carrier_on(sdata->dev);
233 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); 251 cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL);
@@ -448,7 +466,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
448 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; 466 struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
449 bool rates_updated = false; 467 bool rates_updated = false;
450 468
451 if (elems->ds_params && elems->ds_params_len == 1) 469 if (elems->ds_params)
452 freq = ieee80211_channel_to_frequency(elems->ds_params[0], 470 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
453 band); 471 band);
454 else 472 else
@@ -822,8 +840,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
822 struct ieee80211_local *local = sdata->local; 840 struct ieee80211_local *local = sdata->local;
823 int tx_last_beacon, len = req->len; 841 int tx_last_beacon, len = req->len;
824 struct sk_buff *skb; 842 struct sk_buff *skb;
825 struct ieee80211_mgmt *resp; 843 struct beacon_data *presp;
826 struct sk_buff *presp;
827 u8 *pos, *end; 844 u8 *pos, *end;
828 845
829 lockdep_assert_held(&ifibss->mtx); 846 lockdep_assert_held(&ifibss->mtx);
@@ -864,13 +881,15 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
864 } 881 }
865 882
866 /* Reply with ProbeResp */ 883 /* Reply with ProbeResp */
867 skb = skb_copy(presp, GFP_KERNEL); 884 skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
868 if (!skb) 885 if (!skb)
869 return; 886 return;
870 887
871 resp = (struct ieee80211_mgmt *) skb->data; 888 skb_reserve(skb, local->tx_headroom);
872 memcpy(resp->da, mgmt->sa, ETH_ALEN); 889 memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len);
873 ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da); 890
891 memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN);
892 ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);
874 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 893 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
875 ieee80211_tx_skb(sdata, skb); 894 ieee80211_tx_skb(sdata, skb);
876} 895}
@@ -895,7 +914,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,
895 return; 914 return;
896 915
897 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, 916 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
898 &elems); 917 false, &elems);
899 918
900 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 919 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
901} 920}
@@ -985,36 +1004,9 @@ static void ieee80211_ibss_timer(unsigned long data)
985{ 1004{
986 struct ieee80211_sub_if_data *sdata = 1005 struct ieee80211_sub_if_data *sdata =
987 (struct ieee80211_sub_if_data *) data; 1006 (struct ieee80211_sub_if_data *) data;
988 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
989 struct ieee80211_local *local = sdata->local;
990
991 if (local->quiescing) {
992 ifibss->timer_running = true;
993 return;
994 }
995
996 ieee80211_queue_work(&local->hw, &sdata->work);
997}
998
999#ifdef CONFIG_PM
1000void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
1001{
1002 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1003 1007
1004 if (del_timer_sync(&ifibss->timer)) 1008 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1005 ifibss->timer_running = true;
1006}
1007
1008void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata)
1009{
1010 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1011
1012 if (ifibss->timer_running) {
1013 add_timer(&ifibss->timer);
1014 ifibss->timer_running = false;
1015 }
1016} 1009}
1017#endif
1018 1010
1019void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) 1011void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
1020{ 1012{
@@ -1047,23 +1039,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
1047int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, 1039int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1048 struct cfg80211_ibss_params *params) 1040 struct cfg80211_ibss_params *params)
1049{ 1041{
1050 struct sk_buff *skb;
1051 u32 changed = 0; 1042 u32 changed = 0;
1052 1043
1053 skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
1054 sizeof(struct ieee80211_hdr_3addr) +
1055 12 /* struct ieee80211_mgmt.u.beacon */ +
1056 2 + IEEE80211_MAX_SSID_LEN /* max SSID */ +
1057 2 + 8 /* max Supported Rates */ +
1058 3 /* max DS params */ +
1059 4 /* IBSS params */ +
1060 2 + (IEEE80211_MAX_SUPP_RATES - 8) +
1061 2 + sizeof(struct ieee80211_ht_cap) +
1062 2 + sizeof(struct ieee80211_ht_operation) +
1063 params->ie_len);
1064 if (!skb)
1065 return -ENOMEM;
1066
1067 mutex_lock(&sdata->u.ibss.mtx); 1044 mutex_lock(&sdata->u.ibss.mtx);
1068 1045
1069 if (params->bssid) { 1046 if (params->bssid) {
@@ -1092,7 +1069,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1092 sdata->u.ibss.ie_len = params->ie_len; 1069 sdata->u.ibss.ie_len = params->ie_len;
1093 } 1070 }
1094 1071
1095 sdata->u.ibss.skb = skb;
1096 sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; 1072 sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
1097 sdata->u.ibss.ibss_join_req = jiffies; 1073 sdata->u.ibss.ibss_join_req = jiffies;
1098 1074
@@ -1128,13 +1104,13 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1128 1104
1129int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) 1105int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1130{ 1106{
1131 struct sk_buff *skb;
1132 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 1107 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1133 struct ieee80211_local *local = sdata->local; 1108 struct ieee80211_local *local = sdata->local;
1134 struct cfg80211_bss *cbss; 1109 struct cfg80211_bss *cbss;
1135 u16 capability; 1110 u16 capability;
1136 int active_ibss; 1111 int active_ibss;
1137 struct sta_info *sta; 1112 struct sta_info *sta;
1113 struct beacon_data *presp;
1138 1114
1139 mutex_lock(&sdata->u.ibss.mtx); 1115 mutex_lock(&sdata->u.ibss.mtx);
1140 1116
@@ -1180,17 +1156,18 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
1180 1156
1181 /* remove beacon */ 1157 /* remove beacon */
1182 kfree(sdata->u.ibss.ie); 1158 kfree(sdata->u.ibss.ie);
1183 skb = rcu_dereference_protected(sdata->u.ibss.presp, 1159 presp = rcu_dereference_protected(ifibss->presp,
1184 lockdep_is_held(&sdata->u.ibss.mtx)); 1160 lockdep_is_held(&sdata->u.ibss.mtx));
1185 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); 1161 RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);
1186 sdata->vif.bss_conf.ibss_joined = false; 1162 sdata->vif.bss_conf.ibss_joined = false;
1187 sdata->vif.bss_conf.ibss_creator = false; 1163 sdata->vif.bss_conf.ibss_creator = false;
1188 sdata->vif.bss_conf.enable_beacon = false; 1164 sdata->vif.bss_conf.enable_beacon = false;
1165 sdata->vif.bss_conf.ssid_len = 0;
1189 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); 1166 clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
1190 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | 1167 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
1191 BSS_CHANGED_IBSS); 1168 BSS_CHANGED_IBSS);
1192 synchronize_rcu(); 1169 synchronize_rcu();
1193 kfree_skb(skb); 1170 kfree(presp);
1194 1171
1195 skb_queue_purge(&sdata->skb_queue); 1172 skb_queue_purge(&sdata->skb_queue);
1196 1173
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5672533a0832..158e6eb188d3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -156,6 +156,7 @@ struct ieee80211_tx_data {
156 struct ieee80211_sub_if_data *sdata; 156 struct ieee80211_sub_if_data *sdata;
157 struct sta_info *sta; 157 struct sta_info *sta;
158 struct ieee80211_key *key; 158 struct ieee80211_key *key;
159 struct ieee80211_tx_rate rate;
159 160
160 unsigned int flags; 161 unsigned int flags;
161}; 162};
@@ -316,6 +317,7 @@ struct ieee80211_roc_work {
316 u32 duration, req_duration; 317 u32 duration, req_duration;
317 struct sk_buff *frame; 318 struct sk_buff *frame;
318 u64 cookie, mgmt_tx_cookie; 319 u64 cookie, mgmt_tx_cookie;
320 enum ieee80211_roc_type type;
319}; 321};
320 322
321/* flags used in struct ieee80211_if_managed.flags */ 323/* flags used in struct ieee80211_if_managed.flags */
@@ -401,7 +403,6 @@ struct ieee80211_if_managed {
401 403
402 u16 aid; 404 u16 aid;
403 405
404 unsigned long timers_running; /* used for quiesce/restart */
405 bool powersave; /* powersave requested for this iface */ 406 bool powersave; /* powersave requested for this iface */
406 bool broken_ap; /* AP is broken -- turn off powersave */ 407 bool broken_ap; /* AP is broken -- turn off powersave */
407 u8 dtim_period; 408 u8 dtim_period;
@@ -443,7 +444,7 @@ struct ieee80211_if_managed {
443 444
444 u8 use_4addr; 445 u8 use_4addr;
445 446
446 u8 p2p_noa_index; 447 s16 p2p_noa_index;
447 448
448 /* Signal strength from the last Beacon frame in the current BSS. */ 449 /* Signal strength from the last Beacon frame in the current BSS. */
449 int last_beacon_signal; 450 int last_beacon_signal;
@@ -480,6 +481,8 @@ struct ieee80211_if_managed {
480 481
481 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */ 482 struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */
482 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ 483 struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */
484 struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
485 struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
483}; 486};
484 487
485struct ieee80211_if_ibss { 488struct ieee80211_if_ibss {
@@ -491,8 +494,6 @@ struct ieee80211_if_ibss {
491 494
492 u32 basic_rates; 495 u32 basic_rates;
493 496
494 bool timer_running;
495
496 bool fixed_bssid; 497 bool fixed_bssid;
497 bool fixed_channel; 498 bool fixed_channel;
498 bool privacy; 499 bool privacy;
@@ -509,8 +510,7 @@ struct ieee80211_if_ibss {
509 510
510 unsigned long ibss_join_req; 511 unsigned long ibss_join_req;
511 /* probe response/beacon for IBSS */ 512 /* probe response/beacon for IBSS */
512 struct sk_buff __rcu *presp; 513 struct beacon_data __rcu *presp;
513 struct sk_buff *skb;
514 514
515 spinlock_t incomplete_lock; 515 spinlock_t incomplete_lock;
516 struct list_head incomplete_stations; 516 struct list_head incomplete_stations;
@@ -544,8 +544,6 @@ struct ieee80211_if_mesh {
544 struct timer_list mesh_path_timer; 544 struct timer_list mesh_path_timer;
545 struct timer_list mesh_path_root_timer; 545 struct timer_list mesh_path_root_timer;
546 546
547 unsigned long timers_running;
548
549 unsigned long wrkq_flags; 547 unsigned long wrkq_flags;
550 548
551 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; 549 u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN];
@@ -591,6 +589,7 @@ struct ieee80211_if_mesh {
591 IEEE80211_MESH_SEC_AUTHED = 0x1, 589 IEEE80211_MESH_SEC_AUTHED = 0x1,
592 IEEE80211_MESH_SEC_SECURED = 0x2, 590 IEEE80211_MESH_SEC_SECURED = 0x2,
593 } security; 591 } security;
592 bool user_mpm;
594 /* Extensible Synchronization Framework */ 593 /* Extensible Synchronization Framework */
595 const struct ieee80211_mesh_sync_ops *sync_ops; 594 const struct ieee80211_mesh_sync_ops *sync_ops;
596 s64 sync_offset_clockdrift_max; 595 s64 sync_offset_clockdrift_max;
@@ -683,6 +682,8 @@ struct ieee80211_sub_if_data {
683 682
684 /* count for keys needing tailroom space allocation */ 683 /* count for keys needing tailroom space allocation */
685 int crypto_tx_tailroom_needed_cnt; 684 int crypto_tx_tailroom_needed_cnt;
685 int crypto_tx_tailroom_pending_dec;
686 struct delayed_work dec_tailroom_needed_wk;
686 687
687 struct net_device *dev; 688 struct net_device *dev;
688 struct ieee80211_local *local; 689 struct ieee80211_local *local;
@@ -740,6 +741,8 @@ struct ieee80211_sub_if_data {
740 741
741 /* bitmap of allowed (non-MCS) rate indexes for rate control */ 742 /* bitmap of allowed (non-MCS) rate indexes for rate control */
742 u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; 743 u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
744
745 bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
743 u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN]; 746 u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
744 747
745 union { 748 union {
@@ -758,7 +761,6 @@ struct ieee80211_sub_if_data {
758 761
759#ifdef CONFIG_MAC80211_DEBUGFS 762#ifdef CONFIG_MAC80211_DEBUGFS
760 struct { 763 struct {
761 struct dentry *dir;
762 struct dentry *subdir_stations; 764 struct dentry *subdir_stations;
763 struct dentry *default_unicast_key; 765 struct dentry *default_unicast_key;
764 struct dentry *default_multicast_key; 766 struct dentry *default_multicast_key;
@@ -766,10 +768,6 @@ struct ieee80211_sub_if_data {
766 } debugfs; 768 } debugfs;
767#endif 769#endif
768 770
769#ifdef CONFIG_PM
770 struct ieee80211_bss_conf suspend_bss_conf;
771#endif
772
773 /* must be last, dynamically sized area in this! */ 771 /* must be last, dynamically sized area in this! */
774 struct ieee80211_vif vif; 772 struct ieee80211_vif vif;
775}; 773};
@@ -804,11 +802,6 @@ enum sdata_queue_type {
804enum { 802enum {
805 IEEE80211_RX_MSG = 1, 803 IEEE80211_RX_MSG = 1,
806 IEEE80211_TX_STATUS_MSG = 2, 804 IEEE80211_TX_STATUS_MSG = 2,
807 IEEE80211_EOSP_MSG = 3,
808};
809
810struct skb_eosp_msg_data {
811 u8 sta[ETH_ALEN], iface[ETH_ALEN];
812}; 805};
813 806
814enum queue_stop_reason { 807enum queue_stop_reason {
@@ -819,6 +812,7 @@ enum queue_stop_reason {
819 IEEE80211_QUEUE_STOP_REASON_SUSPEND, 812 IEEE80211_QUEUE_STOP_REASON_SUSPEND,
820 IEEE80211_QUEUE_STOP_REASON_SKB_ADD, 813 IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
821 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, 814 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL,
815 IEEE80211_QUEUE_STOP_REASON_FLUSH,
822}; 816};
823 817
824#ifdef CONFIG_MAC80211_LEDS 818#ifdef CONFIG_MAC80211_LEDS
@@ -1029,10 +1023,9 @@ struct ieee80211_local {
1029 enum mac80211_scan_state next_scan_state; 1023 enum mac80211_scan_state next_scan_state;
1030 struct delayed_work scan_work; 1024 struct delayed_work scan_work;
1031 struct ieee80211_sub_if_data __rcu *scan_sdata; 1025 struct ieee80211_sub_if_data __rcu *scan_sdata;
1032 struct ieee80211_channel *csa_channel; 1026 struct cfg80211_chan_def csa_chandef;
1033 /* For backward compatibility only -- do not use */ 1027 /* For backward compatibility only -- do not use */
1034 struct ieee80211_channel *_oper_channel; 1028 struct cfg80211_chan_def _oper_chandef;
1035 enum nl80211_channel_type _oper_channel_type;
1036 1029
1037 /* Temporary remain-on-channel for off-channel operations */ 1030 /* Temporary remain-on-channel for off-channel operations */
1038 struct ieee80211_channel *tmp_channel; 1031 struct ieee80211_channel *tmp_channel;
@@ -1137,11 +1130,6 @@ struct ieee80211_local {
1137 1130
1138 struct ieee80211_sub_if_data __rcu *p2p_sdata; 1131 struct ieee80211_sub_if_data __rcu *p2p_sdata;
1139 1132
1140 /* dummy netdev for use w/ NAPI */
1141 struct net_device napi_dev;
1142
1143 struct napi_struct napi;
1144
1145 /* virtual monitor interface */ 1133 /* virtual monitor interface */
1146 struct ieee80211_sub_if_data __rcu *monitor_sdata; 1134 struct ieee80211_sub_if_data __rcu *monitor_sdata;
1147 struct cfg80211_chan_def monitor_chandef; 1135 struct cfg80211_chan_def monitor_chandef;
@@ -1173,11 +1161,8 @@ struct ieee802_11_elems {
1173 /* pointers to IEs */ 1161 /* pointers to IEs */
1174 const u8 *ssid; 1162 const u8 *ssid;
1175 const u8 *supp_rates; 1163 const u8 *supp_rates;
1176 const u8 *fh_params;
1177 const u8 *ds_params; 1164 const u8 *ds_params;
1178 const u8 *cf_params;
1179 const struct ieee80211_tim_ie *tim; 1165 const struct ieee80211_tim_ie *tim;
1180 const u8 *ibss_params;
1181 const u8 *challenge; 1166 const u8 *challenge;
1182 const u8 *rsn; 1167 const u8 *rsn;
1183 const u8 *erp_info; 1168 const u8 *erp_info;
@@ -1197,23 +1182,20 @@ struct ieee802_11_elems {
1197 const u8 *perr; 1182 const u8 *perr;
1198 const struct ieee80211_rann_ie *rann; 1183 const struct ieee80211_rann_ie *rann;
1199 const struct ieee80211_channel_sw_ie *ch_switch_ie; 1184 const struct ieee80211_channel_sw_ie *ch_switch_ie;
1185 const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
1186 const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
1200 const u8 *country_elem; 1187 const u8 *country_elem;
1201 const u8 *pwr_constr_elem; 1188 const u8 *pwr_constr_elem;
1202 const u8 *quiet_elem; /* first quite element */ 1189 const struct ieee80211_timeout_interval_ie *timeout_int;
1203 const u8 *timeout_int;
1204 const u8 *opmode_notif; 1190 const u8 *opmode_notif;
1191 const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
1205 1192
1206 /* length of them, respectively */ 1193 /* length of them, respectively */
1207 u8 ssid_len; 1194 u8 ssid_len;
1208 u8 supp_rates_len; 1195 u8 supp_rates_len;
1209 u8 fh_params_len;
1210 u8 ds_params_len;
1211 u8 cf_params_len;
1212 u8 tim_len; 1196 u8 tim_len;
1213 u8 ibss_params_len;
1214 u8 challenge_len; 1197 u8 challenge_len;
1215 u8 rsn_len; 1198 u8 rsn_len;
1216 u8 erp_info_len;
1217 u8 ext_supp_rates_len; 1199 u8 ext_supp_rates_len;
1218 u8 wmm_info_len; 1200 u8 wmm_info_len;
1219 u8 wmm_param_len; 1201 u8 wmm_param_len;
@@ -1223,9 +1205,6 @@ struct ieee802_11_elems {
1223 u8 prep_len; 1205 u8 prep_len;
1224 u8 perr_len; 1206 u8 perr_len;
1225 u8 country_elem_len; 1207 u8 country_elem_len;
1226 u8 quiet_elem_len;
1227 u8 num_of_quiet_elem; /* can be more the one */
1228 u8 timeout_int_len;
1229 1208
1230 /* whether a parse error occurred while retrieving these elements */ 1209 /* whether a parse error occurred while retrieving these elements */
1231 bool parse_error; 1210 bool parse_error;
@@ -1280,12 +1259,6 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
1280int ieee80211_max_network_latency(struct notifier_block *nb, 1259int ieee80211_max_network_latency(struct notifier_block *nb,
1281 unsigned long data, void *dummy); 1260 unsigned long data, void *dummy);
1282int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); 1261int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
1283void
1284ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1285 const struct ieee80211_channel_sw_ie *sw_elem,
1286 struct ieee80211_bss *bss, u64 timestamp);
1287void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
1288void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1289void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1262void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1290void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1263void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1291 struct sk_buff *skb); 1264 struct sk_buff *skb);
@@ -1303,8 +1276,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
1303int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, 1276int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
1304 struct cfg80211_ibss_params *params); 1277 struct cfg80211_ibss_params *params);
1305int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); 1278int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
1306void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
1307void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
1308void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); 1279void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
1309void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1280void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1310 struct sk_buff *skb); 1281 struct sk_buff *skb);
@@ -1347,7 +1318,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);
1347void ieee80211_offchannel_return(struct ieee80211_local *local); 1318void ieee80211_offchannel_return(struct ieee80211_local *local);
1348void ieee80211_roc_setup(struct ieee80211_local *local); 1319void ieee80211_roc_setup(struct ieee80211_local *local);
1349void ieee80211_start_next_roc(struct ieee80211_local *local); 1320void ieee80211_start_next_roc(struct ieee80211_local *local);
1350void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); 1321void ieee80211_roc_purge(struct ieee80211_local *local,
1322 struct ieee80211_sub_if_data *sdata);
1351void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free); 1323void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);
1352void ieee80211_sw_roc_work(struct work_struct *work); 1324void ieee80211_sw_roc_work(struct work_struct *work);
1353void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); 1325void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc);
@@ -1368,6 +1340,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
1368 const int offset); 1340 const int offset);
1369int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up); 1341int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);
1370void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata); 1342void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata);
1343int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
1344void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
1371 1345
1372bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1346bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
1373void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); 1347void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
@@ -1443,6 +1417,8 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);
1443void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, 1417void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
1444 struct sta_info *sta, u8 opmode, 1418 struct sta_info *sta, u8 opmode,
1445 enum ieee80211_band band, bool nss_only); 1419 enum ieee80211_band band, bool nss_only);
1420void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
1421 struct ieee80211_sta_vht_cap *vht_cap);
1446 1422
1447/* Spectrum management */ 1423/* Spectrum management */
1448void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1424void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1520,11 +1496,15 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
1520 ieee80211_tx_skb_tid(sdata, skb, 7); 1496 ieee80211_tx_skb_tid(sdata, skb, 7);
1521} 1497}
1522 1498
1523void ieee802_11_parse_elems(u8 *start, size_t len, 1499u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,
1524 struct ieee802_11_elems *elems);
1525u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
1526 struct ieee802_11_elems *elems, 1500 struct ieee802_11_elems *elems,
1527 u64 filter, u32 crc); 1501 u64 filter, u32 crc);
1502static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action,
1503 struct ieee802_11_elems *elems)
1504{
1505 ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0);
1506}
1507
1528u32 ieee80211_mandatory_rates(struct ieee80211_local *local, 1508u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
1529 enum ieee80211_band band); 1509 enum ieee80211_band band);
1530 1510
@@ -1540,8 +1520,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
1540 struct ieee80211_hdr *hdr, bool ack); 1520 struct ieee80211_hdr *hdr, bool ack);
1541 1521
1542void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1522void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
1523 unsigned long queues,
1543 enum queue_stop_reason reason); 1524 enum queue_stop_reason reason);
1544void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 1525void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
1526 unsigned long queues,
1545 enum queue_stop_reason reason); 1527 enum queue_stop_reason reason);
1546void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, 1528void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
1547 enum queue_stop_reason reason); 1529 enum queue_stop_reason reason);
@@ -1558,6 +1540,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,
1558{ 1540{
1559 ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); 1541 ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);
1560} 1542}
1543void ieee80211_flush_queues(struct ieee80211_local *local,
1544 struct ieee80211_sub_if_data *sdata);
1561 1545
1562void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 1546void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1563 u16 transaction, u16 auth_alg, u16 status, 1547 u16 transaction, u16 auth_alg, u16 status,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 58150f877ec3..60f1ce5e5e52 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Interface handling (except master interface) 2 * Interface handling
3 * 3 *
4 * Copyright 2002-2005, Instant802 Networks, Inc. 4 * Copyright 2002-2005, Instant802 Networks, Inc.
5 * Copyright 2005-2006, Devicescape Software, Inc. 5 * Copyright 2005-2006, Devicescape Software, Inc.
@@ -78,7 +78,7 @@ void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
78 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER); 78 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
79} 79}
80 80
81u32 ieee80211_idle_off(struct ieee80211_local *local) 81static u32 __ieee80211_idle_off(struct ieee80211_local *local)
82{ 82{
83 if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE)) 83 if (!(local->hw.conf.flags & IEEE80211_CONF_IDLE))
84 return 0; 84 return 0;
@@ -87,27 +87,29 @@ u32 ieee80211_idle_off(struct ieee80211_local *local)
87 return IEEE80211_CONF_CHANGE_IDLE; 87 return IEEE80211_CONF_CHANGE_IDLE;
88} 88}
89 89
90static u32 ieee80211_idle_on(struct ieee80211_local *local) 90static u32 __ieee80211_idle_on(struct ieee80211_local *local)
91{ 91{
92 if (local->hw.conf.flags & IEEE80211_CONF_IDLE) 92 if (local->hw.conf.flags & IEEE80211_CONF_IDLE)
93 return 0; 93 return 0;
94 94
95 drv_flush(local, false); 95 ieee80211_flush_queues(local, NULL);
96 96
97 local->hw.conf.flags |= IEEE80211_CONF_IDLE; 97 local->hw.conf.flags |= IEEE80211_CONF_IDLE;
98 return IEEE80211_CONF_CHANGE_IDLE; 98 return IEEE80211_CONF_CHANGE_IDLE;
99} 99}
100 100
101void ieee80211_recalc_idle(struct ieee80211_local *local) 101static u32 __ieee80211_recalc_idle(struct ieee80211_local *local,
102 bool force_active)
102{ 103{
103 bool working = false, scanning, active; 104 bool working = false, scanning, active;
104 unsigned int led_trig_start = 0, led_trig_stop = 0; 105 unsigned int led_trig_start = 0, led_trig_stop = 0;
105 struct ieee80211_roc_work *roc; 106 struct ieee80211_roc_work *roc;
106 u32 change;
107 107
108 lockdep_assert_held(&local->mtx); 108 lockdep_assert_held(&local->mtx);
109 109
110 active = !list_empty(&local->chanctx_list) || local->monitors; 110 active = force_active ||
111 !list_empty(&local->chanctx_list) ||
112 local->monitors;
111 113
112 if (!local->ops->remain_on_channel) { 114 if (!local->ops->remain_on_channel) {
113 list_for_each_entry(roc, &local->roc_list, list) { 115 list_for_each_entry(roc, &local->roc_list, list) {
@@ -132,9 +134,18 @@ void ieee80211_recalc_idle(struct ieee80211_local *local)
132 ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); 134 ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop);
133 135
134 if (working || scanning || active) 136 if (working || scanning || active)
135 change = ieee80211_idle_off(local); 137 return __ieee80211_idle_off(local);
136 else 138 return __ieee80211_idle_on(local);
137 change = ieee80211_idle_on(local); 139}
140
141u32 ieee80211_idle_off(struct ieee80211_local *local)
142{
143 return __ieee80211_recalc_idle(local, true);
144}
145
146void ieee80211_recalc_idle(struct ieee80211_local *local)
147{
148 u32 change = __ieee80211_recalc_idle(local, false);
138 if (change) 149 if (change)
139 ieee80211_hw_config(local, change); 150 ieee80211_hw_config(local, change);
140} 151}
@@ -346,7 +357,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)
346 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE; 357 sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;
347} 358}
348 359
349static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) 360int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
350{ 361{
351 struct ieee80211_sub_if_data *sdata; 362 struct ieee80211_sub_if_data *sdata;
352 int ret; 363 int ret;
@@ -399,7 +410,7 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
399 return 0; 410 return 0;
400} 411}
401 412
402static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) 413void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
403{ 414{
404 struct ieee80211_sub_if_data *sdata; 415 struct ieee80211_sub_if_data *sdata;
405 416
@@ -488,8 +499,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
488 res = drv_start(local); 499 res = drv_start(local);
489 if (res) 500 if (res)
490 goto err_del_bss; 501 goto err_del_bss;
491 if (local->ops->napi_poll)
492 napi_enable(&local->napi);
493 /* we're brought up, everything changes */ 502 /* we're brought up, everything changes */
494 hw_reconf_flags = ~0; 503 hw_reconf_flags = ~0;
495 ieee80211_led_radio(local, true); 504 ieee80211_led_radio(local, true);
@@ -562,8 +571,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
562 goto err_del_interface; 571 goto err_del_interface;
563 } 572 }
564 573
565 drv_add_interface_debugfs(local, sdata);
566
567 if (sdata->vif.type == NL80211_IFTYPE_AP) { 574 if (sdata->vif.type == NL80211_IFTYPE_AP) {
568 local->fif_pspoll++; 575 local->fif_pspoll++;
569 local->fif_probe_req++; 576 local->fif_probe_req++;
@@ -588,7 +595,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
588 case NL80211_IFTYPE_P2P_DEVICE: 595 case NL80211_IFTYPE_P2P_DEVICE:
589 break; 596 break;
590 default: 597 default:
591 netif_carrier_on(dev); 598 /* not reached */
599 WARN_ON(1);
592 } 600 }
593 601
594 /* 602 /*
@@ -645,8 +653,28 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
645 653
646 ieee80211_recalc_ps(local, -1); 654 ieee80211_recalc_ps(local, -1);
647 655
648 if (dev) 656 if (dev) {
649 netif_tx_start_all_queues(dev); 657 unsigned long flags;
658 int n_acs = IEEE80211_NUM_ACS;
659 int ac;
660
661 if (local->hw.queues < IEEE80211_NUM_ACS)
662 n_acs = 1;
663
664 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
665 if (sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE ||
666 (local->queue_stop_reasons[sdata->vif.cab_queue] == 0 &&
667 skb_queue_empty(&local->pending[sdata->vif.cab_queue]))) {
668 for (ac = 0; ac < n_acs; ac++) {
669 int ac_queue = sdata->vif.hw_queue[ac];
670
671 if (local->queue_stop_reasons[ac_queue] == 0 &&
672 skb_queue_empty(&local->pending[ac_queue]))
673 netif_start_subqueue(dev, ac);
674 }
675 }
676 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
677 }
650 678
651 return 0; 679 return 0;
652 err_del_interface: 680 err_del_interface:
@@ -700,7 +728,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
700 if (sdata->dev) 728 if (sdata->dev)
701 netif_tx_stop_all_queues(sdata->dev); 729 netif_tx_stop_all_queues(sdata->dev);
702 730
703 ieee80211_roc_purge(sdata); 731 ieee80211_roc_purge(local, sdata);
704 732
705 if (sdata->vif.type == NL80211_IFTYPE_STATION) 733 if (sdata->vif.type == NL80211_IFTYPE_STATION)
706 ieee80211_mgd_stop(sdata); 734 ieee80211_mgd_stop(sdata);
@@ -725,12 +753,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
725 WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) || 753 WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||
726 (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); 754 (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1));
727 755
728 /* 756 /* don't count this interface for promisc/allmulti while it is down */
729 * Don't count this interface for promisc/allmulti while it
730 * is down. dev_mc_unsync() will invoke set_multicast_list
731 * on the master interface which will sync these down to the
732 * hardware as filter flags.
733 */
734 if (sdata->flags & IEEE80211_SDATA_ALLMULTI) 757 if (sdata->flags & IEEE80211_SDATA_ALLMULTI)
735 atomic_dec(&local->iff_allmultis); 758 atomic_dec(&local->iff_allmultis);
736 759
@@ -751,8 +774,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
751 sdata->dev->addr_len); 774 sdata->dev->addr_len);
752 spin_unlock_bh(&local->filter_lock); 775 spin_unlock_bh(&local->filter_lock);
753 netif_addr_unlock_bh(sdata->dev); 776 netif_addr_unlock_bh(sdata->dev);
754
755 ieee80211_configure_filter(local);
756 } 777 }
757 778
758 del_timer_sync(&local->dynamic_ps_timer); 779 del_timer_sync(&local->dynamic_ps_timer);
@@ -763,6 +784,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
763 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work); 784 cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);
764 785
765 if (sdata->wdev.cac_started) { 786 if (sdata->wdev.cac_started) {
787 WARN_ON(local->suspended);
766 mutex_lock(&local->iflist_mtx); 788 mutex_lock(&local->iflist_mtx);
767 ieee80211_vif_release_channel(sdata); 789 ieee80211_vif_release_channel(sdata);
768 mutex_unlock(&local->iflist_mtx); 790 mutex_unlock(&local->iflist_mtx);
@@ -813,14 +835,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
813 if (local->monitors == 0) { 835 if (local->monitors == 0) {
814 local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR; 836 local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR;
815 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; 837 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
816 ieee80211_del_virtual_monitor(local);
817 } 838 }
818 839
819 ieee80211_adjust_monitor_flags(sdata, -1); 840 ieee80211_adjust_monitor_flags(sdata, -1);
820 ieee80211_configure_filter(local);
821 mutex_lock(&local->mtx);
822 ieee80211_recalc_idle(local);
823 mutex_unlock(&local->mtx);
824 break; 841 break;
825 case NL80211_IFTYPE_P2P_DEVICE: 842 case NL80211_IFTYPE_P2P_DEVICE:
826 /* relies on synchronize_rcu() below */ 843 /* relies on synchronize_rcu() below */
@@ -833,46 +850,28 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
833 * 850 *
834 * sta_info_flush_cleanup() requires rcu_barrier() 851 * sta_info_flush_cleanup() requires rcu_barrier()
835 * first to wait for the station call_rcu() calls 852 * first to wait for the station call_rcu() calls
836 * to complete, here we need at least sychronize_rcu() 853 * to complete, and we also need synchronize_rcu()
837 * it to wait for the RX path in case it is using the 854 * to wait for the RX path in case it is using the
838 * interface and enqueuing frames at this very time on 855 * interface and enqueuing frames at this very time on
839 * another CPU. 856 * another CPU.
840 */ 857 */
858 synchronize_rcu();
841 rcu_barrier(); 859 rcu_barrier();
842 sta_info_flush_cleanup(sdata); 860 sta_info_flush_cleanup(sdata);
843 861
844 skb_queue_purge(&sdata->skb_queue);
845
846 /* 862 /*
847 * Free all remaining keys, there shouldn't be any, 863 * Free all remaining keys, there shouldn't be any,
848 * except maybe group keys in AP more or WDS? 864 * except maybe in WDS mode?
849 */ 865 */
850 ieee80211_free_keys(sdata); 866 ieee80211_free_keys(sdata);
851 867
852 drv_remove_interface_debugfs(local, sdata); 868 /* fall through */
853 869 case NL80211_IFTYPE_AP:
854 if (going_down) 870 skb_queue_purge(&sdata->skb_queue);
855 drv_remove_interface(local, sdata);
856 } 871 }
857 872
858 sdata->bss = NULL; 873 sdata->bss = NULL;
859 874
860 ieee80211_recalc_ps(local, -1);
861
862 if (local->open_count == 0) {
863 if (local->ops->napi_poll)
864 napi_disable(&local->napi);
865 ieee80211_clear_tx_pending(local);
866 ieee80211_stop_device(local);
867
868 /* no reconfiguring after stop! */
869 hw_reconf_flags = 0;
870 }
871
872 /* do after stop to avoid reconfiguring when we stop anyway */
873 if (hw_reconf_flags)
874 ieee80211_hw_config(local, hw_reconf_flags);
875
876 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 875 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
877 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { 876 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
878 skb_queue_walk_safe(&local->pending[i], skb, tmp) { 877 skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -885,7 +884,54 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
885 } 884 }
886 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 885 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
887 886
888 if (local->monitors == local->open_count && local->monitors > 0) 887 if (local->open_count == 0)
888 ieee80211_clear_tx_pending(local);
889
890 /*
891 * If the interface goes down while suspended, presumably because
892 * the device was unplugged and that happens before our resume,
893 * then the driver is already unconfigured and the remainder of
894 * this function isn't needed.
895 * XXX: what about WoWLAN? If the device has software state, e.g.
896 * memory allocated, it might expect teardown commands from
897 * mac80211 here?
898 */
899 if (local->suspended) {
900 WARN_ON(local->wowlan);
901 WARN_ON(rtnl_dereference(local->monitor_sdata));
902 return;
903 }
904
905 switch (sdata->vif.type) {
906 case NL80211_IFTYPE_AP_VLAN:
907 break;
908 case NL80211_IFTYPE_MONITOR:
909 if (local->monitors == 0)
910 ieee80211_del_virtual_monitor(local);
911
912 mutex_lock(&local->mtx);
913 ieee80211_recalc_idle(local);
914 mutex_unlock(&local->mtx);
915 break;
916 default:
917 if (going_down)
918 drv_remove_interface(local, sdata);
919 }
920
921 ieee80211_recalc_ps(local, -1);
922
923 if (local->open_count == 0) {
924 ieee80211_stop_device(local);
925
926 /* no reconfiguring after stop! */
927 return;
928 }
929
930 /* do after stop to avoid reconfiguring when we stop anyway */
931 ieee80211_configure_filter(local);
932 ieee80211_hw_config(local, hw_reconf_flags);
933
934 if (local->monitors == local->open_count)
889 ieee80211_add_virtual_monitor(local); 935 ieee80211_add_virtual_monitor(local);
890} 936}
891 937
@@ -924,6 +970,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
924 atomic_dec(&local->iff_promiscs); 970 atomic_dec(&local->iff_promiscs);
925 sdata->flags ^= IEEE80211_SDATA_PROMISC; 971 sdata->flags ^= IEEE80211_SDATA_PROMISC;
926 } 972 }
973
974 /*
975 * TODO: If somebody needs this on AP interfaces,
976 * it can be enabled easily but multicast
977 * addresses from VLANs need to be synced.
978 */
979 if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
980 sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
981 sdata->vif.type != NL80211_IFTYPE_AP)
982 drv_set_multicast_list(local, sdata, &dev->mc);
983
927 spin_lock_bh(&local->filter_lock); 984 spin_lock_bh(&local->filter_lock);
928 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len); 985 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
929 spin_unlock_bh(&local->filter_lock); 986 spin_unlock_bh(&local->filter_lock);
@@ -1550,6 +1607,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
1550 INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); 1607 INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk);
1551 INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, 1608 INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,
1552 ieee80211_dfs_cac_timer_work); 1609 ieee80211_dfs_cac_timer_work);
1610 INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk,
1611 ieee80211_delayed_tailroom_dec);
1553 1612
1554 for (i = 0; i < IEEE80211_NUM_BANDS; i++) { 1613 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
1555 struct ieee80211_supported_band *sband; 1614 struct ieee80211_supported_band *sband;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index ef252eb58c36..67059b88fea5 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
248} 248}
249 249
250 250
251static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 251static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
252 struct sta_info *sta, 252 struct sta_info *sta,
253 bool pairwise, 253 bool pairwise,
254 struct ieee80211_key *old, 254 struct ieee80211_key *old,
255 struct ieee80211_key *new) 255 struct ieee80211_key *new)
256{ 256{
257 int idx; 257 int idx;
258 bool defunikey, defmultikey, defmgmtkey; 258 bool defunikey, defmultikey, defmgmtkey;
@@ -397,7 +397,41 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
397 return key; 397 return key;
398} 398}
399 399
400static void __ieee80211_key_destroy(struct ieee80211_key *key) 400static void ieee80211_key_free_common(struct ieee80211_key *key)
401{
402 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
403 ieee80211_aes_key_free(key->u.ccmp.tfm);
404 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
405 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
406 kfree(key);
407}
408
409static void __ieee80211_key_destroy(struct ieee80211_key *key,
410 bool delay_tailroom)
411{
412 if (key->local)
413 ieee80211_key_disable_hw_accel(key);
414
415 if (key->local) {
416 struct ieee80211_sub_if_data *sdata = key->sdata;
417
418 ieee80211_debugfs_key_remove(key);
419
420 if (delay_tailroom) {
421 /* see ieee80211_delayed_tailroom_dec */
422 sdata->crypto_tx_tailroom_pending_dec++;
423 schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
424 HZ/2);
425 } else {
426 sdata->crypto_tx_tailroom_needed_cnt--;
427 }
428 }
429
430 ieee80211_key_free_common(key);
431}
432
433static void ieee80211_key_destroy(struct ieee80211_key *key,
434 bool delay_tailroom)
401{ 435{
402 if (!key) 436 if (!key)
403 return; 437 return;
@@ -408,19 +442,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
408 */ 442 */
409 synchronize_net(); 443 synchronize_net();
410 444
411 if (key->local) 445 __ieee80211_key_destroy(key, delay_tailroom);
412 ieee80211_key_disable_hw_accel(key); 446}
413
414 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
415 ieee80211_aes_key_free(key->u.ccmp.tfm);
416 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
417 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
418 if (key->local) {
419 ieee80211_debugfs_key_remove(key);
420 key->sdata->crypto_tx_tailroom_needed_cnt--;
421 }
422 447
423 kfree(key); 448void ieee80211_key_free_unused(struct ieee80211_key *key)
449{
450 WARN_ON(key->sdata || key->local);
451 ieee80211_key_free_common(key);
424} 452}
425 453
426int ieee80211_key_link(struct ieee80211_key *key, 454int ieee80211_key_link(struct ieee80211_key *key,
@@ -440,32 +468,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
440 key->sdata = sdata; 468 key->sdata = sdata;
441 key->sta = sta; 469 key->sta = sta;
442 470
443 if (sta) {
444 /*
445 * some hardware cannot handle TKIP with QoS, so
446 * we indicate whether QoS could be in use.
447 */
448 if (test_sta_flag(sta, WLAN_STA_WME))
449 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
450 } else {
451 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
452 struct sta_info *ap;
453
454 /*
455 * We're getting a sta pointer in, so must be under
456 * appropriate locking for sta_info_get().
457 */
458
459 /* same here, the AP could be using QoS */
460 ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid);
461 if (ap) {
462 if (test_sta_flag(ap, WLAN_STA_WME))
463 key->conf.flags |=
464 IEEE80211_KEY_FLAG_WMM_STA;
465 }
466 }
467 }
468
469 mutex_lock(&sdata->local->key_mtx); 471 mutex_lock(&sdata->local->key_mtx);
470 472
471 if (sta && pairwise) 473 if (sta && pairwise)
@@ -477,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,
477 479
478 increment_tailroom_need_count(sdata); 480 increment_tailroom_need_count(sdata);
479 481
480 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); 482 ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
481 __ieee80211_key_destroy(old_key); 483 ieee80211_key_destroy(old_key, true);
482 484
483 ieee80211_debugfs_key_add(key); 485 ieee80211_debugfs_key_add(key);
484 486
485 ret = ieee80211_key_enable_hw_accel(key); 487 ret = ieee80211_key_enable_hw_accel(key);
486 488
489 if (ret)
490 ieee80211_key_free(key, true);
491
487 mutex_unlock(&sdata->local->key_mtx); 492 mutex_unlock(&sdata->local->key_mtx);
488 493
489 return ret; 494 return ret;
490} 495}
491 496
492void __ieee80211_key_free(struct ieee80211_key *key) 497void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
493{ 498{
494 if (!key) 499 if (!key)
495 return; 500 return;
@@ -498,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key)
498 * Replace key with nothingness if it was ever used. 503 * Replace key with nothingness if it was ever used.
499 */ 504 */
500 if (key->sdata) 505 if (key->sdata)
501 __ieee80211_key_replace(key->sdata, key->sta, 506 ieee80211_key_replace(key->sdata, key->sta,
502 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, 507 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
503 key, NULL); 508 key, NULL);
504 __ieee80211_key_destroy(key); 509 ieee80211_key_destroy(key, delay_tailroom);
505}
506
507void ieee80211_key_free(struct ieee80211_local *local,
508 struct ieee80211_key *key)
509{
510 mutex_lock(&local->key_mtx);
511 __ieee80211_key_free(key);
512 mutex_unlock(&local->key_mtx);
513} 510}
514 511
515void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) 512void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
@@ -566,36 +563,109 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,
566} 563}
567EXPORT_SYMBOL(ieee80211_iter_keys); 564EXPORT_SYMBOL(ieee80211_iter_keys);
568 565
569void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) 566void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
570{ 567{
571 struct ieee80211_key *key; 568 struct ieee80211_key *key, *tmp;
569 LIST_HEAD(keys);
572 570
573 ASSERT_RTNL(); 571 cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);
574 572
575 mutex_lock(&sdata->local->key_mtx); 573 mutex_lock(&sdata->local->key_mtx);
576 574
577 list_for_each_entry(key, &sdata->key_list, list) 575 sdata->crypto_tx_tailroom_needed_cnt -=
578 ieee80211_key_disable_hw_accel(key); 576 sdata->crypto_tx_tailroom_pending_dec;
577 sdata->crypto_tx_tailroom_pending_dec = 0;
578
579 ieee80211_debugfs_key_remove_mgmt_default(sdata);
580
581 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) {
582 ieee80211_key_replace(key->sdata, key->sta,
583 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
584 key, NULL);
585 list_add_tail(&key->list, &keys);
586 }
587
588 ieee80211_debugfs_key_update_default(sdata);
589
590 if (!list_empty(&keys)) {
591 synchronize_net();
592 list_for_each_entry_safe(key, tmp, &keys, list)
593 __ieee80211_key_destroy(key, false);
594 }
595
596 WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
597 sdata->crypto_tx_tailroom_pending_dec);
579 598
580 mutex_unlock(&sdata->local->key_mtx); 599 mutex_unlock(&sdata->local->key_mtx);
581} 600}
582 601
583void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) 602void ieee80211_free_sta_keys(struct ieee80211_local *local,
603 struct sta_info *sta)
584{ 604{
585 struct ieee80211_key *key, *tmp; 605 struct ieee80211_key *key, *tmp;
606 LIST_HEAD(keys);
607 int i;
586 608
587 mutex_lock(&sdata->local->key_mtx); 609 mutex_lock(&local->key_mtx);
610 for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
611 key = key_mtx_dereference(local, sta->gtk[i]);
612 if (!key)
613 continue;
614 ieee80211_key_replace(key->sdata, key->sta,
615 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
616 key, NULL);
617 list_add(&key->list, &keys);
618 }
588 619
589 ieee80211_debugfs_key_remove_mgmt_default(sdata); 620 key = key_mtx_dereference(local, sta->ptk);
621 if (key) {
622 ieee80211_key_replace(key->sdata, key->sta,
623 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
624 key, NULL);
625 list_add(&key->list, &keys);
626 }
590 627
591 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) 628 /*
592 __ieee80211_key_free(key); 629 * NB: the station code relies on this being
630 * done even if there aren't any keys
631 */
632 synchronize_net();
593 633
594 ieee80211_debugfs_key_update_default(sdata); 634 list_for_each_entry_safe(key, tmp, &keys, list)
635 __ieee80211_key_destroy(key, true);
595 636
596 mutex_unlock(&sdata->local->key_mtx); 637 mutex_unlock(&local->key_mtx);
597} 638}
598 639
640void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
641{
642 struct ieee80211_sub_if_data *sdata;
643
644 sdata = container_of(wk, struct ieee80211_sub_if_data,
645 dec_tailroom_needed_wk.work);
646
647 /*
648 * The reason for the delayed tailroom needed decrementing is to
649 * make roaming faster: during roaming, all keys are first deleted
650 * and then new keys are installed. The first new key causes the
651 * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes
652 * the cost of synchronize_net() (which can be slow). Avoid this
653 * by deferring the crypto_tx_tailroom_needed_cnt decrementing on
654 * key removal for a while, so if we roam the value is larger than
655 * zero and no 0->1 transition happens.
656 *
657 * The cost is that if the AP switching was from an AP with keys
658 * to one without, we still allocate tailroom while it would no
659 * longer be needed. However, in the typical (fast) roaming case
660 * within an ESS this usually won't happen.
661 */
662
663 mutex_lock(&sdata->local->key_mtx);
664 sdata->crypto_tx_tailroom_needed_cnt -=
665 sdata->crypto_tx_tailroom_pending_dec;
666 sdata->crypto_tx_tailroom_pending_dec = 0;
667 mutex_unlock(&sdata->local->key_mtx);
668}
599 669
600void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, 670void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,
601 const u8 *replay_ctr, gfp_t gfp) 671 const u8 *replay_ctr, gfp_t gfp)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 382dc44ed330..e8de3e6d7804 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -129,23 +129,25 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
129 size_t seq_len, const u8 *seq); 129 size_t seq_len, const u8 *seq);
130/* 130/*
131 * Insert a key into data structures (sdata, sta if necessary) 131 * Insert a key into data structures (sdata, sta if necessary)
132 * to make it used, free old key. 132 * to make it used, free old key. On failure, also free the new key.
133 */ 133 */
134int __must_check ieee80211_key_link(struct ieee80211_key *key, 134int ieee80211_key_link(struct ieee80211_key *key,
135 struct ieee80211_sub_if_data *sdata, 135 struct ieee80211_sub_if_data *sdata,
136 struct sta_info *sta); 136 struct sta_info *sta);
137void __ieee80211_key_free(struct ieee80211_key *key); 137void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom);
138void ieee80211_key_free(struct ieee80211_local *local, 138void ieee80211_key_free_unused(struct ieee80211_key *key);
139 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, 139void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,
141 bool uni, bool multi); 140 bool uni, bool multi);
142void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, 141void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
143 int idx); 142 int idx);
144void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); 143void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
144void ieee80211_free_sta_keys(struct ieee80211_local *local,
145 struct sta_info *sta);
145void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 146void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
146void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
147 147
148#define key_mtx_dereference(local, ref) \ 148#define key_mtx_dereference(local, ref) \
149 rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) 149 rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
150 150
151void ieee80211_delayed_tailroom_dec(struct work_struct *wk);
152
151#endif /* IEEE80211_KEY_H */ 153#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1a8591b77a13..8a7bfc47d577 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -95,43 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)
95static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) 95static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
96{ 96{
97 struct ieee80211_sub_if_data *sdata; 97 struct ieee80211_sub_if_data *sdata;
98 struct ieee80211_channel *chan; 98 struct cfg80211_chan_def chandef = {};
99 u32 changed = 0; 99 u32 changed = 0;
100 int power; 100 int power;
101 enum nl80211_channel_type channel_type;
102 u32 offchannel_flag; 101 u32 offchannel_flag;
103 bool scanning = false;
104 102
105 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 103 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
104
106 if (local->scan_channel) { 105 if (local->scan_channel) {
107 chan = local->scan_channel; 106 chandef.chan = local->scan_channel;
108 /* If scanning on oper channel, use whatever channel-type 107 /* If scanning on oper channel, use whatever channel-type
109 * is currently in use. 108 * is currently in use.
110 */ 109 */
111 if (chan == local->_oper_channel) 110 if (chandef.chan == local->_oper_chandef.chan) {
112 channel_type = local->_oper_channel_type; 111 chandef = local->_oper_chandef;
113 else 112 } else {
114 channel_type = NL80211_CHAN_NO_HT; 113 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
114 chandef.center_freq1 = chandef.chan->center_freq;
115 }
115 } else if (local->tmp_channel) { 116 } else if (local->tmp_channel) {
116 chan = local->tmp_channel; 117 chandef.chan = local->tmp_channel;
117 channel_type = NL80211_CHAN_NO_HT; 118 chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
118 } else { 119 chandef.center_freq1 = chandef.chan->center_freq;
119 chan = local->_oper_channel; 120 } else
120 channel_type = local->_oper_channel_type; 121 chandef = local->_oper_chandef;
121 } 122
122 123 WARN(!cfg80211_chandef_valid(&chandef),
123 if (chan != local->_oper_channel || 124 "control:%d MHz width:%d center: %d/%d MHz",
124 channel_type != local->_oper_channel_type) 125 chandef.chan->center_freq, chandef.width,
126 chandef.center_freq1, chandef.center_freq2);
127
128 if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
125 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 129 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
126 else 130 else
127 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 131 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
128 132
129 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; 133 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
130 134
131 if (offchannel_flag || chan != local->hw.conf.channel || 135 if (offchannel_flag ||
132 channel_type != local->hw.conf.channel_type) { 136 !cfg80211_chandef_identical(&local->hw.conf.chandef,
133 local->hw.conf.channel = chan; 137 &local->_oper_chandef)) {
134 local->hw.conf.channel_type = channel_type; 138 local->hw.conf.chandef = chandef;
135 changed |= IEEE80211_CONF_CHANGE_CHANNEL; 139 changed |= IEEE80211_CONF_CHANGE_CHANNEL;
136 } 140 }
137 141
@@ -147,10 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
147 changed |= IEEE80211_CONF_CHANGE_SMPS; 151 changed |= IEEE80211_CONF_CHANGE_SMPS;
148 } 152 }
149 153
150 scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || 154 power = chandef.chan->max_power;
151 test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) ||
152 test_bit(SCAN_HW_SCANNING, &local->scanning);
153 power = chan->max_power;
154 155
155 rcu_read_lock(); 156 rcu_read_lock();
156 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 157 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -226,8 +227,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
226static void ieee80211_tasklet_handler(unsigned long data) 227static void ieee80211_tasklet_handler(unsigned long data)
227{ 228{
228 struct ieee80211_local *local = (struct ieee80211_local *) data; 229 struct ieee80211_local *local = (struct ieee80211_local *) data;
229 struct sta_info *sta, *tmp;
230 struct skb_eosp_msg_data *eosp_data;
231 struct sk_buff *skb; 230 struct sk_buff *skb;
232 231
233 while ((skb = skb_dequeue(&local->skb_queue)) || 232 while ((skb = skb_dequeue(&local->skb_queue)) ||
@@ -243,18 +242,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
243 skb->pkt_type = 0; 242 skb->pkt_type = 0;
244 ieee80211_tx_status(&local->hw, skb); 243 ieee80211_tx_status(&local->hw, skb);
245 break; 244 break;
246 case IEEE80211_EOSP_MSG:
247 eosp_data = (void *)skb->cb;
248 for_each_sta_info(local, eosp_data->sta, sta, tmp) {
249 /* skip wrong virtual interface */
250 if (memcmp(eosp_data->iface,
251 sta->sdata->vif.addr, ETH_ALEN))
252 continue;
253 clear_sta_flag(sta, WLAN_STA_SP);
254 break;
255 }
256 dev_kfree_skb(skb);
257 break;
258 default: 245 default:
259 WARN(1, "mac80211: Packet is of unknown type %d\n", 246 WARN(1, "mac80211: Packet is of unknown type %d\n",
260 skb->pkt_type); 247 skb->pkt_type);
@@ -295,8 +282,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
295 "Hardware restart was requested\n"); 282 "Hardware restart was requested\n");
296 283
297 /* use this reason, ieee80211_reconfig will unblock it */ 284 /* use this reason, ieee80211_reconfig will unblock it */
298 ieee80211_stop_queues_by_reason(hw, 285 ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
299 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 286 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
300 287
301 /* 288 /*
302 * Stop all Rx during the reconfig. We don't want state changes 289 * Stop all Rx during the reconfig. We don't want state changes
@@ -399,30 +386,6 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb,
399} 386}
400#endif 387#endif
401 388
402static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
403{
404 struct ieee80211_local *local =
405 container_of(napi, struct ieee80211_local, napi);
406
407 return local->ops->napi_poll(&local->hw, budget);
408}
409
410void ieee80211_napi_schedule(struct ieee80211_hw *hw)
411{
412 struct ieee80211_local *local = hw_to_local(hw);
413
414 napi_schedule(&local->napi);
415}
416EXPORT_SYMBOL(ieee80211_napi_schedule);
417
418void ieee80211_napi_complete(struct ieee80211_hw *hw)
419{
420 struct ieee80211_local *local = hw_to_local(hw);
421
422 napi_complete(&local->napi);
423}
424EXPORT_SYMBOL(ieee80211_napi_complete);
425
426/* There isn't a lot of sense in it, but you can transmit anything you like */ 389/* There isn't a lot of sense in it, but you can transmit anything you like */
427static const struct ieee80211_txrx_stypes 390static const struct ieee80211_txrx_stypes
428ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { 391ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
@@ -501,6 +464,27 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {
501 }, 464 },
502}; 465};
503 466
467static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
468 .vht_cap_info =
469 cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
470 IEEE80211_VHT_CAP_SHORT_GI_80 |
471 IEEE80211_VHT_CAP_SHORT_GI_160 |
472 IEEE80211_VHT_CAP_RXSTBC_1 |
473 IEEE80211_VHT_CAP_RXSTBC_2 |
474 IEEE80211_VHT_CAP_RXSTBC_3 |
475 IEEE80211_VHT_CAP_RXSTBC_4 |
476 IEEE80211_VHT_CAP_TXSTBC |
477 IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
478 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
479 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN |
480 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
481 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK),
482 .supp_mcs = {
483 .rx_mcs_map = cpu_to_le16(~0),
484 .tx_mcs_map = cpu_to_le16(~0),
485 },
486};
487
504static const u8 extended_capabilities[] = { 488static const u8 extended_capabilities[] = {
505 0, 0, 0, 0, 0, 0, 0, 489 0, 0, 0, 0, 0, 0, 0,
506 WLAN_EXT_CAPA8_OPMODE_NOTIF, 490 WLAN_EXT_CAPA8_OPMODE_NOTIF,
@@ -572,7 +556,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
572 wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | 556 wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |
573 NL80211_FEATURE_SAE | 557 NL80211_FEATURE_SAE |
574 NL80211_FEATURE_HT_IBSS | 558 NL80211_FEATURE_HT_IBSS |
575 NL80211_FEATURE_VIF_TXPOWER; 559 NL80211_FEATURE_VIF_TXPOWER |
560 NL80211_FEATURE_USERSPACE_MPM;
576 561
577 if (!ops->hw_scan) 562 if (!ops->hw_scan)
578 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | 563 wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -607,8 +592,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
607 IEEE80211_RADIOTAP_MCS_HAVE_BW; 592 IEEE80211_RADIOTAP_MCS_HAVE_BW;
608 local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI | 593 local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI |
609 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; 594 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
595 local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
596 local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
610 local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; 597 local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
611 wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; 598 wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
599 wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
612 600
613 INIT_LIST_HEAD(&local->interfaces); 601 INIT_LIST_HEAD(&local->interfaces);
614 602
@@ -664,9 +652,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
664 skb_queue_head_init(&local->skb_queue); 652 skb_queue_head_init(&local->skb_queue);
665 skb_queue_head_init(&local->skb_queue_unreliable); 653 skb_queue_head_init(&local->skb_queue_unreliable);
666 654
667 /* init dummy netdev for use w/ NAPI */
668 init_dummy_netdev(&local->napi_dev);
669
670 ieee80211_led_names(local); 655 ieee80211_led_names(local);
671 656
672 ieee80211_roc_setup(local); 657 ieee80211_roc_setup(local);
@@ -683,6 +668,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 int channels, max_bitrates; 668 int channels, max_bitrates;
684 bool supp_ht, supp_vht; 669 bool supp_ht, supp_vht;
685 netdev_features_t feature_whitelist; 670 netdev_features_t feature_whitelist;
671 struct cfg80211_chan_def dflt_chandef = {};
686 static const u32 cipher_suites[] = { 672 static const u32 cipher_suites[] = {
687 /* keep WEP first, it may be removed below */ 673 /* keep WEP first, it may be removed below */
688 WLAN_CIPHER_SUITE_WEP40, 674 WLAN_CIPHER_SUITE_WEP40,
@@ -760,15 +746,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
760 sband = local->hw.wiphy->bands[band]; 746 sband = local->hw.wiphy->bands[band];
761 if (!sband) 747 if (!sband)
762 continue; 748 continue;
763 if (!local->use_chanctx && !local->_oper_channel) { 749
750 if (!dflt_chandef.chan) {
751 cfg80211_chandef_create(&dflt_chandef,
752 &sband->channels[0],
753 NL80211_CHAN_NO_HT);
764 /* init channel we're on */ 754 /* init channel we're on */
765 local->hw.conf.channel = 755 if (!local->use_chanctx && !local->_oper_chandef.chan) {
766 local->_oper_channel = &sband->channels[0]; 756 local->hw.conf.chandef = dflt_chandef;
767 local->hw.conf.channel_type = NL80211_CHAN_NO_HT; 757 local->_oper_chandef = dflt_chandef;
758 }
759 local->monitor_chandef = dflt_chandef;
768 } 760 }
769 cfg80211_chandef_create(&local->monitor_chandef, 761
770 &sband->channels[0],
771 NL80211_CHAN_NO_HT);
772 channels += sband->n_channels; 762 channels += sband->n_channels;
773 763
774 if (max_bitrates < sband->n_bitrates) 764 if (max_bitrates < sband->n_bitrates)
@@ -851,22 +841,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
851 if (supp_ht) 841 if (supp_ht)
852 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap); 842 local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap);
853 843
854 if (supp_vht) { 844 if (supp_vht)
855 local->scan_ies_len += 845 local->scan_ies_len +=
856 2 + sizeof(struct ieee80211_vht_cap); 846 2 + sizeof(struct ieee80211_vht_cap);
857 847
858 /*
859 * (for now at least), drivers wanting to use VHT must
860 * support channel contexts, as they contain all the
861 * necessary VHT information and the global hw config
862 * doesn't (yet)
863 */
864 if (WARN_ON(!local->use_chanctx)) {
865 result = -EINVAL;
866 goto fail_wiphy_register;
867 }
868 }
869
870 if (!local->ops->hw_scan) { 848 if (!local->ops->hw_scan) {
871 /* For hw_scan, driver needs to set these up. */ 849 /* For hw_scan, driver needs to set these up. */
872 local->hw.wiphy->max_scan_ssids = 4; 850 local->hw.wiphy->max_scan_ssids = 4;
@@ -1021,9 +999,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
1021 goto fail_ifa6; 999 goto fail_ifa6;
1022#endif 1000#endif
1023 1001
1024 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
1025 local->hw.napi_weight);
1026
1027 return 0; 1002 return 0;
1028 1003
1029#if IS_ENABLED(CONFIG_IPV6) 1004#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 4749b3858695..6952760881c8 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -13,10 +13,6 @@
13#include "ieee80211_i.h" 13#include "ieee80211_i.h"
14#include "mesh.h" 14#include "mesh.h"
15 15
16#define TMR_RUNNING_HK 0
17#define TMR_RUNNING_MP 1
18#define TMR_RUNNING_MPR 2
19
20static int mesh_allocated; 16static int mesh_allocated;
21static struct kmem_cache *rm_cache; 17static struct kmem_cache *rm_cache;
22 18
@@ -50,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
50 46
51 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); 47 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
52 48
53 if (local->quiescing) {
54 set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
55 return;
56 }
57
58 ieee80211_queue_work(&local->hw, &sdata->work); 49 ieee80211_queue_work(&local->hw, &sdata->work);
59} 50}
60 51
@@ -165,7 +156,7 @@ void mesh_sta_cleanup(struct sta_info *sta)
165 * an update. 156 * an update.
166 */ 157 */
167 changed = mesh_accept_plinks_update(sdata); 158 changed = mesh_accept_plinks_update(sdata);
168 if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { 159 if (!sdata->u.mesh.user_mpm) {
169 changed |= mesh_plink_deactivate(sta); 160 changed |= mesh_plink_deactivate(sta);
170 del_timer_sync(&sta->plink_timer); 161 del_timer_sync(&sta->plink_timer);
171 } 162 }
@@ -479,15 +470,8 @@ static void ieee80211_mesh_path_timer(unsigned long data)
479{ 470{
480 struct ieee80211_sub_if_data *sdata = 471 struct ieee80211_sub_if_data *sdata =
481 (struct ieee80211_sub_if_data *) data; 472 (struct ieee80211_sub_if_data *) data;
482 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
483 struct ieee80211_local *local = sdata->local;
484
485 if (local->quiescing) {
486 set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
487 return;
488 }
489 473
490 ieee80211_queue_work(&local->hw, &sdata->work); 474 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
491} 475}
492 476
493static void ieee80211_mesh_path_root_timer(unsigned long data) 477static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -495,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
495 struct ieee80211_sub_if_data *sdata = 479 struct ieee80211_sub_if_data *sdata =
496 (struct ieee80211_sub_if_data *) data; 480 (struct ieee80211_sub_if_data *) data;
497 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 481 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
498 struct ieee80211_local *local = sdata->local;
499 482
500 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); 483 set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
501 484
502 if (local->quiescing) { 485 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
503 set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
504 return;
505 }
506
507 ieee80211_queue_work(&local->hw, &sdata->work);
508} 486}
509 487
510void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) 488void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -622,35 +600,6 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)
622 round_jiffies(TU_TO_EXP_TIME(interval))); 600 round_jiffies(TU_TO_EXP_TIME(interval)));
623} 601}
624 602
625#ifdef CONFIG_PM
626void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
627{
628 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
629
630 /* use atomic bitops in case all timers fire at the same time */
631
632 if (del_timer_sync(&ifmsh->housekeeping_timer))
633 set_bit(TMR_RUNNING_HK, &ifmsh->timers_running);
634 if (del_timer_sync(&ifmsh->mesh_path_timer))
635 set_bit(TMR_RUNNING_MP, &ifmsh->timers_running);
636 if (del_timer_sync(&ifmsh->mesh_path_root_timer))
637 set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running);
638}
639
640void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
641{
642 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
643
644 if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running))
645 add_timer(&ifmsh->housekeeping_timer);
646 if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running))
647 add_timer(&ifmsh->mesh_path_timer);
648 if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running))
649 add_timer(&ifmsh->mesh_path_root_timer);
650 ieee80211_mesh_root_setup(ifmsh);
651}
652#endif
653
654static int 603static int
655ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 604ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
656{ 605{
@@ -750,10 +699,8 @@ out_free:
750static int 699static int
751ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) 700ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)
752{ 701{
753 struct ieee80211_sub_if_data *sdata;
754 struct beacon_data *old_bcn; 702 struct beacon_data *old_bcn;
755 int ret; 703 int ret;
756 sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
757 704
758 mutex_lock(&ifmsh->mtx); 705 mutex_lock(&ifmsh->mtx);
759 706
@@ -871,8 +818,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
871 local->fif_other_bss--; 818 local->fif_other_bss--;
872 atomic_dec(&local->iff_allmultis); 819 atomic_dec(&local->iff_allmultis);
873 ieee80211_configure_filter(local); 820 ieee80211_configure_filter(local);
874
875 sdata->u.mesh.timers_running = 0;
876} 821}
877 822
878static void 823static void
@@ -886,15 +831,14 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,
886 struct ieee80211_mgmt *hdr; 831 struct ieee80211_mgmt *hdr;
887 struct ieee802_11_elems elems; 832 struct ieee802_11_elems elems;
888 size_t baselen; 833 size_t baselen;
889 u8 *pos, *end; 834 u8 *pos;
890 835
891 end = ((u8 *) mgmt) + len;
892 pos = mgmt->u.probe_req.variable; 836 pos = mgmt->u.probe_req.variable;
893 baselen = (u8 *) pos - (u8 *) mgmt; 837 baselen = (u8 *) pos - (u8 *) mgmt;
894 if (baselen > len) 838 if (baselen > len)
895 return; 839 return;
896 840
897 ieee802_11_parse_elems(pos, len - baselen, &elems); 841 ieee802_11_parse_elems(pos, len - baselen, false, &elems);
898 842
899 /* 802.11-2012 10.1.4.3.2 */ 843 /* 802.11-2012 10.1.4.3.2 */
900 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && 844 if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) &&
@@ -955,7 +899,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
955 return; 899 return;
956 900
957 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, 901 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
958 &elems); 902 false, &elems);
959 903
960 /* ignore non-mesh or secure / unsecure mismatch */ 904 /* ignore non-mesh or secure / unsecure mismatch */
961 if ((!elems.mesh_id || !elems.mesh_config) || 905 if ((!elems.mesh_id || !elems.mesh_config) ||
@@ -963,7 +907,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
963 (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)) 907 (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))
964 return; 908 return;
965 909
966 if (elems.ds_params && elems.ds_params_len == 1) 910 if (elems.ds_params)
967 freq = ieee80211_channel_to_frequency(elems.ds_params[0], band); 911 freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);
968 else 912 else
969 freq = rx_status->freq; 913 freq = rx_status->freq;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 336c88a16687..da158774eebb 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,7 +275,8 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);
275void mesh_path_expire(struct ieee80211_sub_if_data *sdata); 275void mesh_path_expire(struct ieee80211_sub_if_data *sdata);
276void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, 276void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
277 struct ieee80211_mgmt *mgmt, size_t len); 277 struct ieee80211_mgmt *mgmt, size_t len);
278int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst); 278struct mesh_path *
279mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst);
279 280
280int mesh_path_add_gate(struct mesh_path *mpath); 281int mesh_path_add_gate(struct mesh_path *mpath);
281int mesh_path_send_to_gates(struct mesh_path *mpath); 282int mesh_path_send_to_gates(struct mesh_path *mpath);
@@ -313,8 +314,6 @@ void mesh_path_timer(unsigned long data);
313void mesh_path_flush_by_nexthop(struct sta_info *sta); 314void mesh_path_flush_by_nexthop(struct sta_info *sta);
314void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, 315void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
315 struct sk_buff *skb); 316 struct sk_buff *skb);
316void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata);
317void mesh_path_restart(struct ieee80211_sub_if_data *sdata);
318void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); 317void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);
319 318
320bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); 319bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt);
@@ -359,22 +358,12 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
359 358
360void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); 359void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
361 360
362void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata);
363void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata);
364void mesh_plink_quiesce(struct sta_info *sta);
365void mesh_plink_restart(struct sta_info *sta);
366void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); 361void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
367void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); 362void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
368void ieee80211s_stop(void); 363void ieee80211s_stop(void);
369#else 364#else
370static inline void 365static inline void
371ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} 366ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
372static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
373{}
374static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata)
375{}
376static inline void mesh_plink_quiesce(struct sta_info *sta) {}
377static inline void mesh_plink_restart(struct sta_info *sta) {}
378static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) 367static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
379{ return false; } 368{ return false; }
380static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) 369static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index bdb8d3b14587..486819cd02cd 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -144,7 +144,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
144 *pos++ = WLAN_EID_PREQ; 144 *pos++ = WLAN_EID_PREQ;
145 break; 145 break;
146 case MPATH_PREP: 146 case MPATH_PREP:
147 mhwmp_dbg(sdata, "sending PREP to %pM\n", target); 147 mhwmp_dbg(sdata, "sending PREP to %pM\n", orig_addr);
148 ie_len = 31; 148 ie_len = 31;
149 pos = skb_put(skb, 2 + ie_len); 149 pos = skb_put(skb, 2 + ie_len);
150 *pos++ = WLAN_EID_PREP; 150 *pos++ = WLAN_EID_PREP;
@@ -445,9 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
445 } 445 }
446 } 446 }
447 } else { 447 } else {
448 mesh_path_add(sdata, orig_addr); 448 mpath = mesh_path_add(sdata, orig_addr);
449 mpath = mesh_path_lookup(sdata, orig_addr); 449 if (IS_ERR(mpath)) {
450 if (!mpath) {
451 rcu_read_unlock(); 450 rcu_read_unlock();
452 return 0; 451 return 0;
453 } 452 }
@@ -486,9 +485,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
486 (last_hop_metric > mpath->metric))) 485 (last_hop_metric > mpath->metric)))
487 fresh_info = false; 486 fresh_info = false;
488 } else { 487 } else {
489 mesh_path_add(sdata, ta); 488 mpath = mesh_path_add(sdata, ta);
490 mpath = mesh_path_lookup(sdata, ta); 489 if (IS_ERR(mpath)) {
491 if (!mpath) {
492 rcu_read_unlock(); 490 rcu_read_unlock();
493 return 0; 491 return 0;
494 } 492 }
@@ -661,7 +659,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
661 u32 target_sn, orig_sn, lifetime; 659 u32 target_sn, orig_sn, lifetime;
662 660
663 mhwmp_dbg(sdata, "received PREP from %pM\n", 661 mhwmp_dbg(sdata, "received PREP from %pM\n",
664 PREP_IE_ORIG_ADDR(prep_elem)); 662 PREP_IE_TARGET_ADDR(prep_elem));
665 663
666 orig_addr = PREP_IE_ORIG_ADDR(prep_elem); 664 orig_addr = PREP_IE_ORIG_ADDR(prep_elem);
667 if (ether_addr_equal(orig_addr, sdata->vif.addr)) 665 if (ether_addr_equal(orig_addr, sdata->vif.addr))
@@ -804,9 +802,8 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,
804 802
805 mpath = mesh_path_lookup(sdata, orig_addr); 803 mpath = mesh_path_lookup(sdata, orig_addr);
806 if (!mpath) { 804 if (!mpath) {
807 mesh_path_add(sdata, orig_addr); 805 mpath = mesh_path_add(sdata, orig_addr);
808 mpath = mesh_path_lookup(sdata, orig_addr); 806 if (IS_ERR(mpath)) {
809 if (!mpath) {
810 rcu_read_unlock(); 807 rcu_read_unlock();
811 sdata->u.mesh.mshstats.dropped_frames_no_route++; 808 sdata->u.mesh.mshstats.dropped_frames_no_route++;
812 return; 809 return;
@@ -883,7 +880,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
883 880
884 baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; 881 baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;
885 ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, 882 ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable,
886 len - baselen, &elems); 883 len - baselen, false, &elems);
887 884
888 if (elems.preq) { 885 if (elems.preq) {
889 if (elems.preq_len != 37) 886 if (elems.preq_len != 37)
@@ -1098,11 +1095,10 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,
1098 /* no nexthop found, start resolving */ 1095 /* no nexthop found, start resolving */
1099 mpath = mesh_path_lookup(sdata, target_addr); 1096 mpath = mesh_path_lookup(sdata, target_addr);
1100 if (!mpath) { 1097 if (!mpath) {
1101 mesh_path_add(sdata, target_addr); 1098 mpath = mesh_path_add(sdata, target_addr);
1102 mpath = mesh_path_lookup(sdata, target_addr); 1099 if (IS_ERR(mpath)) {
1103 if (!mpath) {
1104 mesh_path_discard_frame(sdata, skb); 1100 mesh_path_discard_frame(sdata, skb);
1105 err = -ENOSPC; 1101 err = PTR_ERR(mpath);
1106 goto endlookup; 1102 goto endlookup;
1107 } 1103 }
1108 } 1104 }
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index dc7c8df40c2c..89aacfd2756d 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -493,7 +493,8 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)
493 * 493 *
494 * State: the initial state of the new path is set to 0 494 * State: the initial state of the new path is set to 0
495 */ 495 */
496int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) 496struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
497 const u8 *dst)
497{ 498{
498 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 499 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
499 struct ieee80211_local *local = sdata->local; 500 struct ieee80211_local *local = sdata->local;
@@ -502,18 +503,33 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
502 struct mpath_node *node, *new_node; 503 struct mpath_node *node, *new_node;
503 struct hlist_head *bucket; 504 struct hlist_head *bucket;
504 int grow = 0; 505 int grow = 0;
505 int err = 0; 506 int err;
506 u32 hash_idx; 507 u32 hash_idx;
507 508
508 if (ether_addr_equal(dst, sdata->vif.addr)) 509 if (ether_addr_equal(dst, sdata->vif.addr))
509 /* never add ourselves as neighbours */ 510 /* never add ourselves as neighbours */
510 return -ENOTSUPP; 511 return ERR_PTR(-ENOTSUPP);
511 512
512 if (is_multicast_ether_addr(dst)) 513 if (is_multicast_ether_addr(dst))
513 return -ENOTSUPP; 514 return ERR_PTR(-ENOTSUPP);
514 515
515 if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) 516 if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
516 return -ENOSPC; 517 return ERR_PTR(-ENOSPC);
518
519 read_lock_bh(&pathtbl_resize_lock);
520 tbl = resize_dereference_mesh_paths();
521
522 hash_idx = mesh_table_hash(dst, sdata, tbl);
523 bucket = &tbl->hash_buckets[hash_idx];
524
525 spin_lock(&tbl->hashwlock[hash_idx]);
526
527 hlist_for_each_entry(node, bucket, list) {
528 mpath = node->mpath;
529 if (mpath->sdata == sdata &&
530 ether_addr_equal(dst, mpath->dst))
531 goto found;
532 }
517 533
518 err = -ENOMEM; 534 err = -ENOMEM;
519 new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); 535 new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC);
@@ -524,7 +540,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
524 if (!new_node) 540 if (!new_node)
525 goto err_node_alloc; 541 goto err_node_alloc;
526 542
527 read_lock_bh(&pathtbl_resize_lock);
528 memcpy(new_mpath->dst, dst, ETH_ALEN); 543 memcpy(new_mpath->dst, dst, ETH_ALEN);
529 eth_broadcast_addr(new_mpath->rann_snd_addr); 544 eth_broadcast_addr(new_mpath->rann_snd_addr);
530 new_mpath->is_root = false; 545 new_mpath->is_root = false;
@@ -538,21 +553,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
538 spin_lock_init(&new_mpath->state_lock); 553 spin_lock_init(&new_mpath->state_lock);
539 init_timer(&new_mpath->timer); 554 init_timer(&new_mpath->timer);
540 555
541 tbl = resize_dereference_mesh_paths();
542
543 hash_idx = mesh_table_hash(dst, sdata, tbl);
544 bucket = &tbl->hash_buckets[hash_idx];
545
546 spin_lock(&tbl->hashwlock[hash_idx]);
547
548 err = -EEXIST;
549 hlist_for_each_entry(node, bucket, list) {
550 mpath = node->mpath;
551 if (mpath->sdata == sdata &&
552 ether_addr_equal(dst, mpath->dst))
553 goto err_exists;
554 }
555
556 hlist_add_head_rcu(&new_node->list, bucket); 556 hlist_add_head_rcu(&new_node->list, bucket);
557 if (atomic_inc_return(&tbl->entries) >= 557 if (atomic_inc_return(&tbl->entries) >=
558 tbl->mean_chain_len * (tbl->hash_mask + 1)) 558 tbl->mean_chain_len * (tbl->hash_mask + 1))
@@ -560,23 +560,23 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)
560 560
561 mesh_paths_generation++; 561 mesh_paths_generation++;
562 562
563 spin_unlock(&tbl->hashwlock[hash_idx]);
564 read_unlock_bh(&pathtbl_resize_lock);
565 if (grow) { 563 if (grow) {
566 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); 564 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags);
567 ieee80211_queue_work(&local->hw, &sdata->work); 565 ieee80211_queue_work(&local->hw, &sdata->work);
568 } 566 }
569 return 0; 567 mpath = new_mpath;
570 568found:
571err_exists:
572 spin_unlock(&tbl->hashwlock[hash_idx]); 569 spin_unlock(&tbl->hashwlock[hash_idx]);
573 read_unlock_bh(&pathtbl_resize_lock); 570 read_unlock_bh(&pathtbl_resize_lock);
574 kfree(new_node); 571 return mpath;
572
575err_node_alloc: 573err_node_alloc:
576 kfree(new_mpath); 574 kfree(new_mpath);
577err_path_alloc: 575err_path_alloc:
578 atomic_dec(&sdata->u.mesh.mpaths); 576 atomic_dec(&sdata->u.mesh.mpaths);
579 return err; 577 spin_unlock(&tbl->hashwlock[hash_idx]);
578 read_unlock_bh(&pathtbl_resize_lock);
579 return ERR_PTR(err);
580} 580}
581 581
582static void mesh_table_free_rcu(struct rcu_head *rcu) 582static void mesh_table_free_rcu(struct rcu_head *rcu)
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 07d396d57079..09bebed99416 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -420,7 +420,6 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
420 return NULL; 420 return NULL;
421 421
422 sta->plink_state = NL80211_PLINK_LISTEN; 422 sta->plink_state = NL80211_PLINK_LISTEN;
423 init_timer(&sta->plink_timer);
424 423
425 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); 424 sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
426 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); 425 sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
@@ -437,8 +436,9 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
437{ 436{
438 struct sta_info *sta = NULL; 437 struct sta_info *sta = NULL;
439 438
440 /* Userspace handles peer allocation when security is enabled */ 439 /* Userspace handles station allocation */
441 if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) 440 if (sdata->u.mesh.user_mpm ||
441 sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
442 cfg80211_notify_new_peer_candidate(sdata->dev, addr, 442 cfg80211_notify_new_peer_candidate(sdata->dev, addr,
443 elems->ie_start, 443 elems->ie_start,
444 elems->total_len, 444 elems->total_len,
@@ -534,10 +534,8 @@ static void mesh_plink_timer(unsigned long data)
534 */ 534 */
535 sta = (struct sta_info *) data; 535 sta = (struct sta_info *) data;
536 536
537 if (sta->sdata->local->quiescing) { 537 if (sta->sdata->local->quiescing)
538 sta->plink_timer_was_running = true;
539 return; 538 return;
540 }
541 539
542 spin_lock_bh(&sta->lock); 540 spin_lock_bh(&sta->lock);
543 if (sta->ignore_plink_timer) { 541 if (sta->ignore_plink_timer) {
@@ -546,8 +544,8 @@ static void mesh_plink_timer(unsigned long data)
546 return; 544 return;
547 } 545 }
548 mpl_dbg(sta->sdata, 546 mpl_dbg(sta->sdata,
549 "Mesh plink timer for %pM fired on state %d\n", 547 "Mesh plink timer for %pM fired on state %s\n",
550 sta->sta.addr, sta->plink_state); 548 sta->sta.addr, mplstates[sta->plink_state]);
551 reason = 0; 549 reason = 0;
552 llid = sta->llid; 550 llid = sta->llid;
553 plid = sta->plid; 551 plid = sta->plid;
@@ -598,29 +596,6 @@ static void mesh_plink_timer(unsigned long data)
598 } 596 }
599} 597}
600 598
601#ifdef CONFIG_PM
602void mesh_plink_quiesce(struct sta_info *sta)
603{
604 if (!ieee80211_vif_is_mesh(&sta->sdata->vif))
605 return;
606
607 /* no kernel mesh sta timers have been initialized */
608 if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE)
609 return;
610
611 if (del_timer_sync(&sta->plink_timer))
612 sta->plink_timer_was_running = true;
613}
614
615void mesh_plink_restart(struct sta_info *sta)
616{
617 if (sta->plink_timer_was_running) {
618 add_timer(&sta->plink_timer);
619 sta->plink_timer_was_running = false;
620 }
621}
622#endif
623
624static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) 599static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)
625{ 600{
626 sta->plink_timer.expires = jiffies + (HZ * timeout / 1000); 601 sta->plink_timer.expires = jiffies + (HZ * timeout / 1000);
@@ -695,6 +670,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
695 if (len < IEEE80211_MIN_ACTION_SIZE + 3) 670 if (len < IEEE80211_MIN_ACTION_SIZE + 3)
696 return; 671 return;
697 672
673 if (sdata->u.mesh.user_mpm)
674 /* userspace must register for these */
675 return;
676
698 if (is_multicast_ether_addr(mgmt->da)) { 677 if (is_multicast_ether_addr(mgmt->da)) {
699 mpl_dbg(sdata, 678 mpl_dbg(sdata,
700 "Mesh plink: ignore frame from multicast address\n"); 679 "Mesh plink: ignore frame from multicast address\n");
@@ -708,7 +687,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
708 baseaddr += 4; 687 baseaddr += 4;
709 baselen += 4; 688 baselen += 4;
710 } 689 }
711 ieee802_11_parse_elems(baseaddr, len - baselen, &elems); 690 ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);
712 691
713 if (!elems.peering) { 692 if (!elems.peering) {
714 mpl_dbg(sdata, 693 mpl_dbg(sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 82cc30318a86..29620bfc7a69 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -56,7 +56,10 @@ MODULE_PARM_DESC(max_probe_tries,
56 * probe on beacon miss before declaring the connection lost 56 * probe on beacon miss before declaring the connection lost
57 * default to what we want. 57 * default to what we want.
58 */ 58 */
59#define IEEE80211_BEACON_LOSS_COUNT 7 59static int beacon_loss_count = 7;
60module_param(beacon_loss_count, int, 0644);
61MODULE_PARM_DESC(beacon_loss_count,
62 "Number of beacon intervals before we decide beacon was lost.");
60 63
61/* 64/*
62 * Time the connection can be idle before we probe 65 * Time the connection can be idle before we probe
@@ -87,9 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms,
87 */ 90 */
88#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 91#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
89 92
90#define TMR_RUNNING_TIMER 0
91#define TMR_RUNNING_CHANSW 1
92
93/* 93/*
94 * All cfg80211 functions have to be called outside a locked 94 * All cfg80211 functions have to be called outside a locked
95 * section so that they can acquire a lock themselves... This 95 * section so that they can acquire a lock themselves... This
@@ -289,6 +289,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
289 } else { 289 } else {
290 /* 40 MHz (and 80 MHz) must be supported for VHT */ 290 /* 40 MHz (and 80 MHz) must be supported for VHT */
291 ret = IEEE80211_STA_DISABLE_VHT; 291 ret = IEEE80211_STA_DISABLE_VHT;
292 /* also mark 40 MHz disabled */
293 ret |= IEEE80211_STA_DISABLE_40MHZ;
292 goto out; 294 goto out;
293 } 295 }
294 296
@@ -303,12 +305,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
303 channel->band); 305 channel->band);
304 vht_chandef.center_freq2 = 0; 306 vht_chandef.center_freq2 = 0;
305 307
306 if (vht_oper->center_freq_seg2_idx)
307 vht_chandef.center_freq2 =
308 ieee80211_channel_to_frequency(
309 vht_oper->center_freq_seg2_idx,
310 channel->band);
311
312 switch (vht_oper->chan_width) { 308 switch (vht_oper->chan_width) {
313 case IEEE80211_VHT_CHANWIDTH_USE_HT: 309 case IEEE80211_VHT_CHANWIDTH_USE_HT:
314 vht_chandef.width = chandef->width; 310 vht_chandef.width = chandef->width;
@@ -321,6 +317,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
321 break; 317 break;
322 case IEEE80211_VHT_CHANWIDTH_80P80MHZ: 318 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
323 vht_chandef.width = NL80211_CHAN_WIDTH_80P80; 319 vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
320 vht_chandef.center_freq2 =
321 ieee80211_channel_to_frequency(
322 vht_oper->center_freq_seg2_idx,
323 channel->band);
324 break; 324 break;
325 default: 325 default:
326 if (verbose) 326 if (verbose)
@@ -604,11 +604,11 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
604 u8 *pos; 604 u8 *pos;
605 u32 cap; 605 u32 cap;
606 struct ieee80211_sta_vht_cap vht_cap; 606 struct ieee80211_sta_vht_cap vht_cap;
607 int i;
608 607
609 BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); 608 BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));
610 609
611 memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); 610 memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap));
611 ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);
612 612
613 /* determine capability flags */ 613 /* determine capability flags */
614 cap = vht_cap.cap; 614 cap = vht_cap.cap;
@@ -631,37 +631,6 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
631 cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE))) 631 cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
632 cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; 632 cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
633 633
634 if (!(ap_vht_cap->vht_cap_info &
635 cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC)))
636 cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 |
637 IEEE80211_VHT_CAP_RXSTBC_3 |
638 IEEE80211_VHT_CAP_RXSTBC_4);
639
640 for (i = 0; i < 8; i++) {
641 int shift = i * 2;
642 u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift;
643 u16 ap_mcs, our_mcs;
644
645 ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) &
646 mask) >> shift;
647 our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) &
648 mask) >> shift;
649
650 if (our_mcs == IEEE80211_VHT_MCS_NOT_SUPPORTED)
651 continue;
652
653 switch (ap_mcs) {
654 default:
655 if (our_mcs <= ap_mcs)
656 break;
657 /* fall through */
658 case IEEE80211_VHT_MCS_NOT_SUPPORTED:
659 vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask);
660 vht_cap.vht_mcs.rx_mcs_map |=
661 cpu_to_le16(ap_mcs << shift);
662 }
663 }
664
665 /* reserve and fill IE */ 634 /* reserve and fill IE */
666 pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2); 635 pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
667 ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); 636 ieee80211_ie_build_vht_cap(pos, &vht_cap, cap);
@@ -987,6 +956,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)
987{ 956{
988 struct ieee80211_sub_if_data *sdata = 957 struct ieee80211_sub_if_data *sdata =
989 container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); 958 container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
959 struct ieee80211_local *local = sdata->local;
990 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 960 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
991 961
992 if (!ieee80211_sdata_running(sdata)) 962 if (!ieee80211_sdata_running(sdata))
@@ -996,21 +966,22 @@ static void ieee80211_chswitch_work(struct work_struct *work)
996 if (!ifmgd->associated) 966 if (!ifmgd->associated)
997 goto out; 967 goto out;
998 968
999 sdata->local->_oper_channel = sdata->local->csa_channel; 969 local->_oper_chandef = local->csa_chandef;
1000 if (!sdata->local->ops->channel_switch) { 970
971 if (!local->ops->channel_switch) {
1001 /* call "hw_config" only if doing sw channel switch */ 972 /* call "hw_config" only if doing sw channel switch */
1002 ieee80211_hw_config(sdata->local, 973 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1003 IEEE80211_CONF_CHANGE_CHANNEL);
1004 } else { 974 } else {
1005 /* update the device channel directly */ 975 /* update the device channel directly */
1006 sdata->local->hw.conf.channel = sdata->local->_oper_channel; 976 local->hw.conf.chandef = local->_oper_chandef;
1007 } 977 }
1008 978
1009 /* XXX: shouldn't really modify cfg80211-owned data! */ 979 /* XXX: shouldn't really modify cfg80211-owned data! */
1010 ifmgd->associated->channel = sdata->local->_oper_channel; 980 ifmgd->associated->channel = local->_oper_chandef.chan;
1011 981
1012 /* XXX: wait for a beacon first? */ 982 /* XXX: wait for a beacon first? */
1013 ieee80211_wake_queues_by_reason(&sdata->local->hw, 983 ieee80211_wake_queues_by_reason(&local->hw,
984 IEEE80211_MAX_QUEUE_MAP,
1014 IEEE80211_QUEUE_STOP_REASON_CSA); 985 IEEE80211_QUEUE_STOP_REASON_CSA);
1015 out: 986 out:
1016 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; 987 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
@@ -1038,66 +1009,197 @@ static void ieee80211_chswitch_timer(unsigned long data)
1038{ 1009{
1039 struct ieee80211_sub_if_data *sdata = 1010 struct ieee80211_sub_if_data *sdata =
1040 (struct ieee80211_sub_if_data *) data; 1011 (struct ieee80211_sub_if_data *) data;
1041 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1042
1043 if (sdata->local->quiescing) {
1044 set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
1045 return;
1046 }
1047 1012
1048 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); 1013 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
1049} 1014}
1050 1015
1051void 1016static void
1052ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, 1017ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1053 const struct ieee80211_channel_sw_ie *sw_elem, 1018 u64 timestamp, struct ieee802_11_elems *elems)
1054 struct ieee80211_bss *bss, u64 timestamp)
1055{ 1019{
1056 struct cfg80211_bss *cbss = 1020 struct ieee80211_local *local = sdata->local;
1057 container_of((void *)bss, struct cfg80211_bss, priv);
1058 struct ieee80211_channel *new_ch;
1059 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1021 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1060 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, 1022 struct cfg80211_bss *cbss = ifmgd->associated;
1061 cbss->channel->band); 1023 struct ieee80211_bss *bss;
1062 struct ieee80211_chanctx *chanctx; 1024 struct ieee80211_chanctx *chanctx;
1025 enum ieee80211_band new_band;
1026 int new_freq;
1027 u8 new_chan_no;
1028 u8 count;
1029 u8 mode;
1030 struct ieee80211_channel *new_chan;
1031 struct cfg80211_chan_def new_chandef = {};
1032 struct cfg80211_chan_def new_vht_chandef = {};
1033 const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
1034 const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
1035 int secondary_channel_offset = -1;
1063 1036
1064 ASSERT_MGD_MTX(ifmgd); 1037 ASSERT_MGD_MTX(ifmgd);
1065 1038
1066 if (!ifmgd->associated) 1039 if (!cbss)
1067 return; 1040 return;
1068 1041
1069 if (sdata->local->scanning) 1042 if (local->scanning)
1070 return; 1043 return;
1071 1044
1072 /* Disregard subsequent beacons if we are already running a timer 1045 /* disregard subsequent announcements if we are already processing */
1073 processing a CSA */
1074
1075 if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) 1046 if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
1076 return; 1047 return;
1077 1048
1078 new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); 1049 sec_chan_offs = elems->sec_chan_offs;
1079 if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { 1050 wide_bw_chansw_ie = elems->wide_bw_chansw_ie;
1051
1052 if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
1053 IEEE80211_STA_DISABLE_40MHZ)) {
1054 sec_chan_offs = NULL;
1055 wide_bw_chansw_ie = NULL;
1056 }
1057
1058 if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT)
1059 wide_bw_chansw_ie = NULL;
1060
1061 if (elems->ext_chansw_ie) {
1062 if (!ieee80211_operating_class_to_band(
1063 elems->ext_chansw_ie->new_operating_class,
1064 &new_band)) {
1065 sdata_info(sdata,
1066 "cannot understand ECSA IE operating class %d, disconnecting\n",
1067 elems->ext_chansw_ie->new_operating_class);
1068 ieee80211_queue_work(&local->hw,
1069 &ifmgd->csa_connection_drop_work);
1070 }
1071 new_chan_no = elems->ext_chansw_ie->new_ch_num;
1072 count = elems->ext_chansw_ie->count;
1073 mode = elems->ext_chansw_ie->mode;
1074 } else if (elems->ch_switch_ie) {
1075 new_band = cbss->channel->band;
1076 new_chan_no = elems->ch_switch_ie->new_ch_num;
1077 count = elems->ch_switch_ie->count;
1078 mode = elems->ch_switch_ie->mode;
1079 } else {
1080 /* nothing here we understand */
1081 return;
1082 }
1083
1084 bss = (void *)cbss->priv;
1085
1086 new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band);
1087 new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
1088 if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {
1080 sdata_info(sdata, 1089 sdata_info(sdata,
1081 "AP %pM switches to unsupported channel (%d MHz), disconnecting\n", 1090 "AP %pM switches to unsupported channel (%d MHz), disconnecting\n",
1082 ifmgd->associated->bssid, new_freq); 1091 ifmgd->associated->bssid, new_freq);
1083 ieee80211_queue_work(&sdata->local->hw, 1092 ieee80211_queue_work(&local->hw,
1093 &ifmgd->csa_connection_drop_work);
1094 return;
1095 }
1096
1097 if (sec_chan_offs) {
1098 secondary_channel_offset = sec_chan_offs->sec_chan_offs;
1099 } else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
1100 /* if HT is enabled and the IE not present, it's still HT */
1101 secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE;
1102 }
1103
1104 switch (secondary_channel_offset) {
1105 default:
1106 /* secondary_channel_offset was present but is invalid */
1107 case IEEE80211_HT_PARAM_CHA_SEC_NONE:
1108 cfg80211_chandef_create(&new_chandef, new_chan,
1109 NL80211_CHAN_HT20);
1110 break;
1111 case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
1112 cfg80211_chandef_create(&new_chandef, new_chan,
1113 NL80211_CHAN_HT40PLUS);
1114 break;
1115 case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
1116 cfg80211_chandef_create(&new_chandef, new_chan,
1117 NL80211_CHAN_HT40MINUS);
1118 break;
1119 case -1:
1120 cfg80211_chandef_create(&new_chandef, new_chan,
1121 NL80211_CHAN_NO_HT);
1122 break;
1123 }
1124
1125 if (wide_bw_chansw_ie) {
1126 new_vht_chandef.chan = new_chan;
1127 new_vht_chandef.center_freq1 =
1128 ieee80211_channel_to_frequency(
1129 wide_bw_chansw_ie->new_center_freq_seg0,
1130 new_band);
1131
1132 switch (wide_bw_chansw_ie->new_channel_width) {
1133 default:
1134 /* hmmm, ignore VHT and use HT if present */
1135 case IEEE80211_VHT_CHANWIDTH_USE_HT:
1136 new_vht_chandef.chan = NULL;
1137 break;
1138 case IEEE80211_VHT_CHANWIDTH_80MHZ:
1139 new_vht_chandef.width = NL80211_CHAN_WIDTH_80;
1140 break;
1141 case IEEE80211_VHT_CHANWIDTH_160MHZ:
1142 new_vht_chandef.width = NL80211_CHAN_WIDTH_160;
1143 break;
1144 case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
1145 /* field is otherwise reserved */
1146 new_vht_chandef.center_freq2 =
1147 ieee80211_channel_to_frequency(
1148 wide_bw_chansw_ie->new_center_freq_seg1,
1149 new_band);
1150 new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80;
1151 break;
1152 }
1153 if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ &&
1154 new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80)
1155 chandef_downgrade(&new_vht_chandef);
1156 if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ &&
1157 new_vht_chandef.width == NL80211_CHAN_WIDTH_160)
1158 chandef_downgrade(&new_vht_chandef);
1159 if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ &&
1160 new_vht_chandef.width > NL80211_CHAN_WIDTH_20)
1161 chandef_downgrade(&new_vht_chandef);
1162 }
1163
1164 /* if VHT data is there validate & use it */
1165 if (new_vht_chandef.chan) {
1166 if (!cfg80211_chandef_compatible(&new_vht_chandef,
1167 &new_chandef)) {
1168 sdata_info(sdata,
1169 "AP %pM CSA has inconsistent channel data, disconnecting\n",
1170 ifmgd->associated->bssid);
1171 ieee80211_queue_work(&local->hw,
1172 &ifmgd->csa_connection_drop_work);
1173 return;
1174 }
1175 new_chandef = new_vht_chandef;
1176 }
1177
1178 if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef,
1179 IEEE80211_CHAN_DISABLED)) {
1180 sdata_info(sdata,
1181 "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
1182 ifmgd->associated->bssid, new_freq,
1183 new_chandef.width, new_chandef.center_freq1,
1184 new_chandef.center_freq2);
1185 ieee80211_queue_work(&local->hw,
1084 &ifmgd->csa_connection_drop_work); 1186 &ifmgd->csa_connection_drop_work);
1085 return; 1187 return;
1086 } 1188 }
1087 1189
1088 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; 1190 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
1089 1191
1090 if (sdata->local->use_chanctx) { 1192 if (local->use_chanctx) {
1091 sdata_info(sdata, 1193 sdata_info(sdata,
1092 "not handling channel switch with channel contexts\n"); 1194 "not handling channel switch with channel contexts\n");
1093 ieee80211_queue_work(&sdata->local->hw, 1195 ieee80211_queue_work(&local->hw,
1094 &ifmgd->csa_connection_drop_work); 1196 &ifmgd->csa_connection_drop_work);
1095 return; 1197 return;
1096 } 1198 }
1097 1199
1098 mutex_lock(&sdata->local->chanctx_mtx); 1200 mutex_lock(&local->chanctx_mtx);
1099 if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) { 1201 if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) {
1100 mutex_unlock(&sdata->local->chanctx_mtx); 1202 mutex_unlock(&local->chanctx_mtx);
1101 return; 1203 return;
1102 } 1204 }
1103 chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), 1205 chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf),
@@ -1105,39 +1207,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1105 if (chanctx->refcount > 1) { 1207 if (chanctx->refcount > 1) {
1106 sdata_info(sdata, 1208 sdata_info(sdata,
1107 "channel switch with multiple interfaces on the same channel, disconnecting\n"); 1209 "channel switch with multiple interfaces on the same channel, disconnecting\n");
1108 ieee80211_queue_work(&sdata->local->hw, 1210 ieee80211_queue_work(&local->hw,
1109 &ifmgd->csa_connection_drop_work); 1211 &ifmgd->csa_connection_drop_work);
1110 mutex_unlock(&sdata->local->chanctx_mtx); 1212 mutex_unlock(&local->chanctx_mtx);
1111 return; 1213 return;
1112 } 1214 }
1113 mutex_unlock(&sdata->local->chanctx_mtx); 1215 mutex_unlock(&local->chanctx_mtx);
1114 1216
1115 sdata->local->csa_channel = new_ch; 1217 local->csa_chandef = new_chandef;
1116 1218
1117 if (sw_elem->mode) 1219 if (mode)
1118 ieee80211_stop_queues_by_reason(&sdata->local->hw, 1220 ieee80211_stop_queues_by_reason(&local->hw,
1221 IEEE80211_MAX_QUEUE_MAP,
1119 IEEE80211_QUEUE_STOP_REASON_CSA); 1222 IEEE80211_QUEUE_STOP_REASON_CSA);
1120 1223
1121 if (sdata->local->ops->channel_switch) { 1224 if (local->ops->channel_switch) {
1122 /* use driver's channel switch callback */ 1225 /* use driver's channel switch callback */
1123 struct ieee80211_channel_switch ch_switch = { 1226 struct ieee80211_channel_switch ch_switch = {
1124 .timestamp = timestamp, 1227 .timestamp = timestamp,
1125 .block_tx = sw_elem->mode, 1228 .block_tx = mode,
1126 .channel = new_ch, 1229 .chandef = new_chandef,
1127 .count = sw_elem->count, 1230 .count = count,
1128 }; 1231 };
1129 1232
1130 drv_channel_switch(sdata->local, &ch_switch); 1233 drv_channel_switch(local, &ch_switch);
1131 return; 1234 return;
1132 } 1235 }
1133 1236
1134 /* channel switch handled in software */ 1237 /* channel switch handled in software */
1135 if (sw_elem->count <= 1) 1238 if (count <= 1)
1136 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); 1239 ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);
1137 else 1240 else
1138 mod_timer(&ifmgd->chswitch_timer, 1241 mod_timer(&ifmgd->chswitch_timer,
1139 TU_TO_EXP_TIME(sw_elem->count * 1242 TU_TO_EXP_TIME(count * cbss->beacon_interval));
1140 cbss->beacon_interval));
1141} 1243}
1142 1244
1143static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, 1245static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
@@ -1383,6 +1485,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)
1383 } 1485 }
1384 1486
1385 ieee80211_wake_queues_by_reason(&local->hw, 1487 ieee80211_wake_queues_by_reason(&local->hw,
1488 IEEE80211_MAX_QUEUE_MAP,
1386 IEEE80211_QUEUE_STOP_REASON_PS); 1489 IEEE80211_QUEUE_STOP_REASON_PS);
1387} 1490}
1388 1491
@@ -1435,16 +1538,14 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1435 1538
1436 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && 1539 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
1437 !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { 1540 !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) {
1438 netif_tx_stop_all_queues(sdata->dev); 1541 if (drv_tx_frames_pending(local)) {
1439
1440 if (drv_tx_frames_pending(local))
1441 mod_timer(&local->dynamic_ps_timer, jiffies + 1542 mod_timer(&local->dynamic_ps_timer, jiffies +
1442 msecs_to_jiffies( 1543 msecs_to_jiffies(
1443 local->hw.conf.dynamic_ps_timeout)); 1544 local->hw.conf.dynamic_ps_timeout));
1444 else { 1545 } else {
1445 ieee80211_send_nullfunc(local, sdata, 1); 1546 ieee80211_send_nullfunc(local, sdata, 1);
1446 /* Flush to get the tx status of nullfunc frame */ 1547 /* Flush to get the tx status of nullfunc frame */
1447 drv_flush(local, false); 1548 ieee80211_flush_queues(local, sdata);
1448 } 1549 }
1449 } 1550 }
1450 1551
@@ -1455,9 +1556,6 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
1455 local->hw.conf.flags |= IEEE80211_CONF_PS; 1556 local->hw.conf.flags |= IEEE80211_CONF_PS;
1456 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 1557 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
1457 } 1558 }
1458
1459 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
1460 netif_tx_wake_all_queues(sdata->dev);
1461} 1559}
1462 1560
1463void ieee80211_dynamic_ps_timer(unsigned long data) 1561void ieee80211_dynamic_ps_timer(unsigned long data)
@@ -1563,6 +1661,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
1563 params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); 1661 params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
1564 params.cw_min = ecw2cw(pos[1] & 0x0f); 1662 params.cw_min = ecw2cw(pos[1] & 0x0f);
1565 params.txop = get_unaligned_le16(pos + 2); 1663 params.txop = get_unaligned_le16(pos + 2);
1664 params.acm = acm;
1566 params.uapsd = uapsd; 1665 params.uapsd = uapsd;
1567 1666
1568 mlme_dbg(sdata, 1667 mlme_dbg(sdata,
@@ -1650,7 +1749,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
1650 bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value); 1749 bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value);
1651 1750
1652 sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec( 1751 sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec(
1653 IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int)); 1752 beacon_loss_count * bss_conf->beacon_int));
1654 1753
1655 sdata->u.mgd.associated = cbss; 1754 sdata->u.mgd.associated = cbss;
1656 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); 1755 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
@@ -1663,18 +1762,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
1663 rcu_read_lock(); 1762 rcu_read_lock();
1664 ies = rcu_dereference(cbss->ies); 1763 ies = rcu_dereference(cbss->ies);
1665 if (ies) { 1764 if (ies) {
1666 u8 noa[2];
1667 int ret; 1765 int ret;
1668 1766
1669 ret = cfg80211_get_p2p_attr( 1767 ret = cfg80211_get_p2p_attr(
1670 ies->data, ies->len, 1768 ies->data, ies->len,
1671 IEEE80211_P2P_ATTR_ABSENCE_NOTICE, 1769 IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
1672 noa, sizeof(noa)); 1770 (u8 *) &bss_conf->p2p_noa_attr,
1771 sizeof(bss_conf->p2p_noa_attr));
1673 if (ret >= 2) { 1772 if (ret >= 2) {
1674 bss_conf->p2p_oppps = noa[1] & 0x80; 1773 sdata->u.mgd.p2p_noa_index =
1675 bss_conf->p2p_ctwindow = noa[1] & 0x7f; 1774 bss_conf->p2p_noa_attr.index;
1676 bss_info_changed |= BSS_CHANGED_P2P_PS; 1775 bss_info_changed |= BSS_CHANGED_P2P_PS;
1677 sdata->u.mgd.p2p_noa_index = noa[0];
1678 } 1776 }
1679 } 1777 }
1680 rcu_read_unlock(); 1778 rcu_read_unlock();
@@ -1718,7 +1816,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
1718 ieee80211_recalc_smps(sdata); 1816 ieee80211_recalc_smps(sdata);
1719 ieee80211_recalc_ps_vif(sdata); 1817 ieee80211_recalc_ps_vif(sdata);
1720 1818
1721 netif_tx_start_all_queues(sdata->dev);
1722 netif_carrier_on(sdata->dev); 1819 netif_carrier_on(sdata->dev);
1723} 1820}
1724 1821
@@ -1741,22 +1838,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1741 ieee80211_stop_poll(sdata); 1838 ieee80211_stop_poll(sdata);
1742 1839
1743 ifmgd->associated = NULL; 1840 ifmgd->associated = NULL;
1744
1745 /*
1746 * we need to commit the associated = NULL change because the
1747 * scan code uses that to determine whether this iface should
1748 * go to/wake up from powersave or not -- and could otherwise
1749 * wake the queues erroneously.
1750 */
1751 smp_mb();
1752
1753 /*
1754 * Thus, we can only afterwards stop the queues -- to account
1755 * for the case where another CPU is finishing a scan at this
1756 * time -- we don't want the scan code to enable queues.
1757 */
1758
1759 netif_tx_stop_all_queues(sdata->dev);
1760 netif_carrier_off(sdata->dev); 1841 netif_carrier_off(sdata->dev);
1761 1842
1762 /* 1843 /*
@@ -1775,7 +1856,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1775 1856
1776 /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */ 1857 /* flush out any pending frame (e.g. DELBA) before deauth/disassoc */
1777 if (tx) 1858 if (tx)
1778 drv_flush(local, false); 1859 ieee80211_flush_queues(local, sdata);
1779 1860
1780 /* deauthenticate/disassociate now */ 1861 /* deauthenticate/disassociate now */
1781 if (tx || frame_buf) 1862 if (tx || frame_buf)
@@ -1784,7 +1865,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1784 1865
1785 /* flush out frame */ 1866 /* flush out frame */
1786 if (tx) 1867 if (tx)
1787 drv_flush(local, false); 1868 ieee80211_flush_queues(local, sdata);
1788 1869
1789 /* clear bssid only after building the needed mgmt frames */ 1870 /* clear bssid only after building the needed mgmt frames */
1790 memset(ifmgd->bssid, 0, ETH_ALEN); 1871 memset(ifmgd->bssid, 0, ETH_ALEN);
@@ -1799,12 +1880,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1799 changed |= BSS_CHANGED_ASSOC; 1880 changed |= BSS_CHANGED_ASSOC;
1800 sdata->vif.bss_conf.assoc = false; 1881 sdata->vif.bss_conf.assoc = false;
1801 1882
1802 sdata->vif.bss_conf.p2p_ctwindow = 0; 1883 ifmgd->p2p_noa_index = -1;
1803 sdata->vif.bss_conf.p2p_oppps = false; 1884 memset(&sdata->vif.bss_conf.p2p_noa_attr, 0,
1885 sizeof(sdata->vif.bss_conf.p2p_noa_attr));
1804 1886
1805 /* on the next assoc, re-program HT parameters */ 1887 /* on the next assoc, re-program HT/VHT parameters */
1806 memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); 1888 memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));
1807 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); 1889 memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
1890 memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
1891 memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
1808 1892
1809 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; 1893 sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
1810 1894
@@ -1830,8 +1914,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
1830 del_timer_sync(&sdata->u.mgd.timer); 1914 del_timer_sync(&sdata->u.mgd.timer);
1831 del_timer_sync(&sdata->u.mgd.chswitch_timer); 1915 del_timer_sync(&sdata->u.mgd.chswitch_timer);
1832 1916
1833 sdata->u.mgd.timers_running = 0;
1834
1835 sdata->vif.bss_conf.dtim_period = 0; 1917 sdata->vif.bss_conf.dtim_period = 0;
1836 1918
1837 ifmgd->flags = 0; 1919 ifmgd->flags = 0;
@@ -1956,7 +2038,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1956 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); 2038 ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);
1957 run_again(ifmgd, ifmgd->probe_timeout); 2039 run_again(ifmgd, ifmgd->probe_timeout);
1958 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) 2040 if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
1959 drv_flush(sdata->local, false); 2041 ieee80211_flush_queues(sdata->local, sdata);
1960} 2042}
1961 2043
1962static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, 2044static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
@@ -1980,12 +2062,15 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
1980 goto out; 2062 goto out;
1981 } 2063 }
1982 2064
1983 if (beacon) 2065 if (beacon) {
1984 mlme_dbg_ratelimited(sdata, 2066 mlme_dbg_ratelimited(sdata,
1985 "detected beacon loss from AP - probing\n"); 2067 "detected beacon loss from AP (missed %d beacons) - probing\n",
2068 beacon_loss_count);
1986 2069
1987 ieee80211_cqm_rssi_notify(&sdata->vif, 2070 ieee80211_cqm_rssi_notify(&sdata->vif,
1988 NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); 2071 NL80211_CQM_RSSI_BEACON_LOSS_EVENT,
2072 GFP_KERNEL);
2073 }
1989 2074
1990 /* 2075 /*
1991 * The driver/our work has already reported this event or the 2076 * The driver/our work has already reported this event or the
@@ -2079,6 +2164,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
2079 true, frame_buf); 2164 true, frame_buf);
2080 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; 2165 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
2081 ieee80211_wake_queues_by_reason(&sdata->local->hw, 2166 ieee80211_wake_queues_by_reason(&sdata->local->hw,
2167 IEEE80211_MAX_QUEUE_MAP,
2082 IEEE80211_QUEUE_STOP_REASON_CSA); 2168 IEEE80211_QUEUE_STOP_REASON_CSA);
2083 mutex_unlock(&ifmgd->mtx); 2169 mutex_unlock(&ifmgd->mtx);
2084 2170
@@ -2130,7 +2216,6 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
2130 2216
2131 trace_api_beacon_loss(sdata); 2217 trace_api_beacon_loss(sdata);
2132 2218
2133 WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
2134 sdata->u.mgd.connection_loss = false; 2219 sdata->u.mgd.connection_loss = false;
2135 ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work); 2220 ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
2136} 2221}
@@ -2180,7 +2265,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
2180 u32 tx_flags = 0; 2265 u32 tx_flags = 0;
2181 2266
2182 pos = mgmt->u.auth.variable; 2267 pos = mgmt->u.auth.variable;
2183 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); 2268 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
2184 if (!elems.challenge) 2269 if (!elems.challenge)
2185 return; 2270 return;
2186 auth_data->expected_transaction = 4; 2271 auth_data->expected_transaction = 4;
@@ -2445,7 +2530,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
2445 } 2530 }
2446 2531
2447 pos = mgmt->u.assoc_resp.variable; 2532 pos = mgmt->u.assoc_resp.variable;
2448 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); 2533 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
2449 2534
2450 if (!elems.supp_rates) { 2535 if (!elems.supp_rates) {
2451 sdata_info(sdata, "no SuppRates element in AssocResp\n"); 2536 sdata_info(sdata, "no SuppRates element in AssocResp\n");
@@ -2614,13 +2699,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
2614 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); 2699 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
2615 2700
2616 pos = mgmt->u.assoc_resp.variable; 2701 pos = mgmt->u.assoc_resp.variable;
2617 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); 2702 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
2618 2703
2619 if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && 2704 if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
2620 elems.timeout_int && elems.timeout_int_len == 5 && 2705 elems.timeout_int &&
2621 elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) { 2706 elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {
2622 u32 tu, ms; 2707 u32 tu, ms;
2623 tu = get_unaligned_le32(elems.timeout_int + 1); 2708 tu = le32_to_cpu(elems.timeout_int->value);
2624 ms = tu * 1024 / 1000; 2709 ms = tu * 1024 / 1000;
2625 sdata_info(sdata, 2710 sdata_info(sdata,
2626 "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", 2711 "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n",
@@ -2669,6 +2754,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2669 struct ieee80211_channel *channel; 2754 struct ieee80211_channel *channel;
2670 bool need_ps = false; 2755 bool need_ps = false;
2671 2756
2757 lockdep_assert_held(&sdata->u.mgd.mtx);
2758
2672 if ((sdata->u.mgd.associated && 2759 if ((sdata->u.mgd.associated &&
2673 ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || 2760 ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) ||
2674 (sdata->u.mgd.assoc_data && 2761 (sdata->u.mgd.assoc_data &&
@@ -2683,7 +2770,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2683 } 2770 }
2684 } 2771 }
2685 2772
2686 if (elems->ds_params && elems->ds_params_len == 1) 2773 if (elems->ds_params)
2687 freq = ieee80211_channel_to_frequency(elems->ds_params[0], 2774 freq = ieee80211_channel_to_frequency(elems->ds_params[0],
2688 rx_status->band); 2775 rx_status->band);
2689 else 2776 else
@@ -2699,7 +2786,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2699 if (bss) 2786 if (bss)
2700 ieee80211_rx_bss_put(local, bss); 2787 ieee80211_rx_bss_put(local, bss);
2701 2788
2702 if (!sdata->u.mgd.associated) 2789 if (!sdata->u.mgd.associated ||
2790 !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))
2703 return; 2791 return;
2704 2792
2705 if (need_ps) { 2793 if (need_ps) {
@@ -2708,10 +2796,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
2708 mutex_unlock(&local->iflist_mtx); 2796 mutex_unlock(&local->iflist_mtx);
2709 } 2797 }
2710 2798
2711 if (elems->ch_switch_ie && 2799 ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems);
2712 memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0) 2800
2713 ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie,
2714 bss, rx_status->mactime);
2715} 2801}
2716 2802
2717 2803
@@ -2736,7 +2822,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
2736 return; 2822 return;
2737 2823
2738 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, 2824 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
2739 &elems); 2825 false, &elems);
2740 2826
2741 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 2827 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
2742 2828
@@ -2819,7 +2905,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2819 if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon && 2905 if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
2820 ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) { 2906 ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
2821 ieee802_11_parse_elems(mgmt->u.beacon.variable, 2907 ieee802_11_parse_elems(mgmt->u.beacon.variable,
2822 len - baselen, &elems); 2908 len - baselen, false, &elems);
2823 2909
2824 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); 2910 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);
2825 ifmgd->assoc_data->have_beacon = true; 2911 ifmgd->assoc_data->have_beacon = true;
@@ -2929,7 +3015,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2929 3015
2930 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); 3016 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
2931 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, 3017 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
2932 len - baselen, &elems, 3018 len - baselen, false, &elems,
2933 care_about_ies, ncrc); 3019 care_about_ies, ncrc);
2934 3020
2935 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { 3021 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) {
@@ -2961,22 +3047,30 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
2961 } 3047 }
2962 3048
2963 if (sdata->vif.p2p) { 3049 if (sdata->vif.p2p) {
2964 u8 noa[2]; 3050 struct ieee80211_p2p_noa_attr noa = {};
2965 int ret; 3051 int ret;
2966 3052
2967 ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable, 3053 ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable,
2968 len - baselen, 3054 len - baselen,
2969 IEEE80211_P2P_ATTR_ABSENCE_NOTICE, 3055 IEEE80211_P2P_ATTR_ABSENCE_NOTICE,
2970 noa, sizeof(noa)); 3056 (u8 *) &noa, sizeof(noa));
2971 if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa[0]) { 3057 if (ret >= 2) {
2972 bss_conf->p2p_oppps = noa[1] & 0x80; 3058 if (sdata->u.mgd.p2p_noa_index != noa.index) {
2973 bss_conf->p2p_ctwindow = noa[1] & 0x7f; 3059 /* valid noa_attr and index changed */
3060 sdata->u.mgd.p2p_noa_index = noa.index;
3061 memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa));
3062 changed |= BSS_CHANGED_P2P_PS;
3063 /*
3064 * make sure we update all information, the CRC
3065 * mechanism doesn't look at P2P attributes.
3066 */
3067 ifmgd->beacon_crc_valid = false;
3068 }
3069 } else if (sdata->u.mgd.p2p_noa_index != -1) {
3070 /* noa_attr not found and we had valid noa_attr before */
3071 sdata->u.mgd.p2p_noa_index = -1;
3072 memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr));
2974 changed |= BSS_CHANGED_P2P_PS; 3073 changed |= BSS_CHANGED_P2P_PS;
2975 sdata->u.mgd.p2p_noa_index = noa[0];
2976 /*
2977 * make sure we update all information, the CRC
2978 * mechanism doesn't look at P2P attributes.
2979 */
2980 ifmgd->beacon_crc_valid = false; 3074 ifmgd->beacon_crc_valid = false;
2981 } 3075 }
2982 } 3076 }
@@ -3018,7 +3112,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
3018 changed |= BSS_CHANGED_DTIM_PERIOD; 3112 changed |= BSS_CHANGED_DTIM_PERIOD;
3019 } 3113 }
3020 3114
3021 if (elems.erp_info && elems.erp_info_len >= 1) { 3115 if (elems.erp_info) {
3022 erp_valid = true; 3116 erp_valid = true;
3023 erp_value = elems.erp_info[0]; 3117 erp_value = elems.erp_info[0];
3024 } else { 3118 } else {
@@ -3068,6 +3162,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3068 enum rx_mgmt_action rma = RX_MGMT_NONE; 3162 enum rx_mgmt_action rma = RX_MGMT_NONE;
3069 u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; 3163 u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];
3070 u16 fc; 3164 u16 fc;
3165 struct ieee802_11_elems elems;
3166 int ies_len;
3071 3167
3072 rx_status = (struct ieee80211_rx_status *) skb->cb; 3168 rx_status = (struct ieee80211_rx_status *) skb->cb;
3073 mgmt = (struct ieee80211_mgmt *) skb->data; 3169 mgmt = (struct ieee80211_mgmt *) skb->data;
@@ -3097,14 +3193,48 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
3097 rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); 3193 rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss);
3098 break; 3194 break;
3099 case IEEE80211_STYPE_ACTION: 3195 case IEEE80211_STYPE_ACTION:
3100 switch (mgmt->u.action.category) { 3196 if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) {
3101 case WLAN_CATEGORY_SPECTRUM_MGMT: 3197 ies_len = skb->len -
3198 offsetof(struct ieee80211_mgmt,
3199 u.action.u.chan_switch.variable);
3200
3201 if (ies_len < 0)
3202 break;
3203
3204 ieee802_11_parse_elems(
3205 mgmt->u.action.u.chan_switch.variable,
3206 ies_len, true, &elems);
3207
3208 if (elems.parse_error)
3209 break;
3210
3102 ieee80211_sta_process_chanswitch(sdata, 3211 ieee80211_sta_process_chanswitch(sdata,
3103 &mgmt->u.action.u.chan_switch.sw_elem, 3212 rx_status->mactime,
3104 (void *)ifmgd->associated->priv, 3213 &elems);
3105 rx_status->mactime); 3214 } else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) {
3106 break; 3215 ies_len = skb->len -
3216 offsetof(struct ieee80211_mgmt,
3217 u.action.u.ext_chan_switch.variable);
3218
3219 if (ies_len < 0)
3220 break;
3221
3222 ieee802_11_parse_elems(
3223 mgmt->u.action.u.ext_chan_switch.variable,
3224 ies_len, true, &elems);
3225
3226 if (elems.parse_error)
3227 break;
3228
3229 /* for the handling code pretend this was also an IE */
3230 elems.ext_chansw_ie =
3231 &mgmt->u.action.u.ext_chan_switch.data;
3232
3233 ieee80211_sta_process_chanswitch(sdata,
3234 rx_status->mactime,
3235 &elems);
3107 } 3236 }
3237 break;
3108 } 3238 }
3109 mutex_unlock(&ifmgd->mtx); 3239 mutex_unlock(&ifmgd->mtx);
3110 3240
@@ -3140,15 +3270,8 @@ static void ieee80211_sta_timer(unsigned long data)
3140{ 3270{
3141 struct ieee80211_sub_if_data *sdata = 3271 struct ieee80211_sub_if_data *sdata =
3142 (struct ieee80211_sub_if_data *) data; 3272 (struct ieee80211_sub_if_data *) data;
3143 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3144 struct ieee80211_local *local = sdata->local;
3145 3273
3146 if (local->quiescing) { 3274 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
3147 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
3148 return;
3149 }
3150
3151 ieee80211_queue_work(&local->hw, &sdata->work);
3152} 3275}
3153 3276
3154static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, 3277static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
@@ -3500,72 +3623,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
3500 } 3623 }
3501} 3624}
3502 3625
3503#ifdef CONFIG_PM
3504void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
3505{
3506 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3507
3508 /*
3509 * Stop timers before deleting work items, as timers
3510 * could race and re-add the work-items. They will be
3511 * re-established on connection.
3512 */
3513 del_timer_sync(&ifmgd->conn_mon_timer);
3514 del_timer_sync(&ifmgd->bcn_mon_timer);
3515
3516 /*
3517 * we need to use atomic bitops for the running bits
3518 * only because both timers might fire at the same
3519 * time -- the code here is properly synchronised.
3520 */
3521
3522 cancel_work_sync(&ifmgd->request_smps_work);
3523
3524 cancel_work_sync(&ifmgd->monitor_work);
3525 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
3526 cancel_work_sync(&ifmgd->csa_connection_drop_work);
3527 if (del_timer_sync(&ifmgd->timer))
3528 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
3529
3530 if (del_timer_sync(&ifmgd->chswitch_timer))
3531 set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running);
3532 cancel_work_sync(&ifmgd->chswitch_work);
3533}
3534
3535void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
3536{
3537 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
3538
3539 mutex_lock(&ifmgd->mtx);
3540 if (!ifmgd->associated) {
3541 mutex_unlock(&ifmgd->mtx);
3542 return;
3543 }
3544
3545 if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) {
3546 sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME;
3547 mlme_dbg(sdata, "driver requested disconnect after resume\n");
3548 ieee80211_sta_connection_lost(sdata,
3549 ifmgd->associated->bssid,
3550 WLAN_REASON_UNSPECIFIED,
3551 true);
3552 mutex_unlock(&ifmgd->mtx);
3553 return;
3554 }
3555 mutex_unlock(&ifmgd->mtx);
3556
3557 if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running))
3558 add_timer(&ifmgd->timer);
3559 if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running))
3560 add_timer(&ifmgd->chswitch_timer);
3561 ieee80211_sta_reset_beacon_monitor(sdata);
3562
3563 mutex_lock(&sdata->local->mtx);
3564 ieee80211_restart_sta_timer(sdata);
3565 mutex_unlock(&sdata->local->mtx);
3566}
3567#endif
3568
3569/* interface setup */ 3626/* interface setup */
3570void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) 3627void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
3571{ 3628{
@@ -3590,8 +3647,9 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
3590 3647
3591 ifmgd->flags = 0; 3648 ifmgd->flags = 0;
3592 ifmgd->powersave = sdata->wdev.ps; 3649 ifmgd->powersave = sdata->wdev.ps;
3593 ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; 3650 ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues;
3594 ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; 3651 ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len;
3652 ifmgd->p2p_noa_index = -1;
3595 3653
3596 mutex_init(&ifmgd->mtx); 3654 mutex_init(&ifmgd->mtx);
3597 3655
@@ -3964,8 +4022,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
3964 /* prep auth_data so we don't go into idle on disassoc */ 4022 /* prep auth_data so we don't go into idle on disassoc */
3965 ifmgd->auth_data = auth_data; 4023 ifmgd->auth_data = auth_data;
3966 4024
3967 if (ifmgd->associated) 4025 if (ifmgd->associated) {
3968 ieee80211_set_disassoc(sdata, 0, 0, false, NULL); 4026 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4027
4028 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4029 WLAN_REASON_UNSPECIFIED,
4030 false, frame_buf);
4031
4032 __cfg80211_send_deauth(sdata->dev, frame_buf,
4033 sizeof(frame_buf));
4034 }
3969 4035
3970 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); 4036 sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid);
3971 4037
@@ -4025,8 +4091,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4025 4091
4026 mutex_lock(&ifmgd->mtx); 4092 mutex_lock(&ifmgd->mtx);
4027 4093
4028 if (ifmgd->associated) 4094 if (ifmgd->associated) {
4029 ieee80211_set_disassoc(sdata, 0, 0, false, NULL); 4095 u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
4096
4097 ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
4098 WLAN_REASON_UNSPECIFIED,
4099 false, frame_buf);
4100
4101 __cfg80211_send_deauth(sdata->dev, frame_buf,
4102 sizeof(frame_buf));
4103 }
4030 4104
4031 if (ifmgd->auth_data && !ifmgd->auth_data->done) { 4105 if (ifmgd->auth_data && !ifmgd->auth_data->done) {
4032 err = -EBUSY; 4106 err = -EBUSY;
@@ -4073,6 +4147,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4073 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; 4147 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4074 } 4148 }
4075 4149
4150 if (req->flags & ASSOC_REQ_DISABLE_VHT)
4151 ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
4152
4076 /* Also disable HT if we don't support it or the AP doesn't use WMM */ 4153 /* Also disable HT if we don't support it or the AP doesn't use WMM */
4077 sband = local->hw.wiphy->bands[req->bss->channel->band]; 4154 sband = local->hw.wiphy->bands[req->bss->channel->band];
4078 if (!sband->ht_cap.ht_supported || 4155 if (!sband->ht_cap.ht_supported ||
@@ -4096,6 +4173,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4096 memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, 4173 memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,
4097 sizeof(ifmgd->ht_capa_mask)); 4174 sizeof(ifmgd->ht_capa_mask));
4098 4175
4176 memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa));
4177 memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask,
4178 sizeof(ifmgd->vht_capa_mask));
4179
4099 if (req->ie && req->ie_len) { 4180 if (req->ie && req->ie_len) {
4100 memcpy(assoc_data->ie, req->ie, req->ie_len); 4181 memcpy(assoc_data->ie, req->ie, req->ie_len);
4101 assoc_data->ie_len = req->ie_len; 4182 assoc_data->ie_len = req->ie_len;
@@ -4133,7 +4214,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
4133 rcu_read_unlock(); 4214 rcu_read_unlock();
4134 4215
4135 if (bss->wmm_used && bss->uapsd_supported && 4216 if (bss->wmm_used && bss->uapsd_supported &&
4136 (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { 4217 (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) &&
4218 sdata->wmm_acm != 0xff) {
4137 assoc_data->uapsd = true; 4219 assoc_data->uapsd = true;
4138 ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED; 4220 ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
4139 } else { 4221 } else {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 430bd254e496..acd1f71adc03 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)
118 * Stop queues and transmit all frames queued by the driver 118 * Stop queues and transmit all frames queued by the driver
119 * before sending nullfunc to enable powersave at the AP. 119 * before sending nullfunc to enable powersave at the AP.
120 */ 120 */
121 ieee80211_stop_queues_by_reason(&local->hw, 121 ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
122 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); 122 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
123 drv_flush(local, false); 123 ieee80211_flush_queues(local, NULL);
124 124
125 mutex_lock(&local->iflist_mtx); 125 mutex_lock(&local->iflist_mtx);
126 list_for_each_entry(sdata, &local->interfaces, list) { 126 list_for_each_entry(sdata, &local->interfaces, list) {
@@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)
181 } 181 }
182 mutex_unlock(&local->iflist_mtx); 182 mutex_unlock(&local->iflist_mtx);
183 183
184 ieee80211_wake_queues_by_reason(&local->hw, 184 ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
185 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); 185 IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);
186} 186}
187 187
@@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)
277 duration = 10; 277 duration = 10;
278 278
279 ret = drv_remain_on_channel(local, roc->sdata, roc->chan, 279 ret = drv_remain_on_channel(local, roc->sdata, roc->chan,
280 duration); 280 duration, roc->type);
281 281
282 roc->started = true; 282 roc->started = true;
283 283
@@ -382,7 +382,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)
382 ieee80211_roc_notify_destroy(roc, !roc->abort); 382 ieee80211_roc_notify_destroy(roc, !roc->abort);
383 383
384 if (started) { 384 if (started) {
385 drv_flush(local, false); 385 ieee80211_flush_queues(local, NULL);
386 386
387 local->tmp_channel = NULL; 387 local->tmp_channel = NULL;
388 ieee80211_hw_config(local, 0); 388 ieee80211_hw_config(local, 0);
@@ -445,15 +445,15 @@ void ieee80211_roc_setup(struct ieee80211_local *local)
445 INIT_LIST_HEAD(&local->roc_list); 445 INIT_LIST_HEAD(&local->roc_list);
446} 446}
447 447
448void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) 448void ieee80211_roc_purge(struct ieee80211_local *local,
449 struct ieee80211_sub_if_data *sdata)
449{ 450{
450 struct ieee80211_local *local = sdata->local;
451 struct ieee80211_roc_work *roc, *tmp; 451 struct ieee80211_roc_work *roc, *tmp;
452 LIST_HEAD(tmp_list); 452 LIST_HEAD(tmp_list);
453 453
454 mutex_lock(&local->mtx); 454 mutex_lock(&local->mtx);
455 list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { 455 list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
456 if (roc->sdata != sdata) 456 if (sdata && roc->sdata != sdata)
457 continue; 457 continue;
458 458
459 if (roc->started && local->ops->remain_on_channel) { 459 if (roc->started && local->ops->remain_on_channel) {
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d0275f34bf70..7fc5d0d8149a 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,32 +6,11 @@
6#include "driver-ops.h" 6#include "driver-ops.h"
7#include "led.h" 7#include "led.h"
8 8
9/* return value indicates whether the driver should be further notified */
10static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata)
11{
12 switch (sdata->vif.type) {
13 case NL80211_IFTYPE_STATION:
14 ieee80211_sta_quiesce(sdata);
15 break;
16 case NL80211_IFTYPE_ADHOC:
17 ieee80211_ibss_quiesce(sdata);
18 break;
19 case NL80211_IFTYPE_MESH_POINT:
20 ieee80211_mesh_quiesce(sdata);
21 break;
22 default:
23 break;
24 }
25
26 cancel_work_sync(&sdata->work);
27}
28
29int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) 9int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
30{ 10{
31 struct ieee80211_local *local = hw_to_local(hw); 11 struct ieee80211_local *local = hw_to_local(hw);
32 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
33 struct sta_info *sta; 13 struct sta_info *sta;
34 struct ieee80211_chanctx *ctx;
35 14
36 if (!local->open_count) 15 if (!local->open_count)
37 goto suspend; 16 goto suspend;
@@ -40,6 +19,10 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
40 19
41 ieee80211_dfs_cac_cancel(local); 20 ieee80211_dfs_cac_cancel(local);
42 21
22 ieee80211_roc_purge(local, NULL);
23
24 ieee80211_del_virtual_monitor(local);
25
43 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 26 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
44 mutex_lock(&local->sta_mtx); 27 mutex_lock(&local->sta_mtx);
45 list_for_each_entry(sta, &local->sta_list, list) { 28 list_for_each_entry(sta, &local->sta_list, list) {
@@ -51,12 +34,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
51 } 34 }
52 35
53 ieee80211_stop_queues_by_reason(hw, 36 ieee80211_stop_queues_by_reason(hw,
54 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 37 IEEE80211_MAX_QUEUE_MAP,
38 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
55 39
56 /* flush out all packets */ 40 /* flush out all packets and station cleanup call_rcu()s */
57 synchronize_net(); 41 synchronize_net();
42 rcu_barrier();
58 43
59 drv_flush(local, false); 44 ieee80211_flush_queues(local, NULL);
60 45
61 local->quiescing = true; 46 local->quiescing = true;
62 /* make quiescing visible to timers everywhere */ 47 /* make quiescing visible to timers everywhere */
@@ -89,23 +74,17 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
89 mutex_unlock(&local->sta_mtx); 74 mutex_unlock(&local->sta_mtx);
90 } 75 }
91 ieee80211_wake_queues_by_reason(hw, 76 ieee80211_wake_queues_by_reason(hw,
77 IEEE80211_MAX_QUEUE_MAP,
92 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 78 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
93 return err; 79 return err;
94 } else if (err > 0) { 80 } else if (err > 0) {
95 WARN_ON(err != 1); 81 WARN_ON(err != 1);
96 local->wowlan = false; 82 return err;
97 } else { 83 } else {
98 list_for_each_entry(sdata, &local->interfaces, list)
99 if (ieee80211_sdata_running(sdata))
100 ieee80211_quiesce(sdata);
101 goto suspend; 84 goto suspend;
102 } 85 }
103 } 86 }
104 87
105 /* disable keys */
106 list_for_each_entry(sdata, &local->interfaces, list)
107 ieee80211_disable_keys(sdata);
108
109 /* tear down aggregation sessions and remove STAs */ 88 /* tear down aggregation sessions and remove STAs */
110 mutex_lock(&local->sta_mtx); 89 mutex_lock(&local->sta_mtx);
111 list_for_each_entry(sta, &local->sta_list, list) { 90 list_for_each_entry(sta, &local->sta_list, list) {
@@ -117,100 +96,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
117 WARN_ON(drv_sta_state(local, sta->sdata, sta, 96 WARN_ON(drv_sta_state(local, sta->sdata, sta,
118 state, state - 1)); 97 state, state - 1));
119 } 98 }
120
121 mesh_plink_quiesce(sta);
122 } 99 }
123 mutex_unlock(&local->sta_mtx); 100 mutex_unlock(&local->sta_mtx);
124 101
125 /* remove all interfaces */ 102 /* remove all interfaces */
126 list_for_each_entry(sdata, &local->interfaces, list) { 103 list_for_each_entry(sdata, &local->interfaces, list) {
127 static u8 zero_addr[ETH_ALEN] = {};
128 u32 changed = 0;
129
130 if (!ieee80211_sdata_running(sdata)) 104 if (!ieee80211_sdata_running(sdata))
131 continue; 105 continue;
132
133 switch (sdata->vif.type) {
134 case NL80211_IFTYPE_AP_VLAN:
135 case NL80211_IFTYPE_MONITOR:
136 /* skip these */
137 continue;
138 case NL80211_IFTYPE_STATION:
139 if (sdata->vif.bss_conf.assoc)
140 changed = BSS_CHANGED_ASSOC |
141 BSS_CHANGED_BSSID |
142 BSS_CHANGED_IDLE;
143 break;
144 case NL80211_IFTYPE_AP:
145 case NL80211_IFTYPE_ADHOC:
146 case NL80211_IFTYPE_MESH_POINT:
147 if (sdata->vif.bss_conf.enable_beacon)
148 changed = BSS_CHANGED_BEACON_ENABLED;
149 break;
150 default:
151 break;
152 }
153
154 ieee80211_quiesce(sdata);
155
156 sdata->suspend_bss_conf = sdata->vif.bss_conf;
157 memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf));
158 sdata->vif.bss_conf.idle = true;
159 if (sdata->suspend_bss_conf.bssid)
160 sdata->vif.bss_conf.bssid = zero_addr;
161
162 /* disable beaconing or remove association */
163 ieee80211_bss_info_change_notify(sdata, changed);
164
165 if (sdata->vif.type == NL80211_IFTYPE_AP &&
166 rcu_access_pointer(sdata->u.ap.beacon))
167 drv_stop_ap(local, sdata);
168
169 if (local->use_chanctx) {
170 struct ieee80211_chanctx_conf *conf;
171
172 mutex_lock(&local->chanctx_mtx);
173 conf = rcu_dereference_protected(
174 sdata->vif.chanctx_conf,
175 lockdep_is_held(&local->chanctx_mtx));
176 if (conf) {
177 ctx = container_of(conf,
178 struct ieee80211_chanctx,
179 conf);
180 drv_unassign_vif_chanctx(local, sdata, ctx);
181 }
182
183 mutex_unlock(&local->chanctx_mtx);
184 }
185 drv_remove_interface(local, sdata);
186 }
187
188 sdata = rtnl_dereference(local->monitor_sdata);
189 if (sdata) {
190 if (local->use_chanctx) {
191 struct ieee80211_chanctx_conf *conf;
192
193 mutex_lock(&local->chanctx_mtx);
194 conf = rcu_dereference_protected(
195 sdata->vif.chanctx_conf,
196 lockdep_is_held(&local->chanctx_mtx));
197 if (conf) {
198 ctx = container_of(conf,
199 struct ieee80211_chanctx,
200 conf);
201 drv_unassign_vif_chanctx(local, sdata, ctx);
202 }
203
204 mutex_unlock(&local->chanctx_mtx);
205 }
206
207 drv_remove_interface(local, sdata); 106 drv_remove_interface(local, sdata);
208 } 107 }
209 108
210 mutex_lock(&local->chanctx_mtx); 109 /*
211 list_for_each_entry(ctx, &local->chanctx_list, list) 110 * We disconnected on all interfaces before suspend, all channel
212 drv_remove_chanctx(local, ctx); 111 * contexts should be released.
213 mutex_unlock(&local->chanctx_mtx); 112 */
113 WARN_ON(!list_empty(&local->chanctx_list));
214 114
215 /* stop hardware - this must stop RX */ 115 /* stop hardware - this must stop RX */
216 if (local->open_count) 116 if (local->open_count)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index dd88381c53b7..0d51877efdb7 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband,
252 return 0; 252 return 0;
253} 253}
254 254
255static void __rate_control_send_low(struct ieee80211_hw *hw,
256 struct ieee80211_supported_band *sband,
257 struct ieee80211_sta *sta,
258 struct ieee80211_tx_info *info)
259{
260 if ((sband->band != IEEE80211_BAND_2GHZ) ||
261 !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE))
262 info->control.rates[0].idx = rate_lowest_index(sband, sta);
263 else
264 info->control.rates[0].idx =
265 rate_lowest_non_cck_index(sband, sta);
266
267 info->control.rates[0].count =
268 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
269 1 : hw->max_rate_tries;
270
271 info->control.skip_table = 1;
272}
273
255 274
256bool rate_control_send_low(struct ieee80211_sta *sta, 275bool rate_control_send_low(struct ieee80211_sta *sta,
257 void *priv_sta, 276 void *priv_sta,
@@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
262 int mcast_rate; 281 int mcast_rate;
263 282
264 if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { 283 if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) {
265 if ((sband->band != IEEE80211_BAND_2GHZ) || 284 __rate_control_send_low(txrc->hw, sband, sta, info);
266 !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) 285
267 info->control.rates[0].idx =
268 rate_lowest_index(txrc->sband, sta);
269 else
270 info->control.rates[0].idx =
271 rate_lowest_non_cck_index(txrc->sband, sta);
272 info->control.rates[0].count =
273 (info->flags & IEEE80211_TX_CTL_NO_ACK) ?
274 1 : txrc->hw->max_rate_tries;
275 if (!sta && txrc->bss) { 286 if (!sta && txrc->bss) {
276 mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; 287 mcast_rate = txrc->bss_conf->mcast_rate[sband->band];
277 if (mcast_rate > 0) { 288 if (mcast_rate > 0) {
@@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
355 366
356 367
357static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, 368static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
358 struct ieee80211_tx_rate_control *txrc, 369 struct ieee80211_supported_band *sband,
370 enum nl80211_chan_width chan_width,
359 u32 mask, 371 u32 mask,
360 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]) 372 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
361{ 373{
@@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
375 IEEE80211_TX_RC_USE_SHORT_PREAMBLE); 387 IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
376 alt_rate.count = rate->count; 388 alt_rate.count = rate->count;
377 if (rate_idx_match_legacy_mask(&alt_rate, 389 if (rate_idx_match_legacy_mask(&alt_rate,
378 txrc->sband->n_bitrates, 390 sband->n_bitrates, mask)) {
379 mask)) {
380 *rate = alt_rate; 391 *rate = alt_rate;
381 return; 392 return;
382 } 393 }
383 } else { 394 } else {
384 struct sk_buff *skb = txrc->skb;
385 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
386 __le16 fc;
387
388 /* handle legacy rates */ 395 /* handle legacy rates */
389 if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates, 396 if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask))
390 mask))
391 return; 397 return;
392 398
393 /* if HT BSS, and we handle a data frame, also try HT rates */ 399 /* if HT BSS, and we handle a data frame, also try HT rates */
394 if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) 400 if (chan_width == NL80211_CHAN_WIDTH_20_NOHT)
395 return;
396
397 fc = hdr->frame_control;
398 if (!ieee80211_is_data(fc))
399 return; 401 return;
400 402
401 alt_rate.idx = 0; 403 alt_rate.idx = 0;
@@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
408 410
409 alt_rate.flags |= IEEE80211_TX_RC_MCS; 411 alt_rate.flags |= IEEE80211_TX_RC_MCS;
410 412
411 if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40) 413 if (chan_width == NL80211_CHAN_WIDTH_40)
412 alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; 414 alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
413 415
414 if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) { 416 if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) {
@@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
426 */ 428 */
427} 429}
428 430
431static void rate_fixup_ratelist(struct ieee80211_vif *vif,
432 struct ieee80211_supported_band *sband,
433 struct ieee80211_tx_info *info,
434 struct ieee80211_tx_rate *rates,
435 int max_rates)
436{
437 struct ieee80211_rate *rate;
438 bool inval = false;
439 int i;
440
441 /*
442 * Set up the RTS/CTS rate as the fastest basic rate
443 * that is not faster than the data rate unless there
444 * is no basic rate slower than the data rate, in which
445 * case we pick the slowest basic rate
446 *
447 * XXX: Should this check all retry rates?
448 */
449 if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
450 u32 basic_rates = vif->bss_conf.basic_rates;
451 s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
452
453 rate = &sband->bitrates[rates[0].idx];
454
455 for (i = 0; i < sband->n_bitrates; i++) {
456 /* must be a basic rate */
457 if (!(basic_rates & BIT(i)))
458 continue;
459 /* must not be faster than the data rate */
460 if (sband->bitrates[i].bitrate > rate->bitrate)
461 continue;
462 /* maximum */
463 if (sband->bitrates[baserate].bitrate <
464 sband->bitrates[i].bitrate)
465 baserate = i;
466 }
467
468 info->control.rts_cts_rate_idx = baserate;
469 }
470
471 for (i = 0; i < max_rates; i++) {
472 /*
473 * make sure there's no valid rate following
474 * an invalid one, just in case drivers don't
475 * take the API seriously to stop at -1.
476 */
477 if (inval) {
478 rates[i].idx = -1;
479 continue;
480 }
481 if (rates[i].idx < 0) {
482 inval = true;
483 continue;
484 }
485
486 /*
487 * For now assume MCS is already set up correctly, this
488 * needs to be fixed.
489 */
490 if (rates[i].flags & IEEE80211_TX_RC_MCS) {
491 WARN_ON(rates[i].idx > 76);
492
493 if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
494 info->control.use_cts_prot)
495 rates[i].flags |=
496 IEEE80211_TX_RC_USE_CTS_PROTECT;
497 continue;
498 }
499
500 if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) {
501 WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9);
502 continue;
503 }
504
505 /* set up RTS protection if desired */
506 if (info->control.use_rts) {
507 rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS;
508 info->control.use_cts_prot = false;
509 }
510
511 /* RC is busted */
512 if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) {
513 rates[i].idx = -1;
514 continue;
515 }
516
517 rate = &sband->bitrates[rates[i].idx];
518
519 /* set up short preamble */
520 if (info->control.short_preamble &&
521 rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
522 rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
523
524 /* set up G protection */
525 if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) &&
526 info->control.use_cts_prot &&
527 rate->flags & IEEE80211_RATE_ERP_G)
528 rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT;
529 }
530}
531
532
533static void rate_control_fill_sta_table(struct ieee80211_sta *sta,
534 struct ieee80211_tx_info *info,
535 struct ieee80211_tx_rate *rates,
536 int max_rates)
537{
538 struct ieee80211_sta_rates *ratetbl = NULL;
539 int i;
540
541 if (sta && !info->control.skip_table)
542 ratetbl = rcu_dereference(sta->rates);
543
544 /* Fill remaining rate slots with data from the sta rate table. */
545 max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE);
546 for (i = 0; i < max_rates; i++) {
547 if (i < ARRAY_SIZE(info->control.rates) &&
548 info->control.rates[i].idx >= 0 &&
549 info->control.rates[i].count) {
550 if (rates != info->control.rates)
551 rates[i] = info->control.rates[i];
552 } else if (ratetbl) {
553 rates[i].idx = ratetbl->rate[i].idx;
554 rates[i].flags = ratetbl->rate[i].flags;
555 if (info->control.use_rts)
556 rates[i].count = ratetbl->rate[i].count_rts;
557 else if (info->control.use_cts_prot)
558 rates[i].count = ratetbl->rate[i].count_cts;
559 else
560 rates[i].count = ratetbl->rate[i].count;
561 } else {
562 rates[i].idx = -1;
563 rates[i].count = 0;
564 }
565
566 if (rates[i].idx < 0 || !rates[i].count)
567 break;
568 }
569}
570
571static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
572 struct ieee80211_sta *sta,
573 struct ieee80211_supported_band *sband,
574 struct ieee80211_tx_info *info,
575 struct ieee80211_tx_rate *rates,
576 int max_rates)
577{
578 enum nl80211_chan_width chan_width;
579 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
580 bool has_mcs_mask;
581 u32 mask;
582 int i;
583
584 /*
585 * Try to enforce the rateidx mask the user wanted. skip this if the
586 * default mask (allow all rates) is used to save some processing for
587 * the common case.
588 */
589 mask = sdata->rc_rateidx_mask[info->band];
590 has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
591 if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
592 return;
593
594 if (has_mcs_mask)
595 memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
596 sizeof(mcs_mask));
597 else
598 memset(mcs_mask, 0xff, sizeof(mcs_mask));
599
600 if (sta) {
601 /* Filter out rates that the STA does not support */
602 mask &= sta->supp_rates[info->band];
603 for (i = 0; i < sizeof(mcs_mask); i++)
604 mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
605 }
606
607 /*
608 * Make sure the rate index selected for each TX rate is
609 * included in the configured mask and change the rate indexes
610 * if needed.
611 */
612 chan_width = sdata->vif.bss_conf.chandef.width;
613 for (i = 0; i < max_rates; i++) {
614 /* Skip invalid rates */
615 if (rates[i].idx < 0)
616 break;
617
618 rate_idx_match_mask(&rates[i], sband, mask, chan_width,
619 mcs_mask);
620 }
621}
622
623void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
624 struct ieee80211_sta *sta,
625 struct sk_buff *skb,
626 struct ieee80211_tx_rate *dest,
627 int max_rates)
628{
629 struct ieee80211_sub_if_data *sdata;
630 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
631 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
632 struct ieee80211_supported_band *sband;
633
634 rate_control_fill_sta_table(sta, info, dest, max_rates);
635
636 if (!vif)
637 return;
638
639 sdata = vif_to_sdata(vif);
640 sband = sdata->local->hw.wiphy->bands[info->band];
641
642 if (ieee80211_is_data(hdr->frame_control))
643 rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates);
644
645 if (dest[0].idx < 0)
646 __rate_control_send_low(&sdata->local->hw, sband, sta, info);
647
648 if (sta)
649 rate_fixup_ratelist(vif, sband, info, dest, max_rates);
650}
651EXPORT_SYMBOL(ieee80211_get_tx_rates);
652
429void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, 653void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
430 struct sta_info *sta, 654 struct sta_info *sta,
431 struct ieee80211_tx_rate_control *txrc) 655 struct ieee80211_tx_rate_control *txrc)
@@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
435 struct ieee80211_sta *ista = NULL; 659 struct ieee80211_sta *ista = NULL;
436 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); 660 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
437 int i; 661 int i;
438 u32 mask;
439 u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
440 662
441 if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) { 663 if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {
442 ista = &sta->sta; 664 ista = &sta->sta;
@@ -454,37 +676,27 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
454 676
455 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); 677 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
456 678
457 /* 679 if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE)
458 * Try to enforce the rateidx mask the user wanted. skip this if the 680 return;
459 * default mask (allow all rates) is used to save some processing for 681
460 * the common case. 682 ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb,
461 */ 683 info->control.rates,
462 mask = sdata->rc_rateidx_mask[info->band]; 684 ARRAY_SIZE(info->control.rates));
463 memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], 685}
464 sizeof(mcs_mask));
465 if (mask != (1 << txrc->sband->n_bitrates) - 1) {
466 if (sta) {
467 /* Filter out rates that the STA does not support */
468 mask &= sta->sta.supp_rates[info->band];
469 for (i = 0; i < sizeof(mcs_mask); i++)
470 mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i];
471 }
472 /*
473 * Make sure the rate index selected for each TX rate is
474 * included in the configured mask and change the rate indexes
475 * if needed.
476 */
477 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
478 /* Skip invalid rates */
479 if (info->control.rates[i].idx < 0)
480 break;
481 rate_idx_match_mask(&info->control.rates[i], txrc,
482 mask, mcs_mask);
483 }
484 }
485 686
486 BUG_ON(info->control.rates[0].idx < 0); 687int rate_control_set_rates(struct ieee80211_hw *hw,
688 struct ieee80211_sta *pubsta,
689 struct ieee80211_sta_rates *rates)
690{
691 struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates);
692
693 rcu_assign_pointer(pubsta->rates, rates);
694 if (old)
695 kfree_rcu(old, rcu_head);
696
697 return 0;
487} 698}
699EXPORT_SYMBOL(rate_control_set_rates);
488 700
489int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, 701int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
490 const char *name) 702 const char *name)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index eea45a2c7c35..ac7ef5414bde 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -55,7 +55,6 @@
55#include "rate.h" 55#include "rate.h"
56#include "rc80211_minstrel.h" 56#include "rc80211_minstrel.h"
57 57
58#define SAMPLE_COLUMNS 10
59#define SAMPLE_TBL(_mi, _idx, _col) \ 58#define SAMPLE_TBL(_mi, _idx, _col) \
60 _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] 59 _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col]
61 60
@@ -70,16 +69,75 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
70 return i; 69 return i;
71} 70}
72 71
72/* find & sort topmost throughput rates */
73static inline void
74minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list)
75{
76 int j = MAX_THR_RATES;
77
78 while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp)
79 j--;
80 if (j < MAX_THR_RATES - 1)
81 memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1));
82 if (j < MAX_THR_RATES)
83 tp_list[j] = i;
84}
85
86static void
87minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl,
88 int offset, int idx)
89{
90 struct minstrel_rate *r = &mi->r[idx];
91
92 ratetbl->rate[offset].idx = r->rix;
93 ratetbl->rate[offset].count = r->adjusted_retry_count;
94 ratetbl->rate[offset].count_cts = r->retry_count_cts;
95 ratetbl->rate[offset].count_rts = r->retry_count_rtscts;
96}
97
98static void
99minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
100{
101 struct ieee80211_sta_rates *ratetbl;
102 int i = 0;
103
104 ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC);
105 if (!ratetbl)
106 return;
107
108 /* Start with max_tp_rate */
109 minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]);
110
111 if (mp->hw->max_rates >= 3) {
112 /* At least 3 tx rates supported, use max_tp_rate2 next */
113 minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]);
114 }
115
116 if (mp->hw->max_rates >= 2) {
117 /* At least 2 tx rates supported, use max_prob_rate next */
118 minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate);
119 }
120
121 /* Use lowest rate last */
122 ratetbl->rate[i].idx = mi->lowest_rix;
123 ratetbl->rate[i].count = mp->max_retry;
124 ratetbl->rate[i].count_cts = mp->max_retry;
125 ratetbl->rate[i].count_rts = mp->max_retry;
126
127 rate_control_set_rates(mp->hw, mi->sta, ratetbl);
128}
129
73static void 130static void
74minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) 131minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
75{ 132{
76 u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; 133 u8 tmp_tp_rate[MAX_THR_RATES];
77 u32 max_prob = 0, index_max_prob = 0; 134 u8 tmp_prob_rate = 0;
78 u32 usecs; 135 u32 usecs;
79 u32 p;
80 int i; 136 int i;
81 137
82 mi->stats_update = jiffies; 138 for (i=0; i < MAX_THR_RATES; i++)
139 tmp_tp_rate[i] = 0;
140
83 for (i = 0; i < mi->n_rates; i++) { 141 for (i = 0; i < mi->n_rates; i++) {
84 struct minstrel_rate *mr = &mi->r[i]; 142 struct minstrel_rate *mr = &mi->r[i];
85 143
@@ -87,27 +145,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
87 if (!usecs) 145 if (!usecs)
88 usecs = 1000000; 146 usecs = 1000000;
89 147
90 /* To avoid rounding issues, probabilities scale from 0 (0%) 148 if (unlikely(mr->attempts > 0)) {
91 * to 18000 (100%) */ 149 mr->sample_skipped = 0;
92 if (mr->attempts) { 150 mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
93 p = (mr->success * 18000) / mr->attempts;
94 mr->succ_hist += mr->success; 151 mr->succ_hist += mr->success;
95 mr->att_hist += mr->attempts; 152 mr->att_hist += mr->attempts;
96 mr->cur_prob = p; 153 mr->probability = minstrel_ewma(mr->probability,
97 p = ((p * (100 - mp->ewma_level)) + (mr->probability * 154 mr->cur_prob,
98 mp->ewma_level)) / 100; 155 EWMA_LEVEL);
99 mr->probability = p; 156 } else
100 mr->cur_tp = p * (1000000 / usecs); 157 mr->sample_skipped++;
101 }
102 158
103 mr->last_success = mr->success; 159 mr->last_success = mr->success;
104 mr->last_attempts = mr->attempts; 160 mr->last_attempts = mr->attempts;
105 mr->success = 0; 161 mr->success = 0;
106 mr->attempts = 0; 162 mr->attempts = 0;
107 163
164 /* Update throughput per rate, reset thr. below 10% success */
165 if (mr->probability < MINSTREL_FRAC(10, 100))
166 mr->cur_tp = 0;
167 else
168 mr->cur_tp = mr->probability * (1000000 / usecs);
169
108 /* Sample less often below the 10% chance of success. 170 /* Sample less often below the 10% chance of success.
109 * Sample less often above the 95% chance of success. */ 171 * Sample less often above the 95% chance of success. */
110 if ((mr->probability > 17100) || (mr->probability < 1800)) { 172 if (mr->probability > MINSTREL_FRAC(95, 100) ||
173 mr->probability < MINSTREL_FRAC(10, 100)) {
111 mr->adjusted_retry_count = mr->retry_count >> 1; 174 mr->adjusted_retry_count = mr->retry_count >> 1;
112 if (mr->adjusted_retry_count > 2) 175 if (mr->adjusted_retry_count > 2)
113 mr->adjusted_retry_count = 2; 176 mr->adjusted_retry_count = 2;
@@ -118,35 +181,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
118 } 181 }
119 if (!mr->adjusted_retry_count) 182 if (!mr->adjusted_retry_count)
120 mr->adjusted_retry_count = 2; 183 mr->adjusted_retry_count = 2;
121 }
122 184
123 for (i = 0; i < mi->n_rates; i++) { 185 minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate);
124 struct minstrel_rate *mr = &mi->r[i]; 186
125 if (max_tp < mr->cur_tp) { 187 /* To determine the most robust rate (max_prob_rate) used at
126 index_max_tp = i; 188 * 3rd mmr stage we distinct between two cases:
127 max_tp = mr->cur_tp; 189 * (1) if any success probabilitiy >= 95%, out of those rates
128 } 190 * choose the maximum throughput rate as max_prob_rate
129 if (max_prob < mr->probability) { 191 * (2) if all success probabilities < 95%, the rate with
130 index_max_prob = i; 192 * highest success probability is choosen as max_prob_rate */
131 max_prob = mr->probability; 193 if (mr->probability >= MINSTREL_FRAC(95,100)) {
194 if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp)
195 tmp_prob_rate = i;
196 } else {
197 if (mr->probability >= mi->r[tmp_prob_rate].probability)
198 tmp_prob_rate = i;
132 } 199 }
133 } 200 }
134 201
135 max_tp = 0; 202 /* Assign the new rate set */
136 for (i = 0; i < mi->n_rates; i++) { 203 memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate));
137 struct minstrel_rate *mr = &mi->r[i]; 204 mi->max_prob_rate = tmp_prob_rate;
138 205
139 if (i == index_max_tp) 206 /* Reset update timer */
140 continue; 207 mi->stats_update = jiffies;
141 208
142 if (max_tp < mr->cur_tp) { 209 minstrel_update_rates(mp, mi);
143 index_max_tp2 = i;
144 max_tp = mr->cur_tp;
145 }
146 }
147 mi->max_tp_rate = index_max_tp;
148 mi->max_tp_rate2 = index_max_tp2;
149 mi->max_prob_rate = index_max_prob;
150} 210}
151 211
152static void 212static void
@@ -195,9 +255,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr,
195{ 255{
196 unsigned int retry = mr->adjusted_retry_count; 256 unsigned int retry = mr->adjusted_retry_count;
197 257
198 if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) 258 if (info->control.use_rts)
199 retry = max(2U, min(mr->retry_count_rtscts, retry)); 259 retry = max(2U, min(mr->retry_count_rtscts, retry));
200 else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) 260 else if (info->control.use_cts_prot)
201 retry = max(2U, min(mr->retry_count_cts, retry)); 261 retry = max(2U, min(mr->retry_count_cts, retry));
202 return retry; 262 return retry;
203} 263}
@@ -207,10 +267,10 @@ static int
207minstrel_get_next_sample(struct minstrel_sta_info *mi) 267minstrel_get_next_sample(struct minstrel_sta_info *mi)
208{ 268{
209 unsigned int sample_ndx; 269 unsigned int sample_ndx;
210 sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); 270 sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column);
211 mi->sample_idx++; 271 mi->sample_row++;
212 if ((int) mi->sample_idx > (mi->n_rates - 2)) { 272 if ((int) mi->sample_row >= mi->n_rates) {
213 mi->sample_idx = 0; 273 mi->sample_row = 0;
214 mi->sample_column++; 274 mi->sample_column++;
215 if (mi->sample_column >= SAMPLE_COLUMNS) 275 if (mi->sample_column >= SAMPLE_COLUMNS)
216 mi->sample_column = 0; 276 mi->sample_column = 0;
@@ -226,111 +286,96 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
226 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 286 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
227 struct minstrel_sta_info *mi = priv_sta; 287 struct minstrel_sta_info *mi = priv_sta;
228 struct minstrel_priv *mp = priv; 288 struct minstrel_priv *mp = priv;
229 struct ieee80211_tx_rate *ar = info->control.rates; 289 struct ieee80211_tx_rate *rate = &info->control.rates[0];
230 unsigned int ndx, sample_ndx = 0; 290 struct minstrel_rate *msr, *mr;
231 bool mrr; 291 unsigned int ndx;
232 bool sample_slower = false; 292 bool mrr_capable;
233 bool sample = false; 293 bool prev_sample = mi->prev_sample;
234 int i, delta; 294 int delta;
235 int mrr_ndx[3]; 295 int sampling_ratio;
236 int sample_rate; 296
237 297 /* management/no-ack frames do not use rate control */
238 if (rate_control_send_low(sta, priv_sta, txrc)) 298 if (rate_control_send_low(sta, priv_sta, txrc))
239 return; 299 return;
240 300
241 mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; 301 /* check multi-rate-retry capabilities & adjust lookaround_rate */
242 302 mrr_capable = mp->has_mrr &&
243 ndx = mi->max_tp_rate; 303 !txrc->rts &&
244 304 !txrc->bss_conf->use_cts_prot;
245 if (mrr) 305 if (mrr_capable)
246 sample_rate = mp->lookaround_rate_mrr; 306 sampling_ratio = mp->lookaround_rate_mrr;
247 else 307 else
248 sample_rate = mp->lookaround_rate; 308 sampling_ratio = mp->lookaround_rate;
249 309
310 /* increase sum packet counter */
250 mi->packet_count++; 311 mi->packet_count++;
251 delta = (mi->packet_count * sample_rate / 100) - 312
313 delta = (mi->packet_count * sampling_ratio / 100) -
252 (mi->sample_count + mi->sample_deferred / 2); 314 (mi->sample_count + mi->sample_deferred / 2);
253 315
254 /* delta > 0: sampling required */ 316 /* delta < 0: no sampling required */
255 if ((delta > 0) && (mrr || !mi->prev_sample)) { 317 mi->prev_sample = false;
256 struct minstrel_rate *msr; 318 if (delta < 0 || (!mrr_capable && prev_sample))
257 if (mi->packet_count >= 10000) { 319 return;
258 mi->sample_deferred = 0;
259 mi->sample_count = 0;
260 mi->packet_count = 0;
261 } else if (delta > mi->n_rates * 2) {
262 /* With multi-rate retry, not every planned sample
263 * attempt actually gets used, due to the way the retry
264 * chain is set up - [max_tp,sample,prob,lowest] for
265 * sample_rate < max_tp.
266 *
267 * If there's too much sampling backlog and the link
268 * starts getting worse, minstrel would start bursting
269 * out lots of sampling frames, which would result
270 * in a large throughput loss. */
271 mi->sample_count += (delta - mi->n_rates * 2);
272 }
273 320
274 sample_ndx = minstrel_get_next_sample(mi); 321 if (mi->packet_count >= 10000) {
275 msr = &mi->r[sample_ndx]; 322 mi->sample_deferred = 0;
276 sample = true; 323 mi->sample_count = 0;
277 sample_slower = mrr && (msr->perfect_tx_time > 324 mi->packet_count = 0;
278 mi->r[ndx].perfect_tx_time); 325 } else if (delta > mi->n_rates * 2) {
279 326 /* With multi-rate retry, not every planned sample
280 if (!sample_slower) { 327 * attempt actually gets used, due to the way the retry
281 if (msr->sample_limit != 0) { 328 * chain is set up - [max_tp,sample,prob,lowest] for
282 ndx = sample_ndx; 329 * sample_rate < max_tp.
283 mi->sample_count++; 330 *
284 if (msr->sample_limit > 0) 331 * If there's too much sampling backlog and the link
285 msr->sample_limit--; 332 * starts getting worse, minstrel would start bursting
286 } else { 333 * out lots of sampling frames, which would result
287 sample = false; 334 * in a large throughput loss. */
288 } 335 mi->sample_count += (delta - mi->n_rates * 2);
289 } else { 336 }
290 /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark 337
291 * packets that have the sampling rate deferred to the 338 /* get next random rate sample */
292 * second MRR stage. Increase the sample counter only 339 ndx = minstrel_get_next_sample(mi);
293 * if the deferred sample rate was actually used. 340 msr = &mi->r[ndx];
294 * Use the sample_deferred counter to make sure that 341 mr = &mi->r[mi->max_tp_rate[0]];
295 * the sampling is not done in large bursts */ 342
296 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; 343 /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage)
297 mi->sample_deferred++; 344 * rate sampling method should be used.
298 } 345 * Respect such rates that are not sampled for 20 interations.
346 */
347 if (mrr_capable &&
348 msr->perfect_tx_time > mr->perfect_tx_time &&
349 msr->sample_skipped < 20) {
350 /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
351 * packets that have the sampling rate deferred to the
352 * second MRR stage. Increase the sample counter only
353 * if the deferred sample rate was actually used.
354 * Use the sample_deferred counter to make sure that
355 * the sampling is not done in large bursts */
356 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
357 rate++;
358 mi->sample_deferred++;
359 } else {
360 if (!msr->sample_limit != 0)
361 return;
362
363 mi->sample_count++;
364 if (msr->sample_limit > 0)
365 msr->sample_limit--;
299 } 366 }
300 mi->prev_sample = sample;
301 367
302 /* If we're not using MRR and the sampling rate already 368 /* If we're not using MRR and the sampling rate already
303 * has a probability of >95%, we shouldn't be attempting 369 * has a probability of >95%, we shouldn't be attempting
304 * to use it, as this only wastes precious airtime */ 370 * to use it, as this only wastes precious airtime */
305 if (!mrr && sample && (mi->r[ndx].probability > 17100)) 371 if (!mrr_capable &&
306 ndx = mi->max_tp_rate; 372 (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))
307
308 ar[0].idx = mi->r[ndx].rix;
309 ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info);
310
311 if (!mrr) {
312 if (!sample)
313 ar[0].count = mp->max_retry;
314 ar[1].idx = mi->lowest_rix;
315 ar[1].count = mp->max_retry;
316 return; 373 return;
317 }
318 374
319 /* MRR setup */ 375 mi->prev_sample = true;
320 if (sample) { 376
321 if (sample_slower) 377 rate->idx = mi->r[ndx].rix;
322 mrr_ndx[0] = sample_ndx; 378 rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
323 else
324 mrr_ndx[0] = mi->max_tp_rate;
325 } else {
326 mrr_ndx[0] = mi->max_tp_rate2;
327 }
328 mrr_ndx[1] = mi->max_prob_rate;
329 mrr_ndx[2] = 0;
330 for (i = 1; i < 4; i++) {
331 ar[i].idx = mi->r[mrr_ndx[i - 1]].rix;
332 ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count;
333 }
334} 379}
335 380
336 381
@@ -351,26 +396,21 @@ static void
351init_sample_table(struct minstrel_sta_info *mi) 396init_sample_table(struct minstrel_sta_info *mi)
352{ 397{
353 unsigned int i, col, new_idx; 398 unsigned int i, col, new_idx;
354 unsigned int n_srates = mi->n_rates - 1;
355 u8 rnd[8]; 399 u8 rnd[8];
356 400
357 mi->sample_column = 0; 401 mi->sample_column = 0;
358 mi->sample_idx = 0; 402 mi->sample_row = 0;
359 memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); 403 memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);
360 404
361 for (col = 0; col < SAMPLE_COLUMNS; col++) { 405 for (col = 0; col < SAMPLE_COLUMNS; col++) {
362 for (i = 0; i < n_srates; i++) { 406 for (i = 0; i < mi->n_rates; i++) {
363 get_random_bytes(rnd, sizeof(rnd)); 407 get_random_bytes(rnd, sizeof(rnd));
364 new_idx = (i + rnd[i & 7]) % n_srates; 408 new_idx = (i + rnd[i & 7]) % mi->n_rates;
365 409
366 while (SAMPLE_TBL(mi, new_idx, col) != 0) 410 while (SAMPLE_TBL(mi, new_idx, col) != 0xff)
367 new_idx = (new_idx + 1) % n_srates; 411 new_idx = (new_idx + 1) % mi->n_rates;
368 412
369 /* Don't sample the slowest rate (i.e. slowest base 413 SAMPLE_TBL(mi, new_idx, col) = i;
370 * rate). We must presume that the slowest rate works
371 * fine, or else other management frames will also be
372 * failing and the link will break */
373 SAMPLE_TBL(mi, new_idx, col) = i + 1;
374 } 414 }
375 } 415 }
376} 416}
@@ -385,12 +425,16 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
385 unsigned int i, n = 0; 425 unsigned int i, n = 0;
386 unsigned int t_slot = 9; /* FIXME: get real slot time */ 426 unsigned int t_slot = 9; /* FIXME: get real slot time */
387 427
428 mi->sta = sta;
388 mi->lowest_rix = rate_lowest_index(sband, sta); 429 mi->lowest_rix = rate_lowest_index(sband, sta);
389 ctl_rate = &sband->bitrates[mi->lowest_rix]; 430 ctl_rate = &sband->bitrates[mi->lowest_rix];
390 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10, 431 mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,
391 ctl_rate->bitrate, 432 ctl_rate->bitrate,
392 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); 433 !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1);
393 434
435 memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate));
436 mi->max_prob_rate = 0;
437
394 for (i = 0; i < sband->n_bitrates; i++) { 438 for (i = 0; i < sband->n_bitrates; i++) {
395 struct minstrel_rate *mr = &mi->r[n]; 439 struct minstrel_rate *mr = &mi->r[n];
396 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; 440 unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0;
@@ -433,6 +477,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
433 } while ((tx_time < mp->segment_size) && 477 } while ((tx_time < mp->segment_size) &&
434 (++mr->retry_count < mp->max_retry)); 478 (++mr->retry_count < mp->max_retry));
435 mr->adjusted_retry_count = mr->retry_count; 479 mr->adjusted_retry_count = mr->retry_count;
480 if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G))
481 mr->retry_count_cts = mr->retry_count;
436 } 482 }
437 483
438 for (i = n; i < sband->n_bitrates; i++) { 484 for (i = n; i < sband->n_bitrates; i++) {
@@ -444,6 +490,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,
444 mi->stats_update = jiffies; 490 mi->stats_update = jiffies;
445 491
446 init_sample_table(mi); 492 init_sample_table(mi);
493 minstrel_update_rates(mp, mi);
447} 494}
448 495
449static void * 496static void *
@@ -542,9 +589,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
542 mp->lookaround_rate = 5; 589 mp->lookaround_rate = 5;
543 mp->lookaround_rate_mrr = 10; 590 mp->lookaround_rate_mrr = 10;
544 591
545 /* moving average weight for EWMA */
546 mp->ewma_level = 75;
547
548 /* maximum time that the hw is allowed to stay in one MRR segment */ 592 /* maximum time that the hw is allowed to stay in one MRR segment */
549 mp->segment_size = 6000; 593 mp->segment_size = 6000;
550 594
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 5ecf757817f2..f4301f4b2e41 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -9,6 +9,29 @@
9#ifndef __RC_MINSTREL_H 9#ifndef __RC_MINSTREL_H
10#define __RC_MINSTREL_H 10#define __RC_MINSTREL_H
11 11
12#define EWMA_LEVEL 96 /* ewma weighting factor [/EWMA_DIV] */
13#define EWMA_DIV 128
14#define SAMPLE_COLUMNS 10 /* number of columns in sample table */
15
16
17/* scaled fraction values */
18#define MINSTREL_SCALE 16
19#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
20#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
21
22/* number of highest throughput rates to consider*/
23#define MAX_THR_RATES 4
24
25/*
26 * Perform EWMA (Exponentially Weighted Moving Average) calculation
27 */
28static inline int
29minstrel_ewma(int old, int new, int weight)
30{
31 return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV;
32}
33
34
12struct minstrel_rate { 35struct minstrel_rate {
13 int bitrate; 36 int bitrate;
14 int rix; 37 int rix;
@@ -26,6 +49,7 @@ struct minstrel_rate {
26 u32 attempts; 49 u32 attempts;
27 u32 last_attempts; 50 u32 last_attempts;
28 u32 last_success; 51 u32 last_success;
52 u8 sample_skipped;
29 53
30 /* parts per thousand */ 54 /* parts per thousand */
31 u32 cur_prob; 55 u32 cur_prob;
@@ -39,20 +63,21 @@ struct minstrel_rate {
39}; 63};
40 64
41struct minstrel_sta_info { 65struct minstrel_sta_info {
66 struct ieee80211_sta *sta;
67
42 unsigned long stats_update; 68 unsigned long stats_update;
43 unsigned int sp_ack_dur; 69 unsigned int sp_ack_dur;
44 unsigned int rate_avg; 70 unsigned int rate_avg;
45 71
46 unsigned int lowest_rix; 72 unsigned int lowest_rix;
47 73
48 unsigned int max_tp_rate; 74 u8 max_tp_rate[MAX_THR_RATES];
49 unsigned int max_tp_rate2; 75 u8 max_prob_rate;
50 unsigned int max_prob_rate;
51 unsigned int packet_count; 76 unsigned int packet_count;
52 unsigned int sample_count; 77 unsigned int sample_count;
53 int sample_deferred; 78 int sample_deferred;
54 79
55 unsigned int sample_idx; 80 unsigned int sample_row;
56 unsigned int sample_column; 81 unsigned int sample_column;
57 82
58 int n_rates; 83 int n_rates;
@@ -73,7 +98,6 @@ struct minstrel_priv {
73 unsigned int cw_min; 98 unsigned int cw_min;
74 unsigned int cw_max; 99 unsigned int cw_max;
75 unsigned int max_retry; 100 unsigned int max_retry;
76 unsigned int ewma_level;
77 unsigned int segment_size; 101 unsigned int segment_size;
78 unsigned int update_interval; 102 unsigned int update_interval;
79 unsigned int lookaround_rate; 103 unsigned int lookaround_rate;
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index d5a56226e675..fd0b9ca1570e 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -68,23 +68,25 @@ minstrel_stats_open(struct inode *inode, struct file *file)
68 68
69 file->private_data = ms; 69 file->private_data = ms;
70 p = ms->buf; 70 p = ms->buf;
71 p += sprintf(p, "rate throughput ewma prob this prob " 71 p += sprintf(p, "rate throughput ewma prob this prob "
72 "this succ/attempt success attempts\n"); 72 "this succ/attempt success attempts\n");
73 for (i = 0; i < mi->n_rates; i++) { 73 for (i = 0; i < mi->n_rates; i++) {
74 struct minstrel_rate *mr = &mi->r[i]; 74 struct minstrel_rate *mr = &mi->r[i];
75 75
76 *(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; 76 *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' ';
77 *(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; 77 *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' ';
78 *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' ';
79 *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';
78 *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; 80 *(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';
79 p += sprintf(p, "%3u%s", mr->bitrate / 2, 81 p += sprintf(p, "%3u%s", mr->bitrate / 2,
80 (mr->bitrate & 1 ? ".5" : " ")); 82 (mr->bitrate & 1 ? ".5" : " "));
81 83
82 tp = mr->cur_tp / ((18000 << 10) / 96); 84 tp = MINSTREL_TRUNC(mr->cur_tp / 10);
83 prob = mr->cur_prob / 18; 85 prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
84 eprob = mr->probability / 18; 86 eprob = MINSTREL_TRUNC(mr->probability * 1000);
85 87
86 p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " 88 p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u "
87 "%3u(%3u) %8llu %8llu\n", 89 " %3u(%3u) %8llu %8llu\n",
88 tp / 10, tp % 10, 90 tp / 10, tp % 10,
89 eprob / 10, eprob % 10, 91 eprob / 10, eprob % 10,
90 prob / 10, prob % 10, 92 prob / 10, prob % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3af141c69712..5b2d3012b983 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -17,8 +17,6 @@
17#include "rc80211_minstrel_ht.h" 17#include "rc80211_minstrel_ht.h"
18 18
19#define AVG_PKT_SIZE 1200 19#define AVG_PKT_SIZE 1200
20#define SAMPLE_COLUMNS 10
21#define EWMA_LEVEL 75
22 20
23/* Number of bits for an average sized packet */ 21/* Number of bits for an average sized packet */
24#define MCS_NBITS (AVG_PKT_SIZE << 3) 22#define MCS_NBITS (AVG_PKT_SIZE << 3)
@@ -26,11 +24,11 @@
26/* Number of symbols for a packet with (bps) bits per symbol */ 24/* Number of symbols for a packet with (bps) bits per symbol */
27#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) 25#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
28 26
29/* Transmission time for a packet containing (syms) symbols */ 27/* Transmission time (nanoseconds) for a packet containing (syms) symbols */
30#define MCS_SYMBOL_TIME(sgi, syms) \ 28#define MCS_SYMBOL_TIME(sgi, syms) \
31 (sgi ? \ 29 (sgi ? \
32 ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ 30 ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \
33 (syms) << 2 /* syms * 4 us */ \ 31 ((syms) * 1000) << 2 /* syms * 4 us */ \
34 ) 32 )
35 33
36/* Transmit duration for the raw data part of an average sized packet */ 34/* Transmit duration for the raw data part of an average sized packet */
@@ -64,9 +62,9 @@
64} 62}
65 63
66#define CCK_DURATION(_bitrate, _short, _len) \ 64#define CCK_DURATION(_bitrate, _short, _len) \
67 (10 /* SIFS */ + \ 65 (1000 * (10 /* SIFS */ + \
68 (_short ? 72 + 24 : 144 + 48 ) + \ 66 (_short ? 72 + 24 : 144 + 48 ) + \
69 (8 * (_len + 4) * 10) / (_bitrate)) 67 (8 * (_len + 4) * 10) / (_bitrate)))
70 68
71#define CCK_ACK_DURATION(_bitrate, _short) \ 69#define CCK_ACK_DURATION(_bitrate, _short) \
72 (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ 70 (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \
@@ -128,14 +126,8 @@ const struct mcs_group minstrel_mcs_groups[] = {
128 126
129static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; 127static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
130 128
131/* 129static void
132 * Perform EWMA (Exponentially Weighted Moving Average) calculation 130minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);
133 */
134static int
135minstrel_ewma(int old, int new, int weight)
136{
137 return (new * (100 - weight) + old * weight) / 100;
138}
139 131
140/* 132/*
141 * Look up an MCS group index based on mac80211 rate information 133 * Look up an MCS group index based on mac80211 rate information
@@ -211,20 +203,32 @@ static void
211minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) 203minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)
212{ 204{
213 struct minstrel_rate_stats *mr; 205 struct minstrel_rate_stats *mr;
214 unsigned int usecs = 0; 206 unsigned int nsecs = 0;
207 unsigned int tp;
208 unsigned int prob;
215 209
216 mr = &mi->groups[group].rates[rate]; 210 mr = &mi->groups[group].rates[rate];
211 prob = mr->probability;
217 212
218 if (mr->probability < MINSTREL_FRAC(1, 10)) { 213 if (prob < MINSTREL_FRAC(1, 10)) {
219 mr->cur_tp = 0; 214 mr->cur_tp = 0;
220 return; 215 return;
221 } 216 }
222 217
218 /*
219 * For the throughput calculation, limit the probability value to 90% to
220 * account for collision related packet error rate fluctuation
221 */
222 if (prob > MINSTREL_FRAC(9, 10))
223 prob = MINSTREL_FRAC(9, 10);
224
223 if (group != MINSTREL_CCK_GROUP) 225 if (group != MINSTREL_CCK_GROUP)
224 usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); 226 nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
225 227
226 usecs += minstrel_mcs_groups[group].duration[rate]; 228 nsecs += minstrel_mcs_groups[group].duration[rate];
227 mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); 229 tp = 1000000 * ((mr->probability * 1000) / nsecs);
230
231 mr->cur_tp = MINSTREL_TRUNC(tp);
228} 232}
229 233
230/* 234/*
@@ -243,6 +247,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
243 struct minstrel_rate_stats *mr; 247 struct minstrel_rate_stats *mr;
244 int cur_prob, cur_prob_tp, cur_tp, cur_tp2; 248 int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
245 int group, i, index; 249 int group, i, index;
250 bool mi_rates_valid = false;
246 251
247 if (mi->ampdu_packets > 0) { 252 if (mi->ampdu_packets > 0) {
248 mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, 253 mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
@@ -253,11 +258,10 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
253 258
254 mi->sample_slow = 0; 259 mi->sample_slow = 0;
255 mi->sample_count = 0; 260 mi->sample_count = 0;
256 mi->max_tp_rate = 0;
257 mi->max_tp_rate2 = 0;
258 mi->max_prob_rate = 0;
259 261
260 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { 262 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
263 bool mg_rates_valid = false;
264
261 cur_prob = 0; 265 cur_prob = 0;
262 cur_prob_tp = 0; 266 cur_prob_tp = 0;
263 cur_tp = 0; 267 cur_tp = 0;
@@ -267,15 +271,24 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
267 if (!mg->supported) 271 if (!mg->supported)
268 continue; 272 continue;
269 273
270 mg->max_tp_rate = 0;
271 mg->max_tp_rate2 = 0;
272 mg->max_prob_rate = 0;
273 mi->sample_count++; 274 mi->sample_count++;
274 275
275 for (i = 0; i < MCS_GROUP_RATES; i++) { 276 for (i = 0; i < MCS_GROUP_RATES; i++) {
276 if (!(mg->supported & BIT(i))) 277 if (!(mg->supported & BIT(i)))
277 continue; 278 continue;
278 279
280 /* initialize rates selections starting indexes */
281 if (!mg_rates_valid) {
282 mg->max_tp_rate = mg->max_tp_rate2 =
283 mg->max_prob_rate = i;
284 if (!mi_rates_valid) {
285 mi->max_tp_rate = mi->max_tp_rate2 =
286 mi->max_prob_rate = i;
287 mi_rates_valid = true;
288 }
289 mg_rates_valid = true;
290 }
291
279 mr = &mg->rates[i]; 292 mr = &mg->rates[i];
280 mr->retry_updated = false; 293 mr->retry_updated = false;
281 index = MCS_GROUP_RATES * group + i; 294 index = MCS_GROUP_RATES * group + i;
@@ -308,8 +321,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
308 } 321 }
309 } 322 }
310 323
311 /* try to sample up to half of the available rates during each interval */ 324 /* try to sample all available rates during each interval */
312 mi->sample_count *= 4; 325 mi->sample_count *= 8;
313 326
314 cur_prob = 0; 327 cur_prob = 0;
315 cur_prob_tp = 0; 328 cur_prob_tp = 0;
@@ -320,20 +333,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
320 if (!mg->supported) 333 if (!mg->supported)
321 continue; 334 continue;
322 335
323 mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
324 if (cur_prob_tp < mr->cur_tp &&
325 minstrel_mcs_groups[group].streams == 1) {
326 mi->max_prob_rate = mg->max_prob_rate;
327 cur_prob = mr->cur_prob;
328 cur_prob_tp = mr->cur_tp;
329 }
330
331 mr = minstrel_get_ratestats(mi, mg->max_tp_rate); 336 mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
332 if (cur_tp < mr->cur_tp) { 337 if (cur_tp < mr->cur_tp) {
333 mi->max_tp_rate2 = mi->max_tp_rate; 338 mi->max_tp_rate2 = mi->max_tp_rate;
334 cur_tp2 = cur_tp; 339 cur_tp2 = cur_tp;
335 mi->max_tp_rate = mg->max_tp_rate; 340 mi->max_tp_rate = mg->max_tp_rate;
336 cur_tp = mr->cur_tp; 341 cur_tp = mr->cur_tp;
342 mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;
337 } 343 }
338 344
339 mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); 345 mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
@@ -343,6 +349,23 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
343 } 349 }
344 } 350 }
345 351
352 if (mi->max_prob_streams < 1)
353 mi->max_prob_streams = 1;
354
355 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
356 mg = &mi->groups[group];
357 if (!mg->supported)
358 continue;
359 mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
360 if (cur_prob_tp < mr->cur_tp &&
361 minstrel_mcs_groups[group].streams <= mi->max_prob_streams) {
362 mi->max_prob_rate = mg->max_prob_rate;
363 cur_prob = mr->cur_prob;
364 cur_prob_tp = mr->cur_tp;
365 }
366 }
367
368
346 mi->stats_update = jiffies; 369 mi->stats_update = jiffies;
347} 370}
348 371
@@ -445,7 +468,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
445 struct ieee80211_tx_rate *ar = info->status.rates; 468 struct ieee80211_tx_rate *ar = info->status.rates;
446 struct minstrel_rate_stats *rate, *rate2; 469 struct minstrel_rate_stats *rate, *rate2;
447 struct minstrel_priv *mp = priv; 470 struct minstrel_priv *mp = priv;
448 bool last; 471 bool last, update = false;
449 int i; 472 int i;
450 473
451 if (!msp->is_ht) 474 if (!msp->is_ht)
@@ -467,7 +490,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
467 490
468 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { 491 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
469 mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); 492 mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
470 mi->sample_tries = 2; 493 mi->sample_tries = 1;
471 mi->sample_count--; 494 mi->sample_count--;
472 } 495 }
473 496
@@ -494,21 +517,29 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
494 rate = minstrel_get_ratestats(mi, mi->max_tp_rate); 517 rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
495 if (rate->attempts > 30 && 518 if (rate->attempts > 30 &&
496 MINSTREL_FRAC(rate->success, rate->attempts) < 519 MINSTREL_FRAC(rate->success, rate->attempts) <
497 MINSTREL_FRAC(20, 100)) 520 MINSTREL_FRAC(20, 100)) {
498 minstrel_downgrade_rate(mi, &mi->max_tp_rate, true); 521 minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
522 update = true;
523 }
499 524
500 rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2); 525 rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
501 if (rate2->attempts > 30 && 526 if (rate2->attempts > 30 &&
502 MINSTREL_FRAC(rate2->success, rate2->attempts) < 527 MINSTREL_FRAC(rate2->success, rate2->attempts) <
503 MINSTREL_FRAC(20, 100)) 528 MINSTREL_FRAC(20, 100)) {
504 minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false); 529 minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
530 update = true;
531 }
505 532
506 if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { 533 if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
534 update = true;
507 minstrel_ht_update_stats(mp, mi); 535 minstrel_ht_update_stats(mp, mi);
508 if (!(info->flags & IEEE80211_TX_CTL_AMPDU) && 536 if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&
509 mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP) 537 mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP)
510 minstrel_aggr_check(sta, skb); 538 minstrel_aggr_check(sta, skb);
511 } 539 }
540
541 if (update)
542 minstrel_ht_update_rates(mp, mi);
512} 543}
513 544
514static void 545static void
@@ -536,7 +567,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
536 mr->retry_updated = true; 567 mr->retry_updated = true;
537 568
538 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; 569 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
539 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; 570 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;
540 571
541 /* Contention time for first 2 tries */ 572 /* Contention time for first 2 tries */
542 ctime = (t_slot * cw) >> 1; 573 ctime = (t_slot * cw) >> 1;
@@ -572,36 +603,71 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
572 603
573static void 604static void
574minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, 605minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
575 struct ieee80211_tx_rate *rate, int index, 606 struct ieee80211_sta_rates *ratetbl, int offset, int index)
576 bool sample, bool rtscts)
577{ 607{
578 const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; 608 const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
579 struct minstrel_rate_stats *mr; 609 struct minstrel_rate_stats *mr;
610 u8 idx;
611 u16 flags;
580 612
581 mr = minstrel_get_ratestats(mi, index); 613 mr = minstrel_get_ratestats(mi, index);
582 if (!mr->retry_updated) 614 if (!mr->retry_updated)
583 minstrel_calc_retransmit(mp, mi, index); 615 minstrel_calc_retransmit(mp, mi, index);
584 616
585 if (sample) 617 if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) {
586 rate->count = 1; 618 ratetbl->rate[offset].count = 2;
587 else if (mr->probability < MINSTREL_FRAC(20, 100)) 619 ratetbl->rate[offset].count_rts = 2;
588 rate->count = 2; 620 ratetbl->rate[offset].count_cts = 2;
589 else if (rtscts) 621 } else {
590 rate->count = mr->retry_count_rtscts; 622 ratetbl->rate[offset].count = mr->retry_count;
591 else 623 ratetbl->rate[offset].count_cts = mr->retry_count;
592 rate->count = mr->retry_count; 624 ratetbl->rate[offset].count_rts = mr->retry_count_rtscts;
593 625 }
594 rate->flags = 0;
595 if (rtscts)
596 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
597 626
598 if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { 627 if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
599 rate->idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; 628 idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)];
629 flags = 0;
630 } else {
631 idx = index % MCS_GROUP_RATES +
632 (group->streams - 1) * MCS_GROUP_RATES;
633 flags = IEEE80211_TX_RC_MCS | group->flags;
634 }
635
636 if (offset > 0) {
637 ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
638 flags |= IEEE80211_TX_RC_USE_RTS_CTS;
639 }
640
641 ratetbl->rate[offset].idx = idx;
642 ratetbl->rate[offset].flags = flags;
643}
644
645static void
646minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
647{
648 struct ieee80211_sta_rates *rates;
649 int i = 0;
650
651 rates = kzalloc(sizeof(*rates), GFP_ATOMIC);
652 if (!rates)
600 return; 653 return;
654
655 /* Start with max_tp_rate */
656 minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate);
657
658 if (mp->hw->max_rates >= 3) {
659 /* At least 3 tx rates supported, use max_tp_rate2 next */
660 minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate2);
601 } 661 }
602 662
603 rate->flags |= IEEE80211_TX_RC_MCS | group->flags; 663 if (mp->hw->max_rates >= 2) {
604 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; 664 /*
665 * At least 2 tx rates supported, use max_prob_rate next */
666 minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate);
667 }
668
669 rates->rate[i].idx = -1;
670 rate_control_set_rates(mp->hw, mi->sta, rates);
605} 671}
606 672
607static inline int 673static inline int
@@ -616,6 +682,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
616{ 682{
617 struct minstrel_rate_stats *mr; 683 struct minstrel_rate_stats *mr;
618 struct minstrel_mcs_group_data *mg; 684 struct minstrel_mcs_group_data *mg;
685 unsigned int sample_dur, sample_group;
619 int sample_idx = 0; 686 int sample_idx = 0;
620 687
621 if (mi->sample_wait > 0) { 688 if (mi->sample_wait > 0) {
@@ -626,39 +693,46 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
626 if (!mi->sample_tries) 693 if (!mi->sample_tries)
627 return -1; 694 return -1;
628 695
629 mi->sample_tries--;
630 mg = &mi->groups[mi->sample_group]; 696 mg = &mi->groups[mi->sample_group];
631 sample_idx = sample_table[mg->column][mg->index]; 697 sample_idx = sample_table[mg->column][mg->index];
632 mr = &mg->rates[sample_idx]; 698 mr = &mg->rates[sample_idx];
633 sample_idx += mi->sample_group * MCS_GROUP_RATES; 699 sample_group = mi->sample_group;
700 sample_idx += sample_group * MCS_GROUP_RATES;
634 minstrel_next_sample_idx(mi); 701 minstrel_next_sample_idx(mi);
635 702
636 /* 703 /*
637 * Sampling might add some overhead (RTS, no aggregation) 704 * Sampling might add some overhead (RTS, no aggregation)
638 * to the frame. Hence, don't use sampling for the currently 705 * to the frame. Hence, don't use sampling for the currently
639 * used max TP rate. 706 * used rates.
640 */ 707 */
641 if (sample_idx == mi->max_tp_rate) 708 if (sample_idx == mi->max_tp_rate ||
709 sample_idx == mi->max_tp_rate2 ||
710 sample_idx == mi->max_prob_rate)
642 return -1; 711 return -1;
712
643 /* 713 /*
644 * When not using MRR, do not sample if the probability is already 714 * Do not sample if the probability is already higher than 95%
645 * higher than 95% to avoid wasting airtime 715 * to avoid wasting airtime.
646 */ 716 */
647 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) 717 if (mr->probability > MINSTREL_FRAC(95, 100))
648 return -1; 718 return -1;
649 719
650 /* 720 /*
651 * Make sure that lower rates get sampled only occasionally, 721 * Make sure that lower rates get sampled only occasionally,
652 * if the link is working perfectly. 722 * if the link is working perfectly.
653 */ 723 */
654 if (minstrel_get_duration(sample_idx) > 724 sample_dur = minstrel_get_duration(sample_idx);
655 minstrel_get_duration(mi->max_tp_rate)) { 725 if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) &&
726 (mi->max_prob_streams <
727 minstrel_mcs_groups[sample_group].streams ||
728 sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {
656 if (mr->sample_skipped < 20) 729 if (mr->sample_skipped < 20)
657 return -1; 730 return -1;
658 731
659 if (mi->sample_slow++ > 2) 732 if (mi->sample_slow++ > 2)
660 return -1; 733 return -1;
661 } 734 }
735 mi->sample_tries--;
662 736
663 return sample_idx; 737 return sample_idx;
664} 738}
@@ -683,13 +757,13 @@ static void
683minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, 757minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
684 struct ieee80211_tx_rate_control *txrc) 758 struct ieee80211_tx_rate_control *txrc)
685{ 759{
760 const struct mcs_group *sample_group;
686 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); 761 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
687 struct ieee80211_tx_rate *ar = info->status.rates; 762 struct ieee80211_tx_rate *rate = &info->status.rates[0];
688 struct minstrel_ht_sta_priv *msp = priv_sta; 763 struct minstrel_ht_sta_priv *msp = priv_sta;
689 struct minstrel_ht_sta *mi = &msp->ht; 764 struct minstrel_ht_sta *mi = &msp->ht;
690 struct minstrel_priv *mp = priv; 765 struct minstrel_priv *mp = priv;
691 int sample_idx; 766 int sample_idx;
692 bool sample = false;
693 767
694 if (rate_control_send_low(sta, priv_sta, txrc)) 768 if (rate_control_send_low(sta, priv_sta, txrc))
695 return; 769 return;
@@ -717,51 +791,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
717 } 791 }
718#endif 792#endif
719 793
720 if (sample_idx >= 0) {
721 sample = true;
722 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
723 true, false);
724 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
725 } else {
726 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
727 false, false);
728 }
729
730 if (mp->hw->max_rates >= 3) {
731 /*
732 * At least 3 tx rates supported, use
733 * sample_rate -> max_tp_rate -> max_prob_rate for sampling and
734 * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default.
735 */
736 if (sample_idx >= 0)
737 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
738 false, false);
739 else
740 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
741 false, true);
742
743 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate,
744 false, !sample);
745
746 ar[3].count = 0;
747 ar[3].idx = -1;
748 } else if (mp->hw->max_rates == 2) {
749 /*
750 * Only 2 tx rates supported, use
751 * sample_rate -> max_prob_rate for sampling and
752 * max_tp_rate -> max_prob_rate by default.
753 */
754 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate,
755 false, !sample);
756
757 ar[2].count = 0;
758 ar[2].idx = -1;
759 } else {
760 /* Not using MRR, only use the first rate */
761 ar[1].count = 0;
762 ar[1].idx = -1;
763 }
764
765 mi->total_packets++; 794 mi->total_packets++;
766 795
767 /* wraparound */ 796 /* wraparound */
@@ -769,6 +798,16 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
769 mi->total_packets = 0; 798 mi->total_packets = 0;
770 mi->sample_packets = 0; 799 mi->sample_packets = 0;
771 } 800 }
801
802 if (sample_idx < 0)
803 return;
804
805 sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
806 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
807 rate->idx = sample_idx % MCS_GROUP_RATES +
808 (sample_group->streams - 1) * MCS_GROUP_RATES;
809 rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
810 rate->count = 1;
772} 811}
773 812
774static void 813static void
@@ -818,6 +857,8 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
818 857
819 msp->is_ht = true; 858 msp->is_ht = true;
820 memset(mi, 0, sizeof(*mi)); 859 memset(mi, 0, sizeof(*mi));
860
861 mi->sta = sta;
821 mi->stats_update = jiffies; 862 mi->stats_update = jiffies;
822 863
823 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); 864 ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1);
@@ -879,6 +920,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
879 if (!n_supported) 920 if (!n_supported)
880 goto use_legacy; 921 goto use_legacy;
881 922
923 /* create an initial rate table with the lowest supported rates */
924 minstrel_ht_update_stats(mp, mi);
925 minstrel_ht_update_rates(mp, mi);
926
882 return; 927 return;
883 928
884use_legacy: 929use_legacy:
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
index 302dbd52180d..d655586773ac 100644
--- a/net/mac80211/rc80211_minstrel_ht.h
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -16,11 +16,6 @@
16#define MINSTREL_MAX_STREAMS 3 16#define MINSTREL_MAX_STREAMS 3
17#define MINSTREL_STREAM_GROUPS 4 17#define MINSTREL_STREAM_GROUPS 4
18 18
19/* scaled fraction values */
20#define MINSTREL_SCALE 16
21#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
22#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
23
24#define MCS_GROUP_RATES 8 19#define MCS_GROUP_RATES 8
25 20
26struct mcs_group { 21struct mcs_group {
@@ -70,6 +65,8 @@ struct minstrel_mcs_group_data {
70}; 65};
71 66
72struct minstrel_ht_sta { 67struct minstrel_ht_sta {
68 struct ieee80211_sta *sta;
69
73 /* ampdu length (average, per sampling interval) */ 70 /* ampdu length (average, per sampling interval) */
74 unsigned int ampdu_len; 71 unsigned int ampdu_len;
75 unsigned int ampdu_packets; 72 unsigned int ampdu_packets;
@@ -85,6 +82,7 @@ struct minstrel_ht_sta {
85 82
86 /* best probability rate */ 83 /* best probability rate */
87 unsigned int max_prob_rate; 84 unsigned int max_prob_rate;
85 unsigned int max_prob_streams;
88 86
89 /* time of last status update */ 87 /* time of last status update */
90 unsigned long stats_update; 88 unsigned long stats_update;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c6844ad080be..c8447af76ead 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
648 return RX_CONTINUE; 648 return RX_CONTINUE;
649} 649}
650 650
651#define SEQ_MODULO 0x1000
652#define SEQ_MASK 0xfff
653
654static inline int seq_less(u16 sq1, u16 sq2)
655{
656 return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1);
657}
658
659static inline u16 seq_inc(u16 sq)
660{
661 return (sq + 1) & SEQ_MASK;
662}
663
664static inline u16 seq_sub(u16 sq1, u16 sq2)
665{
666 return (sq1 - sq2) & SEQ_MASK;
667}
668
669static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, 651static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
670 struct tid_ampdu_rx *tid_agg_rx, 652 struct tid_ampdu_rx *tid_agg_rx,
671 int index, 653 int index,
@@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
687 __skb_queue_tail(frames, skb); 669 __skb_queue_tail(frames, skb);
688 670
689no_frame: 671no_frame:
690 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 672 tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
691} 673}
692 674
693static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, 675static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata,
@@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata
699 681
700 lockdep_assert_held(&tid_agg_rx->reorder_lock); 682 lockdep_assert_held(&tid_agg_rx->reorder_lock);
701 683
702 while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { 684 while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) {
703 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 685 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
686 tid_agg_rx->ssn) %
704 tid_agg_rx->buf_size; 687 tid_agg_rx->buf_size;
705 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 688 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
706 frames); 689 frames);
@@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
727 lockdep_assert_held(&tid_agg_rx->reorder_lock); 710 lockdep_assert_held(&tid_agg_rx->reorder_lock);
728 711
729 /* release the buffer until next missing frame */ 712 /* release the buffer until next missing frame */
730 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 713 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
731 tid_agg_rx->buf_size; 714 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
732 if (!tid_agg_rx->reorder_buf[index] && 715 if (!tid_agg_rx->reorder_buf[index] &&
733 tid_agg_rx->stored_mpdu_num) { 716 tid_agg_rx->stored_mpdu_num) {
734 /* 717 /*
@@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
756 * Increment the head seq# also for the skipped slots. 739 * Increment the head seq# also for the skipped slots.
757 */ 740 */
758 tid_agg_rx->head_seq_num = 741 tid_agg_rx->head_seq_num =
759 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; 742 (tid_agg_rx->head_seq_num +
743 skipped) & IEEE80211_SN_MASK;
760 skipped = 0; 744 skipped = 0;
761 } 745 }
762 } else while (tid_agg_rx->reorder_buf[index]) { 746 } else while (tid_agg_rx->reorder_buf[index]) {
763 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, 747 ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,
764 frames); 748 frames);
765 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % 749 index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
750 tid_agg_rx->ssn) %
766 tid_agg_rx->buf_size; 751 tid_agg_rx->buf_size;
767 } 752 }
768 753
769 if (tid_agg_rx->stored_mpdu_num) { 754 if (tid_agg_rx->stored_mpdu_num) {
770 j = index = seq_sub(tid_agg_rx->head_seq_num, 755 j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num,
771 tid_agg_rx->ssn) % tid_agg_rx->buf_size; 756 tid_agg_rx->ssn) %
757 tid_agg_rx->buf_size;
772 758
773 for (; j != (index - 1) % tid_agg_rx->buf_size; 759 for (; j != (index - 1) % tid_agg_rx->buf_size;
774 j = (j + 1) % tid_agg_rx->buf_size) { 760 j = (j + 1) % tid_agg_rx->buf_size) {
@@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
809 head_seq_num = tid_agg_rx->head_seq_num; 795 head_seq_num = tid_agg_rx->head_seq_num;
810 796
811 /* frame with out of date sequence number */ 797 /* frame with out of date sequence number */
812 if (seq_less(mpdu_seq_num, head_seq_num)) { 798 if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
813 dev_kfree_skb(skb); 799 dev_kfree_skb(skb);
814 goto out; 800 goto out;
815 } 801 }
@@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
818 * If frame the sequence number exceeds our buffering window 804 * If frame the sequence number exceeds our buffering window
819 * size release some previous frames to make room for this one. 805 * size release some previous frames to make room for this one.
820 */ 806 */
821 if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { 807 if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) {
822 head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); 808 head_seq_num = ieee80211_sn_inc(
809 ieee80211_sn_sub(mpdu_seq_num, buf_size));
823 /* release stored frames up to new head to stack */ 810 /* release stored frames up to new head to stack */
824 ieee80211_release_reorder_frames(sdata, tid_agg_rx, 811 ieee80211_release_reorder_frames(sdata, tid_agg_rx,
825 head_seq_num, frames); 812 head_seq_num, frames);
@@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
827 814
828 /* Now the new frame is always in the range of the reordering buffer */ 815 /* Now the new frame is always in the range of the reordering buffer */
829 816
830 index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; 817 index = ieee80211_sn_sub(mpdu_seq_num,
818 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
831 819
832 /* check if we already stored this frame */ 820 /* check if we already stored this frame */
833 if (tid_agg_rx->reorder_buf[index]) { 821 if (tid_agg_rx->reorder_buf[index]) {
@@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
843 */ 831 */
844 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 832 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
845 tid_agg_rx->stored_mpdu_num == 0) { 833 tid_agg_rx->stored_mpdu_num == 0) {
846 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 834 tid_agg_rx->head_seq_num =
835 ieee80211_sn_inc(tid_agg_rx->head_seq_num);
847 ret = false; 836 ret = false;
848 goto out; 837 goto out;
849 } 838 }
@@ -1894,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1894 * 'align' will only take the values 0 or 2 here 1883 * 'align' will only take the values 0 or 2 here
1895 * since all frames are required to be aligned 1884 * since all frames are required to be aligned
1896 * to 2-byte boundaries when being passed to 1885 * to 2-byte boundaries when being passed to
1897 * mac80211. That also explains the __skb_push() 1886 * mac80211; the code here works just as well if
1898 * below. 1887 * that isn't true, but mac80211 assumes it can
1888 * access fields as 2-byte aligned (e.g. for
1889 * compare_ether_addr)
1899 */ 1890 */
1900 align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; 1891 align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3;
1901 if (align) { 1892 if (align) {
@@ -2094,6 +2085,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
2094 } 2085 }
2095 2086
2096 fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data; 2087 fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
2088 fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
2097 info = IEEE80211_SKB_CB(fwd_skb); 2089 info = IEEE80211_SKB_CB(fwd_skb);
2098 memset(info, 0, sizeof(*info)); 2090 memset(info, 0, sizeof(*info));
2099 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; 2091 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
@@ -2432,6 +2424,22 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2432 } 2424 }
2433 2425
2434 break; 2426 break;
2427 case WLAN_CATEGORY_PUBLIC:
2428 if (len < IEEE80211_MIN_ACTION_SIZE + 1)
2429 goto invalid;
2430 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2431 break;
2432 if (!rx->sta)
2433 break;
2434 if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid))
2435 break;
2436 if (mgmt->u.action.u.ext_chan_switch.action_code !=
2437 WLAN_PUB_ACTION_EXT_CHANSW_ANN)
2438 break;
2439 if (len < offsetof(struct ieee80211_mgmt,
2440 u.action.u.ext_chan_switch.variable))
2441 goto invalid;
2442 goto queue;
2435 case WLAN_CATEGORY_VHT: 2443 case WLAN_CATEGORY_VHT:
2436 if (sdata->vif.type != NL80211_IFTYPE_STATION && 2444 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
2437 sdata->vif.type != NL80211_IFTYPE_MESH_POINT && 2445 sdata->vif.type != NL80211_IFTYPE_MESH_POINT &&
@@ -2515,10 +2523,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2515 ieee80211_process_measurement_req(sdata, mgmt, len); 2523 ieee80211_process_measurement_req(sdata, mgmt, len);
2516 goto handled; 2524 goto handled;
2517 case WLAN_ACTION_SPCT_CHL_SWITCH: 2525 case WLAN_ACTION_SPCT_CHL_SWITCH:
2518 if (len < (IEEE80211_MIN_ACTION_SIZE +
2519 sizeof(mgmt->u.action.u.chan_switch)))
2520 break;
2521
2522 if (sdata->vif.type != NL80211_IFTYPE_STATION) 2526 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2523 break; 2527 break;
2524 2528
@@ -2552,7 +2556,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2552 case WLAN_SP_MESH_PEERING_CONFIRM: 2556 case WLAN_SP_MESH_PEERING_CONFIRM:
2553 if (!ieee80211_vif_is_mesh(&sdata->vif)) 2557 if (!ieee80211_vif_is_mesh(&sdata->vif))
2554 goto invalid; 2558 goto invalid;
2555 if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) 2559 if (sdata->u.mesh.user_mpm)
2556 /* userspace handles this frame */ 2560 /* userspace handles this frame */
2557 break; 2561 break;
2558 goto queue; 2562 goto queue;
@@ -3051,7 +3055,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
3051 !ieee80211_is_probe_resp(hdr->frame_control) && 3055 !ieee80211_is_probe_resp(hdr->frame_control) &&
3052 !ieee80211_is_beacon(hdr->frame_control)) 3056 !ieee80211_is_beacon(hdr->frame_control))
3053 return 0; 3057 return 0;
3054 if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) 3058 if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) &&
3059 !multicast)
3055 status->rx_flags &= ~IEEE80211_RX_RA_MATCH; 3060 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
3056 break; 3061 break;
3057 default: 3062 default:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 43a45cf00e06..99b103921a4b 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -98,9 +98,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
98 } 98 }
99 99
100 /* save the ERP value so that it is available at association time */ 100 /* save the ERP value so that it is available at association time */
101 if (elems->erp_info && elems->erp_info_len >= 1 && 101 if (elems->erp_info && (!elems->parse_error ||
102 (!elems->parse_error || 102 !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
103 !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {
104 bss->erp_value = elems->erp_info[0]; 103 bss->erp_value = elems->erp_info[0];
105 bss->has_erp_value = true; 104 bss->has_erp_value = true;
106 if (!elems->parse_error) 105 if (!elems->parse_error)
@@ -153,7 +152,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
153 u8 *elements; 152 u8 *elements;
154 struct ieee80211_channel *channel; 153 struct ieee80211_channel *channel;
155 size_t baselen; 154 size_t baselen;
156 bool beacon;
157 struct ieee802_11_elems elems; 155 struct ieee802_11_elems elems;
158 156
159 if (skb->len < 24 || 157 if (skb->len < 24 ||
@@ -175,17 +173,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
175 173
176 elements = mgmt->u.probe_resp.variable; 174 elements = mgmt->u.probe_resp.variable;
177 baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); 175 baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable);
178 beacon = false;
179 } else { 176 } else {
180 baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); 177 baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);
181 elements = mgmt->u.beacon.variable; 178 elements = mgmt->u.beacon.variable;
182 beacon = true;
183 } 179 }
184 180
185 if (baselen > skb->len) 181 if (baselen > skb->len)
186 return; 182 return;
187 183
188 ieee802_11_parse_elems(elements, skb->len - baselen, &elems); 184 ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems);
189 185
190 channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); 186 channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
191 187
@@ -335,7 +331,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
335 ieee80211_offchannel_stop_vifs(local); 331 ieee80211_offchannel_stop_vifs(local);
336 332
337 /* ensure nullfunc is transmitted before leaving operating channel */ 333 /* ensure nullfunc is transmitted before leaving operating channel */
338 drv_flush(local, false); 334 ieee80211_flush_queues(local, NULL);
339 335
340 ieee80211_configure_filter(local); 336 ieee80211_configure_filter(local);
341 337
@@ -387,7 +383,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
387{ 383{
388 int i; 384 int i;
389 struct ieee80211_sub_if_data *sdata; 385 struct ieee80211_sub_if_data *sdata;
390 enum ieee80211_band band = local->hw.conf.channel->band; 386 enum ieee80211_band band = local->hw.conf.chandef.chan->band;
391 u32 tx_flags; 387 u32 tx_flags;
392 388
393 tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; 389 tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK;
@@ -404,7 +400,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,
404 local->scan_req->ssids[i].ssid_len, 400 local->scan_req->ssids[i].ssid_len,
405 local->scan_req->ie, local->scan_req->ie_len, 401 local->scan_req->ie, local->scan_req->ie_len,
406 local->scan_req->rates[band], false, 402 local->scan_req->rates[band], false,
407 tx_flags, local->hw.conf.channel, true); 403 tx_flags, local->hw.conf.chandef.chan, true);
408 404
409 /* 405 /*
410 * After sending probe requests, wait for probe responses 406 * After sending probe requests, wait for probe responses
@@ -470,7 +466,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
470 if (local->ops->hw_scan) { 466 if (local->ops->hw_scan) {
471 __set_bit(SCAN_HW_SCANNING, &local->scanning); 467 __set_bit(SCAN_HW_SCANNING, &local->scanning);
472 } else if ((req->n_channels == 1) && 468 } else if ((req->n_channels == 1) &&
473 (req->channels[0] == local->_oper_channel)) { 469 (req->channels[0] == local->_oper_chandef.chan)) {
474 /* 470 /*
475 * If we are scanning only on the operating channel 471 * If we are scanning only on the operating channel
476 * then we do not need to stop normal activities 472 * then we do not need to stop normal activities
@@ -671,7 +667,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,
671 ieee80211_offchannel_stop_vifs(local); 667 ieee80211_offchannel_stop_vifs(local);
672 668
673 if (local->ops->flush) { 669 if (local->ops->flush) {
674 drv_flush(local, false); 670 ieee80211_flush_queues(local, NULL);
675 *next_delay = 0; 671 *next_delay = 0;
676 } else 672 } else
677 *next_delay = HZ / 10; 673 *next_delay = HZ / 10;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 238a0cca320e..11216bc13b27 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -342,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
342 INIT_WORK(&sta->drv_unblock_wk, sta_unblock); 342 INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
343 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); 343 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
344 mutex_init(&sta->ampdu_mlme.mtx); 344 mutex_init(&sta->ampdu_mlme.mtx);
345#ifdef CONFIG_MAC80211_MESH
346 if (ieee80211_vif_is_mesh(&sdata->vif) &&
347 !sdata->u.mesh.user_mpm)
348 init_timer(&sta->plink_timer);
349#endif
345 350
346 memcpy(sta->sta.addr, addr, ETH_ALEN); 351 memcpy(sta->sta.addr, addr, ETH_ALEN);
347 sta->local = local; 352 sta->local = local;
@@ -551,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)
551 tim[id / 8] &= ~(1 << (id % 8)); 556 tim[id / 8] &= ~(1 << (id % 8));
552} 557}
553 558
559static inline bool __bss_tim_get(u8 *tim, u16 id)
560{
561 /*
562 * This format has been mandated by the IEEE specifications,
563 * so this line may not be changed to use the test_bit() format.
564 */
565 return tim[id / 8] & (1 << (id % 8));
566}
567
554static unsigned long ieee80211_tids_for_ac(int ac) 568static unsigned long ieee80211_tids_for_ac(int ac)
555{ 569{
556 /* If we ever support TIDs > 7, this obviously needs to be adjusted */ 570 /* If we ever support TIDs > 7, this obviously needs to be adjusted */
@@ -631,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta)
631 done: 645 done:
632 spin_lock_bh(&local->tim_lock); 646 spin_lock_bh(&local->tim_lock);
633 647
648 if (indicate_tim == __bss_tim_get(ps->tim, id))
649 goto out_unlock;
650
634 if (indicate_tim) 651 if (indicate_tim)
635 __bss_tim_set(ps->tim, id); 652 __bss_tim_set(ps->tim, id);
636 else 653 else
@@ -642,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta)
642 local->tim_in_locked_section = false; 659 local->tim_in_locked_section = false;
643 } 660 }
644 661
662out_unlock:
645 spin_unlock_bh(&local->tim_lock); 663 spin_unlock_bh(&local->tim_lock);
646} 664}
647 665
@@ -765,8 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
765{ 783{
766 struct ieee80211_local *local; 784 struct ieee80211_local *local;
767 struct ieee80211_sub_if_data *sdata; 785 struct ieee80211_sub_if_data *sdata;
768 int ret, i; 786 int ret;
769 bool have_key = false;
770 787
771 might_sleep(); 788 might_sleep();
772 789
@@ -793,19 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
793 810
794 list_del_rcu(&sta->list); 811 list_del_rcu(&sta->list);
795 812
796 mutex_lock(&local->key_mtx); 813 /* this always calls synchronize_net() */
797 for (i = 0; i < NUM_DEFAULT_KEYS; i++) { 814 ieee80211_free_sta_keys(local, sta);
798 __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]));
799 have_key = true;
800 }
801 if (sta->ptk) {
802 __ieee80211_key_free(key_mtx_dereference(local, sta->ptk));
803 have_key = true;
804 }
805 mutex_unlock(&local->key_mtx);
806
807 if (!have_key)
808 synchronize_net();
809 815
810 sta->dead = true; 816 sta->dead = true;
811 817
@@ -1391,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
1391} 1397}
1392EXPORT_SYMBOL(ieee80211_sta_block_awake); 1398EXPORT_SYMBOL(ieee80211_sta_block_awake);
1393 1399
1394void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) 1400void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)
1395{ 1401{
1396 struct sta_info *sta = container_of(pubsta, struct sta_info, sta); 1402 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
1397 struct ieee80211_local *local = sta->local; 1403 struct ieee80211_local *local = sta->local;
1398 struct sk_buff *skb;
1399 struct skb_eosp_msg_data *data;
1400 1404
1401 trace_api_eosp(local, pubsta); 1405 trace_api_eosp(local, pubsta);
1402 1406
1403 skb = alloc_skb(0, GFP_ATOMIC); 1407 clear_sta_flag(sta, WLAN_STA_SP);
1404 if (!skb) {
1405 /* too bad ... but race is better than loss */
1406 clear_sta_flag(sta, WLAN_STA_SP);
1407 return;
1408 }
1409
1410 data = (void *)skb->cb;
1411 memcpy(data->sta, pubsta->addr, ETH_ALEN);
1412 memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN);
1413 skb->pkt_type = IEEE80211_EOSP_MSG;
1414 skb_queue_tail(&local->skb_queue, skb);
1415 tasklet_schedule(&local->tasklet);
1416} 1408}
1417EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); 1409EXPORT_SYMBOL(ieee80211_sta_eosp);
1418 1410
1419void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, 1411void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
1420 u8 tid, bool buffered) 1412 u8 tid, bool buffered)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4947341a2a82..adc30045f99e 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -281,7 +281,6 @@ struct sta_ampdu_mlme {
281 * @plink_state: peer link state 281 * @plink_state: peer link state
282 * @plink_timeout: timeout of peer link 282 * @plink_timeout: timeout of peer link
283 * @plink_timer: peer link watch timer 283 * @plink_timer: peer link watch timer
284 * @plink_timer_was_running: used by suspend/resume to restore timers
285 * @t_offset: timing offset relative to this host 284 * @t_offset: timing offset relative to this host
286 * @t_offset_setpoint: reference timing offset of this sta to be used when 285 * @t_offset_setpoint: reference timing offset of this sta to be used when
287 * calculating clockdrift 286 * calculating clockdrift
@@ -334,7 +333,8 @@ struct sta_info {
334 unsigned long driver_buffered_tids; 333 unsigned long driver_buffered_tids;
335 334
336 /* Updated from RX path only, no locking requirements */ 335 /* Updated from RX path only, no locking requirements */
337 unsigned long rx_packets, rx_bytes; 336 unsigned long rx_packets;
337 u64 rx_bytes;
338 unsigned long wep_weak_iv_count; 338 unsigned long wep_weak_iv_count;
339 unsigned long last_rx; 339 unsigned long last_rx;
340 long last_connected; 340 long last_connected;
@@ -354,9 +354,9 @@ struct sta_info {
354 unsigned int fail_avg; 354 unsigned int fail_avg;
355 355
356 /* Updated from TX path only, no locking requirements */ 356 /* Updated from TX path only, no locking requirements */
357 unsigned long tx_packets; 357 u32 tx_fragments;
358 unsigned long tx_bytes; 358 u64 tx_packets[IEEE80211_NUM_ACS];
359 unsigned long tx_fragments; 359 u64 tx_bytes[IEEE80211_NUM_ACS];
360 struct ieee80211_tx_rate last_tx_rate; 360 struct ieee80211_tx_rate last_tx_rate;
361 int last_rx_rate_idx; 361 int last_rx_rate_idx;
362 u32 last_rx_rate_flag; 362 u32 last_rx_rate_flag;
@@ -379,7 +379,6 @@ struct sta_info {
379 __le16 reason; 379 __le16 reason;
380 u8 plink_retries; 380 u8 plink_retries;
381 bool ignore_plink_timer; 381 bool ignore_plink_timer;
382 bool plink_timer_was_running;
383 enum nl80211_plink_state plink_state; 382 enum nl80211_plink_state plink_state;
384 u32 plink_timeout; 383 u32 plink_timeout;
385 struct timer_list plink_timer; 384 struct timer_list plink_timer;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3d7cd2a0582f..c215fafd7a2f 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -28,27 +28,27 @@
28#define VIF_PR_FMT " vif:%s(%d%s)" 28#define VIF_PR_FMT " vif:%s(%d%s)"
29#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" 29#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
30 30
31#define CHANDEF_ENTRY __field(u32, control_freq) \ 31#define CHANDEF_ENTRY __field(u32, control_freq) \
32 __field(u32, chan_width) \ 32 __field(u32, chan_width) \
33 __field(u32, center_freq1) \ 33 __field(u32, center_freq1) \
34 __field(u32, center_freq2) 34 __field(u32, center_freq2)
35#define CHANDEF_ASSIGN(c) \ 35#define CHANDEF_ASSIGN(c) \
36 __entry->control_freq = (c)->chan->center_freq; \ 36 __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \
37 __entry->chan_width = (c)->width; \ 37 __entry->chan_width = (c)->width; \
38 __entry->center_freq1 = (c)->center_freq1; \ 38 __entry->center_freq1 = (c)->center_freq1; \
39 __entry->center_freq2 = (c)->center_freq2; 39 __entry->center_freq2 = (c)->center_freq2;
40#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" 40#define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz"
41#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ 41#define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \
42 __entry->center_freq1, __entry->center_freq2 42 __entry->center_freq1, __entry->center_freq2
43 43
44#define CHANCTX_ENTRY CHANDEF_ENTRY \ 44#define CHANCTX_ENTRY CHANDEF_ENTRY \
45 __field(u8, rx_chains_static) \ 45 __field(u8, rx_chains_static) \
46 __field(u8, rx_chains_dynamic) 46 __field(u8, rx_chains_dynamic)
47#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \ 47#define CHANCTX_ASSIGN CHANDEF_ASSIGN(&ctx->conf.def) \
48 __entry->rx_chains_static = ctx->conf.rx_chains_static; \ 48 __entry->rx_chains_static = ctx->conf.rx_chains_static; \
49 __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic 49 __entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic
50#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d" 50#define CHANCTX_PR_FMT CHANDEF_PR_FMT " chains:%d/%d"
51#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \ 51#define CHANCTX_PR_ARG CHANDEF_PR_ARG, \
52 __entry->rx_chains_static, __entry->rx_chains_dynamic 52 __entry->rx_chains_static, __entry->rx_chains_dynamic
53 53
54 54
@@ -286,8 +286,7 @@ TRACE_EVENT(drv_config,
286 __field(u16, listen_interval) 286 __field(u16, listen_interval)
287 __field(u8, long_frame_max_tx_count) 287 __field(u8, long_frame_max_tx_count)
288 __field(u8, short_frame_max_tx_count) 288 __field(u8, short_frame_max_tx_count)
289 __field(int, center_freq) 289 CHANDEF_ENTRY
290 __field(int, channel_type)
291 __field(int, smps) 290 __field(int, smps)
292 ), 291 ),
293 292
@@ -303,15 +302,13 @@ TRACE_EVENT(drv_config,
303 local->hw.conf.long_frame_max_tx_count; 302 local->hw.conf.long_frame_max_tx_count;
304 __entry->short_frame_max_tx_count = 303 __entry->short_frame_max_tx_count =
305 local->hw.conf.short_frame_max_tx_count; 304 local->hw.conf.short_frame_max_tx_count;
306 __entry->center_freq = local->hw.conf.channel ? 305 CHANDEF_ASSIGN(&local->hw.conf.chandef)
307 local->hw.conf.channel->center_freq : 0;
308 __entry->channel_type = local->hw.conf.channel_type;
309 __entry->smps = local->hw.conf.smps_mode; 306 __entry->smps = local->hw.conf.smps_mode;
310 ), 307 ),
311 308
312 TP_printk( 309 TP_printk(
313 LOCAL_PR_FMT " ch:%#x freq:%d", 310 LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT,
314 LOCAL_PR_ARG, __entry->changed, __entry->center_freq 311 LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG
315 ) 312 )
316); 313);
317 314
@@ -359,8 +356,7 @@ TRACE_EVENT(drv_bss_info_changed,
359 __dynamic_array(u8, ssid, info->ssid_len); 356 __dynamic_array(u8, ssid, info->ssid_len);
360 __field(bool, hidden_ssid); 357 __field(bool, hidden_ssid);
361 __field(int, txpower) 358 __field(int, txpower)
362 __field(u8, p2p_ctwindow) 359 __field(u8, p2p_oppps_ctwindow)
363 __field(bool, p2p_oppps)
364 ), 360 ),
365 361
366 TP_fast_assign( 362 TP_fast_assign(
@@ -400,8 +396,7 @@ TRACE_EVENT(drv_bss_info_changed,
400 memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len); 396 memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);
401 __entry->hidden_ssid = info->hidden_ssid; 397 __entry->hidden_ssid = info->hidden_ssid;
402 __entry->txpower = info->txpower; 398 __entry->txpower = info->txpower;
403 __entry->p2p_ctwindow = info->p2p_ctwindow; 399 __entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow;
404 __entry->p2p_oppps = info->p2p_oppps;
405 ), 400 ),
406 401
407 TP_printk( 402 TP_printk(
@@ -431,6 +426,30 @@ TRACE_EVENT(drv_prepare_multicast,
431 ) 426 )
432); 427);
433 428
429TRACE_EVENT(drv_set_multicast_list,
430 TP_PROTO(struct ieee80211_local *local,
431 struct ieee80211_sub_if_data *sdata, int mc_count),
432
433 TP_ARGS(local, sdata, mc_count),
434
435 TP_STRUCT__entry(
436 LOCAL_ENTRY
437 __field(bool, allmulti)
438 __field(int, mc_count)
439 ),
440
441 TP_fast_assign(
442 LOCAL_ASSIGN;
443 __entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI;
444 __entry->mc_count = mc_count;
445 ),
446
447 TP_printk(
448 LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d",
449 LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti
450 )
451);
452
434TRACE_EVENT(drv_configure_filter, 453TRACE_EVENT(drv_configure_filter,
435 TP_PROTO(struct ieee80211_local *local, 454 TP_PROTO(struct ieee80211_local *local,
436 unsigned int changed_flags, 455 unsigned int changed_flags,
@@ -940,23 +959,26 @@ TRACE_EVENT(drv_get_survey,
940); 959);
941 960
942TRACE_EVENT(drv_flush, 961TRACE_EVENT(drv_flush,
943 TP_PROTO(struct ieee80211_local *local, bool drop), 962 TP_PROTO(struct ieee80211_local *local,
963 u32 queues, bool drop),
944 964
945 TP_ARGS(local, drop), 965 TP_ARGS(local, queues, drop),
946 966
947 TP_STRUCT__entry( 967 TP_STRUCT__entry(
948 LOCAL_ENTRY 968 LOCAL_ENTRY
949 __field(bool, drop) 969 __field(bool, drop)
970 __field(u32, queues)
950 ), 971 ),
951 972
952 TP_fast_assign( 973 TP_fast_assign(
953 LOCAL_ASSIGN; 974 LOCAL_ASSIGN;
954 __entry->drop = drop; 975 __entry->drop = drop;
976 __entry->queues = queues;
955 ), 977 ),
956 978
957 TP_printk( 979 TP_printk(
958 LOCAL_PR_FMT " drop:%d", 980 LOCAL_PR_FMT " queues:0x%x drop:%d",
959 LOCAL_PR_ARG, __entry->drop 981 LOCAL_PR_ARG, __entry->queues, __entry->drop
960 ) 982 )
961); 983);
962 984
@@ -968,23 +990,23 @@ TRACE_EVENT(drv_channel_switch,
968 990
969 TP_STRUCT__entry( 991 TP_STRUCT__entry(
970 LOCAL_ENTRY 992 LOCAL_ENTRY
993 CHANDEF_ENTRY
971 __field(u64, timestamp) 994 __field(u64, timestamp)
972 __field(bool, block_tx) 995 __field(bool, block_tx)
973 __field(u16, freq)
974 __field(u8, count) 996 __field(u8, count)
975 ), 997 ),
976 998
977 TP_fast_assign( 999 TP_fast_assign(
978 LOCAL_ASSIGN; 1000 LOCAL_ASSIGN;
1001 CHANDEF_ASSIGN(&ch_switch->chandef)
979 __entry->timestamp = ch_switch->timestamp; 1002 __entry->timestamp = ch_switch->timestamp;
980 __entry->block_tx = ch_switch->block_tx; 1003 __entry->block_tx = ch_switch->block_tx;
981 __entry->freq = ch_switch->channel->center_freq;
982 __entry->count = ch_switch->count; 1004 __entry->count = ch_switch->count;
983 ), 1005 ),
984 1006
985 TP_printk( 1007 TP_printk(
986 LOCAL_PR_FMT " new freq:%u count:%d", 1008 LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d",
987 LOCAL_PR_ARG, __entry->freq, __entry->count 1009 LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count
988 ) 1010 )
989); 1011);
990 1012
@@ -1042,15 +1064,17 @@ TRACE_EVENT(drv_remain_on_channel,
1042 TP_PROTO(struct ieee80211_local *local, 1064 TP_PROTO(struct ieee80211_local *local,
1043 struct ieee80211_sub_if_data *sdata, 1065 struct ieee80211_sub_if_data *sdata,
1044 struct ieee80211_channel *chan, 1066 struct ieee80211_channel *chan,
1045 unsigned int duration), 1067 unsigned int duration,
1068 enum ieee80211_roc_type type),
1046 1069
1047 TP_ARGS(local, sdata, chan, duration), 1070 TP_ARGS(local, sdata, chan, duration, type),
1048 1071
1049 TP_STRUCT__entry( 1072 TP_STRUCT__entry(
1050 LOCAL_ENTRY 1073 LOCAL_ENTRY
1051 VIF_ENTRY 1074 VIF_ENTRY
1052 __field(int, center_freq) 1075 __field(int, center_freq)
1053 __field(unsigned int, duration) 1076 __field(unsigned int, duration)
1077 __field(u32, type)
1054 ), 1078 ),
1055 1079
1056 TP_fast_assign( 1080 TP_fast_assign(
@@ -1058,12 +1082,13 @@ TRACE_EVENT(drv_remain_on_channel,
1058 VIF_ASSIGN; 1082 VIF_ASSIGN;
1059 __entry->center_freq = chan->center_freq; 1083 __entry->center_freq = chan->center_freq;
1060 __entry->duration = duration; 1084 __entry->duration = duration;
1085 __entry->type = type;
1061 ), 1086 ),
1062 1087
1063 TP_printk( 1088 TP_printk(
1064 LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms", 1089 LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
1065 LOCAL_PR_ARG, VIF_PR_ARG, 1090 LOCAL_PR_ARG, VIF_PR_ARG,
1066 __entry->center_freq, __entry->duration 1091 __entry->center_freq, __entry->duration, __entry->type
1067 ) 1092 )
1068); 1093);
1069 1094
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8914d2d2881a..9972e07a2f96 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 48 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
49 49
50 /* assume HW handles this */ 50 /* assume HW handles this */
51 if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) 51 if (tx->rate.flags & IEEE80211_TX_RC_MCS)
52 return 0; 52 return 0;
53 53
54 /* uh huh? */ 54 /* uh huh? */
55 if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) 55 if (WARN_ON_ONCE(tx->rate.idx < 0))
56 return 0; 56 return 0;
57 57
58 sband = local->hw.wiphy->bands[info->band]; 58 sband = local->hw.wiphy->bands[info->band];
59 txrate = &sband->bitrates[info->control.rates[0].idx]; 59 txrate = &sband->bitrates[tx->rate.idx];
60 60
61 erp = txrate->flags & IEEE80211_RATE_ERP_G; 61 erp = txrate->flags & IEEE80211_RATE_ERP_G;
62 62
@@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)
233 233
234 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 234 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
235 ieee80211_stop_queues_by_reason(&local->hw, 235 ieee80211_stop_queues_by_reason(&local->hw,
236 IEEE80211_MAX_QUEUE_MAP,
236 IEEE80211_QUEUE_STOP_REASON_PS); 237 IEEE80211_QUEUE_STOP_REASON_PS);
237 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; 238 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
238 ieee80211_queue_work(&local->hw, 239 ieee80211_queue_work(&local->hw,
@@ -616,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
616 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 617 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
617 struct ieee80211_hdr *hdr = (void *)tx->skb->data; 618 struct ieee80211_hdr *hdr = (void *)tx->skb->data;
618 struct ieee80211_supported_band *sband; 619 struct ieee80211_supported_band *sband;
619 struct ieee80211_rate *rate;
620 int i;
621 u32 len; 620 u32 len;
622 bool inval = false, rts = false, short_preamble = false;
623 struct ieee80211_tx_rate_control txrc; 621 struct ieee80211_tx_rate_control txrc;
622 struct ieee80211_sta_rates *ratetbl = NULL;
624 bool assoc = false; 623 bool assoc = false;
625 624
626 memset(&txrc, 0, sizeof(txrc)); 625 memset(&txrc, 0, sizeof(txrc));
@@ -641,18 +640,23 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
641 txrc.max_rate_idx = -1; 640 txrc.max_rate_idx = -1;
642 else 641 else
643 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; 642 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
644 memcpy(txrc.rate_idx_mcs_mask, 643
645 tx->sdata->rc_rateidx_mcs_mask[info->band], 644 if (tx->sdata->rc_has_mcs_mask[info->band])
646 sizeof(txrc.rate_idx_mcs_mask)); 645 txrc.rate_idx_mcs_mask =
646 tx->sdata->rc_rateidx_mcs_mask[info->band];
647
647 txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || 648 txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
648 tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT || 649 tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
649 tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); 650 tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
650 651
651 /* set up RTS protection if desired */ 652 /* set up RTS protection if desired */
652 if (len > tx->local->hw.wiphy->rts_threshold) { 653 if (len > tx->local->hw.wiphy->rts_threshold) {
653 txrc.rts = rts = true; 654 txrc.rts = true;
654 } 655 }
655 656
657 info->control.use_rts = txrc.rts;
658 info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot;
659
656 /* 660 /*
657 * Use short preamble if the BSS can handle it, but not for 661 * Use short preamble if the BSS can handle it, but not for
658 * management frames unless we know the receiver can handle 662 * management frames unless we know the receiver can handle
@@ -662,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
662 if (tx->sdata->vif.bss_conf.use_short_preamble && 666 if (tx->sdata->vif.bss_conf.use_short_preamble &&
663 (ieee80211_is_data(hdr->frame_control) || 667 (ieee80211_is_data(hdr->frame_control) ||
664 (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) 668 (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
665 txrc.short_preamble = short_preamble = true; 669 txrc.short_preamble = true;
670
671 info->control.short_preamble = txrc.short_preamble;
666 672
667 if (tx->sta) 673 if (tx->sta)
668 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); 674 assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC);
@@ -686,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
686 */ 692 */
687 rate_control_get_rate(tx->sdata, tx->sta, &txrc); 693 rate_control_get_rate(tx->sdata, tx->sta, &txrc);
688 694
689 if (unlikely(info->control.rates[0].idx < 0)) 695 if (tx->sta && !info->control.skip_table)
690 return TX_DROP; 696 ratetbl = rcu_dereference(tx->sta->sta.rates);
697
698 if (unlikely(info->control.rates[0].idx < 0)) {
699 if (ratetbl) {
700 struct ieee80211_tx_rate rate = {
701 .idx = ratetbl->rate[0].idx,
702 .flags = ratetbl->rate[0].flags,
703 .count = ratetbl->rate[0].count
704 };
705
706 if (ratetbl->rate[0].idx < 0)
707 return TX_DROP;
708
709 tx->rate = rate;
710 } else {
711 return TX_DROP;
712 }
713 } else {
714 tx->rate = info->control.rates[0];
715 }
691 716
692 if (txrc.reported_rate.idx < 0) { 717 if (txrc.reported_rate.idx < 0) {
693 txrc.reported_rate = info->control.rates[0]; 718 txrc.reported_rate = tx->rate;
694 if (tx->sta && ieee80211_is_data(hdr->frame_control)) 719 if (tx->sta && ieee80211_is_data(hdr->frame_control))
695 tx->sta->last_tx_rate = txrc.reported_rate; 720 tx->sta->last_tx_rate = txrc.reported_rate;
696 } else if (tx->sta) 721 } else if (tx->sta)
697 tx->sta->last_tx_rate = txrc.reported_rate; 722 tx->sta->last_tx_rate = txrc.reported_rate;
698 723
724 if (ratetbl)
725 return TX_CONTINUE;
726
699 if (unlikely(!info->control.rates[0].count)) 727 if (unlikely(!info->control.rates[0].count))
700 info->control.rates[0].count = 1; 728 info->control.rates[0].count = 1;
701 729
@@ -703,91 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
703 (info->flags & IEEE80211_TX_CTL_NO_ACK))) 731 (info->flags & IEEE80211_TX_CTL_NO_ACK)))
704 info->control.rates[0].count = 1; 732 info->control.rates[0].count = 1;
705 733
706 if (is_multicast_ether_addr(hdr->addr1)) {
707 /*
708 * XXX: verify the rate is in the basic rateset
709 */
710 return TX_CONTINUE;
711 }
712
713 /*
714 * set up the RTS/CTS rate as the fastest basic rate
715 * that is not faster than the data rate
716 *
717 * XXX: Should this check all retry rates?
718 */
719 if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) {
720 s8 baserate = 0;
721
722 rate = &sband->bitrates[info->control.rates[0].idx];
723
724 for (i = 0; i < sband->n_bitrates; i++) {
725 /* must be a basic rate */
726 if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i)))
727 continue;
728 /* must not be faster than the data rate */
729 if (sband->bitrates[i].bitrate > rate->bitrate)
730 continue;
731 /* maximum */
732 if (sband->bitrates[baserate].bitrate <
733 sband->bitrates[i].bitrate)
734 baserate = i;
735 }
736
737 info->control.rts_cts_rate_idx = baserate;
738 }
739
740 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
741 /*
742 * make sure there's no valid rate following
743 * an invalid one, just in case drivers don't
744 * take the API seriously to stop at -1.
745 */
746 if (inval) {
747 info->control.rates[i].idx = -1;
748 continue;
749 }
750 if (info->control.rates[i].idx < 0) {
751 inval = true;
752 continue;
753 }
754
755 /*
756 * For now assume MCS is already set up correctly, this
757 * needs to be fixed.
758 */
759 if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) {
760 WARN_ON(info->control.rates[i].idx > 76);
761 continue;
762 }
763
764 /* set up RTS protection if desired */
765 if (rts)
766 info->control.rates[i].flags |=
767 IEEE80211_TX_RC_USE_RTS_CTS;
768
769 /* RC is busted */
770 if (WARN_ON_ONCE(info->control.rates[i].idx >=
771 sband->n_bitrates)) {
772 info->control.rates[i].idx = -1;
773 continue;
774 }
775
776 rate = &sband->bitrates[info->control.rates[i].idx];
777
778 /* set up short preamble */
779 if (short_preamble &&
780 rate->flags & IEEE80211_RATE_SHORT_PREAMBLE)
781 info->control.rates[i].flags |=
782 IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
783
784 /* set up G protection */
785 if (!rts && tx->sdata->vif.bss_conf.use_cts_prot &&
786 rate->flags & IEEE80211_RATE_ERP_G)
787 info->control.rates[i].flags |=
788 IEEE80211_TX_RC_USE_CTS_PROTECT;
789 }
790
791 return TX_CONTINUE; 734 return TX_CONTINUE;
792} 735}
793 736
@@ -991,15 +934,18 @@ static ieee80211_tx_result debug_noinline
991ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) 934ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
992{ 935{
993 struct sk_buff *skb; 936 struct sk_buff *skb;
937 int ac = -1;
994 938
995 if (!tx->sta) 939 if (!tx->sta)
996 return TX_CONTINUE; 940 return TX_CONTINUE;
997 941
998 tx->sta->tx_packets++;
999 skb_queue_walk(&tx->skbs, skb) { 942 skb_queue_walk(&tx->skbs, skb) {
943 ac = skb_get_queue_mapping(skb);
1000 tx->sta->tx_fragments++; 944 tx->sta->tx_fragments++;
1001 tx->sta->tx_bytes += skb->len; 945 tx->sta->tx_bytes[ac] += skb->len;
1002 } 946 }
947 if (ac >= 0)
948 tx->sta->tx_packets[ac]++;
1003 949
1004 return TX_CONTINUE; 950 return TX_CONTINUE;
1005} 951}
@@ -1705,7 +1651,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
1705 if (chanctx_conf) 1651 if (chanctx_conf)
1706 chan = chanctx_conf->def.chan; 1652 chan = chanctx_conf->def.chan;
1707 else if (!local->use_chanctx) 1653 else if (!local->use_chanctx)
1708 chan = local->_oper_channel; 1654 chan = local->_oper_chandef.chan;
1709 else 1655 else
1710 goto fail_rcu; 1656 goto fail_rcu;
1711 1657
@@ -1839,7 +1785,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1839 * This is the exception! WDS style interfaces are prohibited 1785 * This is the exception! WDS style interfaces are prohibited
1840 * when channel contexts are in used so this must be valid 1786 * when channel contexts are in used so this must be valid
1841 */ 1787 */
1842 band = local->hw.conf.channel->band; 1788 band = local->hw.conf.chandef.chan->band;
1843 break; 1789 break;
1844#ifdef CONFIG_MAC80211_MESH 1790#ifdef CONFIG_MAC80211_MESH
1845 case NL80211_IFTYPE_MESH_POINT: 1791 case NL80211_IFTYPE_MESH_POINT:
@@ -2085,7 +2031,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
2085 encaps_data = bridge_tunnel_header; 2031 encaps_data = bridge_tunnel_header;
2086 encaps_len = sizeof(bridge_tunnel_header); 2032 encaps_len = sizeof(bridge_tunnel_header);
2087 skip_header_bytes -= 2; 2033 skip_header_bytes -= 2;
2088 } else if (ethertype >= 0x600) { 2034 } else if (ethertype >= ETH_P_802_3_MIN) {
2089 encaps_data = rfc1042_header; 2035 encaps_data = rfc1042_header;
2090 encaps_len = sizeof(rfc1042_header); 2036 encaps_len = sizeof(rfc1042_header);
2091 skip_header_bytes -= 2; 2037 skip_header_bytes -= 2;
@@ -2438,14 +2384,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2438 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 2384 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
2439 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 2385 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
2440 struct ieee80211_hdr *hdr; 2386 struct ieee80211_hdr *hdr;
2441 struct sk_buff *presp = rcu_dereference(ifibss->presp); 2387 struct beacon_data *presp = rcu_dereference(ifibss->presp);
2442 2388
2443 if (!presp) 2389 if (!presp)
2444 goto out; 2390 goto out;
2445 2391
2446 skb = skb_copy(presp, GFP_ATOMIC); 2392 skb = dev_alloc_skb(local->tx_headroom + presp->head_len);
2447 if (!skb) 2393 if (!skb)
2448 goto out; 2394 goto out;
2395 skb_reserve(skb, local->tx_headroom);
2396 memcpy(skb_put(skb, presp->head_len), presp->head,
2397 presp->head_len);
2449 2398
2450 hdr = (struct ieee80211_hdr *) skb->data; 2399 hdr = (struct ieee80211_hdr *) skb->data;
2451 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 2400 hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
@@ -2495,8 +2444,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2495 txrc.max_rate_idx = -1; 2444 txrc.max_rate_idx = -1;
2496 else 2445 else
2497 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; 2446 txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
2498 memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band],
2499 sizeof(txrc.rate_idx_mcs_mask));
2500 txrc.bss = true; 2447 txrc.bss = true;
2501 rate_control_get_rate(sdata, NULL, &txrc); 2448 rate_control_get_rate(sdata, NULL, &txrc);
2502 2449
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0f38f43ac62e..3f87fa468b1f 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,
453} 453}
454 454
455void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 455void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
456 enum queue_stop_reason reason) 456 unsigned long queues,
457 enum queue_stop_reason reason)
457{ 458{
458 struct ieee80211_local *local = hw_to_local(hw); 459 struct ieee80211_local *local = hw_to_local(hw);
459 unsigned long flags; 460 unsigned long flags;
@@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
461 462
462 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 463 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
463 464
464 for (i = 0; i < hw->queues; i++) 465 for_each_set_bit(i, &queues, hw->queues)
465 __ieee80211_stop_queue(hw, i, reason); 466 __ieee80211_stop_queue(hw, i, reason);
466 467
467 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 468 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
469 470
470void ieee80211_stop_queues(struct ieee80211_hw *hw) 471void ieee80211_stop_queues(struct ieee80211_hw *hw)
471{ 472{
472 ieee80211_stop_queues_by_reason(hw, 473 ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
473 IEEE80211_QUEUE_STOP_REASON_DRIVER); 474 IEEE80211_QUEUE_STOP_REASON_DRIVER);
474} 475}
475EXPORT_SYMBOL(ieee80211_stop_queues); 476EXPORT_SYMBOL(ieee80211_stop_queues);
@@ -484,13 +485,15 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
484 return true; 485 return true;
485 486
486 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 487 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
487 ret = !!local->queue_stop_reasons[queue]; 488 ret = test_bit(IEEE80211_QUEUE_STOP_REASON_DRIVER,
489 &local->queue_stop_reasons[queue]);
488 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 490 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
489 return ret; 491 return ret;
490} 492}
491EXPORT_SYMBOL(ieee80211_queue_stopped); 493EXPORT_SYMBOL(ieee80211_queue_stopped);
492 494
493void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 495void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
496 unsigned long queues,
494 enum queue_stop_reason reason) 497 enum queue_stop_reason reason)
495{ 498{
496 struct ieee80211_local *local = hw_to_local(hw); 499 struct ieee80211_local *local = hw_to_local(hw);
@@ -499,7 +502,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
499 502
500 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 503 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
501 504
502 for (i = 0; i < hw->queues; i++) 505 for_each_set_bit(i, &queues, hw->queues)
503 __ieee80211_wake_queue(hw, i, reason); 506 __ieee80211_wake_queue(hw, i, reason);
504 507
505 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 508 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -507,10 +510,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
507 510
508void ieee80211_wake_queues(struct ieee80211_hw *hw) 511void ieee80211_wake_queues(struct ieee80211_hw *hw)
509{ 512{
510 ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); 513 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
514 IEEE80211_QUEUE_STOP_REASON_DRIVER);
511} 515}
512EXPORT_SYMBOL(ieee80211_wake_queues); 516EXPORT_SYMBOL(ieee80211_wake_queues);
513 517
518void ieee80211_flush_queues(struct ieee80211_local *local,
519 struct ieee80211_sub_if_data *sdata)
520{
521 u32 queues;
522
523 if (!local->ops->flush)
524 return;
525
526 if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) {
527 int ac;
528
529 queues = 0;
530
531 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
532 queues |= BIT(sdata->vif.hw_queue[ac]);
533 if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE)
534 queues |= BIT(sdata->vif.cab_queue);
535 } else {
536 /* all queues */
537 queues = BIT(local->hw.queues) - 1;
538 }
539
540 ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
541 IEEE80211_QUEUE_STOP_REASON_FLUSH);
542
543 drv_flush(local, queues, false);
544
545 ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
546 IEEE80211_QUEUE_STOP_REASON_FLUSH);
547}
548
514void ieee80211_iterate_active_interfaces( 549void ieee80211_iterate_active_interfaces(
515 struct ieee80211_hw *hw, u32 iter_flags, 550 struct ieee80211_hw *hw, u32 iter_flags,
516 void (*iterator)(void *data, u8 *mac, 551 void (*iterator)(void *data, u8 *mac,
@@ -626,7 +661,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
626} 661}
627EXPORT_SYMBOL(ieee80211_queue_delayed_work); 662EXPORT_SYMBOL(ieee80211_queue_delayed_work);
628 663
629u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, 664u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,
630 struct ieee802_11_elems *elems, 665 struct ieee802_11_elems *elems,
631 u64 filter, u32 crc) 666 u64 filter, u32 crc)
632{ 667{
@@ -634,6 +669,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
634 u8 *pos = start; 669 u8 *pos = start;
635 bool calc_crc = filter != 0; 670 bool calc_crc = filter != 0;
636 DECLARE_BITMAP(seen_elems, 256); 671 DECLARE_BITMAP(seen_elems, 256);
672 const u8 *ie;
637 673
638 bitmap_zero(seen_elems, 256); 674 bitmap_zero(seen_elems, 256);
639 memset(elems, 0, sizeof(*elems)); 675 memset(elems, 0, sizeof(*elems));
@@ -681,6 +717,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
681 case WLAN_EID_COUNTRY: 717 case WLAN_EID_COUNTRY:
682 case WLAN_EID_PWR_CONSTRAINT: 718 case WLAN_EID_PWR_CONSTRAINT:
683 case WLAN_EID_TIMEOUT_INTERVAL: 719 case WLAN_EID_TIMEOUT_INTERVAL:
720 case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
721 case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
722 /*
723 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
724 * that if the content gets bigger it might be needed more than once
725 */
684 if (test_bit(id, seen_elems)) { 726 if (test_bit(id, seen_elems)) {
685 elems->parse_error = true; 727 elems->parse_error = true;
686 left -= elen; 728 left -= elen;
@@ -704,17 +746,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
704 elems->supp_rates = pos; 746 elems->supp_rates = pos;
705 elems->supp_rates_len = elen; 747 elems->supp_rates_len = elen;
706 break; 748 break;
707 case WLAN_EID_FH_PARAMS:
708 elems->fh_params = pos;
709 elems->fh_params_len = elen;
710 break;
711 case WLAN_EID_DS_PARAMS: 749 case WLAN_EID_DS_PARAMS:
712 elems->ds_params = pos; 750 if (elen >= 1)
713 elems->ds_params_len = elen; 751 elems->ds_params = pos;
714 break; 752 else
715 case WLAN_EID_CF_PARAMS: 753 elem_parse_failed = true;
716 elems->cf_params = pos;
717 elems->cf_params_len = elen;
718 break; 754 break;
719 case WLAN_EID_TIM: 755 case WLAN_EID_TIM:
720 if (elen >= sizeof(struct ieee80211_tim_ie)) { 756 if (elen >= sizeof(struct ieee80211_tim_ie)) {
@@ -723,10 +759,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
723 } else 759 } else
724 elem_parse_failed = true; 760 elem_parse_failed = true;
725 break; 761 break;
726 case WLAN_EID_IBSS_PARAMS:
727 elems->ibss_params = pos;
728 elems->ibss_params_len = elen;
729 break;
730 case WLAN_EID_CHALLENGE: 762 case WLAN_EID_CHALLENGE:
731 elems->challenge = pos; 763 elems->challenge = pos;
732 elems->challenge_len = elen; 764 elems->challenge_len = elen;
@@ -756,8 +788,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
756 elems->rsn_len = elen; 788 elems->rsn_len = elen;
757 break; 789 break;
758 case WLAN_EID_ERP_INFO: 790 case WLAN_EID_ERP_INFO:
759 elems->erp_info = pos; 791 if (elen >= 1)
760 elems->erp_info_len = elen; 792 elems->erp_info = pos;
793 else
794 elem_parse_failed = true;
761 break; 795 break;
762 case WLAN_EID_EXT_SUPP_RATES: 796 case WLAN_EID_EXT_SUPP_RATES:
763 elems->ext_supp_rates = pos; 797 elems->ext_supp_rates = pos;
@@ -836,12 +870,47 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
836 } 870 }
837 elems->ch_switch_ie = (void *)pos; 871 elems->ch_switch_ie = (void *)pos;
838 break; 872 break;
839 case WLAN_EID_QUIET: 873 case WLAN_EID_EXT_CHANSWITCH_ANN:
840 if (!elems->quiet_elem) { 874 if (elen != sizeof(struct ieee80211_ext_chansw_ie)) {
841 elems->quiet_elem = pos; 875 elem_parse_failed = true;
842 elems->quiet_elem_len = elen; 876 break;
877 }
878 elems->ext_chansw_ie = (void *)pos;
879 break;
880 case WLAN_EID_SECONDARY_CHANNEL_OFFSET:
881 if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) {
882 elem_parse_failed = true;
883 break;
884 }
885 elems->sec_chan_offs = (void *)pos;
886 break;
887 case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
888 if (!action ||
889 elen != sizeof(*elems->wide_bw_chansw_ie)) {
890 elem_parse_failed = true;
891 break;
892 }
893 elems->wide_bw_chansw_ie = (void *)pos;
894 break;
895 case WLAN_EID_CHANNEL_SWITCH_WRAPPER:
896 if (action) {
897 elem_parse_failed = true;
898 break;
899 }
900 /*
901 * This is a bit tricky, but as we only care about
902 * the wide bandwidth channel switch element, so
903 * just parse it out manually.
904 */
905 ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
906 pos, elen);
907 if (ie) {
908 if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
909 elems->wide_bw_chansw_ie =
910 (void *)(ie + 2);
911 else
912 elem_parse_failed = true;
843 } 913 }
844 elems->num_of_quiet_elem++;
845 break; 914 break;
846 case WLAN_EID_COUNTRY: 915 case WLAN_EID_COUNTRY:
847 elems->country_elem = pos; 916 elems->country_elem = pos;
@@ -855,8 +924,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
855 elems->pwr_constr_elem = pos; 924 elems->pwr_constr_elem = pos;
856 break; 925 break;
857 case WLAN_EID_TIMEOUT_INTERVAL: 926 case WLAN_EID_TIMEOUT_INTERVAL:
858 elems->timeout_int = pos; 927 if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
859 elems->timeout_int_len = elen; 928 elems->timeout_int = (void *)pos;
929 else
930 elem_parse_failed = true;
860 break; 931 break;
861 default: 932 default:
862 break; 933 break;
@@ -877,12 +948,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,
877 return crc; 948 return crc;
878} 949}
879 950
880void ieee802_11_parse_elems(u8 *start, size_t len,
881 struct ieee802_11_elems *elems)
882{
883 ieee802_11_parse_elems_crc(start, len, elems, 0, 0);
884}
885
886void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, 951void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
887 bool bss_notify) 952 bool bss_notify)
888{ 953{
@@ -1357,6 +1422,25 @@ void ieee80211_stop_device(struct ieee80211_local *local)
1357 drv_stop(local); 1422 drv_stop(local);
1358} 1423}
1359 1424
1425static void ieee80211_assign_chanctx(struct ieee80211_local *local,
1426 struct ieee80211_sub_if_data *sdata)
1427{
1428 struct ieee80211_chanctx_conf *conf;
1429 struct ieee80211_chanctx *ctx;
1430
1431 if (!local->use_chanctx)
1432 return;
1433
1434 mutex_lock(&local->chanctx_mtx);
1435 conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1436 lockdep_is_held(&local->chanctx_mtx));
1437 if (conf) {
1438 ctx = container_of(conf, struct ieee80211_chanctx, conf);
1439 drv_assign_vif_chanctx(local, sdata, ctx);
1440 }
1441 mutex_unlock(&local->chanctx_mtx);
1442}
1443
1360int ieee80211_reconfig(struct ieee80211_local *local) 1444int ieee80211_reconfig(struct ieee80211_local *local)
1361{ 1445{
1362 struct ieee80211_hw *hw = &local->hw; 1446 struct ieee80211_hw *hw = &local->hw;
@@ -1421,6 +1505,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1421 /* add interfaces */ 1505 /* add interfaces */
1422 sdata = rtnl_dereference(local->monitor_sdata); 1506 sdata = rtnl_dereference(local->monitor_sdata);
1423 if (sdata) { 1507 if (sdata) {
1508 /* in HW restart it exists already */
1509 WARN_ON(local->resuming);
1424 res = drv_add_interface(local, sdata); 1510 res = drv_add_interface(local, sdata);
1425 if (WARN_ON(res)) { 1511 if (WARN_ON(res)) {
1426 rcu_assign_pointer(local->monitor_sdata, NULL); 1512 rcu_assign_pointer(local->monitor_sdata, NULL);
@@ -1445,36 +1531,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1445 } 1531 }
1446 1532
1447 list_for_each_entry(sdata, &local->interfaces, list) { 1533 list_for_each_entry(sdata, &local->interfaces, list) {
1448 struct ieee80211_chanctx_conf *ctx_conf;
1449
1450 if (!ieee80211_sdata_running(sdata)) 1534 if (!ieee80211_sdata_running(sdata))
1451 continue; 1535 continue;
1452 1536 ieee80211_assign_chanctx(local, sdata);
1453 mutex_lock(&local->chanctx_mtx);
1454 ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1455 lockdep_is_held(&local->chanctx_mtx));
1456 if (ctx_conf) {
1457 ctx = container_of(ctx_conf, struct ieee80211_chanctx,
1458 conf);
1459 drv_assign_vif_chanctx(local, sdata, ctx);
1460 }
1461 mutex_unlock(&local->chanctx_mtx);
1462 } 1537 }
1463 1538
1464 sdata = rtnl_dereference(local->monitor_sdata); 1539 sdata = rtnl_dereference(local->monitor_sdata);
1465 if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) { 1540 if (sdata && ieee80211_sdata_running(sdata))
1466 struct ieee80211_chanctx_conf *ctx_conf; 1541 ieee80211_assign_chanctx(local, sdata);
1467
1468 mutex_lock(&local->chanctx_mtx);
1469 ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
1470 lockdep_is_held(&local->chanctx_mtx));
1471 if (ctx_conf) {
1472 ctx = container_of(ctx_conf, struct ieee80211_chanctx,
1473 conf);
1474 drv_assign_vif_chanctx(local, sdata, ctx);
1475 }
1476 mutex_unlock(&local->chanctx_mtx);
1477 }
1478 1542
1479 /* add STAs back */ 1543 /* add STAs back */
1480 mutex_lock(&local->sta_mtx); 1544 mutex_lock(&local->sta_mtx);
@@ -1534,11 +1598,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1534 BSS_CHANGED_IDLE | 1598 BSS_CHANGED_IDLE |
1535 BSS_CHANGED_TXPOWER; 1599 BSS_CHANGED_TXPOWER;
1536 1600
1537#ifdef CONFIG_PM
1538 if (local->resuming && !reconfig_due_to_wowlan)
1539 sdata->vif.bss_conf = sdata->suspend_bss_conf;
1540#endif
1541
1542 switch (sdata->vif.type) { 1601 switch (sdata->vif.type) {
1543 case NL80211_IFTYPE_STATION: 1602 case NL80211_IFTYPE_STATION:
1544 changed |= BSS_CHANGED_ASSOC | 1603 changed |= BSS_CHANGED_ASSOC |
@@ -1637,6 +1696,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1637 local->in_reconfig = false; 1696 local->in_reconfig = false;
1638 barrier(); 1697 barrier();
1639 1698
1699 if (local->monitors == local->open_count && local->monitors > 0)
1700 ieee80211_add_virtual_monitor(local);
1701
1640 /* 1702 /*
1641 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation 1703 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation
1642 * sessions can be established after a resume. 1704 * sessions can be established after a resume.
@@ -1659,8 +1721,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1659 mutex_unlock(&local->sta_mtx); 1721 mutex_unlock(&local->sta_mtx);
1660 } 1722 }
1661 1723
1662 ieee80211_wake_queues_by_reason(hw, 1724 ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
1663 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 1725 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
1664 1726
1665 /* 1727 /*
1666 * If this is for hw restart things are still running. 1728 * If this is for hw restart things are still running.
@@ -1678,28 +1740,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1678 mb(); 1740 mb();
1679 local->resuming = false; 1741 local->resuming = false;
1680 1742
1681 list_for_each_entry(sdata, &local->interfaces, list) {
1682 switch(sdata->vif.type) {
1683 case NL80211_IFTYPE_STATION:
1684 ieee80211_sta_restart(sdata);
1685 break;
1686 case NL80211_IFTYPE_ADHOC:
1687 ieee80211_ibss_restart(sdata);
1688 break;
1689 case NL80211_IFTYPE_MESH_POINT:
1690 ieee80211_mesh_restart(sdata);
1691 break;
1692 default:
1693 break;
1694 }
1695 }
1696
1697 mod_timer(&local->sta_cleanup, jiffies + 1); 1743 mod_timer(&local->sta_cleanup, jiffies + 1);
1698
1699 mutex_lock(&local->sta_mtx);
1700 list_for_each_entry(sta, &local->sta_list, list)
1701 mesh_plink_restart(sta);
1702 mutex_unlock(&local->sta_mtx);
1703#else 1744#else
1704 WARN_ON(1); 1745 WARN_ON(1);
1705#endif 1746#endif
@@ -2051,7 +2092,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
2051 /* non-managed type inferfaces */ 2092 /* non-managed type inferfaces */
2052 return 0; 2093 return 0;
2053 } 2094 }
2054 return ifmgd->ave_beacon_signal; 2095 return ifmgd->ave_beacon_signal / 16;
2055} 2096}
2056EXPORT_SYMBOL_GPL(ieee80211_ave_rssi); 2097EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
2057 2098
@@ -2166,8 +2207,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)
2166 /* currently not handled */ 2207 /* currently not handled */
2167 WARN_ON(1); 2208 WARN_ON(1);
2168 else { 2209 else {
2169 cfg80211_chandef_create(&chandef, local->hw.conf.channel, 2210 chandef = local->hw.conf.chandef;
2170 local->hw.conf.channel_type);
2171 cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); 2211 cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);
2172 } 2212 }
2173} 2213}
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index a2c2258bc84e..171344d4eb7c 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -13,6 +13,104 @@
13#include "rate.h" 13#include "rate.h"
14 14
15 15
16static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata,
17 struct ieee80211_sta_vht_cap *vht_cap,
18 u32 flag)
19{
20 __le32 le_flag = cpu_to_le32(flag);
21
22 if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag &&
23 !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag))
24 vht_cap->cap &= ~flag;
25}
26
27void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
28 struct ieee80211_sta_vht_cap *vht_cap)
29{
30 int i;
31 u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n;
32
33 if (!vht_cap->vht_supported)
34 return;
35
36 if (sdata->vif.type != NL80211_IFTYPE_STATION)
37 return;
38
39 __check_vhtcap_disable(sdata, vht_cap,
40 IEEE80211_VHT_CAP_RXLDPC);
41 __check_vhtcap_disable(sdata, vht_cap,
42 IEEE80211_VHT_CAP_SHORT_GI_80);
43 __check_vhtcap_disable(sdata, vht_cap,
44 IEEE80211_VHT_CAP_SHORT_GI_160);
45 __check_vhtcap_disable(sdata, vht_cap,
46 IEEE80211_VHT_CAP_TXSTBC);
47 __check_vhtcap_disable(sdata, vht_cap,
48 IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
49 __check_vhtcap_disable(sdata, vht_cap,
50 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
51 __check_vhtcap_disable(sdata, vht_cap,
52 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN);
53 __check_vhtcap_disable(sdata, vht_cap,
54 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN);
55
56 /* Allow user to decrease AMPDU length exponent */
57 if (sdata->u.mgd.vht_capa_mask.vht_cap_info &
58 cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) {
59 u32 cap, n;
60
61 n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) &
62 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
63 n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
64 cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
65 cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
66
67 if (n < cap) {
68 vht_cap->cap &=
69 ~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
70 vht_cap->cap |=
71 n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
72 }
73 }
74
75 /* Allow the user to decrease MCSes */
76 rxmcs_mask =
77 le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map);
78 rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map);
79 rxmcs_n &= rxmcs_mask;
80 rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
81
82 txmcs_mask =
83 le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map);
84 txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map);
85 txmcs_n &= txmcs_mask;
86 txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
87 for (i = 0; i < 8; i++) {
88 u8 m, n, c;
89
90 m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
91 n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
92 c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
93
94 if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
95 n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
96 rxmcs_cap &= ~(3 << 2*i);
97 rxmcs_cap |= (rxmcs_n & (3 << 2*i));
98 }
99
100 m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
101 n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
102 c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
103
104 if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) ||
105 n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) {
106 txmcs_cap &= ~(3 << 2*i);
107 txmcs_cap |= (txmcs_n & (3 << 2*i));
108 }
109 }
110 vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap);
111 vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap);
112}
113
16void 114void
17ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, 115ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
18 struct ieee80211_supported_band *sband, 116 struct ieee80211_supported_band *sband,
@@ -20,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
20 struct sta_info *sta) 118 struct sta_info *sta)
21{ 119{
22 struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; 120 struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap;
121 struct ieee80211_sta_vht_cap own_cap;
122 u32 cap_info, i;
23 123
24 memset(vht_cap, 0, sizeof(*vht_cap)); 124 memset(vht_cap, 0, sizeof(*vht_cap));
25 125
@@ -35,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
35 135
36 vht_cap->vht_supported = true; 136 vht_cap->vht_supported = true;
37 137
38 vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); 138 own_cap = sband->vht_cap;
139 /*
140 * If user has specified capability overrides, take care
141 * of that if the station we're setting up is the AP that
142 * we advertised a restricted capability set to. Override
143 * our own capabilities and then use those below.
144 */
145 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
146 !test_sta_flag(sta, WLAN_STA_TDLS_PEER))
147 ieee80211_apply_vhtcap_overrides(sdata, &own_cap);
148
149 /* take some capabilities as-is */
150 cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info);
151 vht_cap->cap = cap_info;
152 vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
153 IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
154 IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
155 IEEE80211_VHT_CAP_RXLDPC |
156 IEEE80211_VHT_CAP_VHT_TXOP_PS |
157 IEEE80211_VHT_CAP_HTC_VHT |
158 IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK |
159 IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB |
160 IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB |
161 IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN |
162 IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN;
163
164 /* and some based on our own capabilities */
165 switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
166 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
167 vht_cap->cap |= cap_info &
168 IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
169 break;
170 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
171 vht_cap->cap |= cap_info &
172 IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
173 break;
174 default:
175 /* nothing */
176 break;
177 }
178
179 /* symmetric capabilities */
180 vht_cap->cap |= cap_info & own_cap.cap &
181 (IEEE80211_VHT_CAP_SHORT_GI_80 |
182 IEEE80211_VHT_CAP_SHORT_GI_160);
183
184 /* remaining ones */
185 if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) {
186 vht_cap->cap |= cap_info &
187 (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
188 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX |
189 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX);
190 }
191
192 if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)
193 vht_cap->cap |= cap_info &
194 IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
195
196 if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)
197 vht_cap->cap |= cap_info &
198 IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
199
200 if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE)
201 vht_cap->cap |= cap_info &
202 IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
203
204 if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC)
205 vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK;
206
207 if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK)
208 vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC;
39 209
40 /* Copy peer MCS info, the driver might need them. */ 210 /* Copy peer MCS info, the driver might need them. */
41 memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, 211 memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,
42 sizeof(struct ieee80211_vht_mcs_info)); 212 sizeof(struct ieee80211_vht_mcs_info));
43 213
214 /* but also restrict MCSes */
215 for (i = 0; i < 8; i++) {
216 u16 own_rx, own_tx, peer_rx, peer_tx;
217
218 own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map);
219 own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
220
221 own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map);
222 own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
223
224 peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map);
225 peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
226
227 peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map);
228 peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
229
230 if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
231 if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
232 peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
233 else if (own_rx < peer_tx)
234 peer_tx = own_rx;
235 }
236
237 if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) {
238 if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED)
239 peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED;
240 else if (own_tx < peer_rx)
241 peer_rx = own_tx;
242 }
243
244 vht_cap->vht_mcs.rx_mcs_map &=
245 ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
246 vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2);
247
248 vht_cap->vht_mcs.tx_mcs_map &=
249 ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2);
250 vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
251 }
252
253 /* finally set up the bandwidth */
44 switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { 254 switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
45 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: 255 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
46 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: 256 case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ:
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index a4dcaf1dd4b6..d48422e27110 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -88,9 +88,7 @@ struct mac802154_sub_if_data {
88 88
89#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw) 89#define mac802154_to_priv(_hw) container_of(_hw, struct mac802154_priv, hw)
90 90
91#define MAC802154_MAX_XMIT_ATTEMPTS 3 91#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */
92
93#define MAC802154_CHAN_NONE (~(u8)0) /* No channel is assigned */
94 92
95extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced; 93extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;
96extern struct ieee802154_mlme_ops mac802154_mlme_wpan; 94extern struct ieee802154_mlme_ops mac802154_mlme_wpan;
@@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev);
114u16 mac802154_dev_get_pan_id(const struct net_device *dev); 112u16 mac802154_dev_get_pan_id(const struct net_device *dev);
115void mac802154_dev_set_pan_id(struct net_device *dev, u16 val); 113void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);
116void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); 114void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan);
115u8 mac802154_dev_get_dsn(const struct net_device *dev);
117 116
118#endif /* MAC802154_H */ 117#endif /* MAC802154_H */
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index d8d277006089..a99910d4d52f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {
73 .start_req = mac802154_mlme_start_req, 73 .start_req = mac802154_mlme_start_req,
74 .get_pan_id = mac802154_dev_get_pan_id, 74 .get_pan_id = mac802154_dev_get_pan_id,
75 .get_short_addr = mac802154_dev_get_short_addr, 75 .get_short_addr = mac802154_dev_get_short_addr,
76 .get_dsn = mac802154_dev_get_dsn,
76}; 77};
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index f47781ab0ccc..8ded97cf1c33 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)
159 } 159 }
160} 160}
161 161
162u8 mac802154_dev_get_dsn(const struct net_device *dev)
163{
164 struct mac802154_sub_if_data *priv = netdev_priv(dev);
165
166 BUG_ON(dev->type != ARPHRD_IEEE802154);
167
168 return priv->dsn++;
169}
170
162static void phy_chan_notify(struct work_struct *work) 171static void phy_chan_notify(struct work_struct *work)
163{ 172{
164 struct phy_chan_notify_work *nw = container_of(work, 173 struct phy_chan_notify_work *nw = container_of(work,
@@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)
167 struct mac802154_sub_if_data *priv = netdev_priv(nw->dev); 176 struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);
168 int res; 177 int res;
169 178
179 mutex_lock(&priv->hw->phy->pib_lock);
170 res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan); 180 res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);
171 if (res) 181 if (res)
172 pr_debug("set_channel failed\n"); 182 pr_debug("set_channel failed\n");
183 else {
184 priv->hw->phy->current_channel = priv->chan;
185 priv->hw->phy->current_page = priv->page;
186 }
187 mutex_unlock(&priv->hw->phy->pib_lock);
173 188
174 kfree(nw); 189 kfree(nw);
175} 190}
@@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
186 priv->chan = chan; 201 priv->chan = chan;
187 spin_unlock_bh(&priv->mib_lock); 202 spin_unlock_bh(&priv->mib_lock);
188 203
204 mutex_lock(&priv->hw->phy->pib_lock);
189 if (priv->hw->phy->current_channel != priv->chan || 205 if (priv->hw->phy->current_channel != priv->chan ||
190 priv->hw->phy->current_page != priv->page) { 206 priv->hw->phy->current_page != priv->page) {
207 mutex_unlock(&priv->hw->phy->pib_lock);
208
191 work = kzalloc(sizeof(*work), GFP_ATOMIC); 209 work = kzalloc(sizeof(*work), GFP_ATOMIC);
192 if (!work) 210 if (!work)
193 return; 211 return;
@@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)
195 INIT_WORK(&work->work, phy_chan_notify); 213 INIT_WORK(&work->work, phy_chan_notify);
196 work->dev = dev; 214 work->dev = dev;
197 queue_work(priv->hw->dev_workqueue, &work->work); 215 queue_work(priv->hw->dev_workqueue, &work->work);
198 } 216 } else
217 mutex_unlock(&priv->hw->phy->pib_lock);
199} 218}
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 4e09d070995a..6d1647399d4f 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -25,6 +25,7 @@
25#include <linux/if_arp.h> 25#include <linux/if_arp.h>
26#include <linux/crc-ccitt.h> 26#include <linux/crc-ccitt.h>
27 27
28#include <net/ieee802154_netdev.h>
28#include <net/mac802154.h> 29#include <net/mac802154.h>
29#include <net/wpan-phy.h> 30#include <net/wpan-phy.h>
30 31
@@ -39,12 +40,12 @@ struct xmit_work {
39 struct mac802154_priv *priv; 40 struct mac802154_priv *priv;
40 u8 chan; 41 u8 chan;
41 u8 page; 42 u8 page;
42 u8 xmit_attempts;
43}; 43};
44 44
45static void mac802154_xmit_worker(struct work_struct *work) 45static void mac802154_xmit_worker(struct work_struct *work)
46{ 46{
47 struct xmit_work *xw = container_of(work, struct xmit_work, work); 47 struct xmit_work *xw = container_of(work, struct xmit_work, work);
48 struct mac802154_sub_if_data *sdata;
48 int res; 49 int res;
49 50
50 mutex_lock(&xw->priv->phy->pib_lock); 51 mutex_lock(&xw->priv->phy->pib_lock);
@@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)
57 pr_debug("set_channel failed\n"); 58 pr_debug("set_channel failed\n");
58 goto out; 59 goto out;
59 } 60 }
61
62 xw->priv->phy->current_channel = xw->chan;
63 xw->priv->phy->current_page = xw->page;
60 } 64 }
61 65
62 res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb); 66 res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb);
67 if (res)
68 pr_debug("transmission failed\n");
63 69
64out: 70out:
65 mutex_unlock(&xw->priv->phy->pib_lock); 71 mutex_unlock(&xw->priv->phy->pib_lock);
66 72
67 if (res) { 73 /* Restart the netif queue on each sub_if_data object. */
68 if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) { 74 rcu_read_lock();
69 queue_work(xw->priv->dev_workqueue, &xw->work); 75 list_for_each_entry_rcu(sdata, &xw->priv->slaves, list)
70 return; 76 netif_wake_queue(sdata->dev);
71 } else 77 rcu_read_unlock();
72 pr_debug("transmission failed for %d times",
73 MAC802154_MAX_XMIT_ATTEMPTS);
74 }
75 78
76 dev_kfree_skb(xw->skb); 79 dev_kfree_skb(xw->skb);
77 80
@@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
82 u8 page, u8 chan) 85 u8 page, u8 chan)
83{ 86{
84 struct xmit_work *work; 87 struct xmit_work *work;
88 struct mac802154_sub_if_data *sdata;
85 89
86 if (!(priv->phy->channels_supported[page] & (1 << chan))) { 90 if (!(priv->phy->channels_supported[page] & (1 << chan))) {
87 WARN_ON(1); 91 WARN_ON(1);
@@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,
109 return NETDEV_TX_BUSY; 113 return NETDEV_TX_BUSY;
110 } 114 }
111 115
116 /* Stop the netif queue on each sub_if_data object. */
117 rcu_read_lock();
118 list_for_each_entry_rcu(sdata, &priv->slaves, list)
119 netif_stop_queue(sdata->dev);
120 rcu_read_unlock();
121
112 INIT_WORK(&work->work, mac802154_xmit_worker); 122 INIT_WORK(&work->work, mac802154_xmit_worker);
113 work->skb = skb; 123 work->skb = skb;
114 work->priv = priv; 124 work->priv = priv;
115 work->page = page; 125 work->page = page;
116 work->chan = chan; 126 work->chan = chan;
117 work->xmit_attempts = 0;
118 127
119 queue_work(priv->dev_workqueue, &work->work); 128 queue_work(priv->dev_workqueue, &work->work);
120 129
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index d20c6d3c247d..2ca2f4dceab7 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb,
145 145
146 head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ 146 head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */
147 fc = mac_cb_type(skb); 147 fc = mac_cb_type(skb);
148 if (mac_cb_is_ackreq(skb))
149 fc |= IEEE802154_FC_ACK_REQ;
148 150
149 if (!saddr) { 151 if (!saddr) {
150 spin_lock_bh(&priv->mib_lock); 152 spin_lock_bh(&priv->mib_lock);
@@ -358,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)
358 dev->header_ops = &mac802154_header_ops; 360 dev->header_ops = &mac802154_header_ops;
359 dev->needed_tailroom = 2; /* FCS */ 361 dev->needed_tailroom = 2; /* FCS */
360 dev->mtu = IEEE802154_MTU; 362 dev->mtu = IEEE802154_MTU;
361 dev->tx_queue_len = 10; 363 dev->tx_queue_len = 300;
362 dev->type = ARPHRD_IEEE802154; 364 dev->type = ARPHRD_IEEE802154;
363 dev->flags = IFF_NOARP | IFF_BROADCAST; 365 dev->flags = IFF_NOARP | IFF_BROADCAST;
364 dev->watchdog_timeo = 0; 366 dev->watchdog_timeo = 0;
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a9c488b6c50d..07c865a31a3d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -5,6 +5,7 @@
5 * way. 5 * way.
6 * 6 *
7 * Rusty Russell (C)2000 -- This code is GPL. 7 * Rusty Russell (C)2000 -- This code is GPL.
8 * Patrick McHardy (c) 2006-2012
8 */ 9 */
9#include <linux/kernel.h> 10#include <linux/kernel.h>
10#include <linux/netfilter.h> 11#include <linux/netfilter.h>
@@ -276,10 +277,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
276EXPORT_SYMBOL(nf_nat_decode_session_hook); 277EXPORT_SYMBOL(nf_nat_decode_session_hook);
277#endif 278#endif
278 279
280static int __net_init netfilter_net_init(struct net *net)
281{
279#ifdef CONFIG_PROC_FS 282#ifdef CONFIG_PROC_FS
280struct proc_dir_entry *proc_net_netfilter; 283 net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
281EXPORT_SYMBOL(proc_net_netfilter); 284 net->proc_net);
285 if (!net->nf.proc_netfilter) {
286 if (!net_eq(net, &init_net))
287 pr_err("cannot create netfilter proc entry");
288
289 return -ENOMEM;
290 }
282#endif 291#endif
292 return 0;
293}
294
295static void __net_exit netfilter_net_exit(struct net *net)
296{
297 remove_proc_entry("netfilter", net->proc_net);
298}
299
300static struct pernet_operations netfilter_net_ops = {
301 .init = netfilter_net_init,
302 .exit = netfilter_net_exit,
303};
283 304
284void __init netfilter_init(void) 305void __init netfilter_init(void)
285{ 306{
@@ -289,11 +310,8 @@ void __init netfilter_init(void)
289 INIT_LIST_HEAD(&nf_hooks[i][h]); 310 INIT_LIST_HEAD(&nf_hooks[i][h]);
290 } 311 }
291 312
292#ifdef CONFIG_PROC_FS 313 if (register_pernet_subsys(&netfilter_net_ops) < 0)
293 proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net);
294 if (!proc_net_netfilter)
295 panic("cannot create netfilter proc entry"); 314 panic("cannot create netfilter proc entry");
296#endif
297 315
298 if (netfilter_log_init() < 0) 316 if (netfilter_log_init() < 0)
299 panic("cannot initialize nf_log"); 317 panic("cannot initialize nf_log");
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
new file mode 100644
index 000000000000..25243379b887
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -0,0 +1,277 @@
1/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8#ifndef __IP_SET_BITMAP_IP_GEN_H
9#define __IP_SET_BITMAP_IP_GEN_H
10
11#define CONCAT(a, b) a##b
12#define TOKEN(a,b) CONCAT(a, b)
13
14#define mtype_do_test TOKEN(MTYPE, _do_test)
15#define mtype_gc_test TOKEN(MTYPE, _gc_test)
16#define mtype_is_filled TOKEN(MTYPE, _is_filled)
17#define mtype_do_add TOKEN(MTYPE, _do_add)
18#define mtype_do_del TOKEN(MTYPE, _do_del)
19#define mtype_do_list TOKEN(MTYPE, _do_list)
20#define mtype_do_head TOKEN(MTYPE, _do_head)
21#define mtype_adt_elem TOKEN(MTYPE, _adt_elem)
22#define mtype_add_timeout TOKEN(MTYPE, _add_timeout)
23#define mtype_gc_init TOKEN(MTYPE, _gc_init)
24#define mtype_kadt TOKEN(MTYPE, _kadt)
25#define mtype_uadt TOKEN(MTYPE, _uadt)
26#define mtype_destroy TOKEN(MTYPE, _destroy)
27#define mtype_flush TOKEN(MTYPE, _flush)
28#define mtype_head TOKEN(MTYPE, _head)
29#define mtype_same_set TOKEN(MTYPE, _same_set)
30#define mtype_elem TOKEN(MTYPE, _elem)
31#define mtype_test TOKEN(MTYPE, _test)
32#define mtype_add TOKEN(MTYPE, _add)
33#define mtype_del TOKEN(MTYPE, _del)
34#define mtype_list TOKEN(MTYPE, _list)
35#define mtype_gc TOKEN(MTYPE, _gc)
36#define mtype MTYPE
37
38#define ext_timeout(e, m) \
39 (unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
40#define ext_counter(e, m) \
41 (struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER])
42#define get_ext(map, id) ((map)->extensions + (map)->dsize * (id))
43
44static void
45mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
46{
47 struct mtype *map = set->data;
48
49 init_timer(&map->gc);
50 map->gc.data = (unsigned long) set;
51 map->gc.function = gc;
52 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
53 add_timer(&map->gc);
54}
55
56static void
57mtype_destroy(struct ip_set *set)
58{
59 struct mtype *map = set->data;
60
61 if (SET_WITH_TIMEOUT(set))
62 del_timer_sync(&map->gc);
63
64 ip_set_free(map->members);
65 if (map->dsize)
66 ip_set_free(map->extensions);
67 kfree(map);
68
69 set->data = NULL;
70}
71
72static void
73mtype_flush(struct ip_set *set)
74{
75 struct mtype *map = set->data;
76
77 memset(map->members, 0, map->memsize);
78}
79
80static int
81mtype_head(struct ip_set *set, struct sk_buff *skb)
82{
83 const struct mtype *map = set->data;
84 struct nlattr *nested;
85
86 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
87 if (!nested)
88 goto nla_put_failure;
89 if (mtype_do_head(skb, map) ||
90 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
91 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
92 htonl(sizeof(*map) +
93 map->memsize +
94 map->dsize * map->elements)) ||
95 (SET_WITH_TIMEOUT(set) &&
96 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
97 (SET_WITH_COUNTER(set) &&
98 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
99 htonl(IPSET_FLAG_WITH_COUNTERS))))
100 goto nla_put_failure;
101 ipset_nest_end(skb, nested);
102
103 return 0;
104nla_put_failure:
105 return -EMSGSIZE;
106}
107
108static int
109mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
110 struct ip_set_ext *mext, u32 flags)
111{
112 struct mtype *map = set->data;
113 const struct mtype_adt_elem *e = value;
114 void *x = get_ext(map, e->id);
115 int ret = mtype_do_test(e, map);
116
117 if (ret <= 0)
118 return ret;
119 if (SET_WITH_TIMEOUT(set) &&
120 ip_set_timeout_expired(ext_timeout(x, map)))
121 return 0;
122 if (SET_WITH_COUNTER(set))
123 ip_set_update_counter(ext_counter(x, map), ext, mext, flags);
124 return 1;
125}
126
127static int
128mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
129 struct ip_set_ext *mext, u32 flags)
130{
131 struct mtype *map = set->data;
132 const struct mtype_adt_elem *e = value;
133 void *x = get_ext(map, e->id);
134 int ret = mtype_do_add(e, map, flags);
135
136 if (ret == IPSET_ADD_FAILED) {
137 if (SET_WITH_TIMEOUT(set) &&
138 ip_set_timeout_expired(ext_timeout(x, map)))
139 ret = 0;
140 else if (!(flags & IPSET_FLAG_EXIST))
141 return -IPSET_ERR_EXIST;
142 }
143
144 if (SET_WITH_TIMEOUT(set))
145#ifdef IP_SET_BITMAP_STORED_TIMEOUT
146 mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret);
147#else
148 ip_set_timeout_set(ext_timeout(x, map), ext->timeout);
149#endif
150
151 if (SET_WITH_COUNTER(set))
152 ip_set_init_counter(ext_counter(x, map), ext);
153 return 0;
154}
155
156static int
157mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
158 struct ip_set_ext *mext, u32 flags)
159{
160 struct mtype *map = set->data;
161 const struct mtype_adt_elem *e = value;
162 const void *x = get_ext(map, e->id);
163
164 if (mtype_do_del(e, map) ||
165 (SET_WITH_TIMEOUT(set) &&
166 ip_set_timeout_expired(ext_timeout(x, map))))
167 return -IPSET_ERR_EXIST;
168
169 return 0;
170}
171
172static int
173mtype_list(const struct ip_set *set,
174 struct sk_buff *skb, struct netlink_callback *cb)
175{
176 struct mtype *map = set->data;
177 struct nlattr *adt, *nested;
178 void *x;
179 u32 id, first = cb->args[2];
180
181 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
182 if (!adt)
183 return -EMSGSIZE;
184 for (; cb->args[2] < map->elements; cb->args[2]++) {
185 id = cb->args[2];
186 x = get_ext(map, id);
187 if (!test_bit(id, map->members) ||
188 (SET_WITH_TIMEOUT(set) &&
189#ifdef IP_SET_BITMAP_STORED_TIMEOUT
190 mtype_is_filled((const struct mtype_elem *) x) &&
191#endif
192 ip_set_timeout_expired(ext_timeout(x, map))))
193 continue;
194 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
195 if (!nested) {
196 if (id == first) {
197 nla_nest_cancel(skb, adt);
198 return -EMSGSIZE;
199 } else
200 goto nla_put_failure;
201 }
202 if (mtype_do_list(skb, map, id))
203 goto nla_put_failure;
204 if (SET_WITH_TIMEOUT(set)) {
205#ifdef IP_SET_BITMAP_STORED_TIMEOUT
206 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
207 htonl(ip_set_timeout_stored(map, id,
208 ext_timeout(x, map)))))
209 goto nla_put_failure;
210#else
211 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
212 htonl(ip_set_timeout_get(
213 ext_timeout(x, map)))))
214 goto nla_put_failure;
215#endif
216 }
217 if (SET_WITH_COUNTER(set) &&
218 ip_set_put_counter(skb, ext_counter(x, map)))
219 goto nla_put_failure;
220 ipset_nest_end(skb, nested);
221 }
222 ipset_nest_end(skb, adt);
223
224 /* Set listing finished */
225 cb->args[2] = 0;
226
227 return 0;
228
229nla_put_failure:
230 nla_nest_cancel(skb, nested);
231 ipset_nest_end(skb, adt);
232 if (unlikely(id == first)) {
233 cb->args[2] = 0;
234 return -EMSGSIZE;
235 }
236 return 0;
237}
238
239static void
240mtype_gc(unsigned long ul_set)
241{
242 struct ip_set *set = (struct ip_set *) ul_set;
243 struct mtype *map = set->data;
244 const void *x;
245 u32 id;
246
247 /* We run parallel with other readers (test element)
248 * but adding/deleting new entries is locked out */
249 read_lock_bh(&set->lock);
250 for (id = 0; id < map->elements; id++)
251 if (mtype_gc_test(id, map)) {
252 x = get_ext(map, id);
253 if (ip_set_timeout_expired(ext_timeout(x, map)))
254 clear_bit(id, map->members);
255 }
256 read_unlock_bh(&set->lock);
257
258 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
259 add_timer(&map->gc);
260}
261
262static const struct ip_set_type_variant mtype = {
263 .kadt = mtype_kadt,
264 .uadt = mtype_uadt,
265 .adt = {
266 [IPSET_ADD] = mtype_add,
267 [IPSET_DEL] = mtype_del,
268 [IPSET_TEST] = mtype_test,
269 },
270 .destroy = mtype_destroy,
271 .flush = mtype_flush,
272 .head = mtype_head,
273 .list = mtype_list,
274 .same_set = mtype_same_set,
275};
276
277#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4a92fd47bd4c..f1a8128bef01 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -1,6 +1,6 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de> 2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -24,31 +24,37 @@
24#include <linux/netfilter/ipset/pfxlen.h> 24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h> 25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_bitmap.h> 26#include <linux/netfilter/ipset/ip_set_bitmap.h>
27#define IP_SET_BITMAP_TIMEOUT
28#include <linux/netfilter/ipset/ip_set_timeout.h>
29 27
30#define REVISION_MIN 0 28#define REVISION_MIN 0
31#define REVISION_MAX 0 29#define REVISION_MAX 1 /* Counter support added */
32 30
33MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
35IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX); 33IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);
36MODULE_ALIAS("ip_set_bitmap:ip"); 34MODULE_ALIAS("ip_set_bitmap:ip");
37 35
36#define MTYPE bitmap_ip
37
38/* Type structure */ 38/* Type structure */
39struct bitmap_ip { 39struct bitmap_ip {
40 void *members; /* the set members */ 40 void *members; /* the set members */
41 void *extensions; /* data extensions */
41 u32 first_ip; /* host byte order, included in range */ 42 u32 first_ip; /* host byte order, included in range */
42 u32 last_ip; /* host byte order, included in range */ 43 u32 last_ip; /* host byte order, included in range */
43 u32 elements; /* number of max elements in the set */ 44 u32 elements; /* number of max elements in the set */
44 u32 hosts; /* number of hosts in a subnet */ 45 u32 hosts; /* number of hosts in a subnet */
45 size_t memsize; /* members size */ 46 size_t memsize; /* members size */
47 size_t dsize; /* extensions struct size */
48 size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
46 u8 netmask; /* subnet netmask */ 49 u8 netmask; /* subnet netmask */
47 u32 timeout; /* timeout parameter */ 50 u32 timeout; /* timeout parameter */
48 struct timer_list gc; /* garbage collection */ 51 struct timer_list gc; /* garbage collection */
49}; 52};
50 53
51/* Base variant */ 54/* ADT structure for generic function args */
55struct bitmap_ip_adt_elem {
56 u16 id;
57};
52 58
53static inline u32 59static inline u32
54ip_to_id(const struct bitmap_ip *m, u32 ip) 60ip_to_id(const struct bitmap_ip *m, u32 ip)
@@ -56,188 +62,67 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
56 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts; 62 return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
57} 63}
58 64
59static int 65/* Common functions */
60bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
61{
62 const struct bitmap_ip *map = set->data;
63 u16 id = *(u16 *)value;
64
65 return !!test_bit(id, map->members);
66}
67 66
68static int 67static inline int
69bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags) 68bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
70{ 69{
71 struct bitmap_ip *map = set->data; 70 return !!test_bit(e->id, map->members);
72 u16 id = *(u16 *)value;
73
74 if (test_and_set_bit(id, map->members))
75 return -IPSET_ERR_EXIST;
76
77 return 0;
78} 71}
79 72
80static int 73static inline int
81bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags) 74bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)
82{ 75{
83 struct bitmap_ip *map = set->data; 76 return !!test_bit(id, map->members);
84 u16 id = *(u16 *)value;
85
86 if (!test_and_clear_bit(id, map->members))
87 return -IPSET_ERR_EXIST;
88
89 return 0;
90}
91
92static int
93bitmap_ip_list(const struct ip_set *set,
94 struct sk_buff *skb, struct netlink_callback *cb)
95{
96 const struct bitmap_ip *map = set->data;
97 struct nlattr *atd, *nested;
98 u32 id, first = cb->args[2];
99
100 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
101 if (!atd)
102 return -EMSGSIZE;
103 for (; cb->args[2] < map->elements; cb->args[2]++) {
104 id = cb->args[2];
105 if (!test_bit(id, map->members))
106 continue;
107 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
108 if (!nested) {
109 if (id == first) {
110 nla_nest_cancel(skb, atd);
111 return -EMSGSIZE;
112 } else
113 goto nla_put_failure;
114 }
115 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
116 htonl(map->first_ip + id * map->hosts)))
117 goto nla_put_failure;
118 ipset_nest_end(skb, nested);
119 }
120 ipset_nest_end(skb, atd);
121 /* Set listing finished */
122 cb->args[2] = 0;
123 return 0;
124
125nla_put_failure:
126 nla_nest_cancel(skb, nested);
127 ipset_nest_end(skb, atd);
128 if (unlikely(id == first)) {
129 cb->args[2] = 0;
130 return -EMSGSIZE;
131 }
132 return 0;
133} 77}
134 78
135/* Timeout variant */ 79static inline int
136 80bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
137static int 81 u32 flags)
138bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
139{ 82{
140 const struct bitmap_ip *map = set->data; 83 return !!test_and_set_bit(e->id, map->members);
141 const unsigned long *members = map->members;
142 u16 id = *(u16 *)value;
143
144 return ip_set_timeout_test(members[id]);
145} 84}
146 85
147static int 86static inline int
148bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) 87bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
149{ 88{
150 struct bitmap_ip *map = set->data; 89 return !test_and_clear_bit(e->id, map->members);
151 unsigned long *members = map->members;
152 u16 id = *(u16 *)value;
153
154 if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
155 return -IPSET_ERR_EXIST;
156
157 members[id] = ip_set_timeout_set(timeout);
158
159 return 0;
160} 90}
161 91
162static int 92static inline int
163bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) 93bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)
164{ 94{
165 struct bitmap_ip *map = set->data; 95 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
166 unsigned long *members = map->members; 96 htonl(map->first_ip + id * map->hosts));
167 u16 id = *(u16 *)value;
168 int ret = -IPSET_ERR_EXIST;
169
170 if (ip_set_timeout_test(members[id]))
171 ret = 0;
172
173 members[id] = IPSET_ELEM_UNSET;
174 return ret;
175} 97}
176 98
177static int 99static inline int
178bitmap_ip_tlist(const struct ip_set *set, 100bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
179 struct sk_buff *skb, struct netlink_callback *cb)
180{ 101{
181 const struct bitmap_ip *map = set->data; 102 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
182 struct nlattr *adt, *nested; 103 nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
183 u32 id, first = cb->args[2]; 104 (map->netmask != 32 &&
184 const unsigned long *members = map->members; 105 nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask));
185
186 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
187 if (!adt)
188 return -EMSGSIZE;
189 for (; cb->args[2] < map->elements; cb->args[2]++) {
190 id = cb->args[2];
191 if (!ip_set_timeout_test(members[id]))
192 continue;
193 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
194 if (!nested) {
195 if (id == first) {
196 nla_nest_cancel(skb, adt);
197 return -EMSGSIZE;
198 } else
199 goto nla_put_failure;
200 }
201 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
202 htonl(map->first_ip + id * map->hosts)) ||
203 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
204 htonl(ip_set_timeout_get(members[id]))))
205 goto nla_put_failure;
206 ipset_nest_end(skb, nested);
207 }
208 ipset_nest_end(skb, adt);
209
210 /* Set listing finished */
211 cb->args[2] = 0;
212
213 return 0;
214
215nla_put_failure:
216 nla_nest_cancel(skb, nested);
217 ipset_nest_end(skb, adt);
218 if (unlikely(id == first)) {
219 cb->args[2] = 0;
220 return -EMSGSIZE;
221 }
222 return 0;
223} 106}
224 107
225static int 108static int
226bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb, 109bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,
227 const struct xt_action_param *par, 110 const struct xt_action_param *par,
228 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 111 enum ipset_adt adt, struct ip_set_adt_opt *opt)
229{ 112{
230 struct bitmap_ip *map = set->data; 113 struct bitmap_ip *map = set->data;
231 ipset_adtfn adtfn = set->variant->adt[adt]; 114 ipset_adtfn adtfn = set->variant->adt[adt];
115 struct bitmap_ip_adt_elem e = { };
116 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
232 u32 ip; 117 u32 ip;
233 118
234 ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); 119 ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
235 if (ip < map->first_ip || ip > map->last_ip) 120 if (ip < map->first_ip || ip > map->last_ip)
236 return -IPSET_ERR_BITMAP_RANGE; 121 return -IPSET_ERR_BITMAP_RANGE;
237 122
238 ip = ip_to_id(map, ip); 123 e.id = ip_to_id(map, ip);
239 124
240 return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags); 125 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
241} 126}
242 127
243static int 128static int
@@ -246,33 +131,31 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
246{ 131{
247 struct bitmap_ip *map = set->data; 132 struct bitmap_ip *map = set->data;
248 ipset_adtfn adtfn = set->variant->adt[adt]; 133 ipset_adtfn adtfn = set->variant->adt[adt];
249 u32 timeout = map->timeout; 134 u32 ip, ip_to;
250 u32 ip, ip_to, id; 135 struct bitmap_ip_adt_elem e = { };
136 struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
251 int ret = 0; 137 int ret = 0;
252 138
253 if (unlikely(!tb[IPSET_ATTR_IP] || 139 if (unlikely(!tb[IPSET_ATTR_IP] ||
254 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 140 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
141 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
142 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
255 return -IPSET_ERR_PROTOCOL; 143 return -IPSET_ERR_PROTOCOL;
256 144
257 if (tb[IPSET_ATTR_LINENO]) 145 if (tb[IPSET_ATTR_LINENO])
258 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 146 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
259 147
260 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 148 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
149 ip_set_get_extensions(set, tb, &ext);
261 if (ret) 150 if (ret)
262 return ret; 151 return ret;
263 152
264 if (ip < map->first_ip || ip > map->last_ip) 153 if (ip < map->first_ip || ip > map->last_ip)
265 return -IPSET_ERR_BITMAP_RANGE; 154 return -IPSET_ERR_BITMAP_RANGE;
266 155
267 if (tb[IPSET_ATTR_TIMEOUT]) {
268 if (!with_timeout(map->timeout))
269 return -IPSET_ERR_TIMEOUT;
270 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
271 }
272
273 if (adt == IPSET_TEST) { 156 if (adt == IPSET_TEST) {
274 id = ip_to_id(map, ip); 157 e.id = ip_to_id(map, ip);
275 return adtfn(set, &id, timeout, flags); 158 return adtfn(set, &e, &ext, &ext, flags);
276 } 159 }
277 160
278 if (tb[IPSET_ATTR_IP_TO]) { 161 if (tb[IPSET_ATTR_IP_TO]) {
@@ -297,8 +180,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
297 return -IPSET_ERR_BITMAP_RANGE; 180 return -IPSET_ERR_BITMAP_RANGE;
298 181
299 for (; !before(ip_to, ip); ip += map->hosts) { 182 for (; !before(ip_to, ip); ip += map->hosts) {
300 id = ip_to_id(map, ip); 183 e.id = ip_to_id(map, ip);
301 ret = adtfn(set, &id, timeout, flags); 184 ret = adtfn(set, &e, &ext, &ext, flags);
302 185
303 if (ret && !ip_set_eexist(ret, flags)) 186 if (ret && !ip_set_eexist(ret, flags))
304 return ret; 187 return ret;
@@ -308,54 +191,6 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
308 return ret; 191 return ret;
309} 192}
310 193
311static void
312bitmap_ip_destroy(struct ip_set *set)
313{
314 struct bitmap_ip *map = set->data;
315
316 if (with_timeout(map->timeout))
317 del_timer_sync(&map->gc);
318
319 ip_set_free(map->members);
320 kfree(map);
321
322 set->data = NULL;
323}
324
325static void
326bitmap_ip_flush(struct ip_set *set)
327{
328 struct bitmap_ip *map = set->data;
329
330 memset(map->members, 0, map->memsize);
331}
332
333static int
334bitmap_ip_head(struct ip_set *set, struct sk_buff *skb)
335{
336 const struct bitmap_ip *map = set->data;
337 struct nlattr *nested;
338
339 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
340 if (!nested)
341 goto nla_put_failure;
342 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
343 nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
344 (map->netmask != 32 &&
345 nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask)) ||
346 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
347 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
348 htonl(sizeof(*map) + map->memsize)) ||
349 (with_timeout(map->timeout) &&
350 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
351 goto nla_put_failure;
352 ipset_nest_end(skb, nested);
353
354 return 0;
355nla_put_failure:
356 return -EMSGSIZE;
357}
358
359static bool 194static bool
360bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b) 195bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
361{ 196{
@@ -365,70 +200,35 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)
365 return x->first_ip == y->first_ip && 200 return x->first_ip == y->first_ip &&
366 x->last_ip == y->last_ip && 201 x->last_ip == y->last_ip &&
367 x->netmask == y->netmask && 202 x->netmask == y->netmask &&
368 x->timeout == y->timeout; 203 x->timeout == y->timeout &&
204 a->extensions == b->extensions;
369} 205}
370 206
371static const struct ip_set_type_variant bitmap_ip = { 207/* Plain variant */
372 .kadt = bitmap_ip_kadt, 208
373 .uadt = bitmap_ip_uadt, 209struct bitmap_ip_elem {
374 .adt = {
375 [IPSET_ADD] = bitmap_ip_add,
376 [IPSET_DEL] = bitmap_ip_del,
377 [IPSET_TEST] = bitmap_ip_test,
378 },
379 .destroy = bitmap_ip_destroy,
380 .flush = bitmap_ip_flush,
381 .head = bitmap_ip_head,
382 .list = bitmap_ip_list,
383 .same_set = bitmap_ip_same_set,
384}; 210};
385 211
386static const struct ip_set_type_variant bitmap_tip = { 212/* Timeout variant */
387 .kadt = bitmap_ip_kadt, 213
388 .uadt = bitmap_ip_uadt, 214struct bitmap_ipt_elem {
389 .adt = { 215 unsigned long timeout;
390 [IPSET_ADD] = bitmap_ip_tadd,
391 [IPSET_DEL] = bitmap_ip_tdel,
392 [IPSET_TEST] = bitmap_ip_ttest,
393 },
394 .destroy = bitmap_ip_destroy,
395 .flush = bitmap_ip_flush,
396 .head = bitmap_ip_head,
397 .list = bitmap_ip_tlist,
398 .same_set = bitmap_ip_same_set,
399}; 216};
400 217
401static void 218/* Plain variant with counter */
402bitmap_ip_gc(unsigned long ul_set)
403{
404 struct ip_set *set = (struct ip_set *) ul_set;
405 struct bitmap_ip *map = set->data;
406 unsigned long *table = map->members;
407 u32 id;
408
409 /* We run parallel with other readers (test element)
410 * but adding/deleting new entries is locked out */
411 read_lock_bh(&set->lock);
412 for (id = 0; id < map->elements; id++)
413 if (ip_set_timeout_expired(table[id]))
414 table[id] = IPSET_ELEM_UNSET;
415 read_unlock_bh(&set->lock);
416
417 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
418 add_timer(&map->gc);
419}
420 219
421static void 220struct bitmap_ipc_elem {
422bitmap_ip_gc_init(struct ip_set *set) 221 struct ip_set_counter counter;
423{ 222};
424 struct bitmap_ip *map = set->data;
425 223
426 init_timer(&map->gc); 224/* Timeout variant with counter */
427 map->gc.data = (unsigned long) set; 225
428 map->gc.function = bitmap_ip_gc; 226struct bitmap_ipct_elem {
429 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 227 unsigned long timeout;
430 add_timer(&map->gc); 228 struct ip_set_counter counter;
431} 229};
230
231#include "ip_set_bitmap_gen.h"
432 232
433/* Create bitmap:ip type of sets */ 233/* Create bitmap:ip type of sets */
434 234
@@ -440,6 +240,13 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
440 map->members = ip_set_alloc(map->memsize); 240 map->members = ip_set_alloc(map->memsize);
441 if (!map->members) 241 if (!map->members)
442 return false; 242 return false;
243 if (map->dsize) {
244 map->extensions = ip_set_alloc(map->dsize * elements);
245 if (!map->extensions) {
246 kfree(map->members);
247 return false;
248 }
249 }
443 map->first_ip = first_ip; 250 map->first_ip = first_ip;
444 map->last_ip = last_ip; 251 map->last_ip = last_ip;
445 map->elements = elements; 252 map->elements = elements;
@@ -457,13 +264,14 @@ static int
457bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) 264bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
458{ 265{
459 struct bitmap_ip *map; 266 struct bitmap_ip *map;
460 u32 first_ip, last_ip, hosts; 267 u32 first_ip, last_ip, hosts, cadt_flags = 0;
461 u64 elements; 268 u64 elements;
462 u8 netmask = 32; 269 u8 netmask = 32;
463 int ret; 270 int ret;
464 271
465 if (unlikely(!tb[IPSET_ATTR_IP] || 272 if (unlikely(!tb[IPSET_ATTR_IP] ||
466 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 273 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
274 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
467 return -IPSET_ERR_PROTOCOL; 275 return -IPSET_ERR_PROTOCOL;
468 276
469 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); 277 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -526,8 +334,45 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
526 if (!map) 334 if (!map)
527 return -ENOMEM; 335 return -ENOMEM;
528 336
529 if (tb[IPSET_ATTR_TIMEOUT]) { 337 map->memsize = bitmap_bytes(0, elements - 1);
530 map->memsize = elements * sizeof(unsigned long); 338 set->variant = &bitmap_ip;
339 if (tb[IPSET_ATTR_CADT_FLAGS])
340 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
341 if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
342 set->extensions |= IPSET_EXT_COUNTER;
343 if (tb[IPSET_ATTR_TIMEOUT]) {
344 map->dsize = sizeof(struct bitmap_ipct_elem);
345 map->offset[IPSET_OFFSET_TIMEOUT] =
346 offsetof(struct bitmap_ipct_elem, timeout);
347 map->offset[IPSET_OFFSET_COUNTER] =
348 offsetof(struct bitmap_ipct_elem, counter);
349
350 if (!init_map_ip(set, map, first_ip, last_ip,
351 elements, hosts, netmask)) {
352 kfree(map);
353 return -ENOMEM;
354 }
355
356 map->timeout = ip_set_timeout_uget(
357 tb[IPSET_ATTR_TIMEOUT]);
358 set->extensions |= IPSET_EXT_TIMEOUT;
359
360 bitmap_ip_gc_init(set, bitmap_ip_gc);
361 } else {
362 map->dsize = sizeof(struct bitmap_ipc_elem);
363 map->offset[IPSET_OFFSET_COUNTER] =
364 offsetof(struct bitmap_ipc_elem, counter);
365
366 if (!init_map_ip(set, map, first_ip, last_ip,
367 elements, hosts, netmask)) {
368 kfree(map);
369 return -ENOMEM;
370 }
371 }
372 } else if (tb[IPSET_ATTR_TIMEOUT]) {
373 map->dsize = sizeof(struct bitmap_ipt_elem);
374 map->offset[IPSET_OFFSET_TIMEOUT] =
375 offsetof(struct bitmap_ipt_elem, timeout);
531 376
532 if (!init_map_ip(set, map, first_ip, last_ip, 377 if (!init_map_ip(set, map, first_ip, last_ip,
533 elements, hosts, netmask)) { 378 elements, hosts, netmask)) {
@@ -536,19 +381,16 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
536 } 381 }
537 382
538 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 383 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
539 set->variant = &bitmap_tip; 384 set->extensions |= IPSET_EXT_TIMEOUT;
540 385
541 bitmap_ip_gc_init(set); 386 bitmap_ip_gc_init(set, bitmap_ip_gc);
542 } else { 387 } else {
543 map->memsize = bitmap_bytes(0, elements - 1); 388 map->dsize = 0;
544
545 if (!init_map_ip(set, map, first_ip, last_ip, 389 if (!init_map_ip(set, map, first_ip, last_ip,
546 elements, hosts, netmask)) { 390 elements, hosts, netmask)) {
547 kfree(map); 391 kfree(map);
548 return -ENOMEM; 392 return -ENOMEM;
549 } 393 }
550
551 set->variant = &bitmap_ip;
552 } 394 }
553 return 0; 395 return 0;
554} 396}
@@ -568,6 +410,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
568 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 410 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
569 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, 411 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
570 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 412 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
413 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
571 }, 414 },
572 .adt_policy = { 415 .adt_policy = {
573 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 416 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -575,6 +418,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {
575 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 418 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
576 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 419 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
577 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 420 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
421 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
422 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
578 }, 423 },
579 .me = THIS_MODULE, 424 .me = THIS_MODULE,
580}; 425};
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 0f92dc24cb89..3b30e0bef890 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -1,7 +1,7 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de> 2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se> 3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -23,340 +23,208 @@
23 23
24#include <linux/netfilter/ipset/pfxlen.h> 24#include <linux/netfilter/ipset/pfxlen.h>
25#include <linux/netfilter/ipset/ip_set.h> 25#include <linux/netfilter/ipset/ip_set.h>
26#include <linux/netfilter/ipset/ip_set_timeout.h>
27#include <linux/netfilter/ipset/ip_set_bitmap.h> 26#include <linux/netfilter/ipset/ip_set_bitmap.h>
28 27
29#define REVISION_MIN 0 28#define REVISION_MIN 0
30#define REVISION_MAX 0 29#define REVISION_MAX 1 /* Counter support added */
31 30
32MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
34IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX); 33IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);
35MODULE_ALIAS("ip_set_bitmap:ip,mac"); 34MODULE_ALIAS("ip_set_bitmap:ip,mac");
36 35
36#define MTYPE bitmap_ipmac
37#define IP_SET_BITMAP_STORED_TIMEOUT
38
37enum { 39enum {
38 MAC_EMPTY, /* element is not set */
39 MAC_FILLED, /* element is set with MAC */
40 MAC_UNSET, /* element is set, without MAC */ 40 MAC_UNSET, /* element is set, without MAC */
41 MAC_FILLED, /* element is set with MAC */
41}; 42};
42 43
43/* Type structure */ 44/* Type structure */
44struct bitmap_ipmac { 45struct bitmap_ipmac {
45 void *members; /* the set members */ 46 void *members; /* the set members */
47 void *extensions; /* MAC + data extensions */
46 u32 first_ip; /* host byte order, included in range */ 48 u32 first_ip; /* host byte order, included in range */
47 u32 last_ip; /* host byte order, included in range */ 49 u32 last_ip; /* host byte order, included in range */
50 u32 elements; /* number of max elements in the set */
48 u32 timeout; /* timeout value */ 51 u32 timeout; /* timeout value */
49 struct timer_list gc; /* garbage collector */ 52 struct timer_list gc; /* garbage collector */
53 size_t memsize; /* members size */
50 size_t dsize; /* size of element */ 54 size_t dsize; /* size of element */
55 size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
51}; 56};
52 57
53/* ADT structure for generic function args */ 58/* ADT structure for generic function args */
54struct ipmac { 59struct bitmap_ipmac_adt_elem {
55 u32 id; /* id in array */ 60 u16 id;
56 unsigned char *ether; /* ethernet address */ 61 unsigned char *ether;
57}; 62};
58 63
59/* Member element without and with timeout */ 64struct bitmap_ipmac_elem {
60
61struct ipmac_elem {
62 unsigned char ether[ETH_ALEN]; 65 unsigned char ether[ETH_ALEN];
63 unsigned char match; 66 unsigned char filled;
64} __attribute__ ((aligned)); 67} __attribute__ ((aligned));
65 68
66struct ipmac_telem { 69static inline u32
67 unsigned char ether[ETH_ALEN]; 70ip_to_id(const struct bitmap_ipmac *m, u32 ip)
68 unsigned char match;
69 unsigned long timeout;
70} __attribute__ ((aligned));
71
72static inline void *
73bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id)
74{ 71{
75 return (void *)((char *)map->members + id * map->dsize); 72 return ip - m->first_ip;
76} 73}
77 74
78static inline bool 75static inline struct bitmap_ipmac_elem *
79bitmap_timeout(const struct bitmap_ipmac *map, u32 id) 76get_elem(void *extensions, u16 id, size_t dsize)
80{ 77{
81 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); 78 return (struct bitmap_ipmac_elem *)(extensions + id * dsize);
82
83 return ip_set_timeout_test(elem->timeout);
84} 79}
85 80
86static inline bool 81/* Common functions */
87bitmap_expired(const struct bitmap_ipmac *map, u32 id)
88{
89 const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id);
90
91 return ip_set_timeout_expired(elem->timeout);
92}
93 82
94static inline int 83static inline int
95bitmap_ipmac_exist(const struct ipmac_telem *elem) 84bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
85 const struct bitmap_ipmac *map)
96{ 86{
97 return elem->match == MAC_UNSET || 87 const struct bitmap_ipmac_elem *elem;
98 (elem->match == MAC_FILLED &&
99 !ip_set_timeout_expired(elem->timeout));
100}
101 88
102/* Base variant */ 89 if (!test_bit(e->id, map->members))
103 90 return 0;
104static int 91 elem = get_elem(map->extensions, e->id, map->dsize);
105bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags) 92 if (elem->filled == MAC_FILLED)
106{ 93 return e->ether == NULL ||
107 const struct bitmap_ipmac *map = set->data; 94 ether_addr_equal(e->ether, elem->ether);
108 const struct ipmac *data = value; 95 /* Trigger kernel to fill out the ethernet address */
109 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); 96 return -EAGAIN;
110
111 switch (elem->match) {
112 case MAC_UNSET:
113 /* Trigger kernel to fill out the ethernet address */
114 return -EAGAIN;
115 case MAC_FILLED:
116 return data->ether == NULL ||
117 ether_addr_equal(data->ether, elem->ether);
118 }
119 return 0;
120}
121
122static int
123bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
124{
125 struct bitmap_ipmac *map = set->data;
126 const struct ipmac *data = value;
127 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
128
129 switch (elem->match) {
130 case MAC_UNSET:
131 if (!data->ether)
132 /* Already added without ethernet address */
133 return -IPSET_ERR_EXIST;
134 /* Fill the MAC address */
135 memcpy(elem->ether, data->ether, ETH_ALEN);
136 elem->match = MAC_FILLED;
137 break;
138 case MAC_FILLED:
139 return -IPSET_ERR_EXIST;
140 case MAC_EMPTY:
141 if (data->ether) {
142 memcpy(elem->ether, data->ether, ETH_ALEN);
143 elem->match = MAC_FILLED;
144 } else
145 elem->match = MAC_UNSET;
146 }
147
148 return 0;
149}
150
151static int
152bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
153{
154 struct bitmap_ipmac *map = set->data;
155 const struct ipmac *data = value;
156 struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
157
158 if (elem->match == MAC_EMPTY)
159 return -IPSET_ERR_EXIST;
160
161 elem->match = MAC_EMPTY;
162
163 return 0;
164} 97}
165 98
166static int 99static inline int
167bitmap_ipmac_list(const struct ip_set *set, 100bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)
168 struct sk_buff *skb, struct netlink_callback *cb)
169{ 101{
170 const struct bitmap_ipmac *map = set->data; 102 const struct bitmap_ipmac_elem *elem;
171 const struct ipmac_elem *elem;
172 struct nlattr *atd, *nested;
173 u32 id, first = cb->args[2];
174 u32 last = map->last_ip - map->first_ip;
175
176 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
177 if (!atd)
178 return -EMSGSIZE;
179 for (; cb->args[2] <= last; cb->args[2]++) {
180 id = cb->args[2];
181 elem = bitmap_ipmac_elem(map, id);
182 if (elem->match == MAC_EMPTY)
183 continue;
184 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
185 if (!nested) {
186 if (id == first) {
187 nla_nest_cancel(skb, atd);
188 return -EMSGSIZE;
189 } else
190 goto nla_put_failure;
191 }
192 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
193 htonl(map->first_ip + id)) ||
194 (elem->match == MAC_FILLED &&
195 nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
196 elem->ether)))
197 goto nla_put_failure;
198 ipset_nest_end(skb, nested);
199 }
200 ipset_nest_end(skb, atd);
201 /* Set listing finished */
202 cb->args[2] = 0;
203
204 return 0;
205 103
206nla_put_failure: 104 if (!test_bit(id, map->members))
207 nla_nest_cancel(skb, nested); 105 return 0;
208 ipset_nest_end(skb, atd); 106 elem = get_elem(map->extensions, id, map->dsize);
209 if (unlikely(id == first)) { 107 /* Timer not started for the incomplete elements */
210 cb->args[2] = 0; 108 return elem->filled == MAC_FILLED;
211 return -EMSGSIZE;
212 }
213 return 0;
214} 109}
215 110
216/* Timeout variant */ 111static inline int
217 112bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
218static int
219bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
220{ 113{
221 const struct bitmap_ipmac *map = set->data; 114 return elem->filled == MAC_FILLED;
222 const struct ipmac *data = value;
223 const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id);
224
225 switch (elem->match) {
226 case MAC_UNSET:
227 /* Trigger kernel to fill out the ethernet address */
228 return -EAGAIN;
229 case MAC_FILLED:
230 return (data->ether == NULL ||
231 ether_addr_equal(data->ether, elem->ether)) &&
232 !bitmap_expired(map, data->id);
233 }
234 return 0;
235} 115}
236 116
237static int 117static inline int
238bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) 118bitmap_ipmac_add_timeout(unsigned long *timeout,
119 const struct bitmap_ipmac_adt_elem *e,
120 const struct ip_set_ext *ext,
121 struct bitmap_ipmac *map, int mode)
239{ 122{
240 struct bitmap_ipmac *map = set->data; 123 u32 t = ext->timeout;
241 const struct ipmac *data = value;
242 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id);
243 bool flag_exist = flags & IPSET_FLAG_EXIST;
244 124
245 switch (elem->match) { 125 if (mode == IPSET_ADD_START_STORED_TIMEOUT) {
246 case MAC_UNSET: 126 if (t == map->timeout)
247 if (!(data->ether || flag_exist))
248 /* Already added without ethernet address */
249 return -IPSET_ERR_EXIST;
250 /* Fill the MAC address and activate the timer */
251 memcpy(elem->ether, data->ether, ETH_ALEN);
252 elem->match = MAC_FILLED;
253 if (timeout == map->timeout)
254 /* Timeout was not specified, get stored one */ 127 /* Timeout was not specified, get stored one */
255 timeout = elem->timeout; 128 t = *timeout;
256 elem->timeout = ip_set_timeout_set(timeout); 129 ip_set_timeout_set(timeout, t);
257 break; 130 } else {
258 case MAC_FILLED:
259 if (!(bitmap_expired(map, data->id) || flag_exist))
260 return -IPSET_ERR_EXIST;
261 /* Fall through */
262 case MAC_EMPTY:
263 if (data->ether) {
264 memcpy(elem->ether, data->ether, ETH_ALEN);
265 elem->match = MAC_FILLED;
266 } else
267 elem->match = MAC_UNSET;
268 /* If MAC is unset yet, we store plain timeout value 131 /* If MAC is unset yet, we store plain timeout value
269 * because the timer is not activated yet 132 * because the timer is not activated yet
270 * and we can reuse it later when MAC is filled out, 133 * and we can reuse it later when MAC is filled out,
271 * possibly by the kernel */ 134 * possibly by the kernel */
272 elem->timeout = data->ether ? ip_set_timeout_set(timeout) 135 if (e->ether)
273 : timeout; 136 ip_set_timeout_set(timeout, t);
274 break; 137 else
138 *timeout = t;
275 } 139 }
276
277 return 0; 140 return 0;
278} 141}
279 142
280static int 143static inline int
281bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) 144bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
145 struct bitmap_ipmac *map, u32 flags)
282{ 146{
283 struct bitmap_ipmac *map = set->data; 147 struct bitmap_ipmac_elem *elem;
284 const struct ipmac *data = value; 148
285 struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); 149 elem = get_elem(map->extensions, e->id, map->dsize);
150 if (test_and_set_bit(e->id, map->members)) {
151 if (elem->filled == MAC_FILLED) {
152 if (e->ether && (flags & IPSET_FLAG_EXIST))
153 memcpy(elem->ether, e->ether, ETH_ALEN);
154 return IPSET_ADD_FAILED;
155 } else if (!e->ether)
156 /* Already added without ethernet address */
157 return IPSET_ADD_FAILED;
158 /* Fill the MAC address and trigger the timer activation */
159 memcpy(elem->ether, e->ether, ETH_ALEN);
160 elem->filled = MAC_FILLED;
161 return IPSET_ADD_START_STORED_TIMEOUT;
162 } else if (e->ether) {
163 /* We can store MAC too */
164 memcpy(elem->ether, e->ether, ETH_ALEN);
165 elem->filled = MAC_FILLED;
166 return 0;
167 } else {
168 elem->filled = MAC_UNSET;
169 /* MAC is not stored yet, don't start timer */
170 return IPSET_ADD_STORE_PLAIN_TIMEOUT;
171 }
172}
286 173
287 if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id)) 174static inline int
288 return -IPSET_ERR_EXIST; 175bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
176 struct bitmap_ipmac *map)
177{
178 return !test_and_clear_bit(e->id, map->members);
179}
289 180
290 elem->match = MAC_EMPTY; 181static inline unsigned long
182ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)
183{
184 const struct bitmap_ipmac_elem *elem =
185 get_elem(map->extensions, id, map->dsize);
291 186
292 return 0; 187 return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) :
188 *timeout;
293} 189}
294 190
295static int 191static inline int
296bitmap_ipmac_tlist(const struct ip_set *set, 192bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
297 struct sk_buff *skb, struct netlink_callback *cb) 193 u32 id)
298{ 194{
299 const struct bitmap_ipmac *map = set->data; 195 const struct bitmap_ipmac_elem *elem =
300 const struct ipmac_telem *elem; 196 get_elem(map->extensions, id, map->dsize);
301 struct nlattr *atd, *nested;
302 u32 id, first = cb->args[2];
303 u32 timeout, last = map->last_ip - map->first_ip;
304
305 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
306 if (!atd)
307 return -EMSGSIZE;
308 for (; cb->args[2] <= last; cb->args[2]++) {
309 id = cb->args[2];
310 elem = bitmap_ipmac_elem(map, id);
311 if (!bitmap_ipmac_exist(elem))
312 continue;
313 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
314 if (!nested) {
315 if (id == first) {
316 nla_nest_cancel(skb, atd);
317 return -EMSGSIZE;
318 } else
319 goto nla_put_failure;
320 }
321 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP,
322 htonl(map->first_ip + id)) ||
323 (elem->match == MAC_FILLED &&
324 nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN,
325 elem->ether)))
326 goto nla_put_failure;
327 timeout = elem->match == MAC_UNSET ? elem->timeout
328 : ip_set_timeout_get(elem->timeout);
329 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout)))
330 goto nla_put_failure;
331 ipset_nest_end(skb, nested);
332 }
333 ipset_nest_end(skb, atd);
334 /* Set listing finished */
335 cb->args[2] = 0;
336 197
337 return 0; 198 return nla_put_ipaddr4(skb, IPSET_ATTR_IP,
199 htonl(map->first_ip + id)) ||
200 (elem->filled == MAC_FILLED &&
201 nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
202}
338 203
339nla_put_failure: 204static inline int
340 nla_nest_cancel(skb, nested); 205bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
341 ipset_nest_end(skb, atd); 206{
342 return -EMSGSIZE; 207 return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
208 nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));
343} 209}
344 210
345static int 211static int
346bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, 212bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
347 const struct xt_action_param *par, 213 const struct xt_action_param *par,
348 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 214 enum ipset_adt adt, struct ip_set_adt_opt *opt)
349{ 215{
350 struct bitmap_ipmac *map = set->data; 216 struct bitmap_ipmac *map = set->data;
351 ipset_adtfn adtfn = set->variant->adt[adt]; 217 ipset_adtfn adtfn = set->variant->adt[adt];
352 struct ipmac data; 218 struct bitmap_ipmac_adt_elem e = {};
219 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
220 u32 ip;
353 221
354 /* MAC can be src only */ 222 /* MAC can be src only */
355 if (!(opt->flags & IPSET_DIM_TWO_SRC)) 223 if (!(opt->flags & IPSET_DIM_TWO_SRC))
356 return 0; 224 return 0;
357 225
358 data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); 226 ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
359 if (data.id < map->first_ip || data.id > map->last_ip) 227 if (ip < map->first_ip || ip > map->last_ip)
360 return -IPSET_ERR_BITMAP_RANGE; 228 return -IPSET_ERR_BITMAP_RANGE;
361 229
362 /* Backward compatibility: we don't check the second flag */ 230 /* Backward compatibility: we don't check the second flag */
@@ -364,10 +232,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
364 (skb_mac_header(skb) + ETH_HLEN) > skb->data) 232 (skb_mac_header(skb) + ETH_HLEN) > skb->data)
365 return -EINVAL; 233 return -EINVAL;
366 234
367 data.id -= map->first_ip; 235 e.id = ip_to_id(map, ip);
368 data.ether = eth_hdr(skb)->h_source; 236 e.ether = eth_hdr(skb)->h_source;
369 237
370 return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags); 238 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
371} 239}
372 240
373static int 241static int
@@ -376,91 +244,39 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],
376{ 244{
377 const struct bitmap_ipmac *map = set->data; 245 const struct bitmap_ipmac *map = set->data;
378 ipset_adtfn adtfn = set->variant->adt[adt]; 246 ipset_adtfn adtfn = set->variant->adt[adt];
379 struct ipmac data; 247 struct bitmap_ipmac_adt_elem e = {};
380 u32 timeout = map->timeout; 248 struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
249 u32 ip;
381 int ret = 0; 250 int ret = 0;
382 251
383 if (unlikely(!tb[IPSET_ATTR_IP] || 252 if (unlikely(!tb[IPSET_ATTR_IP] ||
384 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 253 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
254 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
255 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
385 return -IPSET_ERR_PROTOCOL; 256 return -IPSET_ERR_PROTOCOL;
386 257
387 if (tb[IPSET_ATTR_LINENO]) 258 if (tb[IPSET_ATTR_LINENO])
388 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 259 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
389 260
390 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id); 261 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
262 ip_set_get_extensions(set, tb, &ext);
391 if (ret) 263 if (ret)
392 return ret; 264 return ret;
393 265
394 if (data.id < map->first_ip || data.id > map->last_ip) 266 if (ip < map->first_ip || ip > map->last_ip)
395 return -IPSET_ERR_BITMAP_RANGE; 267 return -IPSET_ERR_BITMAP_RANGE;
396 268
269 e.id = ip_to_id(map, ip);
397 if (tb[IPSET_ATTR_ETHER]) 270 if (tb[IPSET_ATTR_ETHER])
398 data.ether = nla_data(tb[IPSET_ATTR_ETHER]); 271 e.ether = nla_data(tb[IPSET_ATTR_ETHER]);
399 else 272 else
400 data.ether = NULL; 273 e.ether = NULL;
401
402 if (tb[IPSET_ATTR_TIMEOUT]) {
403 if (!with_timeout(map->timeout))
404 return -IPSET_ERR_TIMEOUT;
405 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
406 }
407 274
408 data.id -= map->first_ip; 275 ret = adtfn(set, &e, &ext, &ext, flags);
409
410 ret = adtfn(set, &data, timeout, flags);
411 276
412 return ip_set_eexist(ret, flags) ? 0 : ret; 277 return ip_set_eexist(ret, flags) ? 0 : ret;
413} 278}
414 279
415static void
416bitmap_ipmac_destroy(struct ip_set *set)
417{
418 struct bitmap_ipmac *map = set->data;
419
420 if (with_timeout(map->timeout))
421 del_timer_sync(&map->gc);
422
423 ip_set_free(map->members);
424 kfree(map);
425
426 set->data = NULL;
427}
428
429static void
430bitmap_ipmac_flush(struct ip_set *set)
431{
432 struct bitmap_ipmac *map = set->data;
433
434 memset(map->members, 0,
435 (map->last_ip - map->first_ip + 1) * map->dsize);
436}
437
438static int
439bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb)
440{
441 const struct bitmap_ipmac *map = set->data;
442 struct nlattr *nested;
443
444 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
445 if (!nested)
446 goto nla_put_failure;
447 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
448 nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) ||
449 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
450 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
451 htonl(sizeof(*map) +
452 ((map->last_ip - map->first_ip + 1) *
453 map->dsize))) ||
454 (with_timeout(map->timeout) &&
455 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
456 goto nla_put_failure;
457 ipset_nest_end(skb, nested);
458
459 return 0;
460nla_put_failure:
461 return -EMSGSIZE;
462}
463
464static bool 280static bool
465bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b) 281bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
466{ 282{
@@ -469,85 +285,64 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)
469 285
470 return x->first_ip == y->first_ip && 286 return x->first_ip == y->first_ip &&
471 x->last_ip == y->last_ip && 287 x->last_ip == y->last_ip &&
472 x->timeout == y->timeout; 288 x->timeout == y->timeout &&
289 a->extensions == b->extensions;
473} 290}
474 291
475static const struct ip_set_type_variant bitmap_ipmac = { 292/* Plain variant */
476 .kadt = bitmap_ipmac_kadt,
477 .uadt = bitmap_ipmac_uadt,
478 .adt = {
479 [IPSET_ADD] = bitmap_ipmac_add,
480 [IPSET_DEL] = bitmap_ipmac_del,
481 [IPSET_TEST] = bitmap_ipmac_test,
482 },
483 .destroy = bitmap_ipmac_destroy,
484 .flush = bitmap_ipmac_flush,
485 .head = bitmap_ipmac_head,
486 .list = bitmap_ipmac_list,
487 .same_set = bitmap_ipmac_same_set,
488};
489 293
490static const struct ip_set_type_variant bitmap_tipmac = { 294/* Timeout variant */
491 .kadt = bitmap_ipmac_kadt, 295
492 .uadt = bitmap_ipmac_uadt, 296struct bitmap_ipmact_elem {
493 .adt = { 297 struct {
494 [IPSET_ADD] = bitmap_ipmac_tadd, 298 unsigned char ether[ETH_ALEN];
495 [IPSET_DEL] = bitmap_ipmac_tdel, 299 unsigned char filled;
496 [IPSET_TEST] = bitmap_ipmac_ttest, 300 } __attribute__ ((aligned));
497 }, 301 unsigned long timeout;
498 .destroy = bitmap_ipmac_destroy,
499 .flush = bitmap_ipmac_flush,
500 .head = bitmap_ipmac_head,
501 .list = bitmap_ipmac_tlist,
502 .same_set = bitmap_ipmac_same_set,
503}; 302};
504 303
505static void 304/* Plain variant with counter */
506bitmap_ipmac_gc(unsigned long ul_set)
507{
508 struct ip_set *set = (struct ip_set *) ul_set;
509 struct bitmap_ipmac *map = set->data;
510 struct ipmac_telem *elem;
511 u32 id, last = map->last_ip - map->first_ip;
512
513 /* We run parallel with other readers (test element)
514 * but adding/deleting new entries is locked out */
515 read_lock_bh(&set->lock);
516 for (id = 0; id <= last; id++) {
517 elem = bitmap_ipmac_elem(map, id);
518 if (elem->match == MAC_FILLED &&
519 ip_set_timeout_expired(elem->timeout))
520 elem->match = MAC_EMPTY;
521 }
522 read_unlock_bh(&set->lock);
523 305
524 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 306struct bitmap_ipmacc_elem {
525 add_timer(&map->gc); 307 struct {
526} 308 unsigned char ether[ETH_ALEN];
309 unsigned char filled;
310 } __attribute__ ((aligned));
311 struct ip_set_counter counter;
312};
527 313
528static void 314/* Timeout variant with counter */
529bitmap_ipmac_gc_init(struct ip_set *set)
530{
531 struct bitmap_ipmac *map = set->data;
532 315
533 init_timer(&map->gc); 316struct bitmap_ipmacct_elem {
534 map->gc.data = (unsigned long) set; 317 struct {
535 map->gc.function = bitmap_ipmac_gc; 318 unsigned char ether[ETH_ALEN];
536 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 319 unsigned char filled;
537 add_timer(&map->gc); 320 } __attribute__ ((aligned));
538} 321 unsigned long timeout;
322 struct ip_set_counter counter;
323};
324
325#include "ip_set_bitmap_gen.h"
539 326
540/* Create bitmap:ip,mac type of sets */ 327/* Create bitmap:ip,mac type of sets */
541 328
542static bool 329static bool
543init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, 330init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
544 u32 first_ip, u32 last_ip) 331 u32 first_ip, u32 last_ip, u32 elements)
545{ 332{
546 map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize); 333 map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);
547 if (!map->members) 334 if (!map->members)
548 return false; 335 return false;
336 if (map->dsize) {
337 map->extensions = ip_set_alloc(map->dsize * elements);
338 if (!map->extensions) {
339 kfree(map->members);
340 return false;
341 }
342 }
549 map->first_ip = first_ip; 343 map->first_ip = first_ip;
550 map->last_ip = last_ip; 344 map->last_ip = last_ip;
345 map->elements = elements;
551 map->timeout = IPSET_NO_TIMEOUT; 346 map->timeout = IPSET_NO_TIMEOUT;
552 347
553 set->data = map; 348 set->data = map;
@@ -560,13 +355,14 @@ static int
560bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[], 355bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
561 u32 flags) 356 u32 flags)
562{ 357{
563 u32 first_ip, last_ip; 358 u32 first_ip, last_ip, cadt_flags = 0;
564 u64 elements; 359 u64 elements;
565 struct bitmap_ipmac *map; 360 struct bitmap_ipmac *map;
566 int ret; 361 int ret;
567 362
568 if (unlikely(!tb[IPSET_ATTR_IP] || 363 if (unlikely(!tb[IPSET_ATTR_IP] ||
569 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 364 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
365 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
570 return -IPSET_ERR_PROTOCOL; 366 return -IPSET_ERR_PROTOCOL;
571 367
572 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); 368 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip);
@@ -601,28 +397,59 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],
601 if (!map) 397 if (!map)
602 return -ENOMEM; 398 return -ENOMEM;
603 399
604 if (tb[IPSET_ATTR_TIMEOUT]) { 400 map->memsize = bitmap_bytes(0, elements - 1);
605 map->dsize = sizeof(struct ipmac_telem); 401 set->variant = &bitmap_ipmac;
402 if (tb[IPSET_ATTR_CADT_FLAGS])
403 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
404 if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
405 set->extensions |= IPSET_EXT_COUNTER;
406 if (tb[IPSET_ATTR_TIMEOUT]) {
407 map->dsize = sizeof(struct bitmap_ipmacct_elem);
408 map->offset[IPSET_OFFSET_TIMEOUT] =
409 offsetof(struct bitmap_ipmacct_elem, timeout);
410 map->offset[IPSET_OFFSET_COUNTER] =
411 offsetof(struct bitmap_ipmacct_elem, counter);
412
413 if (!init_map_ipmac(set, map, first_ip, last_ip,
414 elements)) {
415 kfree(map);
416 return -ENOMEM;
417 }
418 map->timeout = ip_set_timeout_uget(
419 tb[IPSET_ATTR_TIMEOUT]);
420 set->extensions |= IPSET_EXT_TIMEOUT;
421 bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
422 } else {
423 map->dsize = sizeof(struct bitmap_ipmacc_elem);
424 map->offset[IPSET_OFFSET_COUNTER] =
425 offsetof(struct bitmap_ipmacc_elem, counter);
426
427 if (!init_map_ipmac(set, map, first_ip, last_ip,
428 elements)) {
429 kfree(map);
430 return -ENOMEM;
431 }
432 }
433 } else if (tb[IPSET_ATTR_TIMEOUT]) {
434 map->dsize = sizeof(struct bitmap_ipmact_elem);
435 map->offset[IPSET_OFFSET_TIMEOUT] =
436 offsetof(struct bitmap_ipmact_elem, timeout);
606 437
607 if (!init_map_ipmac(set, map, first_ip, last_ip)) { 438 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
608 kfree(map); 439 kfree(map);
609 return -ENOMEM; 440 return -ENOMEM;
610 } 441 }
611
612 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 442 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
613 443 set->extensions |= IPSET_EXT_TIMEOUT;
614 set->variant = &bitmap_tipmac; 444 bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);
615
616 bitmap_ipmac_gc_init(set);
617 } else { 445 } else {
618 map->dsize = sizeof(struct ipmac_elem); 446 map->dsize = sizeof(struct bitmap_ipmac_elem);
619 447
620 if (!init_map_ipmac(set, map, first_ip, last_ip)) { 448 if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {
621 kfree(map); 449 kfree(map);
622 return -ENOMEM; 450 return -ENOMEM;
623 } 451 }
624 set->variant = &bitmap_ipmac; 452 set->variant = &bitmap_ipmac;
625
626 } 453 }
627 return 0; 454 return 0;
628} 455}
@@ -641,6 +468,7 @@ static struct ip_set_type bitmap_ipmac_type = {
641 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, 468 [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
642 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 469 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
643 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 470 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
471 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
644 }, 472 },
645 .adt_policy = { 473 .adt_policy = {
646 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 474 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -648,6 +476,8 @@ static struct ip_set_type bitmap_ipmac_type = {
648 .len = ETH_ALEN }, 476 .len = ETH_ALEN },
649 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 477 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
650 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 478 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
479 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
480 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
651 }, 481 },
652 .me = THIS_MODULE, 482 .me = THIS_MODULE,
653}; 483};
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index e6b2db76f4c3..8207d1fda528 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -19,205 +19,94 @@
19#include <linux/netfilter/ipset/ip_set.h> 19#include <linux/netfilter/ipset/ip_set.h>
20#include <linux/netfilter/ipset/ip_set_bitmap.h> 20#include <linux/netfilter/ipset/ip_set_bitmap.h>
21#include <linux/netfilter/ipset/ip_set_getport.h> 21#include <linux/netfilter/ipset/ip_set_getport.h>
22#define IP_SET_BITMAP_TIMEOUT
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24 22
25#define REVISION_MIN 0 23#define REVISION_MIN 0
26#define REVISION_MAX 0 24#define REVISION_MAX 1 /* Counter support added */
27 25
28MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 27MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
30IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX); 28IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);
31MODULE_ALIAS("ip_set_bitmap:port"); 29MODULE_ALIAS("ip_set_bitmap:port");
32 30
31#define MTYPE bitmap_port
32
33/* Type structure */ 33/* Type structure */
34struct bitmap_port { 34struct bitmap_port {
35 void *members; /* the set members */ 35 void *members; /* the set members */
36 void *extensions; /* data extensions */
36 u16 first_port; /* host byte order, included in range */ 37 u16 first_port; /* host byte order, included in range */
37 u16 last_port; /* host byte order, included in range */ 38 u16 last_port; /* host byte order, included in range */
39 u32 elements; /* number of max elements in the set */
38 size_t memsize; /* members size */ 40 size_t memsize; /* members size */
41 size_t dsize; /* extensions struct size */
42 size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
39 u32 timeout; /* timeout parameter */ 43 u32 timeout; /* timeout parameter */
40 struct timer_list gc; /* garbage collection */ 44 struct timer_list gc; /* garbage collection */
41}; 45};
42 46
43/* Base variant */ 47/* ADT structure for generic function args */
48struct bitmap_port_adt_elem {
49 u16 id;
50};
44 51
45static int 52static inline u16
46bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags) 53port_to_id(const struct bitmap_port *m, u16 port)
47{ 54{
48 const struct bitmap_port *map = set->data; 55 return port - m->first_port;
49 u16 id = *(u16 *)value;
50
51 return !!test_bit(id, map->members);
52} 56}
53 57
54static int 58/* Common functions */
55bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
56{
57 struct bitmap_port *map = set->data;
58 u16 id = *(u16 *)value;
59
60 if (test_and_set_bit(id, map->members))
61 return -IPSET_ERR_EXIST;
62
63 return 0;
64}
65 59
66static int 60static inline int
67bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags) 61bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
62 const struct bitmap_port *map)
68{ 63{
69 struct bitmap_port *map = set->data; 64 return !!test_bit(e->id, map->members);
70 u16 id = *(u16 *)value;
71
72 if (!test_and_clear_bit(id, map->members))
73 return -IPSET_ERR_EXIST;
74
75 return 0;
76} 65}
77 66
78static int 67static inline int
79bitmap_port_list(const struct ip_set *set, 68bitmap_port_gc_test(u16 id, const struct bitmap_port *map)
80 struct sk_buff *skb, struct netlink_callback *cb)
81{ 69{
82 const struct bitmap_port *map = set->data; 70 return !!test_bit(id, map->members);
83 struct nlattr *atd, *nested;
84 u16 id, first = cb->args[2];
85 u16 last = map->last_port - map->first_port;
86
87 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
88 if (!atd)
89 return -EMSGSIZE;
90 for (; cb->args[2] <= last; cb->args[2]++) {
91 id = cb->args[2];
92 if (!test_bit(id, map->members))
93 continue;
94 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
95 if (!nested) {
96 if (id == first) {
97 nla_nest_cancel(skb, atd);
98 return -EMSGSIZE;
99 } else
100 goto nla_put_failure;
101 }
102 if (nla_put_net16(skb, IPSET_ATTR_PORT,
103 htons(map->first_port + id)))
104 goto nla_put_failure;
105 ipset_nest_end(skb, nested);
106 }
107 ipset_nest_end(skb, atd);
108 /* Set listing finished */
109 cb->args[2] = 0;
110
111 return 0;
112
113nla_put_failure:
114 nla_nest_cancel(skb, nested);
115 ipset_nest_end(skb, atd);
116 if (unlikely(id == first)) {
117 cb->args[2] = 0;
118 return -EMSGSIZE;
119 }
120 return 0;
121} 71}
122 72
123/* Timeout variant */ 73static inline int
124 74bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
125static int 75 struct bitmap_port *map, u32 flags)
126bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
127{ 76{
128 const struct bitmap_port *map = set->data; 77 return !!test_and_set_bit(e->id, map->members);
129 const unsigned long *members = map->members;
130 u16 id = *(u16 *)value;
131
132 return ip_set_timeout_test(members[id]);
133} 78}
134 79
135static int 80static inline int
136bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) 81bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
82 struct bitmap_port *map)
137{ 83{
138 struct bitmap_port *map = set->data; 84 return !test_and_clear_bit(e->id, map->members);
139 unsigned long *members = map->members;
140 u16 id = *(u16 *)value;
141
142 if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST))
143 return -IPSET_ERR_EXIST;
144
145 members[id] = ip_set_timeout_set(timeout);
146
147 return 0;
148} 85}
149 86
150static int 87static inline int
151bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) 88bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)
152{ 89{
153 struct bitmap_port *map = set->data; 90 return nla_put_net16(skb, IPSET_ATTR_PORT,
154 unsigned long *members = map->members; 91 htons(map->first_port + id));
155 u16 id = *(u16 *)value;
156 int ret = -IPSET_ERR_EXIST;
157
158 if (ip_set_timeout_test(members[id]))
159 ret = 0;
160
161 members[id] = IPSET_ELEM_UNSET;
162 return ret;
163} 92}
164 93
165static int 94static inline int
166bitmap_port_tlist(const struct ip_set *set, 95bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
167 struct sk_buff *skb, struct netlink_callback *cb)
168{ 96{
169 const struct bitmap_port *map = set->data; 97 return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
170 struct nlattr *adt, *nested; 98 nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
171 u16 id, first = cb->args[2];
172 u16 last = map->last_port - map->first_port;
173 const unsigned long *members = map->members;
174
175 adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
176 if (!adt)
177 return -EMSGSIZE;
178 for (; cb->args[2] <= last; cb->args[2]++) {
179 id = cb->args[2];
180 if (!ip_set_timeout_test(members[id]))
181 continue;
182 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
183 if (!nested) {
184 if (id == first) {
185 nla_nest_cancel(skb, adt);
186 return -EMSGSIZE;
187 } else
188 goto nla_put_failure;
189 }
190 if (nla_put_net16(skb, IPSET_ATTR_PORT,
191 htons(map->first_port + id)) ||
192 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
193 htonl(ip_set_timeout_get(members[id]))))
194 goto nla_put_failure;
195 ipset_nest_end(skb, nested);
196 }
197 ipset_nest_end(skb, adt);
198
199 /* Set listing finished */
200 cb->args[2] = 0;
201
202 return 0;
203
204nla_put_failure:
205 nla_nest_cancel(skb, nested);
206 ipset_nest_end(skb, adt);
207 if (unlikely(id == first)) {
208 cb->args[2] = 0;
209 return -EMSGSIZE;
210 }
211 return 0;
212} 99}
213 100
214static int 101static int
215bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb, 102bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
216 const struct xt_action_param *par, 103 const struct xt_action_param *par,
217 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 104 enum ipset_adt adt, struct ip_set_adt_opt *opt)
218{ 105{
219 struct bitmap_port *map = set->data; 106 struct bitmap_port *map = set->data;
220 ipset_adtfn adtfn = set->variant->adt[adt]; 107 ipset_adtfn adtfn = set->variant->adt[adt];
108 struct bitmap_port_adt_elem e = {};
109 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
221 __be16 __port; 110 __be16 __port;
222 u16 port = 0; 111 u16 port = 0;
223 112
@@ -230,9 +119,9 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
230 if (port < map->first_port || port > map->last_port) 119 if (port < map->first_port || port > map->last_port)
231 return -IPSET_ERR_BITMAP_RANGE; 120 return -IPSET_ERR_BITMAP_RANGE;
232 121
233 port -= map->first_port; 122 e.id = port_to_id(map, port);
234 123
235 return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags); 124 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
236} 125}
237 126
238static int 127static int
@@ -241,14 +130,17 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
241{ 130{
242 struct bitmap_port *map = set->data; 131 struct bitmap_port *map = set->data;
243 ipset_adtfn adtfn = set->variant->adt[adt]; 132 ipset_adtfn adtfn = set->variant->adt[adt];
244 u32 timeout = map->timeout; 133 struct bitmap_port_adt_elem e = {};
134 struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
245 u32 port; /* wraparound */ 135 u32 port; /* wraparound */
246 u16 id, port_to; 136 u16 port_to;
247 int ret = 0; 137 int ret = 0;
248 138
249 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 139 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
250 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 140 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
251 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 141 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
142 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
143 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
252 return -IPSET_ERR_PROTOCOL; 144 return -IPSET_ERR_PROTOCOL;
253 145
254 if (tb[IPSET_ATTR_LINENO]) 146 if (tb[IPSET_ATTR_LINENO])
@@ -257,16 +149,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
257 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); 149 port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
258 if (port < map->first_port || port > map->last_port) 150 if (port < map->first_port || port > map->last_port)
259 return -IPSET_ERR_BITMAP_RANGE; 151 return -IPSET_ERR_BITMAP_RANGE;
260 152 ret = ip_set_get_extensions(set, tb, &ext);
261 if (tb[IPSET_ATTR_TIMEOUT]) { 153 if (ret)
262 if (!with_timeout(map->timeout)) 154 return ret;
263 return -IPSET_ERR_TIMEOUT;
264 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
265 }
266 155
267 if (adt == IPSET_TEST) { 156 if (adt == IPSET_TEST) {
268 id = port - map->first_port; 157 e.id = port_to_id(map, port);
269 return adtfn(set, &id, timeout, flags); 158 return adtfn(set, &e, &ext, &ext, flags);
270 } 159 }
271 160
272 if (tb[IPSET_ATTR_PORT_TO]) { 161 if (tb[IPSET_ATTR_PORT_TO]) {
@@ -283,8 +172,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
283 return -IPSET_ERR_BITMAP_RANGE; 172 return -IPSET_ERR_BITMAP_RANGE;
284 173
285 for (; port <= port_to; port++) { 174 for (; port <= port_to; port++) {
286 id = port - map->first_port; 175 e.id = port_to_id(map, port);
287 ret = adtfn(set, &id, timeout, flags); 176 ret = adtfn(set, &e, &ext, &ext, flags);
288 177
289 if (ret && !ip_set_eexist(ret, flags)) 178 if (ret && !ip_set_eexist(ret, flags))
290 return ret; 179 return ret;
@@ -294,52 +183,6 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
294 return ret; 183 return ret;
295} 184}
296 185
297static void
298bitmap_port_destroy(struct ip_set *set)
299{
300 struct bitmap_port *map = set->data;
301
302 if (with_timeout(map->timeout))
303 del_timer_sync(&map->gc);
304
305 ip_set_free(map->members);
306 kfree(map);
307
308 set->data = NULL;
309}
310
311static void
312bitmap_port_flush(struct ip_set *set)
313{
314 struct bitmap_port *map = set->data;
315
316 memset(map->members, 0, map->memsize);
317}
318
319static int
320bitmap_port_head(struct ip_set *set, struct sk_buff *skb)
321{
322 const struct bitmap_port *map = set->data;
323 struct nlattr *nested;
324
325 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
326 if (!nested)
327 goto nla_put_failure;
328 if (nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
329 nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)) ||
330 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
331 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
332 htonl(sizeof(*map) + map->memsize)) ||
333 (with_timeout(map->timeout) &&
334 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))))
335 goto nla_put_failure;
336 ipset_nest_end(skb, nested);
337
338 return 0;
339nla_put_failure:
340 return -EMSGSIZE;
341}
342
343static bool 186static bool
344bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b) 187bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
345{ 188{
@@ -348,71 +191,35 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)
348 191
349 return x->first_port == y->first_port && 192 return x->first_port == y->first_port &&
350 x->last_port == y->last_port && 193 x->last_port == y->last_port &&
351 x->timeout == y->timeout; 194 x->timeout == y->timeout &&
195 a->extensions == b->extensions;
352} 196}
353 197
354static const struct ip_set_type_variant bitmap_port = { 198/* Plain variant */
355 .kadt = bitmap_port_kadt, 199
356 .uadt = bitmap_port_uadt, 200struct bitmap_port_elem {
357 .adt = {
358 [IPSET_ADD] = bitmap_port_add,
359 [IPSET_DEL] = bitmap_port_del,
360 [IPSET_TEST] = bitmap_port_test,
361 },
362 .destroy = bitmap_port_destroy,
363 .flush = bitmap_port_flush,
364 .head = bitmap_port_head,
365 .list = bitmap_port_list,
366 .same_set = bitmap_port_same_set,
367}; 201};
368 202
369static const struct ip_set_type_variant bitmap_tport = { 203/* Timeout variant */
370 .kadt = bitmap_port_kadt, 204
371 .uadt = bitmap_port_uadt, 205struct bitmap_portt_elem {
372 .adt = { 206 unsigned long timeout;
373 [IPSET_ADD] = bitmap_port_tadd,
374 [IPSET_DEL] = bitmap_port_tdel,
375 [IPSET_TEST] = bitmap_port_ttest,
376 },
377 .destroy = bitmap_port_destroy,
378 .flush = bitmap_port_flush,
379 .head = bitmap_port_head,
380 .list = bitmap_port_tlist,
381 .same_set = bitmap_port_same_set,
382}; 207};
383 208
384static void 209/* Plain variant with counter */
385bitmap_port_gc(unsigned long ul_set)
386{
387 struct ip_set *set = (struct ip_set *) ul_set;
388 struct bitmap_port *map = set->data;
389 unsigned long *table = map->members;
390 u32 id; /* wraparound */
391 u16 last = map->last_port - map->first_port;
392
393 /* We run parallel with other readers (test element)
394 * but adding/deleting new entries is locked out */
395 read_lock_bh(&set->lock);
396 for (id = 0; id <= last; id++)
397 if (ip_set_timeout_expired(table[id]))
398 table[id] = IPSET_ELEM_UNSET;
399 read_unlock_bh(&set->lock);
400
401 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
402 add_timer(&map->gc);
403}
404 210
405static void 211struct bitmap_portc_elem {
406bitmap_port_gc_init(struct ip_set *set) 212 struct ip_set_counter counter;
407{ 213};
408 struct bitmap_port *map = set->data;
409 214
410 init_timer(&map->gc); 215/* Timeout variant with counter */
411 map->gc.data = (unsigned long) set; 216
412 map->gc.function = bitmap_port_gc; 217struct bitmap_portct_elem {
413 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 218 unsigned long timeout;
414 add_timer(&map->gc); 219 struct ip_set_counter counter;
415} 220};
221
222#include "ip_set_bitmap_gen.h"
416 223
417/* Create bitmap:ip type of sets */ 224/* Create bitmap:ip type of sets */
418 225
@@ -423,6 +230,13 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
423 map->members = ip_set_alloc(map->memsize); 230 map->members = ip_set_alloc(map->memsize);
424 if (!map->members) 231 if (!map->members)
425 return false; 232 return false;
233 if (map->dsize) {
234 map->extensions = ip_set_alloc(map->dsize * map->elements);
235 if (!map->extensions) {
236 kfree(map->members);
237 return false;
238 }
239 }
426 map->first_port = first_port; 240 map->first_port = first_port;
427 map->last_port = last_port; 241 map->last_port = last_port;
428 map->timeout = IPSET_NO_TIMEOUT; 242 map->timeout = IPSET_NO_TIMEOUT;
@@ -434,15 +248,16 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
434} 248}
435 249
436static int 250static int
437bitmap_port_create(struct ip_set *set, struct nlattr *tb[], 251bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
438 u32 flags)
439{ 252{
440 struct bitmap_port *map; 253 struct bitmap_port *map;
441 u16 first_port, last_port; 254 u16 first_port, last_port;
255 u32 cadt_flags = 0;
442 256
443 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 257 if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
444 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || 258 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) ||
445 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 259 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
260 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
446 return -IPSET_ERR_PROTOCOL; 261 return -IPSET_ERR_PROTOCOL;
447 262
448 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); 263 first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);
@@ -458,28 +273,56 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[],
458 if (!map) 273 if (!map)
459 return -ENOMEM; 274 return -ENOMEM;
460 275
461 if (tb[IPSET_ATTR_TIMEOUT]) { 276 map->elements = last_port - first_port + 1;
462 map->memsize = (last_port - first_port + 1) 277 map->memsize = map->elements * sizeof(unsigned long);
463 * sizeof(unsigned long); 278 set->variant = &bitmap_port;
464 279 if (tb[IPSET_ATTR_CADT_FLAGS])
280 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
281 if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
282 set->extensions |= IPSET_EXT_COUNTER;
283 if (tb[IPSET_ATTR_TIMEOUT]) {
284 map->dsize = sizeof(struct bitmap_portct_elem);
285 map->offset[IPSET_OFFSET_TIMEOUT] =
286 offsetof(struct bitmap_portct_elem, timeout);
287 map->offset[IPSET_OFFSET_COUNTER] =
288 offsetof(struct bitmap_portct_elem, counter);
289 if (!init_map_port(set, map, first_port, last_port)) {
290 kfree(map);
291 return -ENOMEM;
292 }
293
294 map->timeout =
295 ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
296 set->extensions |= IPSET_EXT_TIMEOUT;
297 bitmap_port_gc_init(set, bitmap_port_gc);
298 } else {
299 map->dsize = sizeof(struct bitmap_portc_elem);
300 map->offset[IPSET_OFFSET_COUNTER] =
301 offsetof(struct bitmap_portc_elem, counter);
302 if (!init_map_port(set, map, first_port, last_port)) {
303 kfree(map);
304 return -ENOMEM;
305 }
306 }
307 } else if (tb[IPSET_ATTR_TIMEOUT]) {
308 map->dsize = sizeof(struct bitmap_portt_elem);
309 map->offset[IPSET_OFFSET_TIMEOUT] =
310 offsetof(struct bitmap_portt_elem, timeout);
465 if (!init_map_port(set, map, first_port, last_port)) { 311 if (!init_map_port(set, map, first_port, last_port)) {
466 kfree(map); 312 kfree(map);
467 return -ENOMEM; 313 return -ENOMEM;
468 } 314 }
469 315
470 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); 316 map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
471 set->variant = &bitmap_tport; 317 set->extensions |= IPSET_EXT_TIMEOUT;
472 318 bitmap_port_gc_init(set, bitmap_port_gc);
473 bitmap_port_gc_init(set);
474 } else { 319 } else {
475 map->memsize = bitmap_bytes(0, last_port - first_port); 320 map->dsize = 0;
476 pr_debug("memsize: %zu\n", map->memsize);
477 if (!init_map_port(set, map, first_port, last_port)) { 321 if (!init_map_port(set, map, first_port, last_port)) {
478 kfree(map); 322 kfree(map);
479 return -ENOMEM; 323 return -ENOMEM;
480 } 324 }
481 325
482 set->variant = &bitmap_port;
483 } 326 }
484 return 0; 327 return 0;
485} 328}
@@ -497,12 +340,15 @@ static struct ip_set_type bitmap_port_type = {
497 [IPSET_ATTR_PORT] = { .type = NLA_U16 }, 340 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
498 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, 341 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
499 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 342 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
343 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
500 }, 344 },
501 .adt_policy = { 345 .adt_policy = {
502 [IPSET_ATTR_PORT] = { .type = NLA_U16 }, 346 [IPSET_ATTR_PORT] = { .type = NLA_U16 },
503 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, 347 [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 },
504 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 348 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
505 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 349 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
350 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
351 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
506 }, 352 },
507 .me = THIS_MODULE, 353 .me = THIS_MODULE,
508}; 354};
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1ba9dbc0e107..f77139007983 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1,6 +1,6 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de> 2 * Patrick Schaaf <bof@bof.de>
3 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 3 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -15,7 +15,6 @@
15#include <linux/ip.h> 15#include <linux/ip.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/spinlock.h> 17#include <linux/spinlock.h>
18#include <linux/netlink.h>
19#include <linux/rculist.h> 18#include <linux/rculist.h>
20#include <net/netlink.h> 19#include <net/netlink.h>
21 20
@@ -316,6 +315,29 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
316} 315}
317EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); 316EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
318 317
318int
319ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
320 struct ip_set_ext *ext)
321{
322 if (tb[IPSET_ATTR_TIMEOUT]) {
323 if (!(set->extensions & IPSET_EXT_TIMEOUT))
324 return -IPSET_ERR_TIMEOUT;
325 ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
326 }
327 if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
328 if (!(set->extensions & IPSET_EXT_COUNTER))
329 return -IPSET_ERR_COUNTER;
330 if (tb[IPSET_ATTR_BYTES])
331 ext->bytes = be64_to_cpu(nla_get_be64(
332 tb[IPSET_ATTR_BYTES]));
333 if (tb[IPSET_ATTR_PACKETS])
334 ext->packets = be64_to_cpu(nla_get_be64(
335 tb[IPSET_ATTR_PACKETS]));
336 }
337 return 0;
338}
339EXPORT_SYMBOL_GPL(ip_set_get_extensions);
340
319/* 341/*
320 * Creating/destroying/renaming/swapping affect the existence and 342 * Creating/destroying/renaming/swapping affect the existence and
321 * the properties of a set. All of these can be executed from userspace 343 * the properties of a set. All of these can be executed from userspace
@@ -366,8 +388,7 @@ ip_set_rcu_get(ip_set_id_t index)
366 388
367int 389int
368ip_set_test(ip_set_id_t index, const struct sk_buff *skb, 390ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
369 const struct xt_action_param *par, 391 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
370 const struct ip_set_adt_opt *opt)
371{ 392{
372 struct ip_set *set = ip_set_rcu_get(index); 393 struct ip_set *set = ip_set_rcu_get(index);
373 int ret = 0; 394 int ret = 0;
@@ -392,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
392 ret = 1; 413 ret = 1;
393 } else { 414 } else {
394 /* --return-nomatch: invert matched element */ 415 /* --return-nomatch: invert matched element */
395 if ((opt->flags & IPSET_RETURN_NOMATCH) && 416 if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
396 (set->type->features & IPSET_TYPE_NOMATCH) && 417 (set->type->features & IPSET_TYPE_NOMATCH) &&
397 (ret > 0 || ret == -ENOTEMPTY)) 418 (ret > 0 || ret == -ENOTEMPTY))
398 ret = -ret; 419 ret = -ret;
@@ -405,8 +426,7 @@ EXPORT_SYMBOL_GPL(ip_set_test);
405 426
406int 427int
407ip_set_add(ip_set_id_t index, const struct sk_buff *skb, 428ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
408 const struct xt_action_param *par, 429 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
409 const struct ip_set_adt_opt *opt)
410{ 430{
411 struct ip_set *set = ip_set_rcu_get(index); 431 struct ip_set *set = ip_set_rcu_get(index);
412 int ret; 432 int ret;
@@ -428,8 +448,7 @@ EXPORT_SYMBOL_GPL(ip_set_add);
428 448
429int 449int
430ip_set_del(ip_set_id_t index, const struct sk_buff *skb, 450ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
431 const struct xt_action_param *par, 451 const struct xt_action_param *par, struct ip_set_adt_opt *opt)
432 const struct ip_set_adt_opt *opt)
433{ 452{
434 struct ip_set *set = ip_set_rcu_get(index); 453 struct ip_set *set = ip_set_rcu_get(index);
435 int ret = 0; 454 int ret = 0;
@@ -1085,7 +1104,7 @@ static int
1085dump_init(struct netlink_callback *cb) 1104dump_init(struct netlink_callback *cb)
1086{ 1105{
1087 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); 1106 struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
1088 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1107 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1089 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1108 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1090 struct nlattr *attr = (void *)nlh + min_len; 1109 struct nlattr *attr = (void *)nlh + min_len;
1091 u32 dump_type; 1110 u32 dump_type;
@@ -1301,7 +1320,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
1301 struct sk_buff *skb2; 1320 struct sk_buff *skb2;
1302 struct nlmsgerr *errmsg; 1321 struct nlmsgerr *errmsg;
1303 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); 1322 size_t payload = sizeof(*errmsg) + nlmsg_len(nlh);
1304 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 1323 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
1305 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; 1324 struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
1306 struct nlattr *cmdattr; 1325 struct nlattr *cmdattr;
1307 u32 *errline; 1326 u32 *errline;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
new file mode 100644
index 000000000000..57beb1762b2d
--- /dev/null
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -0,0 +1,1100 @@
1/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
6 */
7
8#ifndef _IP_SET_HASH_GEN_H
9#define _IP_SET_HASH_GEN_H
10
11#include <linux/rcupdate.h>
12#include <linux/jhash.h>
13#include <linux/netfilter/ipset/ip_set_timeout.h>
14#ifndef rcu_dereference_bh
15#define rcu_dereference_bh(p) rcu_dereference(p)
16#endif
17
18#define CONCAT(a, b) a##b
19#define TOKEN(a, b) CONCAT(a, b)
20
21/* Hashing which uses arrays to resolve clashing. The hash table is resized
22 * (doubled) when searching becomes too long.
23 * Internally jhash is used with the assumption that the size of the
24 * stored data is a multiple of sizeof(u32). If storage supports timeout,
25 * the timeout field must be the last one in the data structure - that field
26 * is ignored when computing the hash key.
27 *
28 * Readers and resizing
29 *
30 * Resizing can be triggered by userspace command only, and those
31 * are serialized by the nfnl mutex. During resizing the set is
32 * read-locked, so the only possible concurrent operations are
33 * the kernel side readers. Those must be protected by proper RCU locking.
34 */
35
36/* Number of elements to store in an initial array block */
37#define AHASH_INIT_SIZE 4
38/* Max number of elements to store in an array block */
39#define AHASH_MAX_SIZE (3*AHASH_INIT_SIZE)
40
41/* Max number of elements can be tuned */
42#ifdef IP_SET_HASH_WITH_MULTI
43#define AHASH_MAX(h) ((h)->ahash_max)
44
45static inline u8
46tune_ahash_max(u8 curr, u32 multi)
47{
48 u32 n;
49
50 if (multi < curr)
51 return curr;
52
53 n = curr + AHASH_INIT_SIZE;
54 /* Currently, at listing one hash bucket must fit into a message.
55 * Therefore we have a hard limit here.
56 */
57 return n > curr && n <= 64 ? n : curr;
58}
59#define TUNE_AHASH_MAX(h, multi) \
60 ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
61#else
62#define AHASH_MAX(h) AHASH_MAX_SIZE
63#define TUNE_AHASH_MAX(h, multi)
64#endif
65
66/* A hash bucket */
67struct hbucket {
68 void *value; /* the array of the values */
69 u8 size; /* size of the array */
70 u8 pos; /* position of the first free entry */
71};
72
73/* The hash table: the table size stored here in order to make resizing easy */
74struct htable {
75 u8 htable_bits; /* size of hash table == 2^htable_bits */
76 struct hbucket bucket[0]; /* hashtable buckets */
77};
78
79#define hbucket(h, i) (&((h)->bucket[i]))
80
81/* Book-keeping of the prefixes added to the set */
82struct net_prefixes {
83 u8 cidr; /* the different cidr values in the set */
84 u32 nets; /* number of elements per cidr */
85};
86
87/* Compute the hash table size */
88static size_t
89htable_size(u8 hbits)
90{
91 size_t hsize;
92
93 /* We must fit both into u32 in jhash and size_t */
94 if (hbits > 31)
95 return 0;
96 hsize = jhash_size(hbits);
97 if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
98 < hsize)
99 return 0;
100
101 return hsize * sizeof(struct hbucket) + sizeof(struct htable);
102}
103
104/* Compute htable_bits from the user input parameter hashsize */
105static u8
106htable_bits(u32 hashsize)
107{
108 /* Assume that hashsize == 2^htable_bits */
109 u8 bits = fls(hashsize - 1);
110 if (jhash_size(bits) != hashsize)
111 /* Round up to the first 2^n value */
112 bits = fls(hashsize);
113
114 return bits;
115}
116
117/* Destroy the hashtable part of the set */
118static void
119ahash_destroy(struct htable *t)
120{
121 struct hbucket *n;
122 u32 i;
123
124 for (i = 0; i < jhash_size(t->htable_bits); i++) {
125 n = hbucket(t, i);
126 if (n->size)
127 /* FIXME: use slab cache */
128 kfree(n->value);
129 }
130
131 ip_set_free(t);
132}
133
134static int
135hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize)
136{
137 if (n->pos >= n->size) {
138 void *tmp;
139
140 if (n->size >= ahash_max)
141 /* Trigger rehashing */
142 return -EAGAIN;
143
144 tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize,
145 GFP_ATOMIC);
146 if (!tmp)
147 return -ENOMEM;
148 if (n->size) {
149 memcpy(tmp, n->value, n->size * dsize);
150 kfree(n->value);
151 }
152 n->value = tmp;
153 n->size += AHASH_INIT_SIZE;
154 }
155 return 0;
156}
157
158#ifdef IP_SET_HASH_WITH_NETS
159#ifdef IP_SET_HASH_WITH_NETS_PACKED
160/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
161#define CIDR(cidr) (cidr + 1)
162#else
163#define CIDR(cidr) (cidr)
164#endif
165
166#define SET_HOST_MASK(family) (family == AF_INET ? 32 : 128)
167
168#ifdef IP_SET_HASH_WITH_MULTI
169#define NETS_LENGTH(family) (SET_HOST_MASK(family) + 1)
170#else
171#define NETS_LENGTH(family) SET_HOST_MASK(family)
172#endif
173
174#else
175#define NETS_LENGTH(family) 0
176#endif /* IP_SET_HASH_WITH_NETS */
177
178#define ext_timeout(e, h) \
179(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT])
180#define ext_counter(e, h) \
181(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER])
182
183#endif /* _IP_SET_HASH_GEN_H */
184
185/* Family dependent templates */
186
187#undef ahash_data
188#undef mtype_data_equal
189#undef mtype_do_data_match
190#undef mtype_data_set_flags
191#undef mtype_data_reset_flags
192#undef mtype_data_netmask
193#undef mtype_data_list
194#undef mtype_data_next
195#undef mtype_elem
196
197#undef mtype_add_cidr
198#undef mtype_del_cidr
199#undef mtype_ahash_memsize
200#undef mtype_flush
201#undef mtype_destroy
202#undef mtype_gc_init
203#undef mtype_same_set
204#undef mtype_kadt
205#undef mtype_uadt
206#undef mtype
207
208#undef mtype_add
209#undef mtype_del
210#undef mtype_test_cidrs
211#undef mtype_test
212#undef mtype_expire
213#undef mtype_resize
214#undef mtype_head
215#undef mtype_list
216#undef mtype_gc
217#undef mtype_gc_init
218#undef mtype_variant
219#undef mtype_data_match
220
221#undef HKEY
222
223#define mtype_data_equal TOKEN(MTYPE, _data_equal)
224#ifdef IP_SET_HASH_WITH_NETS
225#define mtype_do_data_match TOKEN(MTYPE, _do_data_match)
226#else
227#define mtype_do_data_match(d) 1
228#endif
229#define mtype_data_set_flags TOKEN(MTYPE, _data_set_flags)
230#define mtype_data_reset_flags TOKEN(MTYPE, _data_reset_flags)
231#define mtype_data_netmask TOKEN(MTYPE, _data_netmask)
232#define mtype_data_list TOKEN(MTYPE, _data_list)
233#define mtype_data_next TOKEN(MTYPE, _data_next)
234#define mtype_elem TOKEN(MTYPE, _elem)
235#define mtype_add_cidr TOKEN(MTYPE, _add_cidr)
236#define mtype_del_cidr TOKEN(MTYPE, _del_cidr)
237#define mtype_ahash_memsize TOKEN(MTYPE, _ahash_memsize)
238#define mtype_flush TOKEN(MTYPE, _flush)
239#define mtype_destroy TOKEN(MTYPE, _destroy)
240#define mtype_gc_init TOKEN(MTYPE, _gc_init)
241#define mtype_same_set TOKEN(MTYPE, _same_set)
242#define mtype_kadt TOKEN(MTYPE, _kadt)
243#define mtype_uadt TOKEN(MTYPE, _uadt)
244#define mtype MTYPE
245
246#define mtype_elem TOKEN(MTYPE, _elem)
247#define mtype_add TOKEN(MTYPE, _add)
248#define mtype_del TOKEN(MTYPE, _del)
249#define mtype_test_cidrs TOKEN(MTYPE, _test_cidrs)
250#define mtype_test TOKEN(MTYPE, _test)
251#define mtype_expire TOKEN(MTYPE, _expire)
252#define mtype_resize TOKEN(MTYPE, _resize)
253#define mtype_head TOKEN(MTYPE, _head)
254#define mtype_list TOKEN(MTYPE, _list)
255#define mtype_gc TOKEN(MTYPE, _gc)
256#define mtype_variant TOKEN(MTYPE, _variant)
257#define mtype_data_match TOKEN(MTYPE, _data_match)
258
259#ifndef HKEY_DATALEN
260#define HKEY_DATALEN sizeof(struct mtype_elem)
261#endif
262
263#define HKEY(data, initval, htable_bits) \
264(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval) \
265 & jhash_mask(htable_bits))
266
267#ifndef htype
268#define htype HTYPE
269
270/* The generic hash structure */
271struct htype {
272 struct htable *table; /* the hash table */
273 u32 maxelem; /* max elements in the hash */
274 u32 elements; /* current element (vs timeout) */
275 u32 initval; /* random jhash init value */
276 u32 timeout; /* timeout value, if enabled */
277 size_t dsize; /* data struct size */
278 size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
279 struct timer_list gc; /* garbage collection when timeout enabled */
280 struct mtype_elem next; /* temporary storage for uadd */
281#ifdef IP_SET_HASH_WITH_MULTI
282 u8 ahash_max; /* max elements in an array block */
283#endif
284#ifdef IP_SET_HASH_WITH_NETMASK
285 u8 netmask; /* netmask value for subnets to store */
286#endif
287#ifdef IP_SET_HASH_WITH_RBTREE
288 struct rb_root rbtree;
289#endif
290#ifdef IP_SET_HASH_WITH_NETS
291 struct net_prefixes nets[0]; /* book-keeping of prefixes */
292#endif
293};
294#endif
295
296#ifdef IP_SET_HASH_WITH_NETS
297/* Network cidr size book keeping when the hash stores different
298 * sized networks */
299static void
300mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
301{
302 int i, j;
303
304 /* Add in increasing prefix order, so larger cidr first */
305 for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) {
306 if (j != -1)
307 continue;
308 else if (h->nets[i].cidr < cidr)
309 j = i;
310 else if (h->nets[i].cidr == cidr) {
311 h->nets[i].nets++;
312 return;
313 }
314 }
315 if (j != -1) {
316 for (; i > j; i--) {
317 h->nets[i].cidr = h->nets[i - 1].cidr;
318 h->nets[i].nets = h->nets[i - 1].nets;
319 }
320 }
321 h->nets[i].cidr = cidr;
322 h->nets[i].nets = 1;
323}
324
325static void
326mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
327{
328 u8 i, j;
329
330 for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
331 ;
332 h->nets[i].nets--;
333
334 if (h->nets[i].nets != 0)
335 return;
336
337 for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
338 h->nets[j].cidr = h->nets[j + 1].cidr;
339 h->nets[j].nets = h->nets[j + 1].nets;
340 }
341}
342#endif
343
344/* Calculate the actual memory size of the set data */
345static size_t
346mtype_ahash_memsize(const struct htype *h, u8 nets_length)
347{
348 u32 i;
349 struct htable *t = h->table;
350 size_t memsize = sizeof(*h)
351 + sizeof(*t)
352#ifdef IP_SET_HASH_WITH_NETS
353 + sizeof(struct net_prefixes) * nets_length
354#endif
355 + jhash_size(t->htable_bits) * sizeof(struct hbucket);
356
357 for (i = 0; i < jhash_size(t->htable_bits); i++)
358 memsize += t->bucket[i].size * h->dsize;
359
360 return memsize;
361}
362
363/* Flush a hash type of set: destroy all elements */
364static void
365mtype_flush(struct ip_set *set)
366{
367 struct htype *h = set->data;
368 struct htable *t = h->table;
369 struct hbucket *n;
370 u32 i;
371
372 for (i = 0; i < jhash_size(t->htable_bits); i++) {
373 n = hbucket(t, i);
374 if (n->size) {
375 n->size = n->pos = 0;
376 /* FIXME: use slab cache */
377 kfree(n->value);
378 }
379 }
380#ifdef IP_SET_HASH_WITH_NETS
381 memset(h->nets, 0, sizeof(struct net_prefixes)
382 * NETS_LENGTH(set->family));
383#endif
384 h->elements = 0;
385}
386
387/* Destroy a hash type of set */
388static void
389mtype_destroy(struct ip_set *set)
390{
391 struct htype *h = set->data;
392
393 if (set->extensions & IPSET_EXT_TIMEOUT)
394 del_timer_sync(&h->gc);
395
396 ahash_destroy(h->table);
397#ifdef IP_SET_HASH_WITH_RBTREE
398 rbtree_destroy(&h->rbtree);
399#endif
400 kfree(h);
401
402 set->data = NULL;
403}
404
405static void
406mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
407{
408 struct htype *h = set->data;
409
410 init_timer(&h->gc);
411 h->gc.data = (unsigned long) set;
412 h->gc.function = gc;
413 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
414 add_timer(&h->gc);
415 pr_debug("gc initialized, run in every %u\n",
416 IPSET_GC_PERIOD(h->timeout));
417}
418
419static bool
420mtype_same_set(const struct ip_set *a, const struct ip_set *b)
421{
422 const struct htype *x = a->data;
423 const struct htype *y = b->data;
424
425 /* Resizing changes htable_bits, so we ignore it */
426 return x->maxelem == y->maxelem &&
427 x->timeout == y->timeout &&
428#ifdef IP_SET_HASH_WITH_NETMASK
429 x->netmask == y->netmask &&
430#endif
431 a->extensions == b->extensions;
432}
433
434/* Get the ith element from the array block n */
435#define ahash_data(n, i, dsize) \
436 ((struct mtype_elem *)((n)->value + ((i) * (dsize))))
437
438/* Delete expired elements from the hashtable */
439static void
440mtype_expire(struct htype *h, u8 nets_length, size_t dsize)
441{
442 struct htable *t = h->table;
443 struct hbucket *n;
444 struct mtype_elem *data;
445 u32 i;
446 int j;
447
448 for (i = 0; i < jhash_size(t->htable_bits); i++) {
449 n = hbucket(t, i);
450 for (j = 0; j < n->pos; j++) {
451 data = ahash_data(n, j, dsize);
452 if (ip_set_timeout_expired(ext_timeout(data, h))) {
453 pr_debug("expired %u/%u\n", i, j);
454#ifdef IP_SET_HASH_WITH_NETS
455 mtype_del_cidr(h, CIDR(data->cidr),
456 nets_length);
457#endif
458 if (j != n->pos - 1)
459 /* Not last one */
460 memcpy(data,
461 ahash_data(n, n->pos - 1, dsize),
462 dsize);
463 n->pos--;
464 h->elements--;
465 }
466 }
467 if (n->pos + AHASH_INIT_SIZE < n->size) {
468 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
469 * dsize,
470 GFP_ATOMIC);
471 if (!tmp)
472 /* Still try to delete expired elements */
473 continue;
474 n->size -= AHASH_INIT_SIZE;
475 memcpy(tmp, n->value, n->size * dsize);
476 kfree(n->value);
477 n->value = tmp;
478 }
479 }
480}
481
482static void
483mtype_gc(unsigned long ul_set)
484{
485 struct ip_set *set = (struct ip_set *) ul_set;
486 struct htype *h = set->data;
487
488 pr_debug("called\n");
489 write_lock_bh(&set->lock);
490 mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
491 write_unlock_bh(&set->lock);
492
493 h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
494 add_timer(&h->gc);
495}
496
497/* Resize a hash: create a new hash table with doubling the hashsize
498 * and inserting the elements to it. Repeat until we succeed or
499 * fail due to memory pressures. */
500static int
501mtype_resize(struct ip_set *set, bool retried)
502{
503 struct htype *h = set->data;
504 struct htable *t, *orig = h->table;
505 u8 htable_bits = orig->htable_bits;
506#ifdef IP_SET_HASH_WITH_NETS
507 u8 flags;
508#endif
509 struct mtype_elem *data;
510 struct mtype_elem *d;
511 struct hbucket *n, *m;
512 u32 i, j;
513 int ret;
514
515 /* Try to cleanup once */
516 if (SET_WITH_TIMEOUT(set) && !retried) {
517 i = h->elements;
518 write_lock_bh(&set->lock);
519 mtype_expire(set->data, NETS_LENGTH(set->family),
520 h->dsize);
521 write_unlock_bh(&set->lock);
522 if (h->elements < i)
523 return 0;
524 }
525
526retry:
527 ret = 0;
528 htable_bits++;
529 pr_debug("attempt to resize set %s from %u to %u, t %p\n",
530 set->name, orig->htable_bits, htable_bits, orig);
531 if (!htable_bits) {
532 /* In case we have plenty of memory :-) */
533 pr_warning("Cannot increase the hashsize of set %s further\n",
534 set->name);
535 return -IPSET_ERR_HASH_FULL;
536 }
537 t = ip_set_alloc(sizeof(*t)
538 + jhash_size(htable_bits) * sizeof(struct hbucket));
539 if (!t)
540 return -ENOMEM;
541 t->htable_bits = htable_bits;
542
543 read_lock_bh(&set->lock);
544 for (i = 0; i < jhash_size(orig->htable_bits); i++) {
545 n = hbucket(orig, i);
546 for (j = 0; j < n->pos; j++) {
547 data = ahash_data(n, j, h->dsize);
548#ifdef IP_SET_HASH_WITH_NETS
549 flags = 0;
550 mtype_data_reset_flags(data, &flags);
551#endif
552 m = hbucket(t, HKEY(data, h->initval, htable_bits));
553 ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize);
554 if (ret < 0) {
555#ifdef IP_SET_HASH_WITH_NETS
556 mtype_data_reset_flags(data, &flags);
557#endif
558 read_unlock_bh(&set->lock);
559 ahash_destroy(t);
560 if (ret == -EAGAIN)
561 goto retry;
562 return ret;
563 }
564 d = ahash_data(m, m->pos++, h->dsize);
565 memcpy(d, data, h->dsize);
566#ifdef IP_SET_HASH_WITH_NETS
567 mtype_data_reset_flags(d, &flags);
568#endif
569 }
570 }
571
572 rcu_assign_pointer(h->table, t);
573 read_unlock_bh(&set->lock);
574
575 /* Give time to other readers of the set */
576 synchronize_rcu_bh();
577
578 pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
579 orig->htable_bits, orig, t->htable_bits, t);
580 ahash_destroy(orig);
581
582 return 0;
583}
584
585/* Add an element to a hash and update the internal counters when succeeded,
586 * otherwise report the proper error code. */
587static int
588mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
589 struct ip_set_ext *mext, u32 flags)
590{
591 struct htype *h = set->data;
592 struct htable *t;
593 const struct mtype_elem *d = value;
594 struct mtype_elem *data;
595 struct hbucket *n;
596 int i, ret = 0;
597 int j = AHASH_MAX(h) + 1;
598 bool flag_exist = flags & IPSET_FLAG_EXIST;
599 u32 key, multi = 0;
600
601 if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem)
602 /* FIXME: when set is full, we slow down here */
603 mtype_expire(h, NETS_LENGTH(set->family), h->dsize);
604
605 if (h->elements >= h->maxelem) {
606 if (net_ratelimit())
607 pr_warning("Set %s is full, maxelem %u reached\n",
608 set->name, h->maxelem);
609 return -IPSET_ERR_HASH_FULL;
610 }
611
612 rcu_read_lock_bh();
613 t = rcu_dereference_bh(h->table);
614 key = HKEY(value, h->initval, t->htable_bits);
615 n = hbucket(t, key);
616 for (i = 0; i < n->pos; i++) {
617 data = ahash_data(n, i, h->dsize);
618 if (mtype_data_equal(data, d, &multi)) {
619 if (flag_exist ||
620 (SET_WITH_TIMEOUT(set) &&
621 ip_set_timeout_expired(ext_timeout(data, h)))) {
622 /* Just the extensions could be overwritten */
623 j = i;
624 goto reuse_slot;
625 } else {
626 ret = -IPSET_ERR_EXIST;
627 goto out;
628 }
629 }
630 /* Reuse first timed out entry */
631 if (SET_WITH_TIMEOUT(set) &&
632 ip_set_timeout_expired(ext_timeout(data, h)) &&
633 j != AHASH_MAX(h) + 1)
634 j = i;
635 }
636reuse_slot:
637 if (j != AHASH_MAX(h) + 1) {
638 /* Fill out reused slot */
639 data = ahash_data(n, j, h->dsize);
640#ifdef IP_SET_HASH_WITH_NETS
641 mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family));
642 mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
643#endif
644 } else {
645 /* Use/create a new slot */
646 TUNE_AHASH_MAX(h, multi);
647 ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize);
648 if (ret != 0) {
649 if (ret == -EAGAIN)
650 mtype_data_next(&h->next, d);
651 goto out;
652 }
653 data = ahash_data(n, n->pos++, h->dsize);
654#ifdef IP_SET_HASH_WITH_NETS
655 mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
656#endif
657 h->elements++;
658 }
659 memcpy(data, d, sizeof(struct mtype_elem));
660#ifdef IP_SET_HASH_WITH_NETS
661 mtype_data_set_flags(data, flags);
662#endif
663 if (SET_WITH_TIMEOUT(set))
664 ip_set_timeout_set(ext_timeout(data, h), ext->timeout);
665 if (SET_WITH_COUNTER(set))
666 ip_set_init_counter(ext_counter(data, h), ext);
667
668out:
669 rcu_read_unlock_bh();
670 return ret;
671}
672
673/* Delete an element from the hash: swap it with the last element
674 * and free up space if possible.
675 */
676static int
677mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
678 struct ip_set_ext *mext, u32 flags)
679{
680 struct htype *h = set->data;
681 struct htable *t = h->table;
682 const struct mtype_elem *d = value;
683 struct mtype_elem *data;
684 struct hbucket *n;
685 int i;
686 u32 key, multi = 0;
687
688 key = HKEY(value, h->initval, t->htable_bits);
689 n = hbucket(t, key);
690 for (i = 0; i < n->pos; i++) {
691 data = ahash_data(n, i, h->dsize);
692 if (!mtype_data_equal(data, d, &multi))
693 continue;
694 if (SET_WITH_TIMEOUT(set) &&
695 ip_set_timeout_expired(ext_timeout(data, h)))
696 return -IPSET_ERR_EXIST;
697 if (i != n->pos - 1)
698 /* Not last one */
699 memcpy(data, ahash_data(n, n->pos - 1, h->dsize),
700 h->dsize);
701
702 n->pos--;
703 h->elements--;
704#ifdef IP_SET_HASH_WITH_NETS
705 mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family));
706#endif
707 if (n->pos + AHASH_INIT_SIZE < n->size) {
708 void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
709 * h->dsize,
710 GFP_ATOMIC);
711 if (!tmp)
712 return 0;
713 n->size -= AHASH_INIT_SIZE;
714 memcpy(tmp, n->value, n->size * h->dsize);
715 kfree(n->value);
716 n->value = tmp;
717 }
718 return 0;
719 }
720
721 return -IPSET_ERR_EXIST;
722}
723
724static inline int
725mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
726 struct ip_set_ext *mext, struct ip_set *set, u32 flags)
727{
728 if (SET_WITH_COUNTER(set))
729 ip_set_update_counter(ext_counter(data,
730 (struct htype *)(set->data)),
731 ext, mext, flags);
732 return mtype_do_data_match(data);
733}
734
735#ifdef IP_SET_HASH_WITH_NETS
736/* Special test function which takes into account the different network
737 * sizes added to the set */
738static int
739mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
740 const struct ip_set_ext *ext,
741 struct ip_set_ext *mext, u32 flags)
742{
743 struct htype *h = set->data;
744 struct htable *t = h->table;
745 struct hbucket *n;
746 struct mtype_elem *data;
747 int i, j = 0;
748 u32 key, multi = 0;
749 u8 nets_length = NETS_LENGTH(set->family);
750
751 pr_debug("test by nets\n");
752 for (; j < nets_length && h->nets[j].nets && !multi; j++) {
753 mtype_data_netmask(d, h->nets[j].cidr);
754 key = HKEY(d, h->initval, t->htable_bits);
755 n = hbucket(t, key);
756 for (i = 0; i < n->pos; i++) {
757 data = ahash_data(n, i, h->dsize);
758 if (!mtype_data_equal(data, d, &multi))
759 continue;
760 if (SET_WITH_TIMEOUT(set)) {
761 if (!ip_set_timeout_expired(
762 ext_timeout(data, h)))
763 return mtype_data_match(data, ext,
764 mext, set,
765 flags);
766#ifdef IP_SET_HASH_WITH_MULTI
767 multi = 0;
768#endif
769 } else
770 return mtype_data_match(data, ext,
771 mext, set, flags);
772 }
773 }
774 return 0;
775}
776#endif
777
778/* Test whether the element is added to the set */
779static int
780mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
781 struct ip_set_ext *mext, u32 flags)
782{
783 struct htype *h = set->data;
784 struct htable *t = h->table;
785 struct mtype_elem *d = value;
786 struct hbucket *n;
787 struct mtype_elem *data;
788 int i;
789 u32 key, multi = 0;
790
791#ifdef IP_SET_HASH_WITH_NETS
792 /* If we test an IP address and not a network address,
793 * try all possible network sizes */
794 if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
795 return mtype_test_cidrs(set, d, ext, mext, flags);
796#endif
797
798 key = HKEY(d, h->initval, t->htable_bits);
799 n = hbucket(t, key);
800 for (i = 0; i < n->pos; i++) {
801 data = ahash_data(n, i, h->dsize);
802 if (mtype_data_equal(data, d, &multi) &&
803 !(SET_WITH_TIMEOUT(set) &&
804 ip_set_timeout_expired(ext_timeout(data, h))))
805 return mtype_data_match(data, ext, mext, set, flags);
806 }
807 return 0;
808}
809
810/* Reply a HEADER request: fill out the header part of the set */
811static int
812mtype_head(struct ip_set *set, struct sk_buff *skb)
813{
814 const struct htype *h = set->data;
815 struct nlattr *nested;
816 size_t memsize;
817
818 read_lock_bh(&set->lock);
819 memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family));
820 read_unlock_bh(&set->lock);
821
822 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
823 if (!nested)
824 goto nla_put_failure;
825 if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
826 htonl(jhash_size(h->table->htable_bits))) ||
827 nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
828 goto nla_put_failure;
829#ifdef IP_SET_HASH_WITH_NETMASK
830 if (h->netmask != HOST_MASK &&
831 nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
832 goto nla_put_failure;
833#endif
834 if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
835 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
836 ((set->extensions & IPSET_EXT_TIMEOUT) &&
837 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) ||
838 ((set->extensions & IPSET_EXT_COUNTER) &&
839 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
840 htonl(IPSET_FLAG_WITH_COUNTERS))))
841 goto nla_put_failure;
842 ipset_nest_end(skb, nested);
843
844 return 0;
845nla_put_failure:
846 return -EMSGSIZE;
847}
848
849/* Reply a LIST/SAVE request: dump the elements of the specified set */
850static int
851mtype_list(const struct ip_set *set,
852 struct sk_buff *skb, struct netlink_callback *cb)
853{
854 const struct htype *h = set->data;
855 const struct htable *t = h->table;
856 struct nlattr *atd, *nested;
857 const struct hbucket *n;
858 const struct mtype_elem *e;
859 u32 first = cb->args[2];
860 /* We assume that one hash bucket fills into one page */
861 void *incomplete;
862 int i;
863
864 atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
865 if (!atd)
866 return -EMSGSIZE;
867 pr_debug("list hash set %s\n", set->name);
868 for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
869 incomplete = skb_tail_pointer(skb);
870 n = hbucket(t, cb->args[2]);
871 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
872 for (i = 0; i < n->pos; i++) {
873 e = ahash_data(n, i, h->dsize);
874 if (SET_WITH_TIMEOUT(set) &&
875 ip_set_timeout_expired(ext_timeout(e, h)))
876 continue;
877 pr_debug("list hash %lu hbucket %p i %u, data %p\n",
878 cb->args[2], n, i, e);
879 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
880 if (!nested) {
881 if (cb->args[2] == first) {
882 nla_nest_cancel(skb, atd);
883 return -EMSGSIZE;
884 } else
885 goto nla_put_failure;
886 }
887 if (mtype_data_list(skb, e))
888 goto nla_put_failure;
889 if (SET_WITH_TIMEOUT(set) &&
890 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
891 htonl(ip_set_timeout_get(
892 ext_timeout(e, h)))))
893 goto nla_put_failure;
894 if (SET_WITH_COUNTER(set) &&
895 ip_set_put_counter(skb, ext_counter(e, h)))
896 goto nla_put_failure;
897 ipset_nest_end(skb, nested);
898 }
899 }
900 ipset_nest_end(skb, atd);
901 /* Set listing finished */
902 cb->args[2] = 0;
903
904 return 0;
905
906nla_put_failure:
907 nlmsg_trim(skb, incomplete);
908 ipset_nest_end(skb, atd);
909 if (unlikely(first == cb->args[2])) {
910 pr_warning("Can't list set %s: one bucket does not fit into "
911 "a message. Please report it!\n", set->name);
912 cb->args[2] = 0;
913 return -EMSGSIZE;
914 }
915 return 0;
916}
917
918static int
919TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
920 const struct xt_action_param *par,
921 enum ipset_adt adt, struct ip_set_adt_opt *opt);
922
923static int
924TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
925 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
926
927static const struct ip_set_type_variant mtype_variant = {
928 .kadt = mtype_kadt,
929 .uadt = mtype_uadt,
930 .adt = {
931 [IPSET_ADD] = mtype_add,
932 [IPSET_DEL] = mtype_del,
933 [IPSET_TEST] = mtype_test,
934 },
935 .destroy = mtype_destroy,
936 .flush = mtype_flush,
937 .head = mtype_head,
938 .list = mtype_list,
939 .resize = mtype_resize,
940 .same_set = mtype_same_set,
941};
942
943#ifdef IP_SET_EMIT_CREATE
944static int
945TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags)
946{
947 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
948 u32 cadt_flags = 0;
949 u8 hbits;
950#ifdef IP_SET_HASH_WITH_NETMASK
951 u8 netmask;
952#endif
953 size_t hsize;
954 struct HTYPE *h;
955
956 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
957 return -IPSET_ERR_INVALID_FAMILY;
958#ifdef IP_SET_HASH_WITH_NETMASK
959 netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
960 pr_debug("Create set %s with family %s\n",
961 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
962#endif
963
964 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
965 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
966 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
967 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
968 return -IPSET_ERR_PROTOCOL;
969
970 if (tb[IPSET_ATTR_HASHSIZE]) {
971 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
972 if (hashsize < IPSET_MIMINAL_HASHSIZE)
973 hashsize = IPSET_MIMINAL_HASHSIZE;
974 }
975
976 if (tb[IPSET_ATTR_MAXELEM])
977 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
978
979#ifdef IP_SET_HASH_WITH_NETMASK
980 if (tb[IPSET_ATTR_NETMASK]) {
981 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
982
983 if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
984 (set->family == NFPROTO_IPV6 && netmask > 128) ||
985 netmask == 0)
986 return -IPSET_ERR_INVALID_NETMASK;
987 }
988#endif
989
990 hsize = sizeof(*h);
991#ifdef IP_SET_HASH_WITH_NETS
992 hsize += sizeof(struct net_prefixes) *
993 (set->family == NFPROTO_IPV4 ? 32 : 128);
994#endif
995 h = kzalloc(hsize, GFP_KERNEL);
996 if (!h)
997 return -ENOMEM;
998
999 h->maxelem = maxelem;
1000#ifdef IP_SET_HASH_WITH_NETMASK
1001 h->netmask = netmask;
1002#endif
1003 get_random_bytes(&h->initval, sizeof(h->initval));
1004 h->timeout = IPSET_NO_TIMEOUT;
1005
1006 hbits = htable_bits(hashsize);
1007 hsize = htable_size(hbits);
1008 if (hsize == 0) {
1009 kfree(h);
1010 return -ENOMEM;
1011 }
1012 h->table = ip_set_alloc(hsize);
1013 if (!h->table) {
1014 kfree(h);
1015 return -ENOMEM;
1016 }
1017 h->table->htable_bits = hbits;
1018
1019 set->data = h;
1020 if (set->family == NFPROTO_IPV4)
1021 set->variant = &TOKEN(HTYPE, 4_variant);
1022 else
1023 set->variant = &TOKEN(HTYPE, 6_variant);
1024
1025 if (tb[IPSET_ATTR_CADT_FLAGS])
1026 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
1027 if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
1028 set->extensions |= IPSET_EXT_COUNTER;
1029 if (tb[IPSET_ATTR_TIMEOUT]) {
1030 h->timeout =
1031 ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1032 set->extensions |= IPSET_EXT_TIMEOUT;
1033 if (set->family == NFPROTO_IPV4) {
1034 h->dsize =
1035 sizeof(struct TOKEN(HTYPE, 4ct_elem));
1036 h->offset[IPSET_OFFSET_TIMEOUT] =
1037 offsetof(struct TOKEN(HTYPE, 4ct_elem),
1038 timeout);
1039 h->offset[IPSET_OFFSET_COUNTER] =
1040 offsetof(struct TOKEN(HTYPE, 4ct_elem),
1041 counter);
1042 TOKEN(HTYPE, 4_gc_init)(set,
1043 TOKEN(HTYPE, 4_gc));
1044 } else {
1045 h->dsize =
1046 sizeof(struct TOKEN(HTYPE, 6ct_elem));
1047 h->offset[IPSET_OFFSET_TIMEOUT] =
1048 offsetof(struct TOKEN(HTYPE, 6ct_elem),
1049 timeout);
1050 h->offset[IPSET_OFFSET_COUNTER] =
1051 offsetof(struct TOKEN(HTYPE, 6ct_elem),
1052 counter);
1053 TOKEN(HTYPE, 6_gc_init)(set,
1054 TOKEN(HTYPE, 6_gc));
1055 }
1056 } else {
1057 if (set->family == NFPROTO_IPV4) {
1058 h->dsize =
1059 sizeof(struct TOKEN(HTYPE, 4c_elem));
1060 h->offset[IPSET_OFFSET_COUNTER] =
1061 offsetof(struct TOKEN(HTYPE, 4c_elem),
1062 counter);
1063 } else {
1064 h->dsize =
1065 sizeof(struct TOKEN(HTYPE, 6c_elem));
1066 h->offset[IPSET_OFFSET_COUNTER] =
1067 offsetof(struct TOKEN(HTYPE, 6c_elem),
1068 counter);
1069 }
1070 }
1071 } else if (tb[IPSET_ATTR_TIMEOUT]) {
1072 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
1073 set->extensions |= IPSET_EXT_TIMEOUT;
1074 if (set->family == NFPROTO_IPV4) {
1075 h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem));
1076 h->offset[IPSET_OFFSET_TIMEOUT] =
1077 offsetof(struct TOKEN(HTYPE, 4t_elem),
1078 timeout);
1079 TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc));
1080 } else {
1081 h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem));
1082 h->offset[IPSET_OFFSET_TIMEOUT] =
1083 offsetof(struct TOKEN(HTYPE, 6t_elem),
1084 timeout);
1085 TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc));
1086 }
1087 } else {
1088 if (set->family == NFPROTO_IPV4)
1089 h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem));
1090 else
1091 h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem));
1092 }
1093
1094 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
1095 set->name, jhash_size(h->table->htable_bits),
1096 h->table->htable_bits, h->maxelem, set->data, h->table);
1097
1098 return 0;
1099}
1100#endif /* IP_SET_EMIT_CREATE */
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index b7d4cb475ae6..c74e6e14cd93 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -21,11 +21,10 @@
21#include <linux/netfilter.h> 21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h> 22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h> 23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_hash.h> 24#include <linux/netfilter/ipset/ip_set_hash.h>
26 25
27#define REVISION_MIN 0 26#define REVISION_MIN 0
28#define REVISION_MAX 0 27#define REVISION_MAX 1 /* Counters support */
29 28
30MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 30MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,58 +32,47 @@ IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);
33MODULE_ALIAS("ip_set_hash:ip"); 32MODULE_ALIAS("ip_set_hash:ip");
34 33
35/* Type specific function prefix */ 34/* Type specific function prefix */
36#define TYPE hash_ip 35#define HTYPE hash_ip
37 36#define IP_SET_HASH_WITH_NETMASK
38static bool
39hash_ip_same_set(const struct ip_set *a, const struct ip_set *b);
40
41#define hash_ip4_same_set hash_ip_same_set
42#define hash_ip6_same_set hash_ip_same_set
43 37
44/* The type variant functions: IPv4 */ 38/* IPv4 variants */
45 39
46/* Member elements without timeout */ 40/* Member elements */
47struct hash_ip4_elem { 41struct hash_ip4_elem {
42 /* Zero valued IP addresses cannot be stored */
48 __be32 ip; 43 __be32 ip;
49}; 44};
50 45
51/* Member elements with timeout support */ 46struct hash_ip4t_elem {
52struct hash_ip4_telem {
53 __be32 ip; 47 __be32 ip;
54 unsigned long timeout; 48 unsigned long timeout;
55}; 49};
56 50
57static inline bool 51struct hash_ip4c_elem {
58hash_ip4_data_equal(const struct hash_ip4_elem *ip1, 52 __be32 ip;
59 const struct hash_ip4_elem *ip2, 53 struct ip_set_counter counter;
60 u32 *multi) 54};
61{
62 return ip1->ip == ip2->ip;
63}
64 55
65static inline bool 56struct hash_ip4ct_elem {
66hash_ip4_data_isnull(const struct hash_ip4_elem *elem) 57 __be32 ip;
67{ 58 struct ip_set_counter counter;
68 return elem->ip == 0; 59 unsigned long timeout;
69} 60};
70 61
71static inline void 62/* Common functions */
72hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src)
73{
74 dst->ip = src->ip;
75}
76 63
77/* Zero valued IP addresses cannot be stored */ 64static inline bool
78static inline void 65hash_ip4_data_equal(const struct hash_ip4_elem *e1,
79hash_ip4_data_zero_out(struct hash_ip4_elem *elem) 66 const struct hash_ip4_elem *e2,
67 u32 *multi)
80{ 68{
81 elem->ip = 0; 69 return e1->ip == e2->ip;
82} 70}
83 71
84static inline bool 72static inline bool
85hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) 73hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
86{ 74{
87 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip)) 75 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))
88 goto nla_put_failure; 76 goto nla_put_failure;
89 return 0; 77 return 0;
90 78
@@ -92,41 +80,26 @@ nla_put_failure:
92 return 1; 80 return 1;
93} 81}
94 82
95static bool 83static inline void
96hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) 84hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
97{ 85{
98 const struct hash_ip4_telem *tdata = 86 next->ip = e->ip;
99 (const struct hash_ip4_telem *)data;
100
101 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
102 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
103 htonl(ip_set_timeout_get(tdata->timeout))))
104 goto nla_put_failure;
105
106 return 0;
107
108nla_put_failure:
109 return 1;
110} 87}
111 88
112#define IP_SET_HASH_WITH_NETMASK 89#define MTYPE hash_ip4
113#define PF 4 90#define PF 4
114#define HOST_MASK 32 91#define HOST_MASK 32
115#include <linux/netfilter/ipset/ip_set_ahash.h> 92#include "ip_set_hash_gen.h"
116
117static inline void
118hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d)
119{
120 h->next.ip = d->ip;
121}
122 93
123static int 94static int
124hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb, 95hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
125 const struct xt_action_param *par, 96 const struct xt_action_param *par,
126 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 97 enum ipset_adt adt, struct ip_set_adt_opt *opt)
127{ 98{
128 const struct ip_set_hash *h = set->data; 99 const struct hash_ip *h = set->data;
129 ipset_adtfn adtfn = set->variant->adt[adt]; 100 ipset_adtfn adtfn = set->variant->adt[adt];
101 struct hash_ip4_elem e = {};
102 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
130 __be32 ip; 103 __be32 ip;
131 104
132 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); 105 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip);
@@ -134,43 +107,42 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
134 if (ip == 0) 107 if (ip == 0)
135 return -EINVAL; 108 return -EINVAL;
136 109
137 return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); 110 e.ip = ip;
111 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
138} 112}
139 113
140static int 114static int
141hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], 115hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
142 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 116 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
143{ 117{
144 const struct ip_set_hash *h = set->data; 118 const struct hash_ip *h = set->data;
145 ipset_adtfn adtfn = set->variant->adt[adt]; 119 ipset_adtfn adtfn = set->variant->adt[adt];
146 u32 ip, ip_to, hosts, timeout = h->timeout; 120 struct hash_ip4_elem e = {};
147 __be32 nip; 121 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
122 u32 ip, ip_to, hosts;
148 int ret = 0; 123 int ret = 0;
149 124
150 if (unlikely(!tb[IPSET_ATTR_IP] || 125 if (unlikely(!tb[IPSET_ATTR_IP] ||
151 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 126 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
127 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
128 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
152 return -IPSET_ERR_PROTOCOL; 129 return -IPSET_ERR_PROTOCOL;
153 130
154 if (tb[IPSET_ATTR_LINENO]) 131 if (tb[IPSET_ATTR_LINENO])
155 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 132 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
156 133
157 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 134 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
135 ip_set_get_extensions(set, tb, &ext);
158 if (ret) 136 if (ret)
159 return ret; 137 return ret;
160 138
161 ip &= ip_set_hostmask(h->netmask); 139 ip &= ip_set_hostmask(h->netmask);
162 140
163 if (tb[IPSET_ATTR_TIMEOUT]) {
164 if (!with_timeout(h->timeout))
165 return -IPSET_ERR_TIMEOUT;
166 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
167 }
168
169 if (adt == IPSET_TEST) { 141 if (adt == IPSET_TEST) {
170 nip = htonl(ip); 142 e.ip = htonl(ip);
171 if (nip == 0) 143 if (e.ip == 0)
172 return -IPSET_ERR_HASH_ELEM; 144 return -IPSET_ERR_HASH_ELEM;
173 return adtfn(set, &nip, timeout, flags); 145 return adtfn(set, &e, &ext, &ext, flags);
174 } 146 }
175 147
176 ip_to = ip; 148 ip_to = ip;
@@ -193,10 +165,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
193 if (retried) 165 if (retried)
194 ip = ntohl(h->next.ip); 166 ip = ntohl(h->next.ip);
195 for (; !before(ip_to, ip); ip += hosts) { 167 for (; !before(ip_to, ip); ip += hosts) {
196 nip = htonl(ip); 168 e.ip = htonl(ip);
197 if (nip == 0) 169 if (e.ip == 0)
198 return -IPSET_ERR_HASH_ELEM; 170 return -IPSET_ERR_HASH_ELEM;
199 ret = adtfn(set, &nip, timeout, flags); 171 ret = adtfn(set, &e, &ext, &ext, flags);
200 172
201 if (ret && !ip_set_eexist(ret, flags)) 173 if (ret && !ip_set_eexist(ret, flags))
202 return ret; 174 return ret;
@@ -206,29 +178,31 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
206 return ret; 178 return ret;
207} 179}
208 180
209static bool 181/* IPv6 variants */
210hash_ip_same_set(const struct ip_set *a, const struct ip_set *b)
211{
212 const struct ip_set_hash *x = a->data;
213 const struct ip_set_hash *y = b->data;
214 182
215 /* Resizing changes htable_bits, so we ignore it */ 183/* Member elements */
216 return x->maxelem == y->maxelem && 184struct hash_ip6_elem {
217 x->timeout == y->timeout && 185 union nf_inet_addr ip;
218 x->netmask == y->netmask; 186};
219}
220 187
221/* The type variant functions: IPv6 */ 188struct hash_ip6t_elem {
189 union nf_inet_addr ip;
190 unsigned long timeout;
191};
222 192
223struct hash_ip6_elem { 193struct hash_ip6c_elem {
224 union nf_inet_addr ip; 194 union nf_inet_addr ip;
195 struct ip_set_counter counter;
225}; 196};
226 197
227struct hash_ip6_telem { 198struct hash_ip6ct_elem {
228 union nf_inet_addr ip; 199 union nf_inet_addr ip;
200 struct ip_set_counter counter;
229 unsigned long timeout; 201 unsigned long timeout;
230}; 202};
231 203
204/* Common functions */
205
232static inline bool 206static inline bool
233hash_ip6_data_equal(const struct hash_ip6_elem *ip1, 207hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
234 const struct hash_ip6_elem *ip2, 208 const struct hash_ip6_elem *ip2,
@@ -237,37 +211,16 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
237 return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6); 211 return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
238} 212}
239 213
240static inline bool
241hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
242{
243 return ipv6_addr_any(&elem->ip.in6);
244}
245
246static inline void 214static inline void
247hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) 215hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
248{ 216{
249 dst->ip.in6 = src->ip.in6; 217 ip6_netmask(ip, prefix);
250}
251
252static inline void
253hash_ip6_data_zero_out(struct hash_ip6_elem *elem)
254{
255 ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0);
256}
257
258static inline void
259ip6_netmask(union nf_inet_addr *ip, u8 prefix)
260{
261 ip->ip6[0] &= ip_set_netmask6(prefix)[0];
262 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
263 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
264 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
265} 218}
266 219
267static bool 220static bool
268hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) 221hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
269{ 222{
270 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6)) 223 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))
271 goto nla_put_failure; 224 goto nla_put_failure;
272 return 0; 225 return 0;
273 226
@@ -275,69 +228,55 @@ nla_put_failure:
275 return 1; 228 return 1;
276} 229}
277 230
278static bool 231static inline void
279hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) 232hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
280{ 233{
281 const struct hash_ip6_telem *e =
282 (const struct hash_ip6_telem *)data;
283
284 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
285 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
286 htonl(ip_set_timeout_get(e->timeout))))
287 goto nla_put_failure;
288 return 0;
289
290nla_put_failure:
291 return 1;
292} 234}
293 235
236#undef MTYPE
294#undef PF 237#undef PF
295#undef HOST_MASK 238#undef HOST_MASK
239#undef HKEY_DATALEN
296 240
241#define MTYPE hash_ip6
297#define PF 6 242#define PF 6
298#define HOST_MASK 128 243#define HOST_MASK 128
299#include <linux/netfilter/ipset/ip_set_ahash.h>
300 244
301static inline void 245#define IP_SET_EMIT_CREATE
302hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d) 246#include "ip_set_hash_gen.h"
303{
304}
305 247
306static int 248static int
307hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb, 249hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
308 const struct xt_action_param *par, 250 const struct xt_action_param *par,
309 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 251 enum ipset_adt adt, struct ip_set_adt_opt *opt)
310{ 252{
311 const struct ip_set_hash *h = set->data; 253 const struct hash_ip *h = set->data;
312 ipset_adtfn adtfn = set->variant->adt[adt]; 254 ipset_adtfn adtfn = set->variant->adt[adt];
313 union nf_inet_addr ip; 255 struct hash_ip6_elem e = {};
256 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
314 257
315 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6); 258 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
316 ip6_netmask(&ip, h->netmask); 259 hash_ip6_netmask(&e.ip, h->netmask);
317 if (ipv6_addr_any(&ip.in6)) 260 if (ipv6_addr_any(&e.ip.in6))
318 return -EINVAL; 261 return -EINVAL;
319 262
320 return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); 263 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
321} 264}
322 265
323static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = {
324 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
325 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
326 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
327};
328
329static int 266static int
330hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[], 267hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
331 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 268 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
332{ 269{
333 const struct ip_set_hash *h = set->data; 270 const struct hash_ip *h = set->data;
334 ipset_adtfn adtfn = set->variant->adt[adt]; 271 ipset_adtfn adtfn = set->variant->adt[adt];
335 union nf_inet_addr ip; 272 struct hash_ip6_elem e = {};
336 u32 timeout = h->timeout; 273 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
337 int ret; 274 int ret;
338 275
339 if (unlikely(!tb[IPSET_ATTR_IP] || 276 if (unlikely(!tb[IPSET_ATTR_IP] ||
340 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 277 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
278 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
279 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
341 tb[IPSET_ATTR_IP_TO] || 280 tb[IPSET_ATTR_IP_TO] ||
342 tb[IPSET_ATTR_CIDR])) 281 tb[IPSET_ATTR_CIDR]))
343 return -IPSET_ERR_PROTOCOL; 282 return -IPSET_ERR_PROTOCOL;
@@ -345,110 +284,20 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
345 if (tb[IPSET_ATTR_LINENO]) 284 if (tb[IPSET_ATTR_LINENO])
346 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 285 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
347 286
348 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip); 287 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
288 ip_set_get_extensions(set, tb, &ext);
349 if (ret) 289 if (ret)
350 return ret; 290 return ret;
351 291
352 ip6_netmask(&ip, h->netmask); 292 hash_ip6_netmask(&e.ip, h->netmask);
353 if (ipv6_addr_any(&ip.in6)) 293 if (ipv6_addr_any(&e.ip.in6))
354 return -IPSET_ERR_HASH_ELEM; 294 return -IPSET_ERR_HASH_ELEM;
355 295
356 if (tb[IPSET_ATTR_TIMEOUT]) { 296 ret = adtfn(set, &e, &ext, &ext, flags);
357 if (!with_timeout(h->timeout))
358 return -IPSET_ERR_TIMEOUT;
359 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
360 }
361
362 ret = adtfn(set, &ip, timeout, flags);
363 297
364 return ip_set_eexist(ret, flags) ? 0 : ret; 298 return ip_set_eexist(ret, flags) ? 0 : ret;
365} 299}
366 300
367/* Create hash:ip type of sets */
368
369static int
370hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
371{
372 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
373 u8 netmask, hbits;
374 size_t hsize;
375 struct ip_set_hash *h;
376
377 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
378 return -IPSET_ERR_INVALID_FAMILY;
379 netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
380 pr_debug("Create set %s with family %s\n",
381 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
382
383 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
384 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
385 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
386 return -IPSET_ERR_PROTOCOL;
387
388 if (tb[IPSET_ATTR_HASHSIZE]) {
389 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
390 if (hashsize < IPSET_MIMINAL_HASHSIZE)
391 hashsize = IPSET_MIMINAL_HASHSIZE;
392 }
393
394 if (tb[IPSET_ATTR_MAXELEM])
395 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
396
397 if (tb[IPSET_ATTR_NETMASK]) {
398 netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
399
400 if ((set->family == NFPROTO_IPV4 && netmask > 32) ||
401 (set->family == NFPROTO_IPV6 && netmask > 128) ||
402 netmask == 0)
403 return -IPSET_ERR_INVALID_NETMASK;
404 }
405
406 h = kzalloc(sizeof(*h), GFP_KERNEL);
407 if (!h)
408 return -ENOMEM;
409
410 h->maxelem = maxelem;
411 h->netmask = netmask;
412 get_random_bytes(&h->initval, sizeof(h->initval));
413 h->timeout = IPSET_NO_TIMEOUT;
414
415 hbits = htable_bits(hashsize);
416 hsize = htable_size(hbits);
417 if (hsize == 0) {
418 kfree(h);
419 return -ENOMEM;
420 }
421 h->table = ip_set_alloc(hsize);
422 if (!h->table) {
423 kfree(h);
424 return -ENOMEM;
425 }
426 h->table->htable_bits = hbits;
427
428 set->data = h;
429
430 if (tb[IPSET_ATTR_TIMEOUT]) {
431 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
432
433 set->variant = set->family == NFPROTO_IPV4
434 ? &hash_ip4_tvariant : &hash_ip6_tvariant;
435
436 if (set->family == NFPROTO_IPV4)
437 hash_ip4_gc_init(set);
438 else
439 hash_ip6_gc_init(set);
440 } else {
441 set->variant = set->family == NFPROTO_IPV4
442 ? &hash_ip4_variant : &hash_ip6_variant;
443 }
444
445 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
446 set->name, jhash_size(h->table->htable_bits),
447 h->table->htable_bits, h->maxelem, set->data, h->table);
448
449 return 0;
450}
451
452static struct ip_set_type hash_ip_type __read_mostly = { 301static struct ip_set_type hash_ip_type __read_mostly = {
453 .name = "hash:ip", 302 .name = "hash:ip",
454 .protocol = IPSET_PROTOCOL, 303 .protocol = IPSET_PROTOCOL,
@@ -465,6 +314,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {
465 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 314 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
466 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 315 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
467 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, 316 [IPSET_ATTR_NETMASK] = { .type = NLA_U8 },
317 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
468 }, 318 },
469 .adt_policy = { 319 .adt_policy = {
470 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 320 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -472,6 +322,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {
472 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 322 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
473 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 323 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
474 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 324 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
325 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
326 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
475 }, 327 },
476 .me = THIS_MODULE, 328 .me = THIS_MODULE,
477}; 329};
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index d8f77bacae86..7a2d2bd98d04 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
21#include <linux/netfilter.h> 21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h> 22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h> 23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h> 24#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h> 25#include <linux/netfilter/ipset/ip_set_hash.h>
27 26
28#define REVISION_MIN 0 27#define REVISION_MIN 0
29#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ 28/* 1 SCTP and UDPLITE support added */
29#define REVISION_MAX 2 /* Counters support added */
30 30
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,33 +34,45 @@ IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);
34MODULE_ALIAS("ip_set_hash:ip,port"); 34MODULE_ALIAS("ip_set_hash:ip,port");
35 35
36/* Type specific function prefix */ 36/* Type specific function prefix */
37#define TYPE hash_ipport 37#define HTYPE hash_ipport
38 38
39static bool 39/* IPv4 variants */
40hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b);
41 40
42#define hash_ipport4_same_set hash_ipport_same_set 41/* Member elements */
43#define hash_ipport6_same_set hash_ipport_same_set 42struct hash_ipport4_elem {
43 __be32 ip;
44 __be16 port;
45 u8 proto;
46 u8 padding;
47};
44 48
45/* The type variant functions: IPv4 */ 49struct hash_ipport4t_elem {
50 __be32 ip;
51 __be16 port;
52 u8 proto;
53 u8 padding;
54 unsigned long timeout;
55};
46 56
47/* Member elements without timeout */ 57struct hash_ipport4c_elem {
48struct hash_ipport4_elem {
49 __be32 ip; 58 __be32 ip;
50 __be16 port; 59 __be16 port;
51 u8 proto; 60 u8 proto;
52 u8 padding; 61 u8 padding;
62 struct ip_set_counter counter;
53}; 63};
54 64
55/* Member elements with timeout support */ 65struct hash_ipport4ct_elem {
56struct hash_ipport4_telem {
57 __be32 ip; 66 __be32 ip;
58 __be16 port; 67 __be16 port;
59 u8 proto; 68 u8 proto;
60 u8 padding; 69 u8 padding;
70 struct ip_set_counter counter;
61 unsigned long timeout; 71 unsigned long timeout;
62}; 72};
63 73
74/* Common functions */
75
64static inline bool 76static inline bool
65hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, 77hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
66 const struct hash_ipport4_elem *ip2, 78 const struct hash_ipport4_elem *ip2,
@@ -71,27 +83,6 @@ hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
71 ip1->proto == ip2->proto; 83 ip1->proto == ip2->proto;
72} 84}
73 85
74static inline bool
75hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem)
76{
77 return elem->proto == 0;
78}
79
80static inline void
81hash_ipport4_data_copy(struct hash_ipport4_elem *dst,
82 const struct hash_ipport4_elem *src)
83{
84 dst->ip = src->ip;
85 dst->port = src->port;
86 dst->proto = src->proto;
87}
88
89static inline void
90hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem)
91{
92 elem->proto = 0;
93}
94
95static bool 86static bool
96hash_ipport4_data_list(struct sk_buff *skb, 87hash_ipport4_data_list(struct sk_buff *skb,
97 const struct hash_ipport4_elem *data) 88 const struct hash_ipport4_elem *data)
@@ -106,111 +97,91 @@ nla_put_failure:
106 return 1; 97 return 1;
107} 98}
108 99
109static bool
110hash_ipport4_data_tlist(struct sk_buff *skb,
111 const struct hash_ipport4_elem *data)
112{
113 const struct hash_ipport4_telem *tdata =
114 (const struct hash_ipport4_telem *)data;
115
116 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
117 nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
118 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
119 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
120 htonl(ip_set_timeout_get(tdata->timeout))))
121 goto nla_put_failure;
122 return 0;
123
124nla_put_failure:
125 return 1;
126}
127
128#define PF 4
129#define HOST_MASK 32
130#include <linux/netfilter/ipset/ip_set_ahash.h>
131
132static inline void 100static inline void
133hash_ipport4_data_next(struct ip_set_hash *h, 101hash_ipport4_data_next(struct hash_ipport4_elem *next,
134 const struct hash_ipport4_elem *d) 102 const struct hash_ipport4_elem *d)
135{ 103{
136 h->next.ip = d->ip; 104 next->ip = d->ip;
137 h->next.port = d->port; 105 next->port = d->port;
138} 106}
139 107
108#define MTYPE hash_ipport4
109#define PF 4
110#define HOST_MASK 32
111#define HKEY_DATALEN sizeof(struct hash_ipport4_elem)
112#include "ip_set_hash_gen.h"
113
140static int 114static int
141hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, 115hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,
142 const struct xt_action_param *par, 116 const struct xt_action_param *par,
143 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 117 enum ipset_adt adt, struct ip_set_adt_opt *opt)
144{ 118{
145 const struct ip_set_hash *h = set->data; 119 const struct hash_ipport *h = set->data;
146 ipset_adtfn adtfn = set->variant->adt[adt]; 120 ipset_adtfn adtfn = set->variant->adt[adt];
147 struct hash_ipport4_elem data = { }; 121 struct hash_ipport4_elem e = { };
122 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
148 123
149 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 124 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
150 &data.port, &data.proto)) 125 &e.port, &e.proto))
151 return -EINVAL; 126 return -EINVAL;
152 127
153 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 128 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
154 129 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
155 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
156} 130}
157 131
158static int 132static int
159hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], 133hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
160 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 134 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
161{ 135{
162 const struct ip_set_hash *h = set->data; 136 const struct hash_ipport *h = set->data;
163 ipset_adtfn adtfn = set->variant->adt[adt]; 137 ipset_adtfn adtfn = set->variant->adt[adt];
164 struct hash_ipport4_elem data = { }; 138 struct hash_ipport4_elem e = { };
139 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
165 u32 ip, ip_to, p = 0, port, port_to; 140 u32 ip, ip_to, p = 0, port, port_to;
166 u32 timeout = h->timeout;
167 bool with_ports = false; 141 bool with_ports = false;
168 int ret; 142 int ret;
169 143
170 if (unlikely(!tb[IPSET_ATTR_IP] || 144 if (unlikely(!tb[IPSET_ATTR_IP] ||
171 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 145 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
172 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 146 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
173 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 147 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
148 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
149 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
174 return -IPSET_ERR_PROTOCOL; 150 return -IPSET_ERR_PROTOCOL;
175 151
176 if (tb[IPSET_ATTR_LINENO]) 152 if (tb[IPSET_ATTR_LINENO])
177 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 153 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
178 154
179 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); 155 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
156 ip_set_get_extensions(set, tb, &ext);
180 if (ret) 157 if (ret)
181 return ret; 158 return ret;
182 159
183 if (tb[IPSET_ATTR_PORT]) 160 if (tb[IPSET_ATTR_PORT])
184 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 161 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
185 else 162 else
186 return -IPSET_ERR_PROTOCOL; 163 return -IPSET_ERR_PROTOCOL;
187 164
188 if (tb[IPSET_ATTR_PROTO]) { 165 if (tb[IPSET_ATTR_PROTO]) {
189 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 166 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
190 with_ports = ip_set_proto_with_ports(data.proto); 167 with_ports = ip_set_proto_with_ports(e.proto);
191 168
192 if (data.proto == 0) 169 if (e.proto == 0)
193 return -IPSET_ERR_INVALID_PROTO; 170 return -IPSET_ERR_INVALID_PROTO;
194 } else 171 } else
195 return -IPSET_ERR_MISSING_PROTO; 172 return -IPSET_ERR_MISSING_PROTO;
196 173
197 if (!(with_ports || data.proto == IPPROTO_ICMP)) 174 if (!(with_ports || e.proto == IPPROTO_ICMP))
198 data.port = 0; 175 e.port = 0;
199
200 if (tb[IPSET_ATTR_TIMEOUT]) {
201 if (!with_timeout(h->timeout))
202 return -IPSET_ERR_TIMEOUT;
203 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
204 }
205 176
206 if (adt == IPSET_TEST || 177 if (adt == IPSET_TEST ||
207 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || 178 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
208 tb[IPSET_ATTR_PORT_TO])) { 179 tb[IPSET_ATTR_PORT_TO])) {
209 ret = adtfn(set, &data, timeout, flags); 180 ret = adtfn(set, &e, &ext, &ext, flags);
210 return ip_set_eexist(ret, flags) ? 0 : ret; 181 return ip_set_eexist(ret, flags) ? 0 : ret;
211 } 182 }
212 183
213 ip_to = ip = ntohl(data.ip); 184 ip_to = ip = ntohl(e.ip);
214 if (tb[IPSET_ATTR_IP_TO]) { 185 if (tb[IPSET_ATTR_IP_TO]) {
215 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); 186 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
216 if (ret) 187 if (ret)
@@ -225,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
225 ip_set_mask_from_to(ip, ip_to, cidr); 196 ip_set_mask_from_to(ip, ip_to, cidr);
226 } 197 }
227 198
228 port_to = port = ntohs(data.port); 199 port_to = port = ntohs(e.port);
229 if (with_ports && tb[IPSET_ATTR_PORT_TO]) { 200 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
230 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 201 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
231 if (port > port_to) 202 if (port > port_to)
@@ -238,9 +209,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
238 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 209 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
239 : port; 210 : port;
240 for (; p <= port_to; p++) { 211 for (; p <= port_to; p++) {
241 data.ip = htonl(ip); 212 e.ip = htonl(ip);
242 data.port = htons(p); 213 e.port = htons(p);
243 ret = adtfn(set, &data, timeout, flags); 214 ret = adtfn(set, &e, &ext, &ext, flags);
244 215
245 if (ret && !ip_set_eexist(ret, flags)) 216 if (ret && !ip_set_eexist(ret, flags))
246 return ret; 217 return ret;
@@ -251,34 +222,42 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
251 return ret; 222 return ret;
252} 223}
253 224
254static bool 225/* IPv6 variants */
255hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b)
256{
257 const struct ip_set_hash *x = a->data;
258 const struct ip_set_hash *y = b->data;
259 226
260 /* Resizing changes htable_bits, so we ignore it */ 227struct hash_ipport6_elem {
261 return x->maxelem == y->maxelem && 228 union nf_inet_addr ip;
262 x->timeout == y->timeout; 229 __be16 port;
263} 230 u8 proto;
231 u8 padding;
232};
264 233
265/* The type variant functions: IPv6 */ 234struct hash_ipport6t_elem {
235 union nf_inet_addr ip;
236 __be16 port;
237 u8 proto;
238 u8 padding;
239 unsigned long timeout;
240};
266 241
267struct hash_ipport6_elem { 242struct hash_ipport6c_elem {
268 union nf_inet_addr ip; 243 union nf_inet_addr ip;
269 __be16 port; 244 __be16 port;
270 u8 proto; 245 u8 proto;
271 u8 padding; 246 u8 padding;
247 struct ip_set_counter counter;
272}; 248};
273 249
274struct hash_ipport6_telem { 250struct hash_ipport6ct_elem {
275 union nf_inet_addr ip; 251 union nf_inet_addr ip;
276 __be16 port; 252 __be16 port;
277 u8 proto; 253 u8 proto;
278 u8 padding; 254 u8 padding;
255 struct ip_set_counter counter;
279 unsigned long timeout; 256 unsigned long timeout;
280}; 257};
281 258
259/* Common functions */
260
282static inline bool 261static inline bool
283hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, 262hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
284 const struct hash_ipport6_elem *ip2, 263 const struct hash_ipport6_elem *ip2,
@@ -289,25 +268,6 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
289 ip1->proto == ip2->proto; 268 ip1->proto == ip2->proto;
290} 269}
291 270
292static inline bool
293hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem)
294{
295 return elem->proto == 0;
296}
297
298static inline void
299hash_ipport6_data_copy(struct hash_ipport6_elem *dst,
300 const struct hash_ipport6_elem *src)
301{
302 memcpy(dst, src, sizeof(*dst));
303}
304
305static inline void
306hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem)
307{
308 elem->proto = 0;
309}
310
311static bool 271static bool
312hash_ipport6_data_list(struct sk_buff *skb, 272hash_ipport6_data_list(struct sk_buff *skb,
313 const struct hash_ipport6_elem *data) 273 const struct hash_ipport6_elem *data)
@@ -322,66 +282,52 @@ nla_put_failure:
322 return 1; 282 return 1;
323} 283}
324 284
325static bool 285static inline void
326hash_ipport6_data_tlist(struct sk_buff *skb, 286hash_ipport6_data_next(struct hash_ipport4_elem *next,
327 const struct hash_ipport6_elem *data) 287 const struct hash_ipport6_elem *d)
328{ 288{
329 const struct hash_ipport6_telem *e = 289 next->port = d->port;
330 (const struct hash_ipport6_telem *)data;
331
332 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
333 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
334 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
335 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
336 htonl(ip_set_timeout_get(e->timeout))))
337 goto nla_put_failure;
338 return 0;
339
340nla_put_failure:
341 return 1;
342} 290}
343 291
292#undef MTYPE
344#undef PF 293#undef PF
345#undef HOST_MASK 294#undef HOST_MASK
295#undef HKEY_DATALEN
346 296
297#define MTYPE hash_ipport6
347#define PF 6 298#define PF 6
348#define HOST_MASK 128 299#define HOST_MASK 128
349#include <linux/netfilter/ipset/ip_set_ahash.h> 300#define HKEY_DATALEN sizeof(struct hash_ipport6_elem)
350 301#define IP_SET_EMIT_CREATE
351static inline void 302#include "ip_set_hash_gen.h"
352hash_ipport6_data_next(struct ip_set_hash *h,
353 const struct hash_ipport6_elem *d)
354{
355 h->next.port = d->port;
356}
357 303
358static int 304static int
359hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, 305hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,
360 const struct xt_action_param *par, 306 const struct xt_action_param *par,
361 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 307 enum ipset_adt adt, struct ip_set_adt_opt *opt)
362{ 308{
363 const struct ip_set_hash *h = set->data; 309 const struct hash_ipport *h = set->data;
364 ipset_adtfn adtfn = set->variant->adt[adt]; 310 ipset_adtfn adtfn = set->variant->adt[adt];
365 struct hash_ipport6_elem data = { }; 311 struct hash_ipport6_elem e = { };
312 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
366 313
367 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 314 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
368 &data.port, &data.proto)) 315 &e.port, &e.proto))
369 return -EINVAL; 316 return -EINVAL;
370 317
371 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 318 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
372 319 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
373 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
374} 320}
375 321
376static int 322static int
377hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], 323hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
378 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 324 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
379{ 325{
380 const struct ip_set_hash *h = set->data; 326 const struct hash_ipport *h = set->data;
381 ipset_adtfn adtfn = set->variant->adt[adt]; 327 ipset_adtfn adtfn = set->variant->adt[adt];
382 struct hash_ipport6_elem data = { }; 328 struct hash_ipport6_elem e = { };
329 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
383 u32 port, port_to; 330 u32 port, port_to;
384 u32 timeout = h->timeout;
385 bool with_ports = false; 331 bool with_ports = false;
386 int ret; 332 int ret;
387 333
@@ -389,6 +335,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
389 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 335 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
390 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 336 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
391 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 337 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
338 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
339 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
392 tb[IPSET_ATTR_IP_TO] || 340 tb[IPSET_ATTR_IP_TO] ||
393 tb[IPSET_ATTR_CIDR])) 341 tb[IPSET_ATTR_CIDR]))
394 return -IPSET_ERR_PROTOCOL; 342 return -IPSET_ERR_PROTOCOL;
@@ -396,39 +344,34 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
396 if (tb[IPSET_ATTR_LINENO]) 344 if (tb[IPSET_ATTR_LINENO])
397 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 345 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
398 346
399 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 347 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
348 ip_set_get_extensions(set, tb, &ext);
400 if (ret) 349 if (ret)
401 return ret; 350 return ret;
402 351
403 if (tb[IPSET_ATTR_PORT]) 352 if (tb[IPSET_ATTR_PORT])
404 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 353 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
405 else 354 else
406 return -IPSET_ERR_PROTOCOL; 355 return -IPSET_ERR_PROTOCOL;
407 356
408 if (tb[IPSET_ATTR_PROTO]) { 357 if (tb[IPSET_ATTR_PROTO]) {
409 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 358 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
410 with_ports = ip_set_proto_with_ports(data.proto); 359 with_ports = ip_set_proto_with_ports(e.proto);
411 360
412 if (data.proto == 0) 361 if (e.proto == 0)
413 return -IPSET_ERR_INVALID_PROTO; 362 return -IPSET_ERR_INVALID_PROTO;
414 } else 363 } else
415 return -IPSET_ERR_MISSING_PROTO; 364 return -IPSET_ERR_MISSING_PROTO;
416 365
417 if (!(with_ports || data.proto == IPPROTO_ICMPV6)) 366 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
418 data.port = 0; 367 e.port = 0;
419
420 if (tb[IPSET_ATTR_TIMEOUT]) {
421 if (!with_timeout(h->timeout))
422 return -IPSET_ERR_TIMEOUT;
423 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
424 }
425 368
426 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 369 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
427 ret = adtfn(set, &data, timeout, flags); 370 ret = adtfn(set, &e, &ext, &ext, flags);
428 return ip_set_eexist(ret, flags) ? 0 : ret; 371 return ip_set_eexist(ret, flags) ? 0 : ret;
429 } 372 }
430 373
431 port = ntohs(data.port); 374 port = ntohs(e.port);
432 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 375 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
433 if (port > port_to) 376 if (port > port_to)
434 swap(port, port_to); 377 swap(port, port_to);
@@ -436,8 +379,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
436 if (retried) 379 if (retried)
437 port = ntohs(h->next.port); 380 port = ntohs(h->next.port);
438 for (; port <= port_to; port++) { 381 for (; port <= port_to; port++) {
439 data.port = htons(port); 382 e.port = htons(port);
440 ret = adtfn(set, &data, timeout, flags); 383 ret = adtfn(set, &e, &ext, &ext, flags);
441 384
442 if (ret && !ip_set_eexist(ret, flags)) 385 if (ret && !ip_set_eexist(ret, flags))
443 return ret; 386 return ret;
@@ -447,78 +390,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
447 return ret; 390 return ret;
448} 391}
449 392
450/* Create hash:ip type of sets */
451
452static int
453hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
454{
455 struct ip_set_hash *h;
456 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
457 u8 hbits;
458 size_t hsize;
459
460 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
461 return -IPSET_ERR_INVALID_FAMILY;
462
463 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
464 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
465 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
466 return -IPSET_ERR_PROTOCOL;
467
468 if (tb[IPSET_ATTR_HASHSIZE]) {
469 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
470 if (hashsize < IPSET_MIMINAL_HASHSIZE)
471 hashsize = IPSET_MIMINAL_HASHSIZE;
472 }
473
474 if (tb[IPSET_ATTR_MAXELEM])
475 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
476
477 h = kzalloc(sizeof(*h), GFP_KERNEL);
478 if (!h)
479 return -ENOMEM;
480
481 h->maxelem = maxelem;
482 get_random_bytes(&h->initval, sizeof(h->initval));
483 h->timeout = IPSET_NO_TIMEOUT;
484
485 hbits = htable_bits(hashsize);
486 hsize = htable_size(hbits);
487 if (hsize == 0) {
488 kfree(h);
489 return -ENOMEM;
490 }
491 h->table = ip_set_alloc(hsize);
492 if (!h->table) {
493 kfree(h);
494 return -ENOMEM;
495 }
496 h->table->htable_bits = hbits;
497
498 set->data = h;
499
500 if (tb[IPSET_ATTR_TIMEOUT]) {
501 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
502
503 set->variant = set->family == NFPROTO_IPV4
504 ? &hash_ipport4_tvariant : &hash_ipport6_tvariant;
505
506 if (set->family == NFPROTO_IPV4)
507 hash_ipport4_gc_init(set);
508 else
509 hash_ipport6_gc_init(set);
510 } else {
511 set->variant = set->family == NFPROTO_IPV4
512 ? &hash_ipport4_variant : &hash_ipport6_variant;
513 }
514
515 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
516 set->name, jhash_size(h->table->htable_bits),
517 h->table->htable_bits, h->maxelem, set->data, h->table);
518
519 return 0;
520}
521
522static struct ip_set_type hash_ipport_type __read_mostly = { 393static struct ip_set_type hash_ipport_type __read_mostly = {
523 .name = "hash:ip,port", 394 .name = "hash:ip,port",
524 .protocol = IPSET_PROTOCOL, 395 .protocol = IPSET_PROTOCOL,
@@ -535,6 +406,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
535 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 406 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
536 [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, 407 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
537 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 408 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
409 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
538 }, 410 },
539 .adt_policy = { 411 .adt_policy = {
540 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 412 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -545,6 +417,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {
545 [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, 417 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
546 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 418 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
547 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 419 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
420 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
421 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
548 }, 422 },
549 .me = THIS_MODULE, 423 .me = THIS_MODULE,
550}; 424};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 1da1e955f38b..34e8a1acce42 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
21#include <linux/netfilter.h> 21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h> 22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h> 23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h> 24#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h> 25#include <linux/netfilter/ipset/ip_set_hash.h>
27 26
28#define REVISION_MIN 0 27#define REVISION_MIN 0
29#define REVISION_MAX 1 /* SCTP and UDPLITE support added */ 28/* 1 SCTP and UDPLITE support added */
29#define REVISION_MAX 2 /* Counters support added */
30 30
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -34,32 +34,44 @@ IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);
34MODULE_ALIAS("ip_set_hash:ip,port,ip"); 34MODULE_ALIAS("ip_set_hash:ip,port,ip");
35 35
36/* Type specific function prefix */ 36/* Type specific function prefix */
37#define TYPE hash_ipportip 37#define HTYPE hash_ipportip
38 38
39static bool 39/* IPv4 variants */
40hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b);
41 40
42#define hash_ipportip4_same_set hash_ipportip_same_set 41/* Member elements */
43#define hash_ipportip6_same_set hash_ipportip_same_set 42struct hash_ipportip4_elem {
43 __be32 ip;
44 __be32 ip2;
45 __be16 port;
46 u8 proto;
47 u8 padding;
48};
44 49
45/* The type variant functions: IPv4 */ 50struct hash_ipportip4t_elem {
51 __be32 ip;
52 __be32 ip2;
53 __be16 port;
54 u8 proto;
55 u8 padding;
56 unsigned long timeout;
57};
46 58
47/* Member elements without timeout */ 59struct hash_ipportip4c_elem {
48struct hash_ipportip4_elem {
49 __be32 ip; 60 __be32 ip;
50 __be32 ip2; 61 __be32 ip2;
51 __be16 port; 62 __be16 port;
52 u8 proto; 63 u8 proto;
53 u8 padding; 64 u8 padding;
65 struct ip_set_counter counter;
54}; 66};
55 67
56/* Member elements with timeout support */ 68struct hash_ipportip4ct_elem {
57struct hash_ipportip4_telem {
58 __be32 ip; 69 __be32 ip;
59 __be32 ip2; 70 __be32 ip2;
60 __be16 port; 71 __be16 port;
61 u8 proto; 72 u8 proto;
62 u8 padding; 73 u8 padding;
74 struct ip_set_counter counter;
63 unsigned long timeout; 75 unsigned long timeout;
64}; 76};
65 77
@@ -74,25 +86,6 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
74 ip1->proto == ip2->proto; 86 ip1->proto == ip2->proto;
75} 87}
76 88
77static inline bool
78hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem)
79{
80 return elem->proto == 0;
81}
82
83static inline void
84hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst,
85 const struct hash_ipportip4_elem *src)
86{
87 memcpy(dst, src, sizeof(*dst));
88}
89
90static inline void
91hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem)
92{
93 elem->proto = 0;
94}
95
96static bool 89static bool
97hash_ipportip4_data_list(struct sk_buff *skb, 90hash_ipportip4_data_list(struct sk_buff *skb,
98 const struct hash_ipportip4_elem *data) 91 const struct hash_ipportip4_elem *data)
@@ -108,117 +101,96 @@ nla_put_failure:
108 return 1; 101 return 1;
109} 102}
110 103
111static bool 104static inline void
112hash_ipportip4_data_tlist(struct sk_buff *skb, 105hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
113 const struct hash_ipportip4_elem *data) 106 const struct hash_ipportip4_elem *d)
114{ 107{
115 const struct hash_ipportip4_telem *tdata = 108 next->ip = d->ip;
116 (const struct hash_ipportip4_telem *)data; 109 next->port = d->port;
117
118 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
119 nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
120 nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
121 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
122 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
123 htonl(ip_set_timeout_get(tdata->timeout))))
124 goto nla_put_failure;
125 return 0;
126
127nla_put_failure:
128 return 1;
129} 110}
130 111
112/* Common functions */
113#define MTYPE hash_ipportip4
131#define PF 4 114#define PF 4
132#define HOST_MASK 32 115#define HOST_MASK 32
133#include <linux/netfilter/ipset/ip_set_ahash.h> 116#include "ip_set_hash_gen.h"
134
135static inline void
136hash_ipportip4_data_next(struct ip_set_hash *h,
137 const struct hash_ipportip4_elem *d)
138{
139 h->next.ip = d->ip;
140 h->next.port = d->port;
141}
142 117
143static int 118static int
144hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb, 119hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,
145 const struct xt_action_param *par, 120 const struct xt_action_param *par,
146 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 121 enum ipset_adt adt, struct ip_set_adt_opt *opt)
147{ 122{
148 const struct ip_set_hash *h = set->data; 123 const struct hash_ipportip *h = set->data;
149 ipset_adtfn adtfn = set->variant->adt[adt]; 124 ipset_adtfn adtfn = set->variant->adt[adt];
150 struct hash_ipportip4_elem data = { }; 125 struct hash_ipportip4_elem e = { };
126 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
151 127
152 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 128 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
153 &data.port, &data.proto)) 129 &e.port, &e.proto))
154 return -EINVAL; 130 return -EINVAL;
155 131
156 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 132 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
157 ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); 133 ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
158 134 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
159 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
160} 135}
161 136
162static int 137static int
163hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], 138hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
164 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 139 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
165{ 140{
166 const struct ip_set_hash *h = set->data; 141 const struct hash_ipportip *h = set->data;
167 ipset_adtfn adtfn = set->variant->adt[adt]; 142 ipset_adtfn adtfn = set->variant->adt[adt];
168 struct hash_ipportip4_elem data = { }; 143 struct hash_ipportip4_elem e = { };
144 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
169 u32 ip, ip_to, p = 0, port, port_to; 145 u32 ip, ip_to, p = 0, port, port_to;
170 u32 timeout = h->timeout;
171 bool with_ports = false; 146 bool with_ports = false;
172 int ret; 147 int ret;
173 148
174 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || 149 if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||
175 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 150 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
176 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 151 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
177 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 152 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
153 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
154 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
178 return -IPSET_ERR_PROTOCOL; 155 return -IPSET_ERR_PROTOCOL;
179 156
180 if (tb[IPSET_ATTR_LINENO]) 157 if (tb[IPSET_ATTR_LINENO])
181 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 158 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
182 159
183 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); 160 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
161 ip_set_get_extensions(set, tb, &ext);
184 if (ret) 162 if (ret)
185 return ret; 163 return ret;
186 164
187 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); 165 ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2);
188 if (ret) 166 if (ret)
189 return ret; 167 return ret;
190 168
191 if (tb[IPSET_ATTR_PORT]) 169 if (tb[IPSET_ATTR_PORT])
192 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 170 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
193 else 171 else
194 return -IPSET_ERR_PROTOCOL; 172 return -IPSET_ERR_PROTOCOL;
195 173
196 if (tb[IPSET_ATTR_PROTO]) { 174 if (tb[IPSET_ATTR_PROTO]) {
197 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 175 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
198 with_ports = ip_set_proto_with_ports(data.proto); 176 with_ports = ip_set_proto_with_ports(e.proto);
199 177
200 if (data.proto == 0) 178 if (e.proto == 0)
201 return -IPSET_ERR_INVALID_PROTO; 179 return -IPSET_ERR_INVALID_PROTO;
202 } else 180 } else
203 return -IPSET_ERR_MISSING_PROTO; 181 return -IPSET_ERR_MISSING_PROTO;
204 182
205 if (!(with_ports || data.proto == IPPROTO_ICMP)) 183 if (!(with_ports || e.proto == IPPROTO_ICMP))
206 data.port = 0; 184 e.port = 0;
207
208 if (tb[IPSET_ATTR_TIMEOUT]) {
209 if (!with_timeout(h->timeout))
210 return -IPSET_ERR_TIMEOUT;
211 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
212 }
213 185
214 if (adt == IPSET_TEST || 186 if (adt == IPSET_TEST ||
215 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || 187 !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||
216 tb[IPSET_ATTR_PORT_TO])) { 188 tb[IPSET_ATTR_PORT_TO])) {
217 ret = adtfn(set, &data, timeout, flags); 189 ret = adtfn(set, &e, &ext, &ext, flags);
218 return ip_set_eexist(ret, flags) ? 0 : ret; 190 return ip_set_eexist(ret, flags) ? 0 : ret;
219 } 191 }
220 192
221 ip_to = ip = ntohl(data.ip); 193 ip_to = ip = ntohl(e.ip);
222 if (tb[IPSET_ATTR_IP_TO]) { 194 if (tb[IPSET_ATTR_IP_TO]) {
223 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); 195 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
224 if (ret) 196 if (ret)
@@ -233,7 +205,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
233 ip_set_mask_from_to(ip, ip_to, cidr); 205 ip_set_mask_from_to(ip, ip_to, cidr);
234 } 206 }
235 207
236 port_to = port = ntohs(data.port); 208 port_to = port = ntohs(e.port);
237 if (with_ports && tb[IPSET_ATTR_PORT_TO]) { 209 if (with_ports && tb[IPSET_ATTR_PORT_TO]) {
238 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 210 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
239 if (port > port_to) 211 if (port > port_to)
@@ -246,9 +218,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
246 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 218 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
247 : port; 219 : port;
248 for (; p <= port_to; p++) { 220 for (; p <= port_to; p++) {
249 data.ip = htonl(ip); 221 e.ip = htonl(ip);
250 data.port = htons(p); 222 e.port = htons(p);
251 ret = adtfn(set, &data, timeout, flags); 223 ret = adtfn(set, &e, &ext, &ext, flags);
252 224
253 if (ret && !ip_set_eexist(ret, flags)) 225 if (ret && !ip_set_eexist(ret, flags))
254 return ret; 226 return ret;
@@ -259,36 +231,46 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
259 return ret; 231 return ret;
260} 232}
261 233
262static bool 234/* IPv6 variants */
263hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b)
264{
265 const struct ip_set_hash *x = a->data;
266 const struct ip_set_hash *y = b->data;
267 235
268 /* Resizing changes htable_bits, so we ignore it */ 236struct hash_ipportip6_elem {
269 return x->maxelem == y->maxelem && 237 union nf_inet_addr ip;
270 x->timeout == y->timeout; 238 union nf_inet_addr ip2;
271} 239 __be16 port;
240 u8 proto;
241 u8 padding;
242};
272 243
273/* The type variant functions: IPv6 */ 244struct hash_ipportip6t_elem {
245 union nf_inet_addr ip;
246 union nf_inet_addr ip2;
247 __be16 port;
248 u8 proto;
249 u8 padding;
250 unsigned long timeout;
251};
274 252
275struct hash_ipportip6_elem { 253struct hash_ipportip6c_elem {
276 union nf_inet_addr ip; 254 union nf_inet_addr ip;
277 union nf_inet_addr ip2; 255 union nf_inet_addr ip2;
278 __be16 port; 256 __be16 port;
279 u8 proto; 257 u8 proto;
280 u8 padding; 258 u8 padding;
259 struct ip_set_counter counter;
281}; 260};
282 261
283struct hash_ipportip6_telem { 262struct hash_ipportip6ct_elem {
284 union nf_inet_addr ip; 263 union nf_inet_addr ip;
285 union nf_inet_addr ip2; 264 union nf_inet_addr ip2;
286 __be16 port; 265 __be16 port;
287 u8 proto; 266 u8 proto;
288 u8 padding; 267 u8 padding;
268 struct ip_set_counter counter;
289 unsigned long timeout; 269 unsigned long timeout;
290}; 270};
291 271
272/* Common functions */
273
292static inline bool 274static inline bool
293hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1, 275hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
294 const struct hash_ipportip6_elem *ip2, 276 const struct hash_ipportip6_elem *ip2,
@@ -300,25 +282,6 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
300 ip1->proto == ip2->proto; 282 ip1->proto == ip2->proto;
301} 283}
302 284
303static inline bool
304hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem)
305{
306 return elem->proto == 0;
307}
308
309static inline void
310hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst,
311 const struct hash_ipportip6_elem *src)
312{
313 memcpy(dst, src, sizeof(*dst));
314}
315
316static inline void
317hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem)
318{
319 elem->proto = 0;
320}
321
322static bool 285static bool
323hash_ipportip6_data_list(struct sk_buff *skb, 286hash_ipportip6_data_list(struct sk_buff *skb,
324 const struct hash_ipportip6_elem *data) 287 const struct hash_ipportip6_elem *data)
@@ -334,68 +297,51 @@ nla_put_failure:
334 return 1; 297 return 1;
335} 298}
336 299
337static bool 300static inline void
338hash_ipportip6_data_tlist(struct sk_buff *skb, 301hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
339 const struct hash_ipportip6_elem *data) 302 const struct hash_ipportip6_elem *d)
340{ 303{
341 const struct hash_ipportip6_telem *e = 304 next->port = d->port;
342 (const struct hash_ipportip6_telem *)data;
343
344 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
345 nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
346 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
347 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
348 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
349 htonl(ip_set_timeout_get(e->timeout))))
350 goto nla_put_failure;
351 return 0;
352
353nla_put_failure:
354 return 1;
355} 305}
356 306
307#undef MTYPE
357#undef PF 308#undef PF
358#undef HOST_MASK 309#undef HOST_MASK
359 310
311#define MTYPE hash_ipportip6
360#define PF 6 312#define PF 6
361#define HOST_MASK 128 313#define HOST_MASK 128
362#include <linux/netfilter/ipset/ip_set_ahash.h> 314#define IP_SET_EMIT_CREATE
363 315#include "ip_set_hash_gen.h"
364static inline void
365hash_ipportip6_data_next(struct ip_set_hash *h,
366 const struct hash_ipportip6_elem *d)
367{
368 h->next.port = d->port;
369}
370 316
371static int 317static int
372hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb, 318hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,
373 const struct xt_action_param *par, 319 const struct xt_action_param *par,
374 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 320 enum ipset_adt adt, struct ip_set_adt_opt *opt)
375{ 321{
376 const struct ip_set_hash *h = set->data; 322 const struct hash_ipportip *h = set->data;
377 ipset_adtfn adtfn = set->variant->adt[adt]; 323 ipset_adtfn adtfn = set->variant->adt[adt];
378 struct hash_ipportip6_elem data = { }; 324 struct hash_ipportip6_elem e = { };
325 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
379 326
380 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 327 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
381 &data.port, &data.proto)) 328 &e.port, &e.proto))
382 return -EINVAL; 329 return -EINVAL;
383 330
384 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 331 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
385 ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); 332 ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
386 333 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
387 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags);
388} 334}
389 335
390static int 336static int
391hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[], 337hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
392 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 338 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
393{ 339{
394 const struct ip_set_hash *h = set->data; 340 const struct hash_ipportip *h = set->data;
395 ipset_adtfn adtfn = set->variant->adt[adt]; 341 ipset_adtfn adtfn = set->variant->adt[adt];
396 struct hash_ipportip6_elem data = { }; 342 struct hash_ipportip6_elem e = { };
343 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
397 u32 port, port_to; 344 u32 port, port_to;
398 u32 timeout = h->timeout;
399 bool with_ports = false; 345 bool with_ports = false;
400 int ret; 346 int ret;
401 347
@@ -403,6 +349,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
403 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 349 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
404 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 350 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
405 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 351 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
352 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
353 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
406 tb[IPSET_ATTR_IP_TO] || 354 tb[IPSET_ATTR_IP_TO] ||
407 tb[IPSET_ATTR_CIDR])) 355 tb[IPSET_ATTR_CIDR]))
408 return -IPSET_ERR_PROTOCOL; 356 return -IPSET_ERR_PROTOCOL;
@@ -410,43 +358,38 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
410 if (tb[IPSET_ATTR_LINENO]) 358 if (tb[IPSET_ATTR_LINENO])
411 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 359 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
412 360
413 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 361 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
362 ip_set_get_extensions(set, tb, &ext);
414 if (ret) 363 if (ret)
415 return ret; 364 return ret;
416 365
417 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); 366 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
418 if (ret) 367 if (ret)
419 return ret; 368 return ret;
420 369
421 if (tb[IPSET_ATTR_PORT]) 370 if (tb[IPSET_ATTR_PORT])
422 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 371 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
423 else 372 else
424 return -IPSET_ERR_PROTOCOL; 373 return -IPSET_ERR_PROTOCOL;
425 374
426 if (tb[IPSET_ATTR_PROTO]) { 375 if (tb[IPSET_ATTR_PROTO]) {
427 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 376 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
428 with_ports = ip_set_proto_with_ports(data.proto); 377 with_ports = ip_set_proto_with_ports(e.proto);
429 378
430 if (data.proto == 0) 379 if (e.proto == 0)
431 return -IPSET_ERR_INVALID_PROTO; 380 return -IPSET_ERR_INVALID_PROTO;
432 } else 381 } else
433 return -IPSET_ERR_MISSING_PROTO; 382 return -IPSET_ERR_MISSING_PROTO;
434 383
435 if (!(with_ports || data.proto == IPPROTO_ICMPV6)) 384 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
436 data.port = 0; 385 e.port = 0;
437
438 if (tb[IPSET_ATTR_TIMEOUT]) {
439 if (!with_timeout(h->timeout))
440 return -IPSET_ERR_TIMEOUT;
441 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
442 }
443 386
444 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 387 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
445 ret = adtfn(set, &data, timeout, flags); 388 ret = adtfn(set, &e, &ext, &ext, flags);
446 return ip_set_eexist(ret, flags) ? 0 : ret; 389 return ip_set_eexist(ret, flags) ? 0 : ret;
447 } 390 }
448 391
449 port = ntohs(data.port); 392 port = ntohs(e.port);
450 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 393 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
451 if (port > port_to) 394 if (port > port_to)
452 swap(port, port_to); 395 swap(port, port_to);
@@ -454,8 +397,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
454 if (retried) 397 if (retried)
455 port = ntohs(h->next.port); 398 port = ntohs(h->next.port);
456 for (; port <= port_to; port++) { 399 for (; port <= port_to; port++) {
457 data.port = htons(port); 400 e.port = htons(port);
458 ret = adtfn(set, &data, timeout, flags); 401 ret = adtfn(set, &e, &ext, &ext, flags);
459 402
460 if (ret && !ip_set_eexist(ret, flags)) 403 if (ret && !ip_set_eexist(ret, flags))
461 return ret; 404 return ret;
@@ -465,78 +408,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
465 return ret; 408 return ret;
466} 409}
467 410
468/* Create hash:ip type of sets */
469
470static int
471hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
472{
473 struct ip_set_hash *h;
474 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
475 u8 hbits;
476 size_t hsize;
477
478 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
479 return -IPSET_ERR_INVALID_FAMILY;
480
481 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
482 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
483 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
484 return -IPSET_ERR_PROTOCOL;
485
486 if (tb[IPSET_ATTR_HASHSIZE]) {
487 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
488 if (hashsize < IPSET_MIMINAL_HASHSIZE)
489 hashsize = IPSET_MIMINAL_HASHSIZE;
490 }
491
492 if (tb[IPSET_ATTR_MAXELEM])
493 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
494
495 h = kzalloc(sizeof(*h), GFP_KERNEL);
496 if (!h)
497 return -ENOMEM;
498
499 h->maxelem = maxelem;
500 get_random_bytes(&h->initval, sizeof(h->initval));
501 h->timeout = IPSET_NO_TIMEOUT;
502
503 hbits = htable_bits(hashsize);
504 hsize = htable_size(hbits);
505 if (hsize == 0) {
506 kfree(h);
507 return -ENOMEM;
508 }
509 h->table = ip_set_alloc(hsize);
510 if (!h->table) {
511 kfree(h);
512 return -ENOMEM;
513 }
514 h->table->htable_bits = hbits;
515
516 set->data = h;
517
518 if (tb[IPSET_ATTR_TIMEOUT]) {
519 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
520
521 set->variant = set->family == NFPROTO_IPV4
522 ? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant;
523
524 if (set->family == NFPROTO_IPV4)
525 hash_ipportip4_gc_init(set);
526 else
527 hash_ipportip6_gc_init(set);
528 } else {
529 set->variant = set->family == NFPROTO_IPV4
530 ? &hash_ipportip4_variant : &hash_ipportip6_variant;
531 }
532
533 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
534 set->name, jhash_size(h->table->htable_bits),
535 h->table->htable_bits, h->maxelem, set->data, h->table);
536
537 return 0;
538}
539
540static struct ip_set_type hash_ipportip_type __read_mostly = { 411static struct ip_set_type hash_ipportip_type __read_mostly = {
541 .name = "hash:ip,port,ip", 412 .name = "hash:ip,port,ip",
542 .protocol = IPSET_PROTOCOL, 413 .protocol = IPSET_PROTOCOL,
@@ -552,6 +423,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
552 [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, 423 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
553 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 424 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
554 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 425 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
426 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
555 }, 427 },
556 .adt_policy = { 428 .adt_policy = {
557 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 429 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -563,6 +435,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {
563 [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, 435 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
564 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 436 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
565 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 437 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
438 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
439 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
566 }, 440 },
567 .me = THIS_MODULE, 441 .me = THIS_MODULE,
568}; 442};
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index f2627226a087..c6a525373be4 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -21,14 +21,14 @@
21#include <linux/netfilter.h> 21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h> 22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h> 23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_getport.h> 24#include <linux/netfilter/ipset/ip_set_getport.h>
26#include <linux/netfilter/ipset/ip_set_hash.h> 25#include <linux/netfilter/ipset/ip_set_hash.h>
27 26
28#define REVISION_MIN 0 27#define REVISION_MIN 0
29/* 1 SCTP and UDPLITE support added */ 28/* 1 SCTP and UDPLITE support added */
30/* 2 Range as input support for IPv4 added */ 29/* 2 Range as input support for IPv4 added */
31#define REVISION_MAX 3 /* nomatch flag support added */ 30/* 3 nomatch flag support added */
31#define REVISION_MAX 4 /* Counters support added */
32 32
33MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
34MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 34MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -36,23 +36,19 @@ IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);
36MODULE_ALIAS("ip_set_hash:ip,port,net"); 36MODULE_ALIAS("ip_set_hash:ip,port,net");
37 37
38/* Type specific function prefix */ 38/* Type specific function prefix */
39#define TYPE hash_ipportnet 39#define HTYPE hash_ipportnet
40
41static bool
42hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b);
43
44#define hash_ipportnet4_same_set hash_ipportnet_same_set
45#define hash_ipportnet6_same_set hash_ipportnet_same_set
46
47/* The type variant functions: IPv4 */
48 40
49/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0 41/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
50 * However this way we have to store internally cidr - 1, 42 * However this way we have to store internally cidr - 1,
51 * dancing back and forth. 43 * dancing back and forth.
52 */ 44 */
53#define IP_SET_HASH_WITH_NETS_PACKED 45#define IP_SET_HASH_WITH_NETS_PACKED
46#define IP_SET_HASH_WITH_PROTO
47#define IP_SET_HASH_WITH_NETS
48
49/* IPv4 variants */
54 50
55/* Member elements without timeout */ 51/* Member elements */
56struct hash_ipportnet4_elem { 52struct hash_ipportnet4_elem {
57 __be32 ip; 53 __be32 ip;
58 __be32 ip2; 54 __be32 ip2;
@@ -62,8 +58,7 @@ struct hash_ipportnet4_elem {
62 u8 proto; 58 u8 proto;
63}; 59};
64 60
65/* Member elements with timeout support */ 61struct hash_ipportnet4t_elem {
66struct hash_ipportnet4_telem {
67 __be32 ip; 62 __be32 ip;
68 __be32 ip2; 63 __be32 ip2;
69 __be16 port; 64 __be16 port;
@@ -73,6 +68,29 @@ struct hash_ipportnet4_telem {
73 unsigned long timeout; 68 unsigned long timeout;
74}; 69};
75 70
71struct hash_ipportnet4c_elem {
72 __be32 ip;
73 __be32 ip2;
74 __be16 port;
75 u8 cidr:7;
76 u8 nomatch:1;
77 u8 proto;
78 struct ip_set_counter counter;
79};
80
81struct hash_ipportnet4ct_elem {
82 __be32 ip;
83 __be32 ip2;
84 __be16 port;
85 u8 cidr:7;
86 u8 nomatch:1;
87 u8 proto;
88 struct ip_set_counter counter;
89 unsigned long timeout;
90};
91
92/* Common functions */
93
76static inline bool 94static inline bool
77hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, 95hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
78 const struct hash_ipportnet4_elem *ip2, 96 const struct hash_ipportnet4_elem *ip2,
@@ -85,29 +103,22 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
85 ip1->proto == ip2->proto; 103 ip1->proto == ip2->proto;
86} 104}
87 105
88static inline bool 106static inline int
89hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem) 107hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
90{ 108{
91 return elem->proto == 0; 109 return elem->nomatch ? -ENOTEMPTY : 1;
92} 110}
93 111
94static inline void 112static inline void
95hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst, 113hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
96 const struct hash_ipportnet4_elem *src)
97{ 114{
98 memcpy(dst, src, sizeof(*dst)); 115 elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
99} 116}
100 117
101static inline void 118static inline void
102hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) 119hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
103{ 120{
104 dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); 121 swap(*flags, elem->nomatch);
105}
106
107static inline int
108hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem)
109{
110 return elem->nomatch ? -ENOTEMPTY : 1;
111} 122}
112 123
113static inline void 124static inline void
@@ -117,12 +128,6 @@ hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
117 elem->cidr = cidr - 1; 128 elem->cidr = cidr - 1;
118} 129}
119 130
120static inline void
121hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem)
122{
123 elem->proto = 0;
124}
125
126static bool 131static bool
127hash_ipportnet4_data_list(struct sk_buff *skb, 132hash_ipportnet4_data_list(struct sk_buff *skb,
128 const struct hash_ipportnet4_elem *data) 133 const struct hash_ipportnet4_elem *data)
@@ -143,81 +148,56 @@ nla_put_failure:
143 return 1; 148 return 1;
144} 149}
145 150
146static bool 151static inline void
147hash_ipportnet4_data_tlist(struct sk_buff *skb, 152hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
148 const struct hash_ipportnet4_elem *data) 153 const struct hash_ipportnet4_elem *d)
149{ 154{
150 const struct hash_ipportnet4_telem *tdata = 155 next->ip = d->ip;
151 (const struct hash_ipportnet4_telem *)data; 156 next->port = d->port;
152 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; 157 next->ip2 = d->ip2;
153
154 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
155 nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) ||
156 nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
157 nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
158 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
159 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
160 htonl(ip_set_timeout_get(tdata->timeout))) ||
161 (flags &&
162 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
163 goto nla_put_failure;
164 return 0;
165
166nla_put_failure:
167 return 1;
168} 158}
169 159
170#define IP_SET_HASH_WITH_PROTO 160#define MTYPE hash_ipportnet4
171#define IP_SET_HASH_WITH_NETS
172
173#define PF 4 161#define PF 4
174#define HOST_MASK 32 162#define HOST_MASK 32
175#include <linux/netfilter/ipset/ip_set_ahash.h> 163#include "ip_set_hash_gen.h"
176
177static inline void
178hash_ipportnet4_data_next(struct ip_set_hash *h,
179 const struct hash_ipportnet4_elem *d)
180{
181 h->next.ip = d->ip;
182 h->next.port = d->port;
183 h->next.ip2 = d->ip2;
184}
185 164
186static int 165static int
187hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, 166hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
188 const struct xt_action_param *par, 167 const struct xt_action_param *par,
189 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 168 enum ipset_adt adt, struct ip_set_adt_opt *opt)
190{ 169{
191 const struct ip_set_hash *h = set->data; 170 const struct hash_ipportnet *h = set->data;
192 ipset_adtfn adtfn = set->variant->adt[adt]; 171 ipset_adtfn adtfn = set->variant->adt[adt];
193 struct hash_ipportnet4_elem data = { 172 struct hash_ipportnet4_elem e = {
194 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 173 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
195 }; 174 };
175 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
196 176
197 if (adt == IPSET_TEST) 177 if (adt == IPSET_TEST)
198 data.cidr = HOST_MASK - 1; 178 e.cidr = HOST_MASK - 1;
199 179
200 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 180 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
201 &data.port, &data.proto)) 181 &e.port, &e.proto))
202 return -EINVAL; 182 return -EINVAL;
203 183
204 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 184 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
205 ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); 185 ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2);
206 data.ip2 &= ip_set_netmask(data.cidr + 1); 186 e.ip2 &= ip_set_netmask(e.cidr + 1);
207 187
208 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 188 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
209} 189}
210 190
211static int 191static int
212hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], 192hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
213 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 193 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
214{ 194{
215 const struct ip_set_hash *h = set->data; 195 const struct hash_ipportnet *h = set->data;
216 ipset_adtfn adtfn = set->variant->adt[adt]; 196 ipset_adtfn adtfn = set->variant->adt[adt];
217 struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 }; 197 struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
198 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
218 u32 ip, ip_to, p = 0, port, port_to; 199 u32 ip, ip_to, p = 0, port, port_to;
219 u32 ip2_from, ip2_to, ip2_last, ip2; 200 u32 ip2_from, ip2_to, ip2_last, ip2;
220 u32 timeout = h->timeout;
221 bool with_ports = false; 201 bool with_ports = false;
222 u8 cidr; 202 u8 cidr;
223 int ret; 203 int ret;
@@ -226,13 +206,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
226 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 206 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
227 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 207 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
228 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 208 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
229 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 209 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
210 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
211 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
230 return -IPSET_ERR_PROTOCOL; 212 return -IPSET_ERR_PROTOCOL;
231 213
232 if (tb[IPSET_ATTR_LINENO]) 214 if (tb[IPSET_ATTR_LINENO])
233 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 215 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
234 216
235 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 217 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
218 ip_set_get_extensions(set, tb, &ext);
236 if (ret) 219 if (ret)
237 return ret; 220 return ret;
238 221
@@ -244,46 +227,41 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
244 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 227 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
245 if (!cidr || cidr > HOST_MASK) 228 if (!cidr || cidr > HOST_MASK)
246 return -IPSET_ERR_INVALID_CIDR; 229 return -IPSET_ERR_INVALID_CIDR;
247 data.cidr = cidr - 1; 230 e.cidr = cidr - 1;
248 } 231 }
249 232
250 if (tb[IPSET_ATTR_PORT]) 233 if (tb[IPSET_ATTR_PORT])
251 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 234 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
252 else 235 else
253 return -IPSET_ERR_PROTOCOL; 236 return -IPSET_ERR_PROTOCOL;
254 237
255 if (tb[IPSET_ATTR_PROTO]) { 238 if (tb[IPSET_ATTR_PROTO]) {
256 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 239 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
257 with_ports = ip_set_proto_with_ports(data.proto); 240 with_ports = ip_set_proto_with_ports(e.proto);
258 241
259 if (data.proto == 0) 242 if (e.proto == 0)
260 return -IPSET_ERR_INVALID_PROTO; 243 return -IPSET_ERR_INVALID_PROTO;
261 } else 244 } else
262 return -IPSET_ERR_MISSING_PROTO; 245 return -IPSET_ERR_MISSING_PROTO;
263 246
264 if (!(with_ports || data.proto == IPPROTO_ICMP)) 247 if (!(with_ports || e.proto == IPPROTO_ICMP))
265 data.port = 0; 248 e.port = 0;
266 249
267 if (tb[IPSET_ATTR_TIMEOUT]) { 250 if (tb[IPSET_ATTR_CADT_FLAGS]) {
268 if (!with_timeout(h->timeout))
269 return -IPSET_ERR_TIMEOUT;
270 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
271 }
272
273 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
274 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 251 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
275 if (cadt_flags & IPSET_FLAG_NOMATCH) 252 if (cadt_flags & IPSET_FLAG_NOMATCH)
276 flags |= (cadt_flags << 16); 253 flags |= (IPSET_FLAG_NOMATCH << 16);
277 } 254 }
278 255
279 with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; 256 with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
280 if (adt == IPSET_TEST || 257 if (adt == IPSET_TEST ||
281 !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports || 258 !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports ||
282 tb[IPSET_ATTR_IP2_TO])) { 259 tb[IPSET_ATTR_IP2_TO])) {
283 data.ip = htonl(ip); 260 e.ip = htonl(ip);
284 data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr + 1)); 261 e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1));
285 ret = adtfn(set, &data, timeout, flags); 262 ret = adtfn(set, &e, &ext, &ext, flags);
286 return ip_set_eexist(ret, flags) ? 0 : ret; 263 return ip_set_enomatch(ret, flags, adt) ? 1 :
264 ip_set_eexist(ret, flags) ? 0 : ret;
287 } 265 }
288 266
289 ip_to = ip; 267 ip_to = ip;
@@ -301,7 +279,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
301 ip_set_mask_from_to(ip, ip_to, cidr); 279 ip_set_mask_from_to(ip, ip_to, cidr);
302 } 280 }
303 281
304 port_to = port = ntohs(data.port); 282 port_to = port = ntohs(e.port);
305 if (tb[IPSET_ATTR_PORT_TO]) { 283 if (tb[IPSET_ATTR_PORT_TO]) {
306 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 284 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
307 if (port > port_to) 285 if (port > port_to)
@@ -317,28 +295,27 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
317 swap(ip2_from, ip2_to); 295 swap(ip2_from, ip2_to);
318 if (ip2_from + UINT_MAX == ip2_to) 296 if (ip2_from + UINT_MAX == ip2_to)
319 return -IPSET_ERR_HASH_RANGE; 297 return -IPSET_ERR_HASH_RANGE;
320 } else { 298 } else
321 ip_set_mask_from_to(ip2_from, ip2_to, data.cidr + 1); 299 ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
322 }
323 300
324 if (retried) 301 if (retried)
325 ip = ntohl(h->next.ip); 302 ip = ntohl(h->next.ip);
326 for (; !before(ip_to, ip); ip++) { 303 for (; !before(ip_to, ip); ip++) {
327 data.ip = htonl(ip); 304 e.ip = htonl(ip);
328 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 305 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
329 : port; 306 : port;
330 for (; p <= port_to; p++) { 307 for (; p <= port_to; p++) {
331 data.port = htons(p); 308 e.port = htons(p);
332 ip2 = retried 309 ip2 = retried
333 && ip == ntohl(h->next.ip) 310 && ip == ntohl(h->next.ip)
334 && p == ntohs(h->next.port) 311 && p == ntohs(h->next.port)
335 ? ntohl(h->next.ip2) : ip2_from; 312 ? ntohl(h->next.ip2) : ip2_from;
336 while (!after(ip2, ip2_to)) { 313 while (!after(ip2, ip2_to)) {
337 data.ip2 = htonl(ip2); 314 e.ip2 = htonl(ip2);
338 ip2_last = ip_set_range_to_cidr(ip2, ip2_to, 315 ip2_last = ip_set_range_to_cidr(ip2, ip2_to,
339 &cidr); 316 &cidr);
340 data.cidr = cidr - 1; 317 e.cidr = cidr - 1;
341 ret = adtfn(set, &data, timeout, flags); 318 ret = adtfn(set, &e, &ext, &ext, flags);
342 319
343 if (ret && !ip_set_eexist(ret, flags)) 320 if (ret && !ip_set_eexist(ret, flags))
344 return ret; 321 return ret;
@@ -351,38 +328,50 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
351 return ret; 328 return ret;
352} 329}
353 330
354static bool 331/* IPv6 variants */
355hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b)
356{
357 const struct ip_set_hash *x = a->data;
358 const struct ip_set_hash *y = b->data;
359 332
360 /* Resizing changes htable_bits, so we ignore it */ 333struct hash_ipportnet6_elem {
361 return x->maxelem == y->maxelem && 334 union nf_inet_addr ip;
362 x->timeout == y->timeout; 335 union nf_inet_addr ip2;
363} 336 __be16 port;
337 u8 cidr:7;
338 u8 nomatch:1;
339 u8 proto;
340};
364 341
365/* The type variant functions: IPv6 */ 342struct hash_ipportnet6t_elem {
343 union nf_inet_addr ip;
344 union nf_inet_addr ip2;
345 __be16 port;
346 u8 cidr:7;
347 u8 nomatch:1;
348 u8 proto;
349 unsigned long timeout;
350};
366 351
367struct hash_ipportnet6_elem { 352struct hash_ipportnet6c_elem {
368 union nf_inet_addr ip; 353 union nf_inet_addr ip;
369 union nf_inet_addr ip2; 354 union nf_inet_addr ip2;
370 __be16 port; 355 __be16 port;
371 u8 cidr:7; 356 u8 cidr:7;
372 u8 nomatch:1; 357 u8 nomatch:1;
373 u8 proto; 358 u8 proto;
359 struct ip_set_counter counter;
374}; 360};
375 361
376struct hash_ipportnet6_telem { 362struct hash_ipportnet6ct_elem {
377 union nf_inet_addr ip; 363 union nf_inet_addr ip;
378 union nf_inet_addr ip2; 364 union nf_inet_addr ip2;
379 __be16 port; 365 __be16 port;
380 u8 cidr:7; 366 u8 cidr:7;
381 u8 nomatch:1; 367 u8 nomatch:1;
382 u8 proto; 368 u8 proto;
369 struct ip_set_counter counter;
383 unsigned long timeout; 370 unsigned long timeout;
384}; 371};
385 372
373/* Common functions */
374
386static inline bool 375static inline bool
387hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, 376hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
388 const struct hash_ipportnet6_elem *ip2, 377 const struct hash_ipportnet6_elem *ip2,
@@ -395,44 +384,22 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
395 ip1->proto == ip2->proto; 384 ip1->proto == ip2->proto;
396} 385}
397 386
398static inline bool
399hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem)
400{
401 return elem->proto == 0;
402}
403
404static inline void
405hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst,
406 const struct hash_ipportnet6_elem *src)
407{
408 memcpy(dst, src, sizeof(*dst));
409}
410
411static inline void
412hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags)
413{
414 dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
415}
416
417static inline int 387static inline int
418hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) 388hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
419{ 389{
420 return elem->nomatch ? -ENOTEMPTY : 1; 390 return elem->nomatch ? -ENOTEMPTY : 1;
421} 391}
422 392
423static inline void 393static inline void
424hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) 394hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
425{ 395{
426 elem->proto = 0; 396 elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
427} 397}
428 398
429static inline void 399static inline void
430ip6_netmask(union nf_inet_addr *ip, u8 prefix) 400hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
431{ 401{
432 ip->ip6[0] &= ip_set_netmask6(prefix)[0]; 402 swap(*flags, elem->nomatch);
433 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
434 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
435 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
436} 403}
437 404
438static inline void 405static inline void
@@ -462,78 +429,58 @@ nla_put_failure:
462 return 1; 429 return 1;
463} 430}
464 431
465static bool 432static inline void
466hash_ipportnet6_data_tlist(struct sk_buff *skb, 433hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
467 const struct hash_ipportnet6_elem *data) 434 const struct hash_ipportnet6_elem *d)
468{ 435{
469 const struct hash_ipportnet6_telem *e = 436 next->port = d->port;
470 (const struct hash_ipportnet6_telem *)data;
471 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
472
473 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
474 nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) ||
475 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
476 nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) ||
477 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
478 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
479 htonl(ip_set_timeout_get(e->timeout))) ||
480 (flags &&
481 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
482 goto nla_put_failure;
483 return 0;
484
485nla_put_failure:
486 return 1;
487} 437}
488 438
439#undef MTYPE
489#undef PF 440#undef PF
490#undef HOST_MASK 441#undef HOST_MASK
491 442
443#define MTYPE hash_ipportnet6
492#define PF 6 444#define PF 6
493#define HOST_MASK 128 445#define HOST_MASK 128
494#include <linux/netfilter/ipset/ip_set_ahash.h> 446#define IP_SET_EMIT_CREATE
495 447#include "ip_set_hash_gen.h"
496static inline void
497hash_ipportnet6_data_next(struct ip_set_hash *h,
498 const struct hash_ipportnet6_elem *d)
499{
500 h->next.port = d->port;
501}
502 448
503static int 449static int
504hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, 450hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
505 const struct xt_action_param *par, 451 const struct xt_action_param *par,
506 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 452 enum ipset_adt adt, struct ip_set_adt_opt *opt)
507{ 453{
508 const struct ip_set_hash *h = set->data; 454 const struct hash_ipportnet *h = set->data;
509 ipset_adtfn adtfn = set->variant->adt[adt]; 455 ipset_adtfn adtfn = set->variant->adt[adt];
510 struct hash_ipportnet6_elem data = { 456 struct hash_ipportnet6_elem e = {
511 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 457 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
512 }; 458 };
459 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
513 460
514 if (adt == IPSET_TEST) 461 if (adt == IPSET_TEST)
515 data.cidr = HOST_MASK - 1; 462 e.cidr = HOST_MASK - 1;
516 463
517 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 464 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
518 &data.port, &data.proto)) 465 &e.port, &e.proto))
519 return -EINVAL; 466 return -EINVAL;
520 467
521 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 468 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
522 ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); 469 ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6);
523 ip6_netmask(&data.ip2, data.cidr + 1); 470 ip6_netmask(&e.ip2, e.cidr + 1);
524 471
525 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 472 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
526} 473}
527 474
528static int 475static int
529hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], 476hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
530 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 477 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
531{ 478{
532 const struct ip_set_hash *h = set->data; 479 const struct hash_ipportnet *h = set->data;
533 ipset_adtfn adtfn = set->variant->adt[adt]; 480 ipset_adtfn adtfn = set->variant->adt[adt];
534 struct hash_ipportnet6_elem data = { .cidr = HOST_MASK - 1 }; 481 struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
482 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
535 u32 port, port_to; 483 u32 port, port_to;
536 u32 timeout = h->timeout;
537 bool with_ports = false; 484 bool with_ports = false;
538 u8 cidr; 485 u8 cidr;
539 int ret; 486 int ret;
@@ -543,6 +490,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
543 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 490 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
544 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 491 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
545 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || 492 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
493 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
494 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||
546 tb[IPSET_ATTR_IP_TO] || 495 tb[IPSET_ATTR_IP_TO] ||
547 tb[IPSET_ATTR_CIDR])) 496 tb[IPSET_ATTR_CIDR]))
548 return -IPSET_ERR_PROTOCOL; 497 return -IPSET_ERR_PROTOCOL;
@@ -552,11 +501,12 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
552 if (tb[IPSET_ATTR_LINENO]) 501 if (tb[IPSET_ATTR_LINENO])
553 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 502 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
554 503
555 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 504 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
505 ip_set_get_extensions(set, tb, &ext);
556 if (ret) 506 if (ret)
557 return ret; 507 return ret;
558 508
559 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); 509 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);
560 if (ret) 510 if (ret)
561 return ret; 511 return ret;
562 512
@@ -564,46 +514,41 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
564 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); 514 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
565 if (!cidr || cidr > HOST_MASK) 515 if (!cidr || cidr > HOST_MASK)
566 return -IPSET_ERR_INVALID_CIDR; 516 return -IPSET_ERR_INVALID_CIDR;
567 data.cidr = cidr - 1; 517 e.cidr = cidr - 1;
568 } 518 }
569 519
570 ip6_netmask(&data.ip2, data.cidr + 1); 520 ip6_netmask(&e.ip2, e.cidr + 1);
571 521
572 if (tb[IPSET_ATTR_PORT]) 522 if (tb[IPSET_ATTR_PORT])
573 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 523 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
574 else 524 else
575 return -IPSET_ERR_PROTOCOL; 525 return -IPSET_ERR_PROTOCOL;
576 526
577 if (tb[IPSET_ATTR_PROTO]) { 527 if (tb[IPSET_ATTR_PROTO]) {
578 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 528 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
579 with_ports = ip_set_proto_with_ports(data.proto); 529 with_ports = ip_set_proto_with_ports(e.proto);
580 530
581 if (data.proto == 0) 531 if (e.proto == 0)
582 return -IPSET_ERR_INVALID_PROTO; 532 return -IPSET_ERR_INVALID_PROTO;
583 } else 533 } else
584 return -IPSET_ERR_MISSING_PROTO; 534 return -IPSET_ERR_MISSING_PROTO;
585 535
586 if (!(with_ports || data.proto == IPPROTO_ICMPV6)) 536 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
587 data.port = 0; 537 e.port = 0;
588
589 if (tb[IPSET_ATTR_TIMEOUT]) {
590 if (!with_timeout(h->timeout))
591 return -IPSET_ERR_TIMEOUT;
592 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
593 }
594 538
595 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { 539 if (tb[IPSET_ATTR_CADT_FLAGS]) {
596 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 540 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
597 if (cadt_flags & IPSET_FLAG_NOMATCH) 541 if (cadt_flags & IPSET_FLAG_NOMATCH)
598 flags |= (cadt_flags << 16); 542 flags |= (IPSET_FLAG_NOMATCH << 16);
599 } 543 }
600 544
601 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 545 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
602 ret = adtfn(set, &data, timeout, flags); 546 ret = adtfn(set, &e, &ext, &ext, flags);
603 return ip_set_eexist(ret, flags) ? 0 : ret; 547 return ip_set_enomatch(ret, flags, adt) ? 1 :
548 ip_set_eexist(ret, flags) ? 0 : ret;
604 } 549 }
605 550
606 port = ntohs(data.port); 551 port = ntohs(e.port);
607 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 552 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
608 if (port > port_to) 553 if (port > port_to)
609 swap(port, port_to); 554 swap(port, port_to);
@@ -611,8 +556,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
611 if (retried) 556 if (retried)
612 port = ntohs(h->next.port); 557 port = ntohs(h->next.port);
613 for (; port <= port_to; port++) { 558 for (; port <= port_to; port++) {
614 data.port = htons(port); 559 e.port = htons(port);
615 ret = adtfn(set, &data, timeout, flags); 560 ret = adtfn(set, &e, &ext, &ext, flags);
616 561
617 if (ret && !ip_set_eexist(ret, flags)) 562 if (ret && !ip_set_eexist(ret, flags))
618 return ret; 563 return ret;
@@ -622,81 +567,6 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
622 return ret; 567 return ret;
623} 568}
624 569
625/* Create hash:ip type of sets */
626
627static int
628hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
629{
630 struct ip_set_hash *h;
631 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
632 u8 hbits;
633 size_t hsize;
634
635 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
636 return -IPSET_ERR_INVALID_FAMILY;
637
638 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
639 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
640 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
641 return -IPSET_ERR_PROTOCOL;
642
643 if (tb[IPSET_ATTR_HASHSIZE]) {
644 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
645 if (hashsize < IPSET_MIMINAL_HASHSIZE)
646 hashsize = IPSET_MIMINAL_HASHSIZE;
647 }
648
649 if (tb[IPSET_ATTR_MAXELEM])
650 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
651
652 h = kzalloc(sizeof(*h)
653 + sizeof(struct ip_set_hash_nets)
654 * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
655 if (!h)
656 return -ENOMEM;
657
658 h->maxelem = maxelem;
659 get_random_bytes(&h->initval, sizeof(h->initval));
660 h->timeout = IPSET_NO_TIMEOUT;
661
662 hbits = htable_bits(hashsize);
663 hsize = htable_size(hbits);
664 if (hsize == 0) {
665 kfree(h);
666 return -ENOMEM;
667 }
668 h->table = ip_set_alloc(hsize);
669 if (!h->table) {
670 kfree(h);
671 return -ENOMEM;
672 }
673 h->table->htable_bits = hbits;
674
675 set->data = h;
676
677 if (tb[IPSET_ATTR_TIMEOUT]) {
678 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
679
680 set->variant = set->family == NFPROTO_IPV4
681 ? &hash_ipportnet4_tvariant
682 : &hash_ipportnet6_tvariant;
683
684 if (set->family == NFPROTO_IPV4)
685 hash_ipportnet4_gc_init(set);
686 else
687 hash_ipportnet6_gc_init(set);
688 } else {
689 set->variant = set->family == NFPROTO_IPV4
690 ? &hash_ipportnet4_variant : &hash_ipportnet6_variant;
691 }
692
693 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
694 set->name, jhash_size(h->table->htable_bits),
695 h->table->htable_bits, h->maxelem, set->data, h->table);
696
697 return 0;
698}
699
700static struct ip_set_type hash_ipportnet_type __read_mostly = { 570static struct ip_set_type hash_ipportnet_type __read_mostly = {
701 .name = "hash:ip,port,net", 571 .name = "hash:ip,port,net",
702 .protocol = IPSET_PROTOCOL, 572 .protocol = IPSET_PROTOCOL,
@@ -713,6 +583,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
713 [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, 583 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
714 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 584 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
715 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 585 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
586 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
716 }, 587 },
717 .adt_policy = { 588 .adt_policy = {
718 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 589 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -727,6 +598,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {
727 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 598 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
728 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 599 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
729 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 600 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
601 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
602 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
730 }, 603 },
731 .me = THIS_MODULE, 604 .me = THIS_MODULE,
732}; 605};
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 4b677cf6bf7d..da740ceb56ae 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -20,12 +20,12 @@
20#include <linux/netfilter.h> 20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h> 21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h> 22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_hash.h> 23#include <linux/netfilter/ipset/ip_set_hash.h>
25 24
26#define REVISION_MIN 0 25#define REVISION_MIN 0
27/* 1 Range as input support for IPv4 added */ 26/* 1 Range as input support for IPv4 added */
28#define REVISION_MAX 2 /* nomatch flag support added */ 27/* 2 nomatch flag support added */
28#define REVISION_MAX 3 /* Counters support added */
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 31MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -33,33 +33,46 @@ IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);
33MODULE_ALIAS("ip_set_hash:net"); 33MODULE_ALIAS("ip_set_hash:net");
34 34
35/* Type specific function prefix */ 35/* Type specific function prefix */
36#define TYPE hash_net 36#define HTYPE hash_net
37#define IP_SET_HASH_WITH_NETS
37 38
38static bool 39/* IPv4 variants */
39hash_net_same_set(const struct ip_set *a, const struct ip_set *b);
40 40
41#define hash_net4_same_set hash_net_same_set 41/* Member elements */
42#define hash_net6_same_set hash_net_same_set 42struct hash_net4_elem {
43 __be32 ip;
44 u16 padding0;
45 u8 nomatch;
46 u8 cidr;
47};
43 48
44/* The type variant functions: IPv4 */ 49struct hash_net4t_elem {
50 __be32 ip;
51 u16 padding0;
52 u8 nomatch;
53 u8 cidr;
54 unsigned long timeout;
55};
45 56
46/* Member elements without timeout */ 57struct hash_net4c_elem {
47struct hash_net4_elem {
48 __be32 ip; 58 __be32 ip;
49 u16 padding0; 59 u16 padding0;
50 u8 nomatch; 60 u8 nomatch;
51 u8 cidr; 61 u8 cidr;
62 struct ip_set_counter counter;
52}; 63};
53 64
54/* Member elements with timeout support */ 65struct hash_net4ct_elem {
55struct hash_net4_telem {
56 __be32 ip; 66 __be32 ip;
57 u16 padding0; 67 u16 padding0;
58 u8 nomatch; 68 u8 nomatch;
59 u8 cidr; 69 u8 cidr;
70 struct ip_set_counter counter;
60 unsigned long timeout; 71 unsigned long timeout;
61}; 72};
62 73
74/* Common functions */
75
63static inline bool 76static inline bool
64hash_net4_data_equal(const struct hash_net4_elem *ip1, 77hash_net4_data_equal(const struct hash_net4_elem *ip1,
65 const struct hash_net4_elem *ip2, 78 const struct hash_net4_elem *ip2,
@@ -69,31 +82,22 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
69 ip1->cidr == ip2->cidr; 82 ip1->cidr == ip2->cidr;
70} 83}
71 84
72static inline bool 85static inline int
73hash_net4_data_isnull(const struct hash_net4_elem *elem) 86hash_net4_do_data_match(const struct hash_net4_elem *elem)
74{ 87{
75 return elem->cidr == 0; 88 return elem->nomatch ? -ENOTEMPTY : 1;
76} 89}
77 90
78static inline void 91static inline void
79hash_net4_data_copy(struct hash_net4_elem *dst, 92hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
80 const struct hash_net4_elem *src)
81{ 93{
82 dst->ip = src->ip; 94 elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
83 dst->cidr = src->cidr;
84 dst->nomatch = src->nomatch;
85} 95}
86 96
87static inline void 97static inline void
88hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) 98hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
89{
90 dst->nomatch = flags & IPSET_FLAG_NOMATCH;
91}
92
93static inline int
94hash_net4_data_match(const struct hash_net4_elem *elem)
95{ 99{
96 return elem->nomatch ? -ENOTEMPTY : 1; 100 swap(*flags, elem->nomatch);
97} 101}
98 102
99static inline void 103static inline void
@@ -103,13 +107,6 @@ hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
103 elem->cidr = cidr; 107 elem->cidr = cidr;
104} 108}
105 109
106/* Zero CIDR values cannot be stored */
107static inline void
108hash_net4_data_zero_out(struct hash_net4_elem *elem)
109{
110 elem->cidr = 0;
111}
112
113static bool 110static bool
114hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) 111hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
115{ 112{
@@ -126,106 +123,84 @@ nla_put_failure:
126 return 1; 123 return 1;
127} 124}
128 125
129static bool 126static inline void
130hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) 127hash_net4_data_next(struct hash_net4_elem *next,
128 const struct hash_net4_elem *d)
131{ 129{
132 const struct hash_net4_telem *tdata = 130 next->ip = d->ip;
133 (const struct hash_net4_telem *)data;
134 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
135
136 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
137 nla_put_u8(skb, IPSET_ATTR_CIDR, tdata->cidr) ||
138 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
139 htonl(ip_set_timeout_get(tdata->timeout))) ||
140 (flags &&
141 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
142 goto nla_put_failure;
143 return 0;
144
145nla_put_failure:
146 return 1;
147} 131}
148 132
149#define IP_SET_HASH_WITH_NETS 133#define MTYPE hash_net4
150
151#define PF 4 134#define PF 4
152#define HOST_MASK 32 135#define HOST_MASK 32
153#include <linux/netfilter/ipset/ip_set_ahash.h> 136#include "ip_set_hash_gen.h"
154
155static inline void
156hash_net4_data_next(struct ip_set_hash *h,
157 const struct hash_net4_elem *d)
158{
159 h->next.ip = d->ip;
160}
161 137
162static int 138static int
163hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, 139hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
164 const struct xt_action_param *par, 140 const struct xt_action_param *par,
165 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 141 enum ipset_adt adt, struct ip_set_adt_opt *opt)
166{ 142{
167 const struct ip_set_hash *h = set->data; 143 const struct hash_net *h = set->data;
168 ipset_adtfn adtfn = set->variant->adt[adt]; 144 ipset_adtfn adtfn = set->variant->adt[adt];
169 struct hash_net4_elem data = { 145 struct hash_net4_elem e = {
170 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK 146 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
171 }; 147 };
148 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
172 149
173 if (data.cidr == 0) 150 if (e.cidr == 0)
174 return -EINVAL; 151 return -EINVAL;
175 if (adt == IPSET_TEST) 152 if (adt == IPSET_TEST)
176 data.cidr = HOST_MASK; 153 e.cidr = HOST_MASK;
177 154
178 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 155 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
179 data.ip &= ip_set_netmask(data.cidr); 156 e.ip &= ip_set_netmask(e.cidr);
180 157
181 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 158 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
182} 159}
183 160
184static int 161static int
185hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], 162hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
186 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 163 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
187{ 164{
188 const struct ip_set_hash *h = set->data; 165 const struct hash_net *h = set->data;
189 ipset_adtfn adtfn = set->variant->adt[adt]; 166 ipset_adtfn adtfn = set->variant->adt[adt];
190 struct hash_net4_elem data = { .cidr = HOST_MASK }; 167 struct hash_net4_elem e = { .cidr = HOST_MASK };
191 u32 timeout = h->timeout; 168 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
192 u32 ip = 0, ip_to, last; 169 u32 ip = 0, ip_to, last;
193 int ret; 170 int ret;
194 171
195 if (unlikely(!tb[IPSET_ATTR_IP] || 172 if (unlikely(!tb[IPSET_ATTR_IP] ||
196 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 173 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
197 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 174 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
175 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
176 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
198 return -IPSET_ERR_PROTOCOL; 177 return -IPSET_ERR_PROTOCOL;
199 178
200 if (tb[IPSET_ATTR_LINENO]) 179 if (tb[IPSET_ATTR_LINENO])
201 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 180 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
202 181
203 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 182 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
183 ip_set_get_extensions(set, tb, &ext);
204 if (ret) 184 if (ret)
205 return ret; 185 return ret;
206 186
207 if (tb[IPSET_ATTR_CIDR]) { 187 if (tb[IPSET_ATTR_CIDR]) {
208 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 188 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
209 if (!data.cidr || data.cidr > HOST_MASK) 189 if (!e.cidr || e.cidr > HOST_MASK)
210 return -IPSET_ERR_INVALID_CIDR; 190 return -IPSET_ERR_INVALID_CIDR;
211 } 191 }
212 192
213 if (tb[IPSET_ATTR_TIMEOUT]) { 193 if (tb[IPSET_ATTR_CADT_FLAGS]) {
214 if (!with_timeout(h->timeout))
215 return -IPSET_ERR_TIMEOUT;
216 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
217 }
218
219 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) {
220 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 194 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
221 if (cadt_flags & IPSET_FLAG_NOMATCH) 195 if (cadt_flags & IPSET_FLAG_NOMATCH)
222 flags |= (cadt_flags << 16); 196 flags |= (IPSET_FLAG_NOMATCH << 16);
223 } 197 }
224 198
225 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { 199 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
226 data.ip = htonl(ip & ip_set_hostmask(data.cidr)); 200 e.ip = htonl(ip & ip_set_hostmask(e.cidr));
227 ret = adtfn(set, &data, timeout, flags); 201 ret = adtfn(set, &e, &ext, &ext, flags);
228 return ip_set_eexist(ret, flags) ? 0 : ret; 202 return ip_set_enomatch(ret, flags, adt) ? 1 :
203 ip_set_eexist(ret, flags) ? 0 : ret;
229 } 204 }
230 205
231 ip_to = ip; 206 ip_to = ip;
@@ -241,9 +216,9 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
241 if (retried) 216 if (retried)
242 ip = ntohl(h->next.ip); 217 ip = ntohl(h->next.ip);
243 while (!after(ip, ip_to)) { 218 while (!after(ip, ip_to)) {
244 data.ip = htonl(ip); 219 e.ip = htonl(ip);
245 last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); 220 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
246 ret = adtfn(set, &data, timeout, flags); 221 ret = adtfn(set, &e, &ext, &ext, flags);
247 if (ret && !ip_set_eexist(ret, flags)) 222 if (ret && !ip_set_eexist(ret, flags))
248 return ret; 223 return ret;
249 else 224 else
@@ -253,34 +228,42 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
253 return ret; 228 return ret;
254} 229}
255 230
256static bool 231/* IPv6 variants */
257hash_net_same_set(const struct ip_set *a, const struct ip_set *b)
258{
259 const struct ip_set_hash *x = a->data;
260 const struct ip_set_hash *y = b->data;
261 232
262 /* Resizing changes htable_bits, so we ignore it */ 233struct hash_net6_elem {
263 return x->maxelem == y->maxelem && 234 union nf_inet_addr ip;
264 x->timeout == y->timeout; 235 u16 padding0;
265} 236 u8 nomatch;
237 u8 cidr;
238};
266 239
267/* The type variant functions: IPv6 */ 240struct hash_net6t_elem {
241 union nf_inet_addr ip;
242 u16 padding0;
243 u8 nomatch;
244 u8 cidr;
245 unsigned long timeout;
246};
268 247
269struct hash_net6_elem { 248struct hash_net6c_elem {
270 union nf_inet_addr ip; 249 union nf_inet_addr ip;
271 u16 padding0; 250 u16 padding0;
272 u8 nomatch; 251 u8 nomatch;
273 u8 cidr; 252 u8 cidr;
253 struct ip_set_counter counter;
274}; 254};
275 255
276struct hash_net6_telem { 256struct hash_net6ct_elem {
277 union nf_inet_addr ip; 257 union nf_inet_addr ip;
278 u16 padding0; 258 u16 padding0;
279 u8 nomatch; 259 u8 nomatch;
280 u8 cidr; 260 u8 cidr;
261 struct ip_set_counter counter;
281 unsigned long timeout; 262 unsigned long timeout;
282}; 263};
283 264
265/* Common functions */
266
284static inline bool 267static inline bool
285hash_net6_data_equal(const struct hash_net6_elem *ip1, 268hash_net6_data_equal(const struct hash_net6_elem *ip1,
286 const struct hash_net6_elem *ip2, 269 const struct hash_net6_elem *ip2,
@@ -290,46 +273,22 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
290 ip1->cidr == ip2->cidr; 273 ip1->cidr == ip2->cidr;
291} 274}
292 275
293static inline bool
294hash_net6_data_isnull(const struct hash_net6_elem *elem)
295{
296 return elem->cidr == 0;
297}
298
299static inline void
300hash_net6_data_copy(struct hash_net6_elem *dst,
301 const struct hash_net6_elem *src)
302{
303 dst->ip.in6 = src->ip.in6;
304 dst->cidr = src->cidr;
305 dst->nomatch = src->nomatch;
306}
307
308static inline void
309hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags)
310{
311 dst->nomatch = flags & IPSET_FLAG_NOMATCH;
312}
313
314static inline int 276static inline int
315hash_net6_data_match(const struct hash_net6_elem *elem) 277hash_net6_do_data_match(const struct hash_net6_elem *elem)
316{ 278{
317 return elem->nomatch ? -ENOTEMPTY : 1; 279 return elem->nomatch ? -ENOTEMPTY : 1;
318} 280}
319 281
320static inline void 282static inline void
321hash_net6_data_zero_out(struct hash_net6_elem *elem) 283hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
322{ 284{
323 elem->cidr = 0; 285 elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
324} 286}
325 287
326static inline void 288static inline void
327ip6_netmask(union nf_inet_addr *ip, u8 prefix) 289hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
328{ 290{
329 ip->ip6[0] &= ip_set_netmask6(prefix)[0]; 291 swap(*flags, elem->nomatch);
330 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
331 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
332 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
333} 292}
334 293
335static inline void 294static inline void
@@ -355,74 +314,60 @@ nla_put_failure:
355 return 1; 314 return 1;
356} 315}
357 316
358static bool 317static inline void
359hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) 318hash_net6_data_next(struct hash_net4_elem *next,
319 const struct hash_net6_elem *d)
360{ 320{
361 const struct hash_net6_telem *e =
362 (const struct hash_net6_telem *)data;
363 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
364
365 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
366 nla_put_u8(skb, IPSET_ATTR_CIDR, e->cidr) ||
367 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
368 htonl(ip_set_timeout_get(e->timeout))) ||
369 (flags &&
370 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
371 goto nla_put_failure;
372 return 0;
373
374nla_put_failure:
375 return 1;
376} 321}
377 322
323#undef MTYPE
378#undef PF 324#undef PF
379#undef HOST_MASK 325#undef HOST_MASK
380 326
327#define MTYPE hash_net6
381#define PF 6 328#define PF 6
382#define HOST_MASK 128 329#define HOST_MASK 128
383#include <linux/netfilter/ipset/ip_set_ahash.h> 330#define IP_SET_EMIT_CREATE
384 331#include "ip_set_hash_gen.h"
385static inline void
386hash_net6_data_next(struct ip_set_hash *h,
387 const struct hash_net6_elem *d)
388{
389}
390 332
391static int 333static int
392hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, 334hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
393 const struct xt_action_param *par, 335 const struct xt_action_param *par,
394 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 336 enum ipset_adt adt, struct ip_set_adt_opt *opt)
395{ 337{
396 const struct ip_set_hash *h = set->data; 338 const struct hash_net *h = set->data;
397 ipset_adtfn adtfn = set->variant->adt[adt]; 339 ipset_adtfn adtfn = set->variant->adt[adt];
398 struct hash_net6_elem data = { 340 struct hash_net6_elem e = {
399 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK 341 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK
400 }; 342 };
343 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
401 344
402 if (data.cidr == 0) 345 if (e.cidr == 0)
403 return -EINVAL; 346 return -EINVAL;
404 if (adt == IPSET_TEST) 347 if (adt == IPSET_TEST)
405 data.cidr = HOST_MASK; 348 e.cidr = HOST_MASK;
406 349
407 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 350 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
408 ip6_netmask(&data.ip, data.cidr); 351 ip6_netmask(&e.ip, e.cidr);
409 352
410 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 353 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
411} 354}
412 355
413static int 356static int
414hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], 357hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
415 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 358 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
416{ 359{
417 const struct ip_set_hash *h = set->data; 360 const struct hash_net *h = set->data;
418 ipset_adtfn adtfn = set->variant->adt[adt]; 361 ipset_adtfn adtfn = set->variant->adt[adt];
419 struct hash_net6_elem data = { .cidr = HOST_MASK }; 362 struct hash_net6_elem e = { .cidr = HOST_MASK };
420 u32 timeout = h->timeout; 363 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
421 int ret; 364 int ret;
422 365
423 if (unlikely(!tb[IPSET_ATTR_IP] || 366 if (unlikely(!tb[IPSET_ATTR_IP] ||
424 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 367 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
425 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 368 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
369 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
370 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
426 return -IPSET_ERR_PROTOCOL; 371 return -IPSET_ERR_PROTOCOL;
427 if (unlikely(tb[IPSET_ATTR_IP_TO])) 372 if (unlikely(tb[IPSET_ATTR_IP_TO]))
428 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 373 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -430,107 +375,29 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
430 if (tb[IPSET_ATTR_LINENO]) 375 if (tb[IPSET_ATTR_LINENO])
431 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 376 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
432 377
433 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 378 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
379 ip_set_get_extensions(set, tb, &ext);
434 if (ret) 380 if (ret)
435 return ret; 381 return ret;
436 382
437 if (tb[IPSET_ATTR_CIDR]) 383 if (tb[IPSET_ATTR_CIDR])
438 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 384 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
439 385
440 if (!data.cidr || data.cidr > HOST_MASK) 386 if (!e.cidr || e.cidr > HOST_MASK)
441 return -IPSET_ERR_INVALID_CIDR; 387 return -IPSET_ERR_INVALID_CIDR;
442 388
443 ip6_netmask(&data.ip, data.cidr); 389 ip6_netmask(&e.ip, e.cidr);
444
445 if (tb[IPSET_ATTR_TIMEOUT]) {
446 if (!with_timeout(h->timeout))
447 return -IPSET_ERR_TIMEOUT;
448 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
449 }
450 390
451 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { 391 if (tb[IPSET_ATTR_CADT_FLAGS]) {
452 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 392 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
453 if (cadt_flags & IPSET_FLAG_NOMATCH) 393 if (cadt_flags & IPSET_FLAG_NOMATCH)
454 flags |= (cadt_flags << 16); 394 flags |= (IPSET_FLAG_NOMATCH << 16);
455 }
456
457 ret = adtfn(set, &data, timeout, flags);
458
459 return ip_set_eexist(ret, flags) ? 0 : ret;
460}
461
462/* Create hash:ip type of sets */
463
464static int
465hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
466{
467 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
468 struct ip_set_hash *h;
469 u8 hbits;
470 size_t hsize;
471
472 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
473 return -IPSET_ERR_INVALID_FAMILY;
474
475 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
476 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
477 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
478 return -IPSET_ERR_PROTOCOL;
479
480 if (tb[IPSET_ATTR_HASHSIZE]) {
481 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
482 if (hashsize < IPSET_MIMINAL_HASHSIZE)
483 hashsize = IPSET_MIMINAL_HASHSIZE;
484 }
485
486 if (tb[IPSET_ATTR_MAXELEM])
487 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
488
489 h = kzalloc(sizeof(*h)
490 + sizeof(struct ip_set_hash_nets)
491 * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
492 if (!h)
493 return -ENOMEM;
494
495 h->maxelem = maxelem;
496 get_random_bytes(&h->initval, sizeof(h->initval));
497 h->timeout = IPSET_NO_TIMEOUT;
498
499 hbits = htable_bits(hashsize);
500 hsize = htable_size(hbits);
501 if (hsize == 0) {
502 kfree(h);
503 return -ENOMEM;
504 } 395 }
505 h->table = ip_set_alloc(hsize);
506 if (!h->table) {
507 kfree(h);
508 return -ENOMEM;
509 }
510 h->table->htable_bits = hbits;
511
512 set->data = h;
513 396
514 if (tb[IPSET_ATTR_TIMEOUT]) { 397 ret = adtfn(set, &e, &ext, &ext, flags);
515 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
516 398
517 set->variant = set->family == NFPROTO_IPV4 399 return ip_set_enomatch(ret, flags, adt) ? 1 :
518 ? &hash_net4_tvariant : &hash_net6_tvariant; 400 ip_set_eexist(ret, flags) ? 0 : ret;
519
520 if (set->family == NFPROTO_IPV4)
521 hash_net4_gc_init(set);
522 else
523 hash_net6_gc_init(set);
524 } else {
525 set->variant = set->family == NFPROTO_IPV4
526 ? &hash_net4_variant : &hash_net6_variant;
527 }
528
529 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
530 set->name, jhash_size(h->table->htable_bits),
531 h->table->htable_bits, h->maxelem, set->data, h->table);
532
533 return 0;
534} 401}
535 402
536static struct ip_set_type hash_net_type __read_mostly = { 403static struct ip_set_type hash_net_type __read_mostly = {
@@ -548,6 +415,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
548 [IPSET_ATTR_PROBES] = { .type = NLA_U8 }, 415 [IPSET_ATTR_PROBES] = { .type = NLA_U8 },
549 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 416 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
550 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 417 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
418 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
551 }, 419 },
552 .adt_policy = { 420 .adt_policy = {
553 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 421 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -555,6 +423,8 @@ static struct ip_set_type hash_net_type __read_mostly = {
555 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 423 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
556 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 424 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
557 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 425 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
426 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
427 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
558 }, 428 },
559 .me = THIS_MODULE, 429 .me = THIS_MODULE,
560}; 430};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 6ba985f1c96f..84ae6f6ce624 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -21,12 +21,12 @@
21#include <linux/netfilter.h> 21#include <linux/netfilter.h>
22#include <linux/netfilter/ipset/pfxlen.h> 22#include <linux/netfilter/ipset/pfxlen.h>
23#include <linux/netfilter/ipset/ip_set.h> 23#include <linux/netfilter/ipset/ip_set.h>
24#include <linux/netfilter/ipset/ip_set_timeout.h>
25#include <linux/netfilter/ipset/ip_set_hash.h> 24#include <linux/netfilter/ipset/ip_set_hash.h>
26 25
27#define REVISION_MIN 0 26#define REVISION_MIN 0
28/* 1 nomatch flag support added */ 27/* 1 nomatch flag support added */
29#define REVISION_MAX 2 /* /0 support added */ 28/* 2 /0 support added */
29#define REVISION_MAX 3 /* Counters support added */
30 30
31MODULE_LICENSE("GPL"); 31MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 32MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -127,17 +127,14 @@ iface_add(struct rb_root *root, const char **iface)
127} 127}
128 128
129/* Type specific function prefix */ 129/* Type specific function prefix */
130#define TYPE hash_netiface 130#define HTYPE hash_netiface
131 131#define IP_SET_HASH_WITH_NETS
132static bool 132#define IP_SET_HASH_WITH_RBTREE
133hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b); 133#define IP_SET_HASH_WITH_MULTI
134
135#define hash_netiface4_same_set hash_netiface_same_set
136#define hash_netiface6_same_set hash_netiface_same_set
137 134
138#define STREQ(a, b) (strcmp(a, b) == 0) 135#define STREQ(a, b) (strcmp(a, b) == 0)
139 136
140/* The type variant functions: IPv4 */ 137/* IPv4 variants */
141 138
142struct hash_netiface4_elem_hashed { 139struct hash_netiface4_elem_hashed {
143 __be32 ip; 140 __be32 ip;
@@ -147,8 +144,6 @@ struct hash_netiface4_elem_hashed {
147 u8 elem; 144 u8 elem;
148}; 145};
149 146
150#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
151
152/* Member elements without timeout */ 147/* Member elements without timeout */
153struct hash_netiface4_elem { 148struct hash_netiface4_elem {
154 __be32 ip; 149 __be32 ip;
@@ -159,17 +154,39 @@ struct hash_netiface4_elem {
159 const char *iface; 154 const char *iface;
160}; 155};
161 156
162/* Member elements with timeout support */ 157struct hash_netiface4t_elem {
163struct hash_netiface4_telem { 158 __be32 ip;
159 u8 physdev;
160 u8 cidr;
161 u8 nomatch;
162 u8 elem;
163 const char *iface;
164 unsigned long timeout;
165};
166
167struct hash_netiface4c_elem {
168 __be32 ip;
169 u8 physdev;
170 u8 cidr;
171 u8 nomatch;
172 u8 elem;
173 const char *iface;
174 struct ip_set_counter counter;
175};
176
177struct hash_netiface4ct_elem {
164 __be32 ip; 178 __be32 ip;
165 u8 physdev; 179 u8 physdev;
166 u8 cidr; 180 u8 cidr;
167 u8 nomatch; 181 u8 nomatch;
168 u8 elem; 182 u8 elem;
169 const char *iface; 183 const char *iface;
184 struct ip_set_counter counter;
170 unsigned long timeout; 185 unsigned long timeout;
171}; 186};
172 187
188/* Common functions */
189
173static inline bool 190static inline bool
174hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1, 191hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
175 const struct hash_netiface4_elem *ip2, 192 const struct hash_netiface4_elem *ip2,
@@ -182,29 +199,22 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
182 ip1->iface == ip2->iface; 199 ip1->iface == ip2->iface;
183} 200}
184 201
185static inline bool 202static inline int
186hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) 203hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
187{ 204{
188 return elem->elem == 0; 205 return elem->nomatch ? -ENOTEMPTY : 1;
189} 206}
190 207
191static inline void 208static inline void
192hash_netiface4_data_copy(struct hash_netiface4_elem *dst, 209hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
193 const struct hash_netiface4_elem *src)
194{ 210{
195 memcpy(dst, src, sizeof(*dst)); 211 elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
196} 212}
197 213
198static inline void 214static inline void
199hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) 215hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
200{
201 dst->nomatch = flags & IPSET_FLAG_NOMATCH;
202}
203
204static inline int
205hash_netiface4_data_match(const struct hash_netiface4_elem *elem)
206{ 216{
207 return elem->nomatch ? -ENOTEMPTY : 1; 217 swap(*flags, elem->nomatch);
208} 218}
209 219
210static inline void 220static inline void
@@ -214,12 +224,6 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
214 elem->cidr = cidr; 224 elem->cidr = cidr;
215} 225}
216 226
217static inline void
218hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem)
219{
220 elem->elem = 0;
221}
222
223static bool 227static bool
224hash_netiface4_data_list(struct sk_buff *skb, 228hash_netiface4_data_list(struct sk_buff *skb,
225 const struct hash_netiface4_elem *data) 229 const struct hash_netiface4_elem *data)
@@ -240,66 +244,40 @@ nla_put_failure:
240 return 1; 244 return 1;
241} 245}
242 246
243static bool 247static inline void
244hash_netiface4_data_tlist(struct sk_buff *skb, 248hash_netiface4_data_next(struct hash_netiface4_elem *next,
245 const struct hash_netiface4_elem *data) 249 const struct hash_netiface4_elem *d)
246{ 250{
247 const struct hash_netiface4_telem *tdata = 251 next->ip = d->ip;
248 (const struct hash_netiface4_telem *)data;
249 u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
250
251 if (data->nomatch)
252 flags |= IPSET_FLAG_NOMATCH;
253 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
254 nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
255 nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
256 (flags &&
257 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
258 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
259 htonl(ip_set_timeout_get(tdata->timeout))))
260 goto nla_put_failure;
261
262 return 0;
263
264nla_put_failure:
265 return 1;
266} 252}
267 253
268#define IP_SET_HASH_WITH_NETS 254#define MTYPE hash_netiface4
269#define IP_SET_HASH_WITH_RBTREE
270#define IP_SET_HASH_WITH_MULTI
271
272#define PF 4 255#define PF 4
273#define HOST_MASK 32 256#define HOST_MASK 32
274#include <linux/netfilter/ipset/ip_set_ahash.h> 257#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
275 258#include "ip_set_hash_gen.h"
276static inline void
277hash_netiface4_data_next(struct ip_set_hash *h,
278 const struct hash_netiface4_elem *d)
279{
280 h->next.ip = d->ip;
281}
282 259
283static int 260static int
284hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, 261hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
285 const struct xt_action_param *par, 262 const struct xt_action_param *par,
286 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 263 enum ipset_adt adt, struct ip_set_adt_opt *opt)
287{ 264{
288 struct ip_set_hash *h = set->data; 265 struct hash_netiface *h = set->data;
289 ipset_adtfn adtfn = set->variant->adt[adt]; 266 ipset_adtfn adtfn = set->variant->adt[adt];
290 struct hash_netiface4_elem data = { 267 struct hash_netiface4_elem e = {
291 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, 268 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
292 .elem = 1, 269 .elem = 1,
293 }; 270 };
271 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
294 int ret; 272 int ret;
295 273
296 if (data.cidr == 0) 274 if (e.cidr == 0)
297 return -EINVAL; 275 return -EINVAL;
298 if (adt == IPSET_TEST) 276 if (adt == IPSET_TEST)
299 data.cidr = HOST_MASK; 277 e.cidr = HOST_MASK;
300 278
301 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 279 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
302 data.ip &= ip_set_netmask(data.cidr); 280 e.ip &= ip_set_netmask(e.cidr);
303 281
304#define IFACE(dir) (par->dir ? par->dir->name : NULL) 282#define IFACE(dir) (par->dir ? par->dir->name : NULL)
305#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) 283#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
@@ -311,72 +289,69 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
311 289
312 if (!nf_bridge) 290 if (!nf_bridge)
313 return -EINVAL; 291 return -EINVAL;
314 data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); 292 e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
315 data.physdev = 1; 293 e.physdev = 1;
316#else 294#else
317 data.iface = NULL; 295 e.iface = NULL;
318#endif 296#endif
319 } else 297 } else
320 data.iface = SRCDIR ? IFACE(in) : IFACE(out); 298 e.iface = SRCDIR ? IFACE(in) : IFACE(out);
321 299
322 if (!data.iface) 300 if (!e.iface)
323 return -EINVAL; 301 return -EINVAL;
324 ret = iface_test(&h->rbtree, &data.iface); 302 ret = iface_test(&h->rbtree, &e.iface);
325 if (adt == IPSET_ADD) { 303 if (adt == IPSET_ADD) {
326 if (!ret) { 304 if (!ret) {
327 ret = iface_add(&h->rbtree, &data.iface); 305 ret = iface_add(&h->rbtree, &e.iface);
328 if (ret) 306 if (ret)
329 return ret; 307 return ret;
330 } 308 }
331 } else if (!ret) 309 } else if (!ret)
332 return ret; 310 return ret;
333 311
334 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 312 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
335} 313}
336 314
337static int 315static int
338hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], 316hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
339 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 317 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
340{ 318{
341 struct ip_set_hash *h = set->data; 319 struct hash_netiface *h = set->data;
342 ipset_adtfn adtfn = set->variant->adt[adt]; 320 ipset_adtfn adtfn = set->variant->adt[adt];
343 struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 }; 321 struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
322 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
344 u32 ip = 0, ip_to, last; 323 u32 ip = 0, ip_to, last;
345 u32 timeout = h->timeout;
346 char iface[IFNAMSIZ]; 324 char iface[IFNAMSIZ];
347 int ret; 325 int ret;
348 326
349 if (unlikely(!tb[IPSET_ATTR_IP] || 327 if (unlikely(!tb[IPSET_ATTR_IP] ||
350 !tb[IPSET_ATTR_IFACE] || 328 !tb[IPSET_ATTR_IFACE] ||
351 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 329 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
352 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 330 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
331 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
332 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
353 return -IPSET_ERR_PROTOCOL; 333 return -IPSET_ERR_PROTOCOL;
354 334
355 if (tb[IPSET_ATTR_LINENO]) 335 if (tb[IPSET_ATTR_LINENO])
356 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 336 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
357 337
358 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 338 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
339 ip_set_get_extensions(set, tb, &ext);
359 if (ret) 340 if (ret)
360 return ret; 341 return ret;
361 342
362 if (tb[IPSET_ATTR_CIDR]) { 343 if (tb[IPSET_ATTR_CIDR]) {
363 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 344 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
364 if (data.cidr > HOST_MASK) 345 if (e.cidr > HOST_MASK)
365 return -IPSET_ERR_INVALID_CIDR; 346 return -IPSET_ERR_INVALID_CIDR;
366 } 347 }
367 348
368 if (tb[IPSET_ATTR_TIMEOUT]) {
369 if (!with_timeout(h->timeout))
370 return -IPSET_ERR_TIMEOUT;
371 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
372 }
373
374 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); 349 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
375 data.iface = iface; 350 e.iface = iface;
376 ret = iface_test(&h->rbtree, &data.iface); 351 ret = iface_test(&h->rbtree, &e.iface);
377 if (adt == IPSET_ADD) { 352 if (adt == IPSET_ADD) {
378 if (!ret) { 353 if (!ret) {
379 ret = iface_add(&h->rbtree, &data.iface); 354 ret = iface_add(&h->rbtree, &e.iface);
380 if (ret) 355 if (ret)
381 return ret; 356 return ret;
382 } 357 }
@@ -386,14 +361,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
386 if (tb[IPSET_ATTR_CADT_FLAGS]) { 361 if (tb[IPSET_ATTR_CADT_FLAGS]) {
387 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 362 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
388 if (cadt_flags & IPSET_FLAG_PHYSDEV) 363 if (cadt_flags & IPSET_FLAG_PHYSDEV)
389 data.physdev = 1; 364 e.physdev = 1;
390 if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) 365 if (cadt_flags & IPSET_FLAG_NOMATCH)
391 flags |= (cadt_flags << 16); 366 flags |= (IPSET_FLAG_NOMATCH << 16);
392 } 367 }
393 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { 368 if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
394 data.ip = htonl(ip & ip_set_hostmask(data.cidr)); 369 e.ip = htonl(ip & ip_set_hostmask(e.cidr));
395 ret = adtfn(set, &data, timeout, flags); 370 ret = adtfn(set, &e, &ext, &ext, flags);
396 return ip_set_eexist(ret, flags) ? 0 : ret; 371 return ip_set_enomatch(ret, flags, adt) ? 1 :
372 ip_set_eexist(ret, flags) ? 0 : ret;
397 } 373 }
398 374
399 if (tb[IPSET_ATTR_IP_TO]) { 375 if (tb[IPSET_ATTR_IP_TO]) {
@@ -404,16 +380,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
404 swap(ip, ip_to); 380 swap(ip, ip_to);
405 if (ip + UINT_MAX == ip_to) 381 if (ip + UINT_MAX == ip_to)
406 return -IPSET_ERR_HASH_RANGE; 382 return -IPSET_ERR_HASH_RANGE;
407 } else { 383 } else
408 ip_set_mask_from_to(ip, ip_to, data.cidr); 384 ip_set_mask_from_to(ip, ip_to, e.cidr);
409 }
410 385
411 if (retried) 386 if (retried)
412 ip = ntohl(h->next.ip); 387 ip = ntohl(h->next.ip);
413 while (!after(ip, ip_to)) { 388 while (!after(ip, ip_to)) {
414 data.ip = htonl(ip); 389 e.ip = htonl(ip);
415 last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); 390 last = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
416 ret = adtfn(set, &data, timeout, flags); 391 ret = adtfn(set, &e, &ext, &ext, flags);
417 392
418 if (ret && !ip_set_eexist(ret, flags)) 393 if (ret && !ip_set_eexist(ret, flags))
419 return ret; 394 return ret;
@@ -424,18 +399,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
424 return ret; 399 return ret;
425} 400}
426 401
427static bool 402/* IPv6 variants */
428hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b)
429{
430 const struct ip_set_hash *x = a->data;
431 const struct ip_set_hash *y = b->data;
432
433 /* Resizing changes htable_bits, so we ignore it */
434 return x->maxelem == y->maxelem &&
435 x->timeout == y->timeout;
436}
437
438/* The type variant functions: IPv6 */
439 403
440struct hash_netiface6_elem_hashed { 404struct hash_netiface6_elem_hashed {
441 union nf_inet_addr ip; 405 union nf_inet_addr ip;
@@ -445,8 +409,6 @@ struct hash_netiface6_elem_hashed {
445 u8 elem; 409 u8 elem;
446}; 410};
447 411
448#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
449
450struct hash_netiface6_elem { 412struct hash_netiface6_elem {
451 union nf_inet_addr ip; 413 union nf_inet_addr ip;
452 u8 physdev; 414 u8 physdev;
@@ -456,16 +418,39 @@ struct hash_netiface6_elem {
456 const char *iface; 418 const char *iface;
457}; 419};
458 420
459struct hash_netiface6_telem { 421struct hash_netiface6t_elem {
422 union nf_inet_addr ip;
423 u8 physdev;
424 u8 cidr;
425 u8 nomatch;
426 u8 elem;
427 const char *iface;
428 unsigned long timeout;
429};
430
431struct hash_netiface6c_elem {
432 union nf_inet_addr ip;
433 u8 physdev;
434 u8 cidr;
435 u8 nomatch;
436 u8 elem;
437 const char *iface;
438 struct ip_set_counter counter;
439};
440
441struct hash_netiface6ct_elem {
460 union nf_inet_addr ip; 442 union nf_inet_addr ip;
461 u8 physdev; 443 u8 physdev;
462 u8 cidr; 444 u8 cidr;
463 u8 nomatch; 445 u8 nomatch;
464 u8 elem; 446 u8 elem;
465 const char *iface; 447 const char *iface;
448 struct ip_set_counter counter;
466 unsigned long timeout; 449 unsigned long timeout;
467}; 450};
468 451
452/* Common functions */
453
469static inline bool 454static inline bool
470hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1, 455hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
471 const struct hash_netiface6_elem *ip2, 456 const struct hash_netiface6_elem *ip2,
@@ -478,44 +463,22 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
478 ip1->iface == ip2->iface; 463 ip1->iface == ip2->iface;
479} 464}
480 465
481static inline bool
482hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem)
483{
484 return elem->elem == 0;
485}
486
487static inline void
488hash_netiface6_data_copy(struct hash_netiface6_elem *dst,
489 const struct hash_netiface6_elem *src)
490{
491 memcpy(dst, src, sizeof(*dst));
492}
493
494static inline void
495hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags)
496{
497 dst->nomatch = flags & IPSET_FLAG_NOMATCH;
498}
499
500static inline int 466static inline int
501hash_netiface6_data_match(const struct hash_netiface6_elem *elem) 467hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
502{ 468{
503 return elem->nomatch ? -ENOTEMPTY : 1; 469 return elem->nomatch ? -ENOTEMPTY : 1;
504} 470}
505 471
506static inline void 472static inline void
507hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) 473hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
508{ 474{
509 elem->elem = 0; 475 elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
510} 476}
511 477
512static inline void 478static inline void
513ip6_netmask(union nf_inet_addr *ip, u8 prefix) 479hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
514{ 480{
515 ip->ip6[0] &= ip_set_netmask6(prefix)[0]; 481 swap(*flags, elem->nomatch);
516 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
517 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
518 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
519} 482}
520 483
521static inline void 484static inline void
@@ -545,63 +508,45 @@ nla_put_failure:
545 return 1; 508 return 1;
546} 509}
547 510
548static bool 511static inline void
549hash_netiface6_data_tlist(struct sk_buff *skb, 512hash_netiface6_data_next(struct hash_netiface4_elem *next,
550 const struct hash_netiface6_elem *data) 513 const struct hash_netiface6_elem *d)
551{ 514{
552 const struct hash_netiface6_telem *e =
553 (const struct hash_netiface6_telem *)data;
554 u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
555
556 if (data->nomatch)
557 flags |= IPSET_FLAG_NOMATCH;
558 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
559 nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) ||
560 nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) ||
561 (flags &&
562 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) ||
563 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
564 htonl(ip_set_timeout_get(e->timeout))))
565 goto nla_put_failure;
566 return 0;
567
568nla_put_failure:
569 return 1;
570} 515}
571 516
517#undef MTYPE
572#undef PF 518#undef PF
573#undef HOST_MASK 519#undef HOST_MASK
520#undef HKEY_DATALEN
574 521
522#define MTYPE hash_netiface6
575#define PF 6 523#define PF 6
576#define HOST_MASK 128 524#define HOST_MASK 128
577#include <linux/netfilter/ipset/ip_set_ahash.h> 525#define HKEY_DATALEN sizeof(struct hash_netiface6_elem_hashed)
578 526#define IP_SET_EMIT_CREATE
579static inline void 527#include "ip_set_hash_gen.h"
580hash_netiface6_data_next(struct ip_set_hash *h,
581 const struct hash_netiface6_elem *d)
582{
583}
584 528
585static int 529static int
586hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, 530hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
587 const struct xt_action_param *par, 531 const struct xt_action_param *par,
588 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 532 enum ipset_adt adt, struct ip_set_adt_opt *opt)
589{ 533{
590 struct ip_set_hash *h = set->data; 534 struct hash_netiface *h = set->data;
591 ipset_adtfn adtfn = set->variant->adt[adt]; 535 ipset_adtfn adtfn = set->variant->adt[adt];
592 struct hash_netiface6_elem data = { 536 struct hash_netiface6_elem e = {
593 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK, 537 .cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,
594 .elem = 1, 538 .elem = 1,
595 }; 539 };
540 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
596 int ret; 541 int ret;
597 542
598 if (data.cidr == 0) 543 if (e.cidr == 0)
599 return -EINVAL; 544 return -EINVAL;
600 if (adt == IPSET_TEST) 545 if (adt == IPSET_TEST)
601 data.cidr = HOST_MASK; 546 e.cidr = HOST_MASK;
602 547
603 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 548 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
604 ip6_netmask(&data.ip, data.cidr); 549 ip6_netmask(&e.ip, e.cidr);
605 550
606 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { 551 if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
607#ifdef CONFIG_BRIDGE_NETFILTER 552#ifdef CONFIG_BRIDGE_NETFILTER
@@ -609,44 +554,46 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
609 554
610 if (!nf_bridge) 555 if (!nf_bridge)
611 return -EINVAL; 556 return -EINVAL;
612 data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); 557 e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
613 data.physdev = 1; 558 e.physdev = 1;
614#else 559#else
615 data.iface = NULL; 560 e.iface = NULL;
616#endif 561#endif
617 } else 562 } else
618 data.iface = SRCDIR ? IFACE(in) : IFACE(out); 563 e.iface = SRCDIR ? IFACE(in) : IFACE(out);
619 564
620 if (!data.iface) 565 if (!e.iface)
621 return -EINVAL; 566 return -EINVAL;
622 ret = iface_test(&h->rbtree, &data.iface); 567 ret = iface_test(&h->rbtree, &e.iface);
623 if (adt == IPSET_ADD) { 568 if (adt == IPSET_ADD) {
624 if (!ret) { 569 if (!ret) {
625 ret = iface_add(&h->rbtree, &data.iface); 570 ret = iface_add(&h->rbtree, &e.iface);
626 if (ret) 571 if (ret)
627 return ret; 572 return ret;
628 } 573 }
629 } else if (!ret) 574 } else if (!ret)
630 return ret; 575 return ret;
631 576
632 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 577 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
633} 578}
634 579
635static int 580static int
636hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[], 581hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
637 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 582 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
638{ 583{
639 struct ip_set_hash *h = set->data; 584 struct hash_netiface *h = set->data;
640 ipset_adtfn adtfn = set->variant->adt[adt]; 585 ipset_adtfn adtfn = set->variant->adt[adt];
641 struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 }; 586 struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
642 u32 timeout = h->timeout; 587 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
643 char iface[IFNAMSIZ]; 588 char iface[IFNAMSIZ];
644 int ret; 589 int ret;
645 590
646 if (unlikely(!tb[IPSET_ATTR_IP] || 591 if (unlikely(!tb[IPSET_ATTR_IP] ||
647 !tb[IPSET_ATTR_IFACE] || 592 !tb[IPSET_ATTR_IFACE] ||
648 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 593 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
649 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 594 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
595 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
596 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
650 return -IPSET_ERR_PROTOCOL; 597 return -IPSET_ERR_PROTOCOL;
651 if (unlikely(tb[IPSET_ATTR_IP_TO])) 598 if (unlikely(tb[IPSET_ATTR_IP_TO]))
652 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 599 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -654,28 +601,23 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
654 if (tb[IPSET_ATTR_LINENO]) 601 if (tb[IPSET_ATTR_LINENO])
655 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 602 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
656 603
657 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 604 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
605 ip_set_get_extensions(set, tb, &ext);
658 if (ret) 606 if (ret)
659 return ret; 607 return ret;
660 608
661 if (tb[IPSET_ATTR_CIDR]) 609 if (tb[IPSET_ATTR_CIDR])
662 data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 610 e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
663 if (data.cidr > HOST_MASK) 611 if (e.cidr > HOST_MASK)
664 return -IPSET_ERR_INVALID_CIDR; 612 return -IPSET_ERR_INVALID_CIDR;
665 ip6_netmask(&data.ip, data.cidr); 613 ip6_netmask(&e.ip, e.cidr);
666
667 if (tb[IPSET_ATTR_TIMEOUT]) {
668 if (!with_timeout(h->timeout))
669 return -IPSET_ERR_TIMEOUT;
670 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
671 }
672 614
673 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); 615 strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE]));
674 data.iface = iface; 616 e.iface = iface;
675 ret = iface_test(&h->rbtree, &data.iface); 617 ret = iface_test(&h->rbtree, &e.iface);
676 if (adt == IPSET_ADD) { 618 if (adt == IPSET_ADD) {
677 if (!ret) { 619 if (!ret) {
678 ret = iface_add(&h->rbtree, &data.iface); 620 ret = iface_add(&h->rbtree, &e.iface);
679 if (ret) 621 if (ret)
680 return ret; 622 return ret;
681 } 623 }
@@ -685,90 +627,15 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
685 if (tb[IPSET_ATTR_CADT_FLAGS]) { 627 if (tb[IPSET_ATTR_CADT_FLAGS]) {
686 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 628 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
687 if (cadt_flags & IPSET_FLAG_PHYSDEV) 629 if (cadt_flags & IPSET_FLAG_PHYSDEV)
688 data.physdev = 1; 630 e.physdev = 1;
689 if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) 631 if (cadt_flags & IPSET_FLAG_NOMATCH)
690 flags |= (cadt_flags << 16); 632 flags |= (IPSET_FLAG_NOMATCH << 16);
691 } 633 }
692 634
693 ret = adtfn(set, &data, timeout, flags); 635 ret = adtfn(set, &e, &ext, &ext, flags);
694 636
695 return ip_set_eexist(ret, flags) ? 0 : ret; 637 return ip_set_enomatch(ret, flags, adt) ? 1 :
696} 638 ip_set_eexist(ret, flags) ? 0 : ret;
697
698/* Create hash:ip type of sets */
699
700static int
701hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
702{
703 struct ip_set_hash *h;
704 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
705 u8 hbits;
706 size_t hsize;
707
708 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
709 return -IPSET_ERR_INVALID_FAMILY;
710
711 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
712 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
713 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
714 return -IPSET_ERR_PROTOCOL;
715
716 if (tb[IPSET_ATTR_HASHSIZE]) {
717 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
718 if (hashsize < IPSET_MIMINAL_HASHSIZE)
719 hashsize = IPSET_MIMINAL_HASHSIZE;
720 }
721
722 if (tb[IPSET_ATTR_MAXELEM])
723 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
724
725 h = kzalloc(sizeof(*h)
726 + sizeof(struct ip_set_hash_nets)
727 * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
728 if (!h)
729 return -ENOMEM;
730
731 h->maxelem = maxelem;
732 get_random_bytes(&h->initval, sizeof(h->initval));
733 h->timeout = IPSET_NO_TIMEOUT;
734 h->ahash_max = AHASH_MAX_SIZE;
735
736 hbits = htable_bits(hashsize);
737 hsize = htable_size(hbits);
738 if (hsize == 0) {
739 kfree(h);
740 return -ENOMEM;
741 }
742 h->table = ip_set_alloc(hsize);
743 if (!h->table) {
744 kfree(h);
745 return -ENOMEM;
746 }
747 h->table->htable_bits = hbits;
748 h->rbtree = RB_ROOT;
749
750 set->data = h;
751
752 if (tb[IPSET_ATTR_TIMEOUT]) {
753 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
754
755 set->variant = set->family == NFPROTO_IPV4
756 ? &hash_netiface4_tvariant : &hash_netiface6_tvariant;
757
758 if (set->family == NFPROTO_IPV4)
759 hash_netiface4_gc_init(set);
760 else
761 hash_netiface6_gc_init(set);
762 } else {
763 set->variant = set->family == NFPROTO_IPV4
764 ? &hash_netiface4_variant : &hash_netiface6_variant;
765 }
766
767 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
768 set->name, jhash_size(h->table->htable_bits),
769 h->table->htable_bits, h->maxelem, set->data, h->table);
770
771 return 0;
772} 639}
773 640
774static struct ip_set_type hash_netiface_type __read_mostly = { 641static struct ip_set_type hash_netiface_type __read_mostly = {
@@ -788,6 +655,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
788 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 655 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
789 [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, 656 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
790 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 657 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
658 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
791 }, 659 },
792 .adt_policy = { 660 .adt_policy = {
793 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 661 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -798,6 +666,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {
798 [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, 666 [IPSET_ATTR_CIDR] = { .type = NLA_U8 },
799 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 667 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
800 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 668 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
669 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
670 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
801 }, 671 },
802 .me = THIS_MODULE, 672 .me = THIS_MODULE,
803}; 673};
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index af20c0c5ced2..9a0869853be5 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -20,14 +20,14 @@
20#include <linux/netfilter.h> 20#include <linux/netfilter.h>
21#include <linux/netfilter/ipset/pfxlen.h> 21#include <linux/netfilter/ipset/pfxlen.h>
22#include <linux/netfilter/ipset/ip_set.h> 22#include <linux/netfilter/ipset/ip_set.h>
23#include <linux/netfilter/ipset/ip_set_timeout.h>
24#include <linux/netfilter/ipset/ip_set_getport.h> 23#include <linux/netfilter/ipset/ip_set_getport.h>
25#include <linux/netfilter/ipset/ip_set_hash.h> 24#include <linux/netfilter/ipset/ip_set_hash.h>
26 25
27#define REVISION_MIN 0 26#define REVISION_MIN 0
28/* 1 SCTP and UDPLITE support added */ 27/* 1 SCTP and UDPLITE support added */
29/* 2 Range as input support for IPv4 added */ 28/* 2 Range as input support for IPv4 added */
30#define REVISION_MAX 3 /* nomatch flag support added */ 29/* 3 nomatch flag support added */
30#define REVISION_MAX 4 /* Counters support added */
31 31
32MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
33MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 33MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
@@ -35,15 +35,9 @@ IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);
35MODULE_ALIAS("ip_set_hash:net,port"); 35MODULE_ALIAS("ip_set_hash:net,port");
36 36
37/* Type specific function prefix */ 37/* Type specific function prefix */
38#define TYPE hash_netport 38#define HTYPE hash_netport
39 39#define IP_SET_HASH_WITH_PROTO
40static bool 40#define IP_SET_HASH_WITH_NETS
41hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
42
43#define hash_netport4_same_set hash_netport_same_set
44#define hash_netport6_same_set hash_netport_same_set
45
46/* The type variant functions: IPv4 */
47 41
48/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0 42/* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0
49 * However this way we have to store internally cidr - 1, 43 * However this way we have to store internally cidr - 1,
@@ -51,7 +45,9 @@ hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);
51 */ 45 */
52#define IP_SET_HASH_WITH_NETS_PACKED 46#define IP_SET_HASH_WITH_NETS_PACKED
53 47
54/* Member elements without timeout */ 48/* IPv4 variants */
49
50/* Member elements */
55struct hash_netport4_elem { 51struct hash_netport4_elem {
56 __be32 ip; 52 __be32 ip;
57 __be16 port; 53 __be16 port;
@@ -60,8 +56,7 @@ struct hash_netport4_elem {
60 u8 nomatch:1; 56 u8 nomatch:1;
61}; 57};
62 58
63/* Member elements with timeout support */ 59struct hash_netport4t_elem {
64struct hash_netport4_telem {
65 __be32 ip; 60 __be32 ip;
66 __be16 port; 61 __be16 port;
67 u8 proto; 62 u8 proto;
@@ -70,6 +65,27 @@ struct hash_netport4_telem {
70 unsigned long timeout; 65 unsigned long timeout;
71}; 66};
72 67
68struct hash_netport4c_elem {
69 __be32 ip;
70 __be16 port;
71 u8 proto;
72 u8 cidr:7;
73 u8 nomatch:1;
74 struct ip_set_counter counter;
75};
76
77struct hash_netport4ct_elem {
78 __be32 ip;
79 __be16 port;
80 u8 proto;
81 u8 cidr:7;
82 u8 nomatch:1;
83 struct ip_set_counter counter;
84 unsigned long timeout;
85};
86
87/* Common functions */
88
73static inline bool 89static inline bool
74hash_netport4_data_equal(const struct hash_netport4_elem *ip1, 90hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
75 const struct hash_netport4_elem *ip2, 91 const struct hash_netport4_elem *ip2,
@@ -81,33 +97,22 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
81 ip1->cidr == ip2->cidr; 97 ip1->cidr == ip2->cidr;
82} 98}
83 99
84static inline bool 100static inline int
85hash_netport4_data_isnull(const struct hash_netport4_elem *elem) 101hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
86{ 102{
87 return elem->proto == 0; 103 return elem->nomatch ? -ENOTEMPTY : 1;
88} 104}
89 105
90static inline void 106static inline void
91hash_netport4_data_copy(struct hash_netport4_elem *dst, 107hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
92 const struct hash_netport4_elem *src)
93{ 108{
94 dst->ip = src->ip; 109 elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
95 dst->port = src->port;
96 dst->proto = src->proto;
97 dst->cidr = src->cidr;
98 dst->nomatch = src->nomatch;
99} 110}
100 111
101static inline void 112static inline void
102hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) 113hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
103{ 114{
104 dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); 115 swap(*flags, elem->nomatch);
105}
106
107static inline int
108hash_netport4_data_match(const struct hash_netport4_elem *elem)
109{
110 return elem->nomatch ? -ENOTEMPTY : 1;
111} 116}
112 117
113static inline void 118static inline void
@@ -117,12 +122,6 @@ hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
117 elem->cidr = cidr - 1; 122 elem->cidr = cidr - 1;
118} 123}
119 124
120static inline void
121hash_netport4_data_zero_out(struct hash_netport4_elem *elem)
122{
123 elem->proto = 0;
124}
125
126static bool 125static bool
127hash_netport4_data_list(struct sk_buff *skb, 126hash_netport4_data_list(struct sk_buff *skb,
128 const struct hash_netport4_elem *data) 127 const struct hash_netport4_elem *data)
@@ -142,77 +141,53 @@ nla_put_failure:
142 return 1; 141 return 1;
143} 142}
144 143
145static bool 144static inline void
146hash_netport4_data_tlist(struct sk_buff *skb, 145hash_netport4_data_next(struct hash_netport4_elem *next,
147 const struct hash_netport4_elem *data) 146 const struct hash_netport4_elem *d)
148{ 147{
149 const struct hash_netport4_telem *tdata = 148 next->ip = d->ip;
150 (const struct hash_netport4_telem *)data; 149 next->port = d->port;
151 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
152
153 if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) ||
154 nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) ||
155 nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
156 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
157 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
158 htonl(ip_set_timeout_get(tdata->timeout))) ||
159 (flags &&
160 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
161 goto nla_put_failure;
162 return 0;
163
164nla_put_failure:
165 return 1;
166} 150}
167 151
168#define IP_SET_HASH_WITH_PROTO 152#define MTYPE hash_netport4
169#define IP_SET_HASH_WITH_NETS
170
171#define PF 4 153#define PF 4
172#define HOST_MASK 32 154#define HOST_MASK 32
173#include <linux/netfilter/ipset/ip_set_ahash.h> 155#include "ip_set_hash_gen.h"
174
175static inline void
176hash_netport4_data_next(struct ip_set_hash *h,
177 const struct hash_netport4_elem *d)
178{
179 h->next.ip = d->ip;
180 h->next.port = d->port;
181}
182 156
183static int 157static int
184hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb, 158hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
185 const struct xt_action_param *par, 159 const struct xt_action_param *par,
186 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 160 enum ipset_adt adt, struct ip_set_adt_opt *opt)
187{ 161{
188 const struct ip_set_hash *h = set->data; 162 const struct hash_netport *h = set->data;
189 ipset_adtfn adtfn = set->variant->adt[adt]; 163 ipset_adtfn adtfn = set->variant->adt[adt];
190 struct hash_netport4_elem data = { 164 struct hash_netport4_elem e = {
191 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1 165 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1
192 }; 166 };
167 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
193 168
194 if (adt == IPSET_TEST) 169 if (adt == IPSET_TEST)
195 data.cidr = HOST_MASK - 1; 170 e.cidr = HOST_MASK - 1;
196 171
197 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 172 if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
198 &data.port, &data.proto)) 173 &e.port, &e.proto))
199 return -EINVAL; 174 return -EINVAL;
200 175
201 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); 176 ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
202 data.ip &= ip_set_netmask(data.cidr + 1); 177 e.ip &= ip_set_netmask(e.cidr + 1);
203 178
204 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 179 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
205} 180}
206 181
207static int 182static int
208hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], 183hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
209 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 184 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
210{ 185{
211 const struct ip_set_hash *h = set->data; 186 const struct hash_netport *h = set->data;
212 ipset_adtfn adtfn = set->variant->adt[adt]; 187 ipset_adtfn adtfn = set->variant->adt[adt];
213 struct hash_netport4_elem data = { .cidr = HOST_MASK - 1 }; 188 struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
189 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
214 u32 port, port_to, p = 0, ip = 0, ip_to, last; 190 u32 port, port_to, p = 0, ip = 0, ip_to, last;
215 u32 timeout = h->timeout;
216 bool with_ports = false; 191 bool with_ports = false;
217 u8 cidr; 192 u8 cidr;
218 int ret; 193 int ret;
@@ -221,13 +196,16 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
221 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 196 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
222 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 197 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
223 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 198 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
224 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 199 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
200 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
201 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
225 return -IPSET_ERR_PROTOCOL; 202 return -IPSET_ERR_PROTOCOL;
226 203
227 if (tb[IPSET_ATTR_LINENO]) 204 if (tb[IPSET_ATTR_LINENO])
228 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 205 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
229 206
230 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); 207 ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) ||
208 ip_set_get_extensions(set, tb, &ext);
231 if (ret) 209 if (ret)
232 return ret; 210 return ret;
233 211
@@ -235,47 +213,42 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
235 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 213 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
236 if (!cidr || cidr > HOST_MASK) 214 if (!cidr || cidr > HOST_MASK)
237 return -IPSET_ERR_INVALID_CIDR; 215 return -IPSET_ERR_INVALID_CIDR;
238 data.cidr = cidr - 1; 216 e.cidr = cidr - 1;
239 } 217 }
240 218
241 if (tb[IPSET_ATTR_PORT]) 219 if (tb[IPSET_ATTR_PORT])
242 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 220 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
243 else 221 else
244 return -IPSET_ERR_PROTOCOL; 222 return -IPSET_ERR_PROTOCOL;
245 223
246 if (tb[IPSET_ATTR_PROTO]) { 224 if (tb[IPSET_ATTR_PROTO]) {
247 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 225 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
248 with_ports = ip_set_proto_with_ports(data.proto); 226 with_ports = ip_set_proto_with_ports(e.proto);
249 227
250 if (data.proto == 0) 228 if (e.proto == 0)
251 return -IPSET_ERR_INVALID_PROTO; 229 return -IPSET_ERR_INVALID_PROTO;
252 } else 230 } else
253 return -IPSET_ERR_MISSING_PROTO; 231 return -IPSET_ERR_MISSING_PROTO;
254 232
255 if (!(with_ports || data.proto == IPPROTO_ICMP)) 233 if (!(with_ports || e.proto == IPPROTO_ICMP))
256 data.port = 0; 234 e.port = 0;
257
258 if (tb[IPSET_ATTR_TIMEOUT]) {
259 if (!with_timeout(h->timeout))
260 return -IPSET_ERR_TIMEOUT;
261 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
262 }
263 235
264 with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; 236 with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];
265 237
266 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { 238 if (tb[IPSET_ATTR_CADT_FLAGS]) {
267 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 239 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
268 if (cadt_flags & IPSET_FLAG_NOMATCH) 240 if (cadt_flags & IPSET_FLAG_NOMATCH)
269 flags |= (cadt_flags << 16); 241 flags |= (IPSET_FLAG_NOMATCH << 16);
270 } 242 }
271 243
272 if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { 244 if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) {
273 data.ip = htonl(ip & ip_set_hostmask(data.cidr + 1)); 245 e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1));
274 ret = adtfn(set, &data, timeout, flags); 246 ret = adtfn(set, &e, &ext, &ext, flags);
275 return ip_set_eexist(ret, flags) ? 0 : ret; 247 return ip_set_enomatch(ret, flags, adt) ? 1 :
248 ip_set_eexist(ret, flags) ? 0 : ret;
276 } 249 }
277 250
278 port = port_to = ntohs(data.port); 251 port = port_to = ntohs(e.port);
279 if (tb[IPSET_ATTR_PORT_TO]) { 252 if (tb[IPSET_ATTR_PORT_TO]) {
280 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 253 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
281 if (port_to < port) 254 if (port_to < port)
@@ -289,21 +262,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
289 swap(ip, ip_to); 262 swap(ip, ip_to);
290 if (ip + UINT_MAX == ip_to) 263 if (ip + UINT_MAX == ip_to)
291 return -IPSET_ERR_HASH_RANGE; 264 return -IPSET_ERR_HASH_RANGE;
292 } else { 265 } else
293 ip_set_mask_from_to(ip, ip_to, data.cidr + 1); 266 ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
294 }
295 267
296 if (retried) 268 if (retried)
297 ip = ntohl(h->next.ip); 269 ip = ntohl(h->next.ip);
298 while (!after(ip, ip_to)) { 270 while (!after(ip, ip_to)) {
299 data.ip = htonl(ip); 271 e.ip = htonl(ip);
300 last = ip_set_range_to_cidr(ip, ip_to, &cidr); 272 last = ip_set_range_to_cidr(ip, ip_to, &cidr);
301 data.cidr = cidr - 1; 273 e.cidr = cidr - 1;
302 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) 274 p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
303 : port; 275 : port;
304 for (; p <= port_to; p++) { 276 for (; p <= port_to; p++) {
305 data.port = htons(p); 277 e.port = htons(p);
306 ret = adtfn(set, &data, timeout, flags); 278 ret = adtfn(set, &e, &ext, &ext, flags);
307 279
308 if (ret && !ip_set_eexist(ret, flags)) 280 if (ret && !ip_set_eexist(ret, flags))
309 return ret; 281 return ret;
@@ -315,36 +287,46 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
315 return ret; 287 return ret;
316} 288}
317 289
318static bool 290/* IPv6 variants */
319hash_netport_same_set(const struct ip_set *a, const struct ip_set *b)
320{
321 const struct ip_set_hash *x = a->data;
322 const struct ip_set_hash *y = b->data;
323 291
324 /* Resizing changes htable_bits, so we ignore it */ 292struct hash_netport6_elem {
325 return x->maxelem == y->maxelem && 293 union nf_inet_addr ip;
326 x->timeout == y->timeout; 294 __be16 port;
327} 295 u8 proto;
296 u8 cidr:7;
297 u8 nomatch:1;
298};
328 299
329/* The type variant functions: IPv6 */ 300struct hash_netport6t_elem {
301 union nf_inet_addr ip;
302 __be16 port;
303 u8 proto;
304 u8 cidr:7;
305 u8 nomatch:1;
306 unsigned long timeout;
307};
330 308
331struct hash_netport6_elem { 309struct hash_netport6c_elem {
332 union nf_inet_addr ip; 310 union nf_inet_addr ip;
333 __be16 port; 311 __be16 port;
334 u8 proto; 312 u8 proto;
335 u8 cidr:7; 313 u8 cidr:7;
336 u8 nomatch:1; 314 u8 nomatch:1;
315 struct ip_set_counter counter;
337}; 316};
338 317
339struct hash_netport6_telem { 318struct hash_netport6ct_elem {
340 union nf_inet_addr ip; 319 union nf_inet_addr ip;
341 __be16 port; 320 __be16 port;
342 u8 proto; 321 u8 proto;
343 u8 cidr:7; 322 u8 cidr:7;
344 u8 nomatch:1; 323 u8 nomatch:1;
324 struct ip_set_counter counter;
345 unsigned long timeout; 325 unsigned long timeout;
346}; 326};
347 327
328/* Common functions */
329
348static inline bool 330static inline bool
349hash_netport6_data_equal(const struct hash_netport6_elem *ip1, 331hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
350 const struct hash_netport6_elem *ip2, 332 const struct hash_netport6_elem *ip2,
@@ -356,44 +338,22 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
356 ip1->cidr == ip2->cidr; 338 ip1->cidr == ip2->cidr;
357} 339}
358 340
359static inline bool
360hash_netport6_data_isnull(const struct hash_netport6_elem *elem)
361{
362 return elem->proto == 0;
363}
364
365static inline void
366hash_netport6_data_copy(struct hash_netport6_elem *dst,
367 const struct hash_netport6_elem *src)
368{
369 memcpy(dst, src, sizeof(*dst));
370}
371
372static inline void
373hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags)
374{
375 dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH);
376}
377
378static inline int 341static inline int
379hash_netport6_data_match(const struct hash_netport6_elem *elem) 342hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
380{ 343{
381 return elem->nomatch ? -ENOTEMPTY : 1; 344 return elem->nomatch ? -ENOTEMPTY : 1;
382} 345}
383 346
384static inline void 347static inline void
385hash_netport6_data_zero_out(struct hash_netport6_elem *elem) 348hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
386{ 349{
387 elem->proto = 0; 350 elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
388} 351}
389 352
390static inline void 353static inline void
391ip6_netmask(union nf_inet_addr *ip, u8 prefix) 354hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
392{ 355{
393 ip->ip6[0] &= ip_set_netmask6(prefix)[0]; 356 swap(*flags, elem->nomatch);
394 ip->ip6[1] &= ip_set_netmask6(prefix)[1];
395 ip->ip6[2] &= ip_set_netmask6(prefix)[2];
396 ip->ip6[3] &= ip_set_netmask6(prefix)[3];
397} 357}
398 358
399static inline void 359static inline void
@@ -422,76 +382,57 @@ nla_put_failure:
422 return 1; 382 return 1;
423} 383}
424 384
425static bool 385static inline void
426hash_netport6_data_tlist(struct sk_buff *skb, 386hash_netport6_data_next(struct hash_netport4_elem *next,
427 const struct hash_netport6_elem *data) 387 const struct hash_netport6_elem *d)
428{ 388{
429 const struct hash_netport6_telem *e = 389 next->port = d->port;
430 (const struct hash_netport6_telem *)data;
431 u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
432
433 if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
434 nla_put_net16(skb, IPSET_ATTR_PORT, data->port) ||
435 nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) ||
436 nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) ||
437 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
438 htonl(ip_set_timeout_get(e->timeout))) ||
439 (flags &&
440 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))))
441 goto nla_put_failure;
442 return 0;
443
444nla_put_failure:
445 return 1;
446} 390}
447 391
392#undef MTYPE
448#undef PF 393#undef PF
449#undef HOST_MASK 394#undef HOST_MASK
450 395
396#define MTYPE hash_netport6
451#define PF 6 397#define PF 6
452#define HOST_MASK 128 398#define HOST_MASK 128
453#include <linux/netfilter/ipset/ip_set_ahash.h> 399#define IP_SET_EMIT_CREATE
454 400#include "ip_set_hash_gen.h"
455static inline void
456hash_netport6_data_next(struct ip_set_hash *h,
457 const struct hash_netport6_elem *d)
458{
459 h->next.port = d->port;
460}
461 401
462static int 402static int
463hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb, 403hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
464 const struct xt_action_param *par, 404 const struct xt_action_param *par,
465 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 405 enum ipset_adt adt, struct ip_set_adt_opt *opt)
466{ 406{
467 const struct ip_set_hash *h = set->data; 407 const struct hash_netport *h = set->data;
468 ipset_adtfn adtfn = set->variant->adt[adt]; 408 ipset_adtfn adtfn = set->variant->adt[adt];
469 struct hash_netport6_elem data = { 409 struct hash_netport6_elem e = {
470 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1, 410 .cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,
471 }; 411 };
412 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);
472 413
473 if (adt == IPSET_TEST) 414 if (adt == IPSET_TEST)
474 data.cidr = HOST_MASK - 1; 415 e.cidr = HOST_MASK - 1;
475 416
476 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, 417 if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC,
477 &data.port, &data.proto)) 418 &e.port, &e.proto))
478 return -EINVAL; 419 return -EINVAL;
479 420
480 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); 421 ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
481 ip6_netmask(&data.ip, data.cidr + 1); 422 ip6_netmask(&e.ip, e.cidr + 1);
482 423
483 return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); 424 return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
484} 425}
485 426
486static int 427static int
487hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[], 428hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
488 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 429 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
489{ 430{
490 const struct ip_set_hash *h = set->data; 431 const struct hash_netport *h = set->data;
491 ipset_adtfn adtfn = set->variant->adt[adt]; 432 ipset_adtfn adtfn = set->variant->adt[adt];
492 struct hash_netport6_elem data = { .cidr = HOST_MASK - 1 }; 433 struct hash_netport6_elem e = { .cidr = HOST_MASK - 1 };
434 struct ip_set_ext ext = IP_SET_INIT_UEXT(h);
493 u32 port, port_to; 435 u32 port, port_to;
494 u32 timeout = h->timeout;
495 bool with_ports = false; 436 bool with_ports = false;
496 u8 cidr; 437 u8 cidr;
497 int ret; 438 int ret;
@@ -500,7 +441,9 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
500 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || 441 !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||
501 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || 442 !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||
502 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 443 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
503 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 444 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
445 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
446 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
504 return -IPSET_ERR_PROTOCOL; 447 return -IPSET_ERR_PROTOCOL;
505 if (unlikely(tb[IPSET_ATTR_IP_TO])) 448 if (unlikely(tb[IPSET_ATTR_IP_TO]))
506 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; 449 return -IPSET_ERR_HASH_RANGE_UNSUPPORTED;
@@ -508,7 +451,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
508 if (tb[IPSET_ATTR_LINENO]) 451 if (tb[IPSET_ATTR_LINENO])
509 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 452 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
510 453
511 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); 454 ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
455 ip_set_get_extensions(set, tb, &ext);
512 if (ret) 456 if (ret)
513 return ret; 457 return ret;
514 458
@@ -516,45 +460,40 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
516 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); 460 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
517 if (!cidr || cidr > HOST_MASK) 461 if (!cidr || cidr > HOST_MASK)
518 return -IPSET_ERR_INVALID_CIDR; 462 return -IPSET_ERR_INVALID_CIDR;
519 data.cidr = cidr - 1; 463 e.cidr = cidr - 1;
520 } 464 }
521 ip6_netmask(&data.ip, data.cidr + 1); 465 ip6_netmask(&e.ip, e.cidr + 1);
522 466
523 if (tb[IPSET_ATTR_PORT]) 467 if (tb[IPSET_ATTR_PORT])
524 data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); 468 e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);
525 else 469 else
526 return -IPSET_ERR_PROTOCOL; 470 return -IPSET_ERR_PROTOCOL;
527 471
528 if (tb[IPSET_ATTR_PROTO]) { 472 if (tb[IPSET_ATTR_PROTO]) {
529 data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); 473 e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]);
530 with_ports = ip_set_proto_with_ports(data.proto); 474 with_ports = ip_set_proto_with_ports(e.proto);
531 475
532 if (data.proto == 0) 476 if (e.proto == 0)
533 return -IPSET_ERR_INVALID_PROTO; 477 return -IPSET_ERR_INVALID_PROTO;
534 } else 478 } else
535 return -IPSET_ERR_MISSING_PROTO; 479 return -IPSET_ERR_MISSING_PROTO;
536 480
537 if (!(with_ports || data.proto == IPPROTO_ICMPV6)) 481 if (!(with_ports || e.proto == IPPROTO_ICMPV6))
538 data.port = 0; 482 e.port = 0;
539
540 if (tb[IPSET_ATTR_TIMEOUT]) {
541 if (!with_timeout(h->timeout))
542 return -IPSET_ERR_TIMEOUT;
543 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
544 }
545 483
546 if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { 484 if (tb[IPSET_ATTR_CADT_FLAGS]) {
547 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 485 u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
548 if (cadt_flags & IPSET_FLAG_NOMATCH) 486 if (cadt_flags & IPSET_FLAG_NOMATCH)
549 flags |= (cadt_flags << 16); 487 flags |= (IPSET_FLAG_NOMATCH << 16);
550 } 488 }
551 489
552 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { 490 if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) {
553 ret = adtfn(set, &data, timeout, flags); 491 ret = adtfn(set, &e, &ext, &ext, flags);
554 return ip_set_eexist(ret, flags) ? 0 : ret; 492 return ip_set_enomatch(ret, flags, adt) ? 1 :
493 ip_set_eexist(ret, flags) ? 0 : ret;
555 } 494 }
556 495
557 port = ntohs(data.port); 496 port = ntohs(e.port);
558 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); 497 port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);
559 if (port > port_to) 498 if (port > port_to)
560 swap(port, port_to); 499 swap(port, port_to);
@@ -562,8 +501,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
562 if (retried) 501 if (retried)
563 port = ntohs(h->next.port); 502 port = ntohs(h->next.port);
564 for (; port <= port_to; port++) { 503 for (; port <= port_to; port++) {
565 data.port = htons(port); 504 e.port = htons(port);
566 ret = adtfn(set, &data, timeout, flags); 505 ret = adtfn(set, &e, &ext, &ext, flags);
567 506
568 if (ret && !ip_set_eexist(ret, flags)) 507 if (ret && !ip_set_eexist(ret, flags))
569 return ret; 508 return ret;
@@ -573,80 +512,6 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
573 return ret; 512 return ret;
574} 513}
575 514
576/* Create hash:ip type of sets */
577
578static int
579hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
580{
581 struct ip_set_hash *h;
582 u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM;
583 u8 hbits;
584 size_t hsize;
585
586 if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
587 return -IPSET_ERR_INVALID_FAMILY;
588
589 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
590 !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
591 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT)))
592 return -IPSET_ERR_PROTOCOL;
593
594 if (tb[IPSET_ATTR_HASHSIZE]) {
595 hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
596 if (hashsize < IPSET_MIMINAL_HASHSIZE)
597 hashsize = IPSET_MIMINAL_HASHSIZE;
598 }
599
600 if (tb[IPSET_ATTR_MAXELEM])
601 maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
602
603 h = kzalloc(sizeof(*h)
604 + sizeof(struct ip_set_hash_nets)
605 * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL);
606 if (!h)
607 return -ENOMEM;
608
609 h->maxelem = maxelem;
610 get_random_bytes(&h->initval, sizeof(h->initval));
611 h->timeout = IPSET_NO_TIMEOUT;
612
613 hbits = htable_bits(hashsize);
614 hsize = htable_size(hbits);
615 if (hsize == 0) {
616 kfree(h);
617 return -ENOMEM;
618 }
619 h->table = ip_set_alloc(hsize);
620 if (!h->table) {
621 kfree(h);
622 return -ENOMEM;
623 }
624 h->table->htable_bits = hbits;
625
626 set->data = h;
627
628 if (tb[IPSET_ATTR_TIMEOUT]) {
629 h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
630
631 set->variant = set->family == NFPROTO_IPV4
632 ? &hash_netport4_tvariant : &hash_netport6_tvariant;
633
634 if (set->family == NFPROTO_IPV4)
635 hash_netport4_gc_init(set);
636 else
637 hash_netport6_gc_init(set);
638 } else {
639 set->variant = set->family == NFPROTO_IPV4
640 ? &hash_netport4_variant : &hash_netport6_variant;
641 }
642
643 pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n",
644 set->name, jhash_size(h->table->htable_bits),
645 h->table->htable_bits, h->maxelem, set->data, h->table);
646
647 return 0;
648}
649
650static struct ip_set_type hash_netport_type __read_mostly = { 515static struct ip_set_type hash_netport_type __read_mostly = {
651 .name = "hash:net,port", 516 .name = "hash:net,port",
652 .protocol = IPSET_PROTOCOL, 517 .protocol = IPSET_PROTOCOL,
@@ -663,6 +528,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {
663 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, 528 [IPSET_ATTR_RESIZE] = { .type = NLA_U8 },
664 [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, 529 [IPSET_ATTR_PROTO] = { .type = NLA_U8 },
665 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 530 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
531 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
666 }, 532 },
667 .adt_policy = { 533 .adt_policy = {
668 [IPSET_ATTR_IP] = { .type = NLA_NESTED }, 534 [IPSET_ATTR_IP] = { .type = NLA_NESTED },
@@ -674,6 +540,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {
674 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 540 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
675 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 541 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
676 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 542 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
543 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
544 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
677 }, 545 },
678 .me = THIS_MODULE, 546 .me = THIS_MODULE,
679}; 547};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 8371c2bac2e4..979b8c90e422 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -1,4 +1,4 @@
1/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 1/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2 * 2 *
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
@@ -13,30 +13,53 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14 14
15#include <linux/netfilter/ipset/ip_set.h> 15#include <linux/netfilter/ipset/ip_set.h>
16#include <linux/netfilter/ipset/ip_set_timeout.h>
17#include <linux/netfilter/ipset/ip_set_list.h> 16#include <linux/netfilter/ipset/ip_set_list.h>
18 17
19#define REVISION_MIN 0 18#define REVISION_MIN 0
20#define REVISION_MAX 0 19#define REVISION_MAX 1 /* Counters support added */
21 20
22MODULE_LICENSE("GPL"); 21MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); 22MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
24IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX); 23IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);
25MODULE_ALIAS("ip_set_list:set"); 24MODULE_ALIAS("ip_set_list:set");
26 25
27/* Member elements without and with timeout */ 26/* Member elements */
28struct set_elem { 27struct set_elem {
29 ip_set_id_t id; 28 ip_set_id_t id;
30}; 29};
31 30
32struct set_telem { 31struct sett_elem {
33 ip_set_id_t id; 32 struct {
33 ip_set_id_t id;
34 } __attribute__ ((aligned));
35 unsigned long timeout;
36};
37
38struct setc_elem {
39 struct {
40 ip_set_id_t id;
41 } __attribute__ ((aligned));
42 struct ip_set_counter counter;
43};
44
45struct setct_elem {
46 struct {
47 ip_set_id_t id;
48 } __attribute__ ((aligned));
49 struct ip_set_counter counter;
34 unsigned long timeout; 50 unsigned long timeout;
35}; 51};
36 52
53struct set_adt_elem {
54 ip_set_id_t id;
55 ip_set_id_t refid;
56 int before;
57};
58
37/* Type structure */ 59/* Type structure */
38struct list_set { 60struct list_set {
39 size_t dsize; /* element size */ 61 size_t dsize; /* element size */
62 size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */
40 u32 size; /* size of set list array */ 63 u32 size; /* size of set list array */
41 u32 timeout; /* timeout value */ 64 u32 timeout; /* timeout value */
42 struct timer_list gc; /* garbage collection */ 65 struct timer_list gc; /* garbage collection */
@@ -49,175 +72,311 @@ list_set_elem(const struct list_set *map, u32 id)
49 return (struct set_elem *)((void *)map->members + id * map->dsize); 72 return (struct set_elem *)((void *)map->members + id * map->dsize);
50} 73}
51 74
52static inline struct set_telem * 75#define ext_timeout(e, m) \
53list_set_telem(const struct list_set *map, u32 id) 76(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT])
54{ 77#define ext_counter(e, m) \
55 return (struct set_telem *)((void *)map->members + id * map->dsize); 78(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER])
56}
57 79
58static inline bool 80static int
59list_set_timeout(const struct list_set *map, u32 id) 81list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
82 const struct xt_action_param *par,
83 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
60{ 84{
61 const struct set_telem *elem = list_set_telem(map, id); 85 struct list_set *map = set->data;
86 struct set_elem *e;
87 u32 i, cmdflags = opt->cmdflags;
88 int ret;
62 89
63 return ip_set_timeout_test(elem->timeout); 90 /* Don't lookup sub-counters at all */
91 opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
92 if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
93 opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
94 for (i = 0; i < map->size; i++) {
95 e = list_set_elem(map, i);
96 if (e->id == IPSET_INVALID_ID)
97 return 0;
98 if (SET_WITH_TIMEOUT(set) &&
99 ip_set_timeout_expired(ext_timeout(e, map)))
100 continue;
101 ret = ip_set_test(e->id, skb, par, opt);
102 if (ret > 0) {
103 if (SET_WITH_COUNTER(set))
104 ip_set_update_counter(ext_counter(e, map),
105 ext, &opt->ext,
106 cmdflags);
107 return ret;
108 }
109 }
110 return 0;
64} 111}
65 112
66static inline bool 113static int
67list_set_expired(const struct list_set *map, u32 id) 114list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
115 const struct xt_action_param *par,
116 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
68{ 117{
69 const struct set_telem *elem = list_set_telem(map, id); 118 struct list_set *map = set->data;
119 struct set_elem *e;
120 u32 i;
121 int ret;
70 122
71 return ip_set_timeout_expired(elem->timeout); 123 for (i = 0; i < map->size; i++) {
124 e = list_set_elem(map, i);
125 if (e->id == IPSET_INVALID_ID)
126 return 0;
127 if (SET_WITH_TIMEOUT(set) &&
128 ip_set_timeout_expired(ext_timeout(e, map)))
129 continue;
130 ret = ip_set_add(e->id, skb, par, opt);
131 if (ret == 0)
132 return ret;
133 }
134 return 0;
72} 135}
73 136
74/* Set list without and with timeout */
75
76static int 137static int
77list_set_kadt(struct ip_set *set, const struct sk_buff *skb, 138list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
78 const struct xt_action_param *par, 139 const struct xt_action_param *par,
79 enum ipset_adt adt, const struct ip_set_adt_opt *opt) 140 struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)
80{ 141{
81 struct list_set *map = set->data; 142 struct list_set *map = set->data;
82 struct set_elem *elem; 143 struct set_elem *e;
83 u32 i; 144 u32 i;
84 int ret; 145 int ret;
85 146
86 for (i = 0; i < map->size; i++) { 147 for (i = 0; i < map->size; i++) {
87 elem = list_set_elem(map, i); 148 e = list_set_elem(map, i);
88 if (elem->id == IPSET_INVALID_ID) 149 if (e->id == IPSET_INVALID_ID)
89 return 0; 150 return 0;
90 if (with_timeout(map->timeout) && list_set_expired(map, i)) 151 if (SET_WITH_TIMEOUT(set) &&
152 ip_set_timeout_expired(ext_timeout(e, map)))
91 continue; 153 continue;
92 switch (adt) { 154 ret = ip_set_del(e->id, skb, par, opt);
93 case IPSET_TEST: 155 if (ret == 0)
94 ret = ip_set_test(elem->id, skb, par, opt); 156 return ret;
95 if (ret > 0) 157 }
96 return ret; 158 return 0;
97 break; 159}
98 case IPSET_ADD: 160
99 ret = ip_set_add(elem->id, skb, par, opt); 161static int
100 if (ret == 0) 162list_set_kadt(struct ip_set *set, const struct sk_buff *skb,
101 return ret; 163 const struct xt_action_param *par,
102 break; 164 enum ipset_adt adt, struct ip_set_adt_opt *opt)
103 case IPSET_DEL: 165{
104 ret = ip_set_del(elem->id, skb, par, opt); 166 struct list_set *map = set->data;
105 if (ret == 0) 167 struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);
106 return ret; 168
107 break; 169 switch (adt) {
108 default: 170 case IPSET_TEST:
109 break; 171 return list_set_ktest(set, skb, par, opt, &ext);
110 } 172 case IPSET_ADD:
173 return list_set_kadd(set, skb, par, opt, &ext);
174 case IPSET_DEL:
175 return list_set_kdel(set, skb, par, opt, &ext);
176 default:
177 break;
111 } 178 }
112 return -EINVAL; 179 return -EINVAL;
113} 180}
114 181
115static bool 182static bool
116id_eq(const struct list_set *map, u32 i, ip_set_id_t id) 183id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)
117{ 184{
118 const struct set_elem *elem; 185 const struct list_set *map = set->data;
186 const struct set_elem *e;
187
188 if (i >= map->size)
189 return 0;
119 190
120 if (i < map->size) { 191 e = list_set_elem(map, i);
121 elem = list_set_elem(map, i); 192 return !!(e->id == id &&
122 return elem->id == id; 193 !(SET_WITH_TIMEOUT(set) &&
194 ip_set_timeout_expired(ext_timeout(e, map))));
195}
196
197static int
198list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d,
199 const struct ip_set_ext *ext)
200{
201 struct list_set *map = set->data;
202 struct set_elem *e = list_set_elem(map, i);
203
204 if (e->id != IPSET_INVALID_ID) {
205 if (i == map->size - 1)
206 /* Last element replaced: e.g. add new,before,last */
207 ip_set_put_byindex(e->id);
208 else {
209 struct set_elem *x = list_set_elem(map, map->size - 1);
210
211 /* Last element pushed off */
212 if (x->id != IPSET_INVALID_ID)
213 ip_set_put_byindex(x->id);
214 memmove(list_set_elem(map, i + 1), e,
215 map->dsize * (map->size - (i + 1)));
216 }
123 } 217 }
124 218
219 e->id = d->id;
220 if (SET_WITH_TIMEOUT(set))
221 ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
222 if (SET_WITH_COUNTER(set))
223 ip_set_init_counter(ext_counter(e, map), ext);
125 return 0; 224 return 0;
126} 225}
127 226
128static bool 227static int
129id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id) 228list_set_del(struct ip_set *set, u32 i)
130{ 229{
131 const struct set_elem *elem; 230 struct list_set *map = set->data;
231 struct set_elem *e = list_set_elem(map, i);
132 232
133 if (i < map->size) { 233 ip_set_put_byindex(e->id);
134 elem = list_set_elem(map, i); 234
135 return !!(elem->id == id && 235 if (i < map->size - 1)
136 !(with_timeout(map->timeout) && 236 memmove(e, list_set_elem(map, i + 1),
137 list_set_expired(map, i))); 237 map->dsize * (map->size - (i + 1)));
138 }
139 238
239 /* Last element */
240 e = list_set_elem(map, map->size - 1);
241 e->id = IPSET_INVALID_ID;
140 return 0; 242 return 0;
141} 243}
142 244
143static void 245static void
144list_elem_add(struct list_set *map, u32 i, ip_set_id_t id) 246set_cleanup_entries(struct ip_set *set)
145{ 247{
248 struct list_set *map = set->data;
146 struct set_elem *e; 249 struct set_elem *e;
250 u32 i;
147 251
148 for (; i < map->size; i++) { 252 for (i = 0; i < map->size; i++) {
149 e = list_set_elem(map, i); 253 e = list_set_elem(map, i);
150 swap(e->id, id); 254 if (e->id != IPSET_INVALID_ID &&
151 if (e->id == IPSET_INVALID_ID) 255 ip_set_timeout_expired(ext_timeout(e, map)))
152 break; 256 list_set_del(set, i);
153 } 257 }
154} 258}
155 259
156static void 260static int
157list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, 261list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
158 unsigned long timeout) 262 struct ip_set_ext *mext, u32 flags)
159{ 263{
160 struct set_telem *e; 264 struct list_set *map = set->data;
265 struct set_adt_elem *d = value;
266 struct set_elem *e;
267 u32 i;
268 int ret;
161 269
162 for (; i < map->size; i++) { 270 for (i = 0; i < map->size; i++) {
163 e = list_set_telem(map, i); 271 e = list_set_elem(map, i);
164 swap(e->id, id);
165 swap(e->timeout, timeout);
166 if (e->id == IPSET_INVALID_ID) 272 if (e->id == IPSET_INVALID_ID)
167 break; 273 return 0;
274 else if (SET_WITH_TIMEOUT(set) &&
275 ip_set_timeout_expired(ext_timeout(e, map)))
276 continue;
277 else if (e->id != d->id)
278 continue;
279
280 if (d->before == 0)
281 return 1;
282 else if (d->before > 0)
283 ret = id_eq(set, i + 1, d->refid);
284 else
285 ret = i > 0 && id_eq(set, i - 1, d->refid);
286 return ret;
168 } 287 }
288 return 0;
169} 289}
170 290
291
171static int 292static int
172list_set_add(struct list_set *map, u32 i, ip_set_id_t id, 293list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
173 unsigned long timeout) 294 struct ip_set_ext *mext, u32 flags)
174{ 295{
175 const struct set_elem *e = list_set_elem(map, i); 296 struct list_set *map = set->data;
176 297 struct set_adt_elem *d = value;
177 if (i == map->size - 1 && e->id != IPSET_INVALID_ID) 298 struct set_elem *e;
178 /* Last element replaced: e.g. add new,before,last */ 299 bool flag_exist = flags & IPSET_FLAG_EXIST;
179 ip_set_put_byindex(e->id); 300 u32 i, ret = 0;
180 if (with_timeout(map->timeout))
181 list_elem_tadd(map, i, id, ip_set_timeout_set(timeout));
182 else
183 list_elem_add(map, i, id);
184 301
185 return 0; 302 /* Check already added element */
186} 303 for (i = 0; i < map->size; i++) {
304 e = list_set_elem(map, i);
305 if (e->id == IPSET_INVALID_ID)
306 goto insert;
307 else if (SET_WITH_TIMEOUT(set) &&
308 ip_set_timeout_expired(ext_timeout(e, map)))
309 continue;
310 else if (e->id != d->id)
311 continue;
187 312
188static int 313 if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) ||
189list_set_del(struct list_set *map, u32 i) 314 (d->before < 0 &&
190{ 315 (i == 0 || !id_eq(set, i - 1, d->refid))))
191 struct set_elem *a = list_set_elem(map, i), *b; 316 /* Before/after doesn't match */
192 317 return -IPSET_ERR_REF_EXIST;
193 ip_set_put_byindex(a->id); 318 if (!flag_exist)
194 319 /* Can't re-add */
195 for (; i < map->size - 1; i++) { 320 return -IPSET_ERR_EXIST;
196 b = list_set_elem(map, i + 1); 321 /* Update extensions */
197 a->id = b->id; 322 if (SET_WITH_TIMEOUT(set))
198 if (with_timeout(map->timeout)) 323 ip_set_timeout_set(ext_timeout(e, map), ext->timeout);
199 ((struct set_telem *)a)->timeout = 324 if (SET_WITH_COUNTER(set))
200 ((struct set_telem *)b)->timeout; 325 ip_set_init_counter(ext_counter(e, map), ext);
201 a = b; 326 /* Set is already added to the list */
202 if (a->id == IPSET_INVALID_ID) 327 ip_set_put_byindex(d->id);
203 break; 328 return 0;
204 } 329 }
205 /* Last element */ 330insert:
206 a->id = IPSET_INVALID_ID; 331 ret = -IPSET_ERR_LIST_FULL;
207 return 0; 332 for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
333 e = list_set_elem(map, i);
334 if (e->id == IPSET_INVALID_ID)
335 ret = d->before != 0 ? -IPSET_ERR_REF_EXIST
336 : list_set_add(set, i, d, ext);
337 else if (e->id != d->refid)
338 continue;
339 else if (d->before > 0)
340 ret = list_set_add(set, i, d, ext);
341 else if (i + 1 < map->size)
342 ret = list_set_add(set, i + 1, d, ext);
343 }
344
345 return ret;
208} 346}
209 347
210static void 348static int
211cleanup_entries(struct list_set *map) 349list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
350 struct ip_set_ext *mext, u32 flags)
212{ 351{
213 struct set_telem *e; 352 struct list_set *map = set->data;
353 struct set_adt_elem *d = value;
354 struct set_elem *e;
214 u32 i; 355 u32 i;
215 356
216 for (i = 0; i < map->size; i++) { 357 for (i = 0; i < map->size; i++) {
217 e = list_set_telem(map, i); 358 e = list_set_elem(map, i);
218 if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) 359 if (e->id == IPSET_INVALID_ID)
219 list_set_del(map, i); 360 return d->before != 0 ? -IPSET_ERR_REF_EXIST
361 : -IPSET_ERR_EXIST;
362 else if (SET_WITH_TIMEOUT(set) &&
363 ip_set_timeout_expired(ext_timeout(e, map)))
364 continue;
365 else if (e->id != d->id)
366 continue;
367
368 if (d->before == 0)
369 return list_set_del(set, i);
370 else if (d->before > 0) {
371 if (!id_eq(set, i + 1, d->refid))
372 return -IPSET_ERR_REF_EXIST;
373 return list_set_del(set, i);
374 } else if (i == 0 || !id_eq(set, i - 1, d->refid))
375 return -IPSET_ERR_REF_EXIST;
376 else
377 return list_set_del(set, i);
220 } 378 }
379 return -IPSET_ERR_EXIST;
221} 380}
222 381
223static int 382static int
@@ -225,26 +384,27 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
225 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) 384 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
226{ 385{
227 struct list_set *map = set->data; 386 struct list_set *map = set->data;
228 bool with_timeout = with_timeout(map->timeout); 387 ipset_adtfn adtfn = set->variant->adt[adt];
229 bool flag_exist = flags & IPSET_FLAG_EXIST; 388 struct set_adt_elem e = { .refid = IPSET_INVALID_ID };
230 int before = 0; 389 struct ip_set_ext ext = IP_SET_INIT_UEXT(map);
231 u32 timeout = map->timeout;
232 ip_set_id_t id, refid = IPSET_INVALID_ID;
233 const struct set_elem *elem;
234 struct ip_set *s; 390 struct ip_set *s;
235 u32 i;
236 int ret = 0; 391 int ret = 0;
237 392
238 if (unlikely(!tb[IPSET_ATTR_NAME] || 393 if (unlikely(!tb[IPSET_ATTR_NAME] ||
239 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || 394 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
240 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) 395 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) ||
396 !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
397 !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))
241 return -IPSET_ERR_PROTOCOL; 398 return -IPSET_ERR_PROTOCOL;
242 399
243 if (tb[IPSET_ATTR_LINENO]) 400 if (tb[IPSET_ATTR_LINENO])
244 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); 401 *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
245 402
246 id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); 403 ret = ip_set_get_extensions(set, tb, &ext);
247 if (id == IPSET_INVALID_ID) 404 if (ret)
405 return ret;
406 e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s);
407 if (e.id == IPSET_INVALID_ID)
248 return -IPSET_ERR_NAME; 408 return -IPSET_ERR_NAME;
249 /* "Loop detection" */ 409 /* "Loop detection" */
250 if (s->type->features & IPSET_TYPE_NAME) { 410 if (s->type->features & IPSET_TYPE_NAME) {
@@ -254,115 +414,34 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],
254 414
255 if (tb[IPSET_ATTR_CADT_FLAGS]) { 415 if (tb[IPSET_ATTR_CADT_FLAGS]) {
256 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); 416 u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
257 before = f & IPSET_FLAG_BEFORE; 417 e.before = f & IPSET_FLAG_BEFORE;
258 } 418 }
259 419
260 if (before && !tb[IPSET_ATTR_NAMEREF]) { 420 if (e.before && !tb[IPSET_ATTR_NAMEREF]) {
261 ret = -IPSET_ERR_BEFORE; 421 ret = -IPSET_ERR_BEFORE;
262 goto finish; 422 goto finish;
263 } 423 }
264 424
265 if (tb[IPSET_ATTR_NAMEREF]) { 425 if (tb[IPSET_ATTR_NAMEREF]) {
266 refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), 426 e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]),
267 &s); 427 &s);
268 if (refid == IPSET_INVALID_ID) { 428 if (e.refid == IPSET_INVALID_ID) {
269 ret = -IPSET_ERR_NAMEREF; 429 ret = -IPSET_ERR_NAMEREF;
270 goto finish; 430 goto finish;
271 } 431 }
272 if (!before) 432 if (!e.before)
273 before = -1; 433 e.before = -1;
274 }
275 if (tb[IPSET_ATTR_TIMEOUT]) {
276 if (!with_timeout) {
277 ret = -IPSET_ERR_TIMEOUT;
278 goto finish;
279 }
280 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
281 } 434 }
282 if (with_timeout && adt != IPSET_TEST) 435 if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set))
283 cleanup_entries(map); 436 set_cleanup_entries(set);
284 437
285 switch (adt) { 438 ret = adtfn(set, &e, &ext, &ext, flags);
286 case IPSET_TEST:
287 for (i = 0; i < map->size && !ret; i++) {
288 elem = list_set_elem(map, i);
289 if (elem->id == IPSET_INVALID_ID ||
290 (before != 0 && i + 1 >= map->size))
291 break;
292 else if (with_timeout && list_set_expired(map, i))
293 continue;
294 else if (before > 0 && elem->id == id)
295 ret = id_eq_timeout(map, i + 1, refid);
296 else if (before < 0 && elem->id == refid)
297 ret = id_eq_timeout(map, i + 1, id);
298 else if (before == 0 && elem->id == id)
299 ret = 1;
300 }
301 break;
302 case IPSET_ADD:
303 for (i = 0; i < map->size; i++) {
304 elem = list_set_elem(map, i);
305 if (elem->id != id)
306 continue;
307 if (!(with_timeout && flag_exist)) {
308 ret = -IPSET_ERR_EXIST;
309 goto finish;
310 } else {
311 struct set_telem *e = list_set_telem(map, i);
312
313 if ((before > 1 &&
314 !id_eq(map, i + 1, refid)) ||
315 (before < 0 &&
316 (i == 0 || !id_eq(map, i - 1, refid)))) {
317 ret = -IPSET_ERR_EXIST;
318 goto finish;
319 }
320 e->timeout = ip_set_timeout_set(timeout);
321 ip_set_put_byindex(id);
322 ret = 0;
323 goto finish;
324 }
325 }
326 ret = -IPSET_ERR_LIST_FULL;
327 for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) {
328 elem = list_set_elem(map, i);
329 if (elem->id == IPSET_INVALID_ID)
330 ret = before != 0 ? -IPSET_ERR_REF_EXIST
331 : list_set_add(map, i, id, timeout);
332 else if (elem->id != refid)
333 continue;
334 else if (before > 0)
335 ret = list_set_add(map, i, id, timeout);
336 else if (i + 1 < map->size)
337 ret = list_set_add(map, i + 1, id, timeout);
338 }
339 break;
340 case IPSET_DEL:
341 ret = -IPSET_ERR_EXIST;
342 for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) {
343 elem = list_set_elem(map, i);
344 if (elem->id == IPSET_INVALID_ID) {
345 ret = before != 0 ? -IPSET_ERR_REF_EXIST
346 : -IPSET_ERR_EXIST;
347 break;
348 } else if (elem->id == id &&
349 (before == 0 ||
350 (before > 0 && id_eq(map, i + 1, refid))))
351 ret = list_set_del(map, i);
352 else if (elem->id == refid &&
353 before < 0 && id_eq(map, i + 1, id))
354 ret = list_set_del(map, i + 1);
355 }
356 break;
357 default:
358 break;
359 }
360 439
361finish: 440finish:
362 if (refid != IPSET_INVALID_ID) 441 if (e.refid != IPSET_INVALID_ID)
363 ip_set_put_byindex(refid); 442 ip_set_put_byindex(e.refid);
364 if (adt != IPSET_ADD || ret) 443 if (adt != IPSET_ADD || ret)
365 ip_set_put_byindex(id); 444 ip_set_put_byindex(e.id);
366 445
367 return ip_set_eexist(ret, flags) ? 0 : ret; 446 return ip_set_eexist(ret, flags) ? 0 : ret;
368} 447}
@@ -371,14 +450,14 @@ static void
371list_set_flush(struct ip_set *set) 450list_set_flush(struct ip_set *set)
372{ 451{
373 struct list_set *map = set->data; 452 struct list_set *map = set->data;
374 struct set_elem *elem; 453 struct set_elem *e;
375 u32 i; 454 u32 i;
376 455
377 for (i = 0; i < map->size; i++) { 456 for (i = 0; i < map->size; i++) {
378 elem = list_set_elem(map, i); 457 e = list_set_elem(map, i);
379 if (elem->id != IPSET_INVALID_ID) { 458 if (e->id != IPSET_INVALID_ID) {
380 ip_set_put_byindex(elem->id); 459 ip_set_put_byindex(e->id);
381 elem->id = IPSET_INVALID_ID; 460 e->id = IPSET_INVALID_ID;
382 } 461 }
383 } 462 }
384} 463}
@@ -388,7 +467,7 @@ list_set_destroy(struct ip_set *set)
388{ 467{
389 struct list_set *map = set->data; 468 struct list_set *map = set->data;
390 469
391 if (with_timeout(map->timeout)) 470 if (SET_WITH_TIMEOUT(set))
392 del_timer_sync(&map->gc); 471 del_timer_sync(&map->gc);
393 list_set_flush(set); 472 list_set_flush(set);
394 kfree(map); 473 kfree(map);
@@ -406,8 +485,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
406 if (!nested) 485 if (!nested)
407 goto nla_put_failure; 486 goto nla_put_failure;
408 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || 487 if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
409 (with_timeout(map->timeout) && 488 (SET_WITH_TIMEOUT(set) &&
410 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || 489 nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) ||
490 (SET_WITH_COUNTER(set) &&
491 nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS,
492 htonl(IPSET_FLAG_WITH_COUNTERS))) ||
411 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || 493 nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
412 nla_put_net32(skb, IPSET_ATTR_MEMSIZE, 494 nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
413 htonl(sizeof(*map) + map->size * map->dsize))) 495 htonl(sizeof(*map) + map->size * map->dsize)))
@@ -436,7 +518,8 @@ list_set_list(const struct ip_set *set,
436 e = list_set_elem(map, i); 518 e = list_set_elem(map, i);
437 if (e->id == IPSET_INVALID_ID) 519 if (e->id == IPSET_INVALID_ID)
438 goto finish; 520 goto finish;
439 if (with_timeout(map->timeout) && list_set_expired(map, i)) 521 if (SET_WITH_TIMEOUT(set) &&
522 ip_set_timeout_expired(ext_timeout(e, map)))
440 continue; 523 continue;
441 nested = ipset_nest_start(skb, IPSET_ATTR_DATA); 524 nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
442 if (!nested) { 525 if (!nested) {
@@ -449,13 +532,14 @@ list_set_list(const struct ip_set *set,
449 if (nla_put_string(skb, IPSET_ATTR_NAME, 532 if (nla_put_string(skb, IPSET_ATTR_NAME,
450 ip_set_name_byindex(e->id))) 533 ip_set_name_byindex(e->id)))
451 goto nla_put_failure; 534 goto nla_put_failure;
452 if (with_timeout(map->timeout)) { 535 if (SET_WITH_TIMEOUT(set) &&
453 const struct set_telem *te = 536 nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
454 (const struct set_telem *) e; 537 htonl(ip_set_timeout_get(
455 __be32 to = htonl(ip_set_timeout_get(te->timeout)); 538 ext_timeout(e, map)))))
456 if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, to)) 539 goto nla_put_failure;
457 goto nla_put_failure; 540 if (SET_WITH_COUNTER(set) &&
458 } 541 ip_set_put_counter(skb, ext_counter(e, map)))
542 goto nla_put_failure;
459 ipset_nest_end(skb, nested); 543 ipset_nest_end(skb, nested);
460 } 544 }
461finish: 545finish:
@@ -481,12 +565,18 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
481 const struct list_set *y = b->data; 565 const struct list_set *y = b->data;
482 566
483 return x->size == y->size && 567 return x->size == y->size &&
484 x->timeout == y->timeout; 568 x->timeout == y->timeout &&
569 a->extensions == b->extensions;
485} 570}
486 571
487static const struct ip_set_type_variant list_set = { 572static const struct ip_set_type_variant set_variant = {
488 .kadt = list_set_kadt, 573 .kadt = list_set_kadt,
489 .uadt = list_set_uadt, 574 .uadt = list_set_uadt,
575 .adt = {
576 [IPSET_ADD] = list_set_uadd,
577 [IPSET_DEL] = list_set_udel,
578 [IPSET_TEST] = list_set_utest,
579 },
490 .destroy = list_set_destroy, 580 .destroy = list_set_destroy,
491 .flush = list_set_flush, 581 .flush = list_set_flush,
492 .head = list_set_head, 582 .head = list_set_head,
@@ -501,7 +591,7 @@ list_set_gc(unsigned long ul_set)
501 struct list_set *map = set->data; 591 struct list_set *map = set->data;
502 592
503 write_lock_bh(&set->lock); 593 write_lock_bh(&set->lock);
504 cleanup_entries(map); 594 set_cleanup_entries(set);
505 write_unlock_bh(&set->lock); 595 write_unlock_bh(&set->lock);
506 596
507 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 597 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
@@ -509,20 +599,20 @@ list_set_gc(unsigned long ul_set)
509} 599}
510 600
511static void 601static void
512list_set_gc_init(struct ip_set *set) 602list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
513{ 603{
514 struct list_set *map = set->data; 604 struct list_set *map = set->data;
515 605
516 init_timer(&map->gc); 606 init_timer(&map->gc);
517 map->gc.data = (unsigned long) set; 607 map->gc.data = (unsigned long) set;
518 map->gc.function = list_set_gc; 608 map->gc.function = gc;
519 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; 609 map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;
520 add_timer(&map->gc); 610 add_timer(&map->gc);
521} 611}
522 612
523/* Create list:set type of sets */ 613/* Create list:set type of sets */
524 614
525static bool 615static struct list_set *
526init_list_set(struct ip_set *set, u32 size, size_t dsize, 616init_list_set(struct ip_set *set, u32 size, size_t dsize,
527 unsigned long timeout) 617 unsigned long timeout)
528{ 618{
@@ -532,7 +622,7 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
532 622
533 map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL); 623 map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);
534 if (!map) 624 if (!map)
535 return false; 625 return NULL;
536 626
537 map->size = size; 627 map->size = size;
538 map->dsize = dsize; 628 map->dsize = dsize;
@@ -544,16 +634,19 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,
544 e->id = IPSET_INVALID_ID; 634 e->id = IPSET_INVALID_ID;
545 } 635 }
546 636
547 return true; 637 return map;
548} 638}
549 639
550static int 640static int
551list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags) 641list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
552{ 642{
553 u32 size = IP_SET_LIST_DEFAULT_SIZE; 643 struct list_set *map;
644 u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0;
645 unsigned long timeout = 0;
554 646
555 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || 647 if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) ||
556 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) 648 !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
649 !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
557 return -IPSET_ERR_PROTOCOL; 650 return -IPSET_ERR_PROTOCOL;
558 651
559 if (tb[IPSET_ATTR_SIZE]) 652 if (tb[IPSET_ATTR_SIZE])
@@ -561,18 +654,46 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)
561 if (size < IP_SET_LIST_MIN_SIZE) 654 if (size < IP_SET_LIST_MIN_SIZE)
562 size = IP_SET_LIST_MIN_SIZE; 655 size = IP_SET_LIST_MIN_SIZE;
563 656
564 if (tb[IPSET_ATTR_TIMEOUT]) { 657 if (tb[IPSET_ATTR_CADT_FLAGS])
565 if (!init_list_set(set, size, sizeof(struct set_telem), 658 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
566 ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]))) 659 if (tb[IPSET_ATTR_TIMEOUT])
660 timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
661 set->variant = &set_variant;
662 if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) {
663 set->extensions |= IPSET_EXT_COUNTER;
664 if (tb[IPSET_ATTR_TIMEOUT]) {
665 map = init_list_set(set, size,
666 sizeof(struct setct_elem), timeout);
667 if (!map)
668 return -ENOMEM;
669 set->extensions |= IPSET_EXT_TIMEOUT;
670 map->offset[IPSET_OFFSET_TIMEOUT] =
671 offsetof(struct setct_elem, timeout);
672 map->offset[IPSET_OFFSET_COUNTER] =
673 offsetof(struct setct_elem, counter);
674 list_set_gc_init(set, list_set_gc);
675 } else {
676 map = init_list_set(set, size,
677 sizeof(struct setc_elem), 0);
678 if (!map)
679 return -ENOMEM;
680 map->offset[IPSET_OFFSET_COUNTER] =
681 offsetof(struct setc_elem, counter);
682 }
683 } else if (tb[IPSET_ATTR_TIMEOUT]) {
684 map = init_list_set(set, size,
685 sizeof(struct sett_elem), timeout);
686 if (!map)
567 return -ENOMEM; 687 return -ENOMEM;
568 688 set->extensions |= IPSET_EXT_TIMEOUT;
569 list_set_gc_init(set); 689 map->offset[IPSET_OFFSET_TIMEOUT] =
690 offsetof(struct sett_elem, timeout);
691 list_set_gc_init(set, list_set_gc);
570 } else { 692 } else {
571 if (!init_list_set(set, size, sizeof(struct set_elem), 693 map = init_list_set(set, size, sizeof(struct set_elem), 0);
572 IPSET_NO_TIMEOUT)) 694 if (!map)
573 return -ENOMEM; 695 return -ENOMEM;
574 } 696 }
575 set->variant = &list_set;
576 return 0; 697 return 0;
577} 698}
578 699
@@ -588,6 +709,7 @@ static struct ip_set_type list_set_type __read_mostly = {
588 .create_policy = { 709 .create_policy = {
589 [IPSET_ATTR_SIZE] = { .type = NLA_U32 }, 710 [IPSET_ATTR_SIZE] = { .type = NLA_U32 },
590 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 711 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
712 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
591 }, 713 },
592 .adt_policy = { 714 .adt_policy = {
593 [IPSET_ATTR_NAME] = { .type = NLA_STRING, 715 [IPSET_ATTR_NAME] = { .type = NLA_STRING,
@@ -597,6 +719,8 @@ static struct ip_set_type list_set_type __read_mostly = {
597 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, 719 [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
598 [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, 720 [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
599 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, 721 [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
722 [IPSET_ATTR_BYTES] = { .type = NLA_U64 },
723 [IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
600 }, 724 },
601 .me = THIS_MODULE, 725 .me = THIS_MODULE,
602}; 726};
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0b779d7df881..dfd7b65b3d2a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
58 module_put(app->module); 58 module_put(app->module);
59} 59}
60 60
61static void ip_vs_app_inc_destroy(struct ip_vs_app *inc)
62{
63 kfree(inc->timeout_table);
64 kfree(inc);
65}
66
67static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
68{
69 struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head);
70
71 ip_vs_app_inc_destroy(inc);
72}
61 73
62/* 74/*
63 * Allocate/initialize app incarnation and register it in proto apps. 75 * Allocate/initialize app incarnation and register it in proto apps.
@@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
106 return 0; 118 return 0;
107 119
108 out: 120 out:
109 kfree(inc->timeout_table); 121 ip_vs_app_inc_destroy(inc);
110 kfree(inc);
111 return ret; 122 return ret;
112} 123}
113 124
@@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
131 142
132 list_del(&inc->a_list); 143 list_del(&inc->a_list);
133 144
134 kfree(inc->timeout_table); 145 call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);
135 kfree(inc);
136} 146}
137 147
138 148
@@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
144{ 154{
145 int result; 155 int result;
146 156
147 atomic_inc(&inc->usecnt); 157 result = ip_vs_app_get(inc->app);
148 if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) 158 if (result)
149 atomic_dec(&inc->usecnt); 159 atomic_inc(&inc->usecnt);
150 return result; 160 return result;
151} 161}
152 162
@@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)
156 */ 166 */
157void ip_vs_app_inc_put(struct ip_vs_app *inc) 167void ip_vs_app_inc_put(struct ip_vs_app *inc)
158{ 168{
159 ip_vs_app_put(inc->app);
160 atomic_dec(&inc->usecnt); 169 atomic_dec(&inc->usecnt);
170 ip_vs_app_put(inc->app);
161} 171}
162 172
163 173
@@ -218,6 +228,7 @@ out_unlock:
218/* 228/*
219 * ip_vs_app unregistration routine 229 * ip_vs_app unregistration routine
220 * We are sure there are no app incarnations attached to services 230 * We are sure there are no app incarnations attached to services
231 * Caller should use synchronize_rcu() or rcu_barrier()
221 */ 232 */
222void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 233void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
223{ 234{
@@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
341 unsigned int flag, __u32 seq, int diff) 352 unsigned int flag, __u32 seq, int diff)
342{ 353{
343 /* spinlock is to keep updating cp->flags atomic */ 354 /* spinlock is to keep updating cp->flags atomic */
344 spin_lock(&cp->lock); 355 spin_lock_bh(&cp->lock);
345 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) { 356 if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
346 vseq->previous_delta = vseq->delta; 357 vseq->previous_delta = vseq->delta;
347 vseq->delta += diff; 358 vseq->delta += diff;
348 vseq->init_seq = seq; 359 vseq->init_seq = seq;
349 cp->flags |= flag; 360 cp->flags |= flag;
350 } 361 }
351 spin_unlock(&cp->lock); 362 spin_unlock_bh(&cp->lock);
352} 363}
353 364
354static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 365static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 704e514e02ab..a083bda322b6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
79 79
80struct ip_vs_aligned_lock 80struct ip_vs_aligned_lock
81{ 81{
82 rwlock_t l; 82 spinlock_t l;
83} __attribute__((__aligned__(SMP_CACHE_BYTES))); 83} __attribute__((__aligned__(SMP_CACHE_BYTES)));
84 84
85/* lock array for conn table */ 85/* lock array for conn table */
86static struct ip_vs_aligned_lock 86static struct ip_vs_aligned_lock
87__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; 87__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned;
88 88
89static inline void ct_read_lock(unsigned int key)
90{
91 read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
92}
93
94static inline void ct_read_unlock(unsigned int key)
95{
96 read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
97}
98
99static inline void ct_write_lock(unsigned int key)
100{
101 write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
102}
103
104static inline void ct_write_unlock(unsigned int key)
105{
106 write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
107}
108
109static inline void ct_read_lock_bh(unsigned int key)
110{
111 read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
112}
113
114static inline void ct_read_unlock_bh(unsigned int key)
115{
116 read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
117}
118
119static inline void ct_write_lock_bh(unsigned int key) 89static inline void ct_write_lock_bh(unsigned int key)
120{ 90{
121 write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 91 spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
122} 92}
123 93
124static inline void ct_write_unlock_bh(unsigned int key) 94static inline void ct_write_unlock_bh(unsigned int key)
125{ 95{
126 write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); 96 spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);
127} 97}
128 98
129 99
@@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
197 /* Hash by protocol, client address and port */ 167 /* Hash by protocol, client address and port */
198 hash = ip_vs_conn_hashkey_conn(cp); 168 hash = ip_vs_conn_hashkey_conn(cp);
199 169
200 ct_write_lock(hash); 170 ct_write_lock_bh(hash);
201 spin_lock(&cp->lock); 171 spin_lock(&cp->lock);
202 172
203 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 173 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
204 hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);
205 cp->flags |= IP_VS_CONN_F_HASHED; 174 cp->flags |= IP_VS_CONN_F_HASHED;
206 atomic_inc(&cp->refcnt); 175 atomic_inc(&cp->refcnt);
176 hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);
207 ret = 1; 177 ret = 1;
208 } else { 178 } else {
209 pr_err("%s(): request for already hashed, called from %pF\n", 179 pr_err("%s(): request for already hashed, called from %pF\n",
@@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
212 } 182 }
213 183
214 spin_unlock(&cp->lock); 184 spin_unlock(&cp->lock);
215 ct_write_unlock(hash); 185 ct_write_unlock_bh(hash);
216 186
217 return ret; 187 return ret;
218} 188}
@@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
220 190
221/* 191/*
222 * UNhashes ip_vs_conn from ip_vs_conn_tab. 192 * UNhashes ip_vs_conn from ip_vs_conn_tab.
223 * returns bool success. 193 * returns bool success. Caller should hold conn reference.
224 */ 194 */
225static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) 195static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
226{ 196{
@@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
230 /* unhash it and decrease its reference counter */ 200 /* unhash it and decrease its reference counter */
231 hash = ip_vs_conn_hashkey_conn(cp); 201 hash = ip_vs_conn_hashkey_conn(cp);
232 202
233 ct_write_lock(hash); 203 ct_write_lock_bh(hash);
234 spin_lock(&cp->lock); 204 spin_lock(&cp->lock);
235 205
236 if (cp->flags & IP_VS_CONN_F_HASHED) { 206 if (cp->flags & IP_VS_CONN_F_HASHED) {
237 hlist_del(&cp->c_list); 207 hlist_del_rcu(&cp->c_list);
238 cp->flags &= ~IP_VS_CONN_F_HASHED; 208 cp->flags &= ~IP_VS_CONN_F_HASHED;
239 atomic_dec(&cp->refcnt); 209 atomic_dec(&cp->refcnt);
240 ret = 1; 210 ret = 1;
@@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
242 ret = 0; 212 ret = 0;
243 213
244 spin_unlock(&cp->lock); 214 spin_unlock(&cp->lock);
245 ct_write_unlock(hash); 215 ct_write_unlock_bh(hash);
216
217 return ret;
218}
219
220/* Try to unlink ip_vs_conn from ip_vs_conn_tab.
221 * returns bool success.
222 */
223static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp)
224{
225 unsigned int hash;
226 bool ret;
227
228 hash = ip_vs_conn_hashkey_conn(cp);
229
230 ct_write_lock_bh(hash);
231 spin_lock(&cp->lock);
232
233 if (cp->flags & IP_VS_CONN_F_HASHED) {
234 ret = false;
235 /* Decrease refcnt and unlink conn only if we are last user */
236 if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) {
237 hlist_del_rcu(&cp->c_list);
238 cp->flags &= ~IP_VS_CONN_F_HASHED;
239 ret = true;
240 }
241 } else
242 ret = atomic_read(&cp->refcnt) ? false : true;
243
244 spin_unlock(&cp->lock);
245 ct_write_unlock_bh(hash);
246 246
247 return ret; 247 return ret;
248} 248}
@@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
262 262
263 hash = ip_vs_conn_hashkey_param(p, false); 263 hash = ip_vs_conn_hashkey_param(p, false);
264 264
265 ct_read_lock(hash); 265 rcu_read_lock();
266 266
267 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 267 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
268 if (cp->af == p->af && 268 if (p->cport == cp->cport && p->vport == cp->vport &&
269 p->cport == cp->cport && p->vport == cp->vport && 269 cp->af == p->af &&
270 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 270 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
271 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 271 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
272 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 272 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
273 p->protocol == cp->protocol && 273 p->protocol == cp->protocol &&
274 ip_vs_conn_net_eq(cp, p->net)) { 274 ip_vs_conn_net_eq(cp, p->net)) {
275 if (!__ip_vs_conn_get(cp))
276 continue;
275 /* HIT */ 277 /* HIT */
276 atomic_inc(&cp->refcnt); 278 rcu_read_unlock();
277 ct_read_unlock(hash);
278 return cp; 279 return cp;
279 } 280 }
280 } 281 }
281 282
282 ct_read_unlock(hash); 283 rcu_read_unlock();
283 284
284 return NULL; 285 return NULL;
285} 286}
@@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
346 347
347 hash = ip_vs_conn_hashkey_param(p, false); 348 hash = ip_vs_conn_hashkey_param(p, false);
348 349
349 ct_read_lock(hash); 350 rcu_read_lock();
350 351
351 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 352 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
352 if (!ip_vs_conn_net_eq(cp, p->net)) 353 if (unlikely(p->pe_data && p->pe->ct_match)) {
353 continue; 354 if (!ip_vs_conn_net_eq(cp, p->net))
354 if (p->pe_data && p->pe->ct_match) { 355 continue;
355 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) 356 if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
356 goto out; 357 if (__ip_vs_conn_get(cp))
358 goto out;
359 }
357 continue; 360 continue;
358 } 361 }
359 362
@@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
363 * p->vaddr is a fwmark */ 366 * p->vaddr is a fwmark */
364 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : 367 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
365 p->af, p->vaddr, &cp->vaddr) && 368 p->af, p->vaddr, &cp->vaddr) &&
366 p->cport == cp->cport && p->vport == cp->vport && 369 p->vport == cp->vport && p->cport == cp->cport &&
367 cp->flags & IP_VS_CONN_F_TEMPLATE && 370 cp->flags & IP_VS_CONN_F_TEMPLATE &&
368 p->protocol == cp->protocol) 371 p->protocol == cp->protocol &&
369 goto out; 372 ip_vs_conn_net_eq(cp, p->net)) {
373 if (__ip_vs_conn_get(cp))
374 goto out;
375 }
370 } 376 }
371 cp = NULL; 377 cp = NULL;
372 378
373 out: 379 out:
374 if (cp) 380 rcu_read_unlock();
375 atomic_inc(&cp->refcnt);
376 ct_read_unlock(hash);
377 381
378 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", 382 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
379 ip_vs_proto_name(p->protocol), 383 ip_vs_proto_name(p->protocol),
@@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
398 */ 402 */
399 hash = ip_vs_conn_hashkey_param(p, true); 403 hash = ip_vs_conn_hashkey_param(p, true);
400 404
401 ct_read_lock(hash); 405 rcu_read_lock();
402 406
403 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 407 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
404 if (cp->af == p->af && 408 if (p->vport == cp->cport && p->cport == cp->dport &&
405 p->vport == cp->cport && p->cport == cp->dport && 409 cp->af == p->af &&
406 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 410 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
407 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 411 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
408 p->protocol == cp->protocol && 412 p->protocol == cp->protocol &&
409 ip_vs_conn_net_eq(cp, p->net)) { 413 ip_vs_conn_net_eq(cp, p->net)) {
414 if (!__ip_vs_conn_get(cp))
415 continue;
410 /* HIT */ 416 /* HIT */
411 atomic_inc(&cp->refcnt);
412 ret = cp; 417 ret = cp;
413 break; 418 break;
414 } 419 }
415 } 420 }
416 421
417 ct_read_unlock(hash); 422 rcu_read_unlock();
418 423
419 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", 424 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
420 ip_vs_proto_name(p->protocol), 425 ip_vs_proto_name(p->protocol),
@@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)
457void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) 462void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)
458{ 463{
459 if (ip_vs_conn_unhash(cp)) { 464 if (ip_vs_conn_unhash(cp)) {
460 spin_lock(&cp->lock); 465 spin_lock_bh(&cp->lock);
461 if (cp->flags & IP_VS_CONN_F_NO_CPORT) { 466 if (cp->flags & IP_VS_CONN_F_NO_CPORT) {
462 atomic_dec(&ip_vs_conn_no_cport_cnt); 467 atomic_dec(&ip_vs_conn_no_cport_cnt);
463 cp->flags &= ~IP_VS_CONN_F_NO_CPORT; 468 cp->flags &= ~IP_VS_CONN_F_NO_CPORT;
464 cp->cport = cport; 469 cp->cport = cport;
465 } 470 }
466 spin_unlock(&cp->lock); 471 spin_unlock_bh(&cp->lock);
467 472
468 /* hash on new dport */ 473 /* hash on new dport */
469 ip_vs_conn_hash(cp); 474 ip_vs_conn_hash(cp);
@@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
549 return; 554 return;
550 555
551 /* Increase the refcnt counter of the dest */ 556 /* Increase the refcnt counter of the dest */
552 atomic_inc(&dest->refcnt); 557 ip_vs_dest_hold(dest);
553 558
554 conn_flags = atomic_read(&dest->conn_flags); 559 conn_flags = atomic_read(&dest->conn_flags);
555 if (cp->protocol != IPPROTO_UDP) 560 if (cp->protocol != IPPROTO_UDP)
@@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
606 * Check if there is a destination for the connection, if so 611 * Check if there is a destination for the connection, if so
607 * bind the connection to the destination. 612 * bind the connection to the destination.
608 */ 613 */
609struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) 614void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
610{ 615{
611 struct ip_vs_dest *dest; 616 struct ip_vs_dest *dest;
612 617
618 rcu_read_lock();
613 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, 619 dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
614 cp->dport, &cp->vaddr, cp->vport, 620 cp->dport, &cp->vaddr, cp->vport,
615 cp->protocol, cp->fwmark, cp->flags); 621 cp->protocol, cp->fwmark, cp->flags);
616 if (dest) { 622 if (dest) {
617 struct ip_vs_proto_data *pd; 623 struct ip_vs_proto_data *pd;
618 624
619 spin_lock(&cp->lock); 625 spin_lock_bh(&cp->lock);
620 if (cp->dest) { 626 if (cp->dest) {
621 spin_unlock(&cp->lock); 627 spin_unlock_bh(&cp->lock);
622 return dest; 628 rcu_read_unlock();
629 return;
623 } 630 }
624 631
625 /* Applications work depending on the forwarding method 632 /* Applications work depending on the forwarding method
@@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
628 ip_vs_unbind_app(cp); 635 ip_vs_unbind_app(cp);
629 636
630 ip_vs_bind_dest(cp, dest); 637 ip_vs_bind_dest(cp, dest);
631 spin_unlock(&cp->lock); 638 spin_unlock_bh(&cp->lock);
632 639
633 /* Update its packet transmitter */ 640 /* Update its packet transmitter */
634 cp->packet_xmit = NULL; 641 cp->packet_xmit = NULL;
@@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
643 if (pd && atomic_read(&pd->appcnt)) 650 if (pd && atomic_read(&pd->appcnt))
644 ip_vs_bind_app(cp, pd->pp); 651 ip_vs_bind_app(cp, pd->pp);
645 } 652 }
646 return dest; 653 rcu_read_unlock();
647} 654}
648 655
649 656
@@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
695 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 702 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
696 } 703 }
697 704
698 /* 705 ip_vs_dest_put(dest);
699 * Simply decrease the refcnt of the dest, because the
700 * dest will be either in service's destination list
701 * or in the trash.
702 */
703 atomic_dec(&dest->refcnt);
704} 706}
705 707
706static int expire_quiescent_template(struct netns_ipvs *ipvs, 708static int expire_quiescent_template(struct netns_ipvs *ipvs,
@@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
757 * Simply decrease the refcnt of the template, 759 * Simply decrease the refcnt of the template,
758 * don't restart its timer. 760 * don't restart its timer.
759 */ 761 */
760 atomic_dec(&ct->refcnt); 762 __ip_vs_conn_put(ct);
761 return 0; 763 return 0;
762 } 764 }
763 return 1; 765 return 1;
764} 766}
765 767
768static void ip_vs_conn_rcu_free(struct rcu_head *head)
769{
770 struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn,
771 rcu_head);
772
773 ip_vs_pe_put(cp->pe);
774 kfree(cp->pe_data);
775 kmem_cache_free(ip_vs_conn_cachep, cp);
776}
777
766static void ip_vs_conn_expire(unsigned long data) 778static void ip_vs_conn_expire(unsigned long data)
767{ 779{
768 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 780 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
769 struct net *net = ip_vs_conn_net(cp); 781 struct net *net = ip_vs_conn_net(cp);
770 struct netns_ipvs *ipvs = net_ipvs(net); 782 struct netns_ipvs *ipvs = net_ipvs(net);
771 783
772 cp->timeout = 60*HZ;
773
774 /*
775 * hey, I'm using it
776 */
777 atomic_inc(&cp->refcnt);
778
779 /* 784 /*
780 * do I control anybody? 785 * do I control anybody?
781 */ 786 */
782 if (atomic_read(&cp->n_control)) 787 if (atomic_read(&cp->n_control))
783 goto expire_later; 788 goto expire_later;
784 789
785 /* 790 /* Unlink conn if not referenced anymore */
786 * unhash it if it is hashed in the conn table 791 if (likely(ip_vs_conn_unlink(cp))) {
787 */
788 if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
789 goto expire_later;
790
791 /*
792 * refcnt==1 implies I'm the only one referrer
793 */
794 if (likely(atomic_read(&cp->refcnt) == 1)) {
795 /* delete the timer if it is activated by other users */ 792 /* delete the timer if it is activated by other users */
796 del_timer(&cp->timer); 793 del_timer(&cp->timer);
797 794
@@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)
810 ip_vs_conn_drop_conntrack(cp); 807 ip_vs_conn_drop_conntrack(cp);
811 } 808 }
812 809
813 ip_vs_pe_put(cp->pe);
814 kfree(cp->pe_data);
815 if (unlikely(cp->app != NULL)) 810 if (unlikely(cp->app != NULL))
816 ip_vs_unbind_app(cp); 811 ip_vs_unbind_app(cp);
817 ip_vs_unbind_dest(cp); 812 ip_vs_unbind_dest(cp);
818 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 813 if (cp->flags & IP_VS_CONN_F_NO_CPORT)
819 atomic_dec(&ip_vs_conn_no_cport_cnt); 814 atomic_dec(&ip_vs_conn_no_cport_cnt);
815 call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);
820 atomic_dec(&ipvs->conn_count); 816 atomic_dec(&ipvs->conn_count);
821
822 kmem_cache_free(ip_vs_conn_cachep, cp);
823 return; 817 return;
824 } 818 }
825 819
826 /* hash it back to the table */
827 ip_vs_conn_hash(cp);
828
829 expire_later: 820 expire_later:
830 IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", 821 IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n",
831 atomic_read(&cp->refcnt)-1, 822 atomic_read(&cp->refcnt),
832 atomic_read(&cp->n_control)); 823 atomic_read(&cp->n_control));
833 824
825 atomic_inc(&cp->refcnt);
826 cp->timeout = 60*HZ;
827
834 if (ipvs->sync_state & IP_VS_STATE_MASTER) 828 if (ipvs->sync_state & IP_VS_STATE_MASTER)
835 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs)); 829 ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
836 830
837 ip_vs_conn_put(cp); 831 ip_vs_conn_put(cp);
838} 832}
839 833
840 834/* Modify timer, so that it expires as soon as possible.
835 * Can be called without reference only if under RCU lock.
836 */
841void ip_vs_conn_expire_now(struct ip_vs_conn *cp) 837void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
842{ 838{
843 if (del_timer(&cp->timer)) 839 /* Using mod_timer_pending will ensure the timer is not
844 mod_timer(&cp->timer, jiffies); 840 * modified after the final del_timer in ip_vs_conn_expire.
841 */
842 if (timer_pending(&cp->timer) &&
843 time_after(cp->timer.expires, jiffies))
844 mod_timer_pending(&cp->timer, jiffies);
845} 845}
846 846
847 847
@@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
858 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, 858 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
859 p->protocol); 859 p->protocol);
860 860
861 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 861 cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
862 if (cp == NULL) { 862 if (cp == NULL) {
863 IP_VS_ERR_RL("%s(): no memory\n", __func__); 863 IP_VS_ERR_RL("%s(): no memory\n", __func__);
864 return NULL; 864 return NULL;
@@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
869 ip_vs_conn_net_set(cp, p->net); 869 ip_vs_conn_net_set(cp, p->net);
870 cp->af = p->af; 870 cp->af = p->af;
871 cp->protocol = p->protocol; 871 cp->protocol = p->protocol;
872 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); 872 ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
873 cp->cport = p->cport; 873 cp->cport = p->cport;
874 ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); 874 ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
875 cp->vport = p->vport; 875 cp->vport = p->vport;
876 /* proto should only be IPPROTO_IP if d_addr is a fwmark */ 876 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
877 ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, 877 ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
878 &cp->daddr, daddr); 878 &cp->daddr, daddr);
879 cp->dport = dport; 879 cp->dport = dport;
880 cp->flags = flags; 880 cp->flags = flags;
881 cp->fwmark = fwmark; 881 cp->fwmark = fwmark;
@@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
884 cp->pe = p->pe; 884 cp->pe = p->pe;
885 cp->pe_data = p->pe_data; 885 cp->pe_data = p->pe_data;
886 cp->pe_data_len = p->pe_data_len; 886 cp->pe_data_len = p->pe_data_len;
887 } else {
888 cp->pe = NULL;
889 cp->pe_data = NULL;
890 cp->pe_data_len = 0;
887 } 891 }
888 spin_lock_init(&cp->lock); 892 spin_lock_init(&cp->lock);
889 893
@@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
894 */ 898 */
895 atomic_set(&cp->refcnt, 1); 899 atomic_set(&cp->refcnt, 1);
896 900
901 cp->control = NULL;
897 atomic_set(&cp->n_control, 0); 902 atomic_set(&cp->n_control, 0);
898 atomic_set(&cp->in_pkts, 0); 903 atomic_set(&cp->in_pkts, 0);
899 904
905 cp->packet_xmit = NULL;
906 cp->app = NULL;
907 cp->app_data = NULL;
908 /* reset struct ip_vs_seq */
909 cp->in_seq.delta = 0;
910 cp->out_seq.delta = 0;
911
900 atomic_inc(&ipvs->conn_count); 912 atomic_inc(&ipvs->conn_count);
901 if (flags & IP_VS_CONN_F_NO_CPORT) 913 if (flags & IP_VS_CONN_F_NO_CPORT)
902 atomic_inc(&ip_vs_conn_no_cport_cnt); 914 atomic_inc(&ip_vs_conn_no_cport_cnt);
903 915
904 /* Bind the connection with a destination server */ 916 /* Bind the connection with a destination server */
917 cp->dest = NULL;
905 ip_vs_bind_dest(cp, dest); 918 ip_vs_bind_dest(cp, dest);
906 919
907 /* Set its state and timeout */ 920 /* Set its state and timeout */
908 cp->state = 0; 921 cp->state = 0;
922 cp->old_state = 0;
909 cp->timeout = 3*HZ; 923 cp->timeout = 3*HZ;
910 cp->sync_endtime = jiffies & ~3UL; 924 cp->sync_endtime = jiffies & ~3UL;
911 925
@@ -952,24 +966,29 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
952 struct ip_vs_iter_state *iter = seq->private; 966 struct ip_vs_iter_state *iter = seq->private;
953 967
954 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 968 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
955 ct_read_lock_bh(idx); 969 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
956 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 970 /* __ip_vs_conn_get() is not needed by
971 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show
972 */
957 if (pos-- == 0) { 973 if (pos-- == 0) {
958 iter->l = &ip_vs_conn_tab[idx]; 974 iter->l = &ip_vs_conn_tab[idx];
959 return cp; 975 return cp;
960 } 976 }
961 } 977 }
962 ct_read_unlock_bh(idx); 978 rcu_read_unlock();
979 rcu_read_lock();
963 } 980 }
964 981
965 return NULL; 982 return NULL;
966} 983}
967 984
968static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) 985static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
986 __acquires(RCU)
969{ 987{
970 struct ip_vs_iter_state *iter = seq->private; 988 struct ip_vs_iter_state *iter = seq->private;
971 989
972 iter->l = NULL; 990 iter->l = NULL;
991 rcu_read_lock();
973 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; 992 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
974} 993}
975 994
@@ -977,6 +996,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
977{ 996{
978 struct ip_vs_conn *cp = v; 997 struct ip_vs_conn *cp = v;
979 struct ip_vs_iter_state *iter = seq->private; 998 struct ip_vs_iter_state *iter = seq->private;
999 struct hlist_node *e;
980 struct hlist_head *l = iter->l; 1000 struct hlist_head *l = iter->l;
981 int idx; 1001 int idx;
982 1002
@@ -985,31 +1005,27 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
985 return ip_vs_conn_array(seq, 0); 1005 return ip_vs_conn_array(seq, 0);
986 1006
987 /* more on same hash chain? */ 1007 /* more on same hash chain? */
988 if (cp->c_list.next) 1008 e = rcu_dereference(hlist_next_rcu(&cp->c_list));
989 return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); 1009 if (e)
1010 return hlist_entry(e, struct ip_vs_conn, c_list);
990 1011
991 idx = l - ip_vs_conn_tab; 1012 idx = l - ip_vs_conn_tab;
992 ct_read_unlock_bh(idx);
993
994 while (++idx < ip_vs_conn_tab_size) { 1013 while (++idx < ip_vs_conn_tab_size) {
995 ct_read_lock_bh(idx); 1014 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
996 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
997 iter->l = &ip_vs_conn_tab[idx]; 1015 iter->l = &ip_vs_conn_tab[idx];
998 return cp; 1016 return cp;
999 } 1017 }
1000 ct_read_unlock_bh(idx); 1018 rcu_read_unlock();
1019 rcu_read_lock();
1001 } 1020 }
1002 iter->l = NULL; 1021 iter->l = NULL;
1003 return NULL; 1022 return NULL;
1004} 1023}
1005 1024
1006static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) 1025static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
1026 __releases(RCU)
1007{ 1027{
1008 struct ip_vs_iter_state *iter = seq->private; 1028 rcu_read_unlock();
1009 struct hlist_head *l = iter->l;
1010
1011 if (l)
1012 ct_read_unlock_bh(l - ip_vs_conn_tab);
1013} 1029}
1014 1030
1015static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) 1031static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
@@ -1188,7 +1204,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
1188void ip_vs_random_dropentry(struct net *net) 1204void ip_vs_random_dropentry(struct net *net)
1189{ 1205{
1190 int idx; 1206 int idx;
1191 struct ip_vs_conn *cp; 1207 struct ip_vs_conn *cp, *cp_c;
1192 1208
1193 /* 1209 /*
1194 * Randomly scan 1/32 of the whole table every second 1210 * Randomly scan 1/32 of the whole table every second
@@ -1199,9 +1215,9 @@ void ip_vs_random_dropentry(struct net *net)
1199 /* 1215 /*
1200 * Lock is actually needed in this loop. 1216 * Lock is actually needed in this loop.
1201 */ 1217 */
1202 ct_write_lock_bh(hash); 1218 rcu_read_lock();
1203 1219
1204 hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 1220 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
1205 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1221 if (cp->flags & IP_VS_CONN_F_TEMPLATE)
1206 /* connection template */ 1222 /* connection template */
1207 continue; 1223 continue;
@@ -1228,12 +1244,15 @@ void ip_vs_random_dropentry(struct net *net)
1228 1244
1229 IP_VS_DBG(4, "del connection\n"); 1245 IP_VS_DBG(4, "del connection\n");
1230 ip_vs_conn_expire_now(cp); 1246 ip_vs_conn_expire_now(cp);
1231 if (cp->control) { 1247 cp_c = cp->control;
1248 /* cp->control is valid only with reference to cp */
1249 if (cp_c && __ip_vs_conn_get(cp)) {
1232 IP_VS_DBG(4, "del conn template\n"); 1250 IP_VS_DBG(4, "del conn template\n");
1233 ip_vs_conn_expire_now(cp->control); 1251 ip_vs_conn_expire_now(cp_c);
1252 __ip_vs_conn_put(cp);
1234 } 1253 }
1235 } 1254 }
1236 ct_write_unlock_bh(hash); 1255 rcu_read_unlock();
1237 } 1256 }
1238} 1257}
1239 1258
@@ -1244,7 +1263,7 @@ void ip_vs_random_dropentry(struct net *net)
1244static void ip_vs_conn_flush(struct net *net) 1263static void ip_vs_conn_flush(struct net *net)
1245{ 1264{
1246 int idx; 1265 int idx;
1247 struct ip_vs_conn *cp; 1266 struct ip_vs_conn *cp, *cp_c;
1248 struct netns_ipvs *ipvs = net_ipvs(net); 1267 struct netns_ipvs *ipvs = net_ipvs(net);
1249 1268
1250flush_again: 1269flush_again:
@@ -1252,19 +1271,22 @@ flush_again:
1252 /* 1271 /*
1253 * Lock is actually needed in this loop. 1272 * Lock is actually needed in this loop.
1254 */ 1273 */
1255 ct_write_lock_bh(idx); 1274 rcu_read_lock();
1256 1275
1257 hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1276 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
1258 if (!ip_vs_conn_net_eq(cp, net)) 1277 if (!ip_vs_conn_net_eq(cp, net))
1259 continue; 1278 continue;
1260 IP_VS_DBG(4, "del connection\n"); 1279 IP_VS_DBG(4, "del connection\n");
1261 ip_vs_conn_expire_now(cp); 1280 ip_vs_conn_expire_now(cp);
1262 if (cp->control) { 1281 cp_c = cp->control;
1282 /* cp->control is valid only with reference to cp */
1283 if (cp_c && __ip_vs_conn_get(cp)) {
1263 IP_VS_DBG(4, "del conn template\n"); 1284 IP_VS_DBG(4, "del conn template\n");
1264 ip_vs_conn_expire_now(cp->control); 1285 ip_vs_conn_expire_now(cp_c);
1286 __ip_vs_conn_put(cp);
1265 } 1287 }
1266 } 1288 }
1267 ct_write_unlock_bh(idx); 1289 rcu_read_unlock();
1268 } 1290 }
1269 1291
1270 /* the counter may be not NULL, because maybe some conn entries 1292 /* the counter may be not NULL, because maybe some conn entries
@@ -1331,7 +1353,7 @@ int __init ip_vs_conn_init(void)
1331 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); 1353 INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);
1332 1354
1333 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { 1355 for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) {
1334 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1356 spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);
1335 } 1357 }
1336 1358
1337 /* calculate the random value for connection hash */ 1359 /* calculate the random value for connection hash */
@@ -1342,6 +1364,8 @@ int __init ip_vs_conn_init(void)
1342 1364
1343void ip_vs_conn_cleanup(void) 1365void ip_vs_conn_cleanup(void)
1344{ 1366{
1367 /* Wait all ip_vs_conn_rcu_free() callbacks to complete */
1368 rcu_barrier();
1345 /* Release the empty cache */ 1369 /* Release the empty cache */
1346 kmem_cache_destroy(ip_vs_conn_cachep); 1370 kmem_cache_destroy(ip_vs_conn_cachep);
1347 vfree(ip_vs_conn_tab); 1371 vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 61f49d241712..085b5880ab0d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
69EXPORT_SYMBOL(ip_vs_get_debug_level); 69EXPORT_SYMBOL(ip_vs_get_debug_level);
70#endif 70#endif
71 71
72int ip_vs_net_id __read_mostly; 72static int ip_vs_net_id __read_mostly;
73#ifdef IP_VS_GENERIC_NETNS
74EXPORT_SYMBOL(ip_vs_net_id);
75#endif
76/* netns cnt used for uniqueness */ 73/* netns cnt used for uniqueness */
77static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); 74static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
78 75
@@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
206{ 203{
207 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, 204 ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
208 vport, p); 205 vport, p);
209 p->pe = svc->pe; 206 p->pe = rcu_dereference(svc->pe);
210 if (p->pe && p->pe->fill_param) 207 if (p->pe && p->pe->fill_param)
211 return p->pe->fill_param(p, skb); 208 return p->pe->fill_param(p, skb);
212 209
@@ -238,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
238 /* Mask saddr with the netmask to adjust template granularity */ 235 /* Mask saddr with the netmask to adjust template granularity */
239#ifdef CONFIG_IP_VS_IPV6 236#ifdef CONFIG_IP_VS_IPV6
240 if (svc->af == AF_INET6) 237 if (svc->af == AF_INET6)
241 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); 238 ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
239 (__force __u32) svc->netmask);
242 else 240 else
243#endif 241#endif
244 snet.ip = iph->saddr.ip & svc->netmask; 242 snet.ip = iph->saddr.ip & svc->netmask;
@@ -299,12 +297,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
299 /* Check if a template already exists */ 297 /* Check if a template already exists */
300 ct = ip_vs_ct_in_get(&param); 298 ct = ip_vs_ct_in_get(&param);
301 if (!ct || !ip_vs_check_template(ct)) { 299 if (!ct || !ip_vs_check_template(ct)) {
300 struct ip_vs_scheduler *sched;
301
302 /* 302 /*
303 * No template found or the dest of the connection 303 * No template found or the dest of the connection
304 * template is not available. 304 * template is not available.
305 * return *ignored=0 i.e. ICMP and NF_DROP 305 * return *ignored=0 i.e. ICMP and NF_DROP
306 */ 306 */
307 dest = svc->scheduler->schedule(svc, skb); 307 sched = rcu_dereference(svc->scheduler);
308 dest = sched->schedule(svc, skb);
308 if (!dest) { 309 if (!dest) {
309 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 310 IP_VS_DBG(1, "p-schedule: no dest found.\n");
310 kfree(param.pe_data); 311 kfree(param.pe_data);
@@ -394,6 +395,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
394{ 395{
395 struct ip_vs_protocol *pp = pd->pp; 396 struct ip_vs_protocol *pp = pd->pp;
396 struct ip_vs_conn *cp = NULL; 397 struct ip_vs_conn *cp = NULL;
398 struct ip_vs_scheduler *sched;
397 struct ip_vs_dest *dest; 399 struct ip_vs_dest *dest;
398 __be16 _ports[2], *pptr; 400 __be16 _ports[2], *pptr;
399 unsigned int flags; 401 unsigned int flags;
@@ -449,7 +451,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
449 return NULL; 451 return NULL;
450 } 452 }
451 453
452 dest = svc->scheduler->schedule(svc, skb); 454 sched = rcu_dereference(svc->scheduler);
455 dest = sched->schedule(svc, skb);
453 if (dest == NULL) { 456 if (dest == NULL) {
454 IP_VS_DBG(1, "Schedule: no dest found.\n"); 457 IP_VS_DBG(1, "Schedule: no dest found.\n");
455 return NULL; 458 return NULL;
@@ -507,7 +510,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
507 510
508 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph); 511 pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
509 if (pptr == NULL) { 512 if (pptr == NULL) {
510 ip_vs_service_put(svc);
511 return NF_DROP; 513 return NF_DROP;
512 } 514 }
513 515
@@ -533,8 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
533 IP_VS_CONN_F_ONE_PACKET : 0; 535 IP_VS_CONN_F_ONE_PACKET : 0;
534 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 536 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
535 537
536 ip_vs_service_put(svc);
537
538 /* create a new connection entry */ 538 /* create a new connection entry */
539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 539 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
540 { 540 {
@@ -571,12 +571,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
571 * listed in the ipvs table), pass the packets, because it is 571 * listed in the ipvs table), pass the packets, because it is
572 * not ipvs job to decide to drop the packets. 572 * not ipvs job to decide to drop the packets.
573 */ 573 */
574 if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { 574 if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
575 ip_vs_service_put(svc);
576 return NF_ACCEPT; 575 return NF_ACCEPT;
577 }
578
579 ip_vs_service_put(svc);
580 576
581 /* 577 /*
582 * Notify the client that the destination is unreachable, and 578 * Notify the client that the destination is unreachable, and
@@ -588,9 +584,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
588#ifdef CONFIG_IP_VS_IPV6 584#ifdef CONFIG_IP_VS_IPV6
589 if (svc->af == AF_INET6) { 585 if (svc->af == AF_INET6) {
590 if (!skb->dev) { 586 if (!skb->dev) {
591 struct net *net = dev_net(skb_dst(skb)->dev); 587 struct net *net_ = dev_net(skb_dst(skb)->dev);
592 588
593 skb->dev = net->loopback_dev; 589 skb->dev = net_->loopback_dev;
594 } 590 }
595 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 591 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
596 } else 592 } else
@@ -643,8 +639,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
643 639
644static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 640static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
645{ 641{
646 int err = ip_defrag(skb, user); 642 int err;
647 643
644 local_bh_disable();
645 err = ip_defrag(skb, user);
646 local_bh_enable();
648 if (!err) 647 if (!err)
649 ip_send_check(ip_hdr(skb)); 648 ip_send_check(ip_hdr(skb));
650 649
@@ -1164,9 +1163,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1164 sizeof(_ports), _ports, &iph); 1163 sizeof(_ports), _ports, &iph);
1165 if (pptr == NULL) 1164 if (pptr == NULL)
1166 return NF_ACCEPT; /* Not for me */ 1165 return NF_ACCEPT; /* Not for me */
1167 if (ip_vs_lookup_real_service(net, af, iph.protocol, 1166 if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
1168 &iph.saddr, 1167 pptr[0])) {
1169 pptr[0])) {
1170 /* 1168 /*
1171 * Notify the real server: there is no 1169 * Notify the real server: there is no
1172 * existing entry if it is not RST 1170 * existing entry if it is not RST
@@ -1181,9 +1179,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1181 iph.len)))) { 1179 iph.len)))) {
1182#ifdef CONFIG_IP_VS_IPV6 1180#ifdef CONFIG_IP_VS_IPV6
1183 if (af == AF_INET6) { 1181 if (af == AF_INET6) {
1184 struct net *net =
1185 dev_net(skb_dst(skb)->dev);
1186
1187 if (!skb->dev) 1182 if (!skb->dev)
1188 skb->dev = net->loopback_dev; 1183 skb->dev = net->loopback_dev;
1189 icmpv6_send(skb, 1184 icmpv6_send(skb,
@@ -1226,13 +1221,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1226 const struct net_device *in, const struct net_device *out, 1221 const struct net_device *in, const struct net_device *out,
1227 int (*okfn)(struct sk_buff *)) 1222 int (*okfn)(struct sk_buff *))
1228{ 1223{
1229 unsigned int verdict; 1224 return ip_vs_out(hooknum, skb, AF_INET);
1230
1231 /* Disable BH in LOCAL_OUT until all places are fixed */
1232 local_bh_disable();
1233 verdict = ip_vs_out(hooknum, skb, AF_INET);
1234 local_bh_enable();
1235 return verdict;
1236} 1225}
1237 1226
1238#ifdef CONFIG_IP_VS_IPV6 1227#ifdef CONFIG_IP_VS_IPV6
@@ -1259,13 +1248,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1259 const struct net_device *in, const struct net_device *out, 1248 const struct net_device *in, const struct net_device *out,
1260 int (*okfn)(struct sk_buff *)) 1249 int (*okfn)(struct sk_buff *))
1261{ 1250{
1262 unsigned int verdict; 1251 return ip_vs_out(hooknum, skb, AF_INET6);
1263
1264 /* Disable BH in LOCAL_OUT until all places are fixed */
1265 local_bh_disable();
1266 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1267 local_bh_enable();
1268 return verdict;
1269} 1252}
1270 1253
1271#endif 1254#endif
@@ -1401,10 +1384,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1401 goto ignore_ipip; 1384 goto ignore_ipip;
1402 /* Prefer the resulting PMTU */ 1385 /* Prefer the resulting PMTU */
1403 if (dest) { 1386 if (dest) {
1404 spin_lock(&dest->dst_lock); 1387 struct ip_vs_dest_dst *dest_dst;
1405 if (dest->dst_cache) 1388
1406 mtu = dst_mtu(dest->dst_cache); 1389 rcu_read_lock();
1407 spin_unlock(&dest->dst_lock); 1390 dest_dst = rcu_dereference(dest->dest_dst);
1391 if (dest_dst)
1392 mtu = dst_mtu(dest_dst->dst_cache);
1393 rcu_read_unlock();
1408 } 1394 }
1409 if (mtu > 68 + sizeof(struct iphdr)) 1395 if (mtu > 68 + sizeof(struct iphdr))
1410 mtu -= sizeof(struct iphdr); 1396 mtu -= sizeof(struct iphdr);
@@ -1720,13 +1706,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1720 const struct net_device *in, const struct net_device *out, 1706 const struct net_device *in, const struct net_device *out,
1721 int (*okfn)(struct sk_buff *)) 1707 int (*okfn)(struct sk_buff *))
1722{ 1708{
1723 unsigned int verdict; 1709 return ip_vs_in(hooknum, skb, AF_INET);
1724
1725 /* Disable BH in LOCAL_OUT until all places are fixed */
1726 local_bh_disable();
1727 verdict = ip_vs_in(hooknum, skb, AF_INET);
1728 local_bh_enable();
1729 return verdict;
1730} 1710}
1731 1711
1732#ifdef CONFIG_IP_VS_IPV6 1712#ifdef CONFIG_IP_VS_IPV6
@@ -1785,13 +1765,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1785 const struct net_device *in, const struct net_device *out, 1765 const struct net_device *in, const struct net_device *out,
1786 int (*okfn)(struct sk_buff *)) 1766 int (*okfn)(struct sk_buff *))
1787{ 1767{
1788 unsigned int verdict; 1768 return ip_vs_in(hooknum, skb, AF_INET6);
1789
1790 /* Disable BH in LOCAL_OUT until all places are fixed */
1791 local_bh_disable();
1792 verdict = ip_vs_in(hooknum, skb, AF_INET6);
1793 local_bh_enable();
1794 return verdict;
1795} 1769}
1796 1770
1797#endif 1771#endif
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 9e2d1cccd1eb..5b142fb16480 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -55,9 +55,6 @@
55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 55/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56static DEFINE_MUTEX(__ip_vs_mutex); 56static DEFINE_MUTEX(__ip_vs_mutex);
57 57
58/* lock for service table */
59static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61/* sysctl variables */ 58/* sysctl variables */
62 59
63#ifdef CONFIG_IP_VS_DEBUG 60#ifdef CONFIG_IP_VS_DEBUG
@@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)
71 68
72 69
73/* Protos */ 70/* Protos */
74static void __ip_vs_del_service(struct ip_vs_service *svc); 71static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);
75 72
76 73
77#ifdef CONFIG_IP_VS_IPV6 74#ifdef CONFIG_IP_VS_IPV6
@@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)
257#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258 255
259/* the service table hashed by <protocol, addr, port> */ 256/* the service table hashed by <protocol, addr, port> */
260static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 257static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261/* the service table hashed by fwmark */ 258/* the service table hashed by fwmark */
262static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 259static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263 260
264 261
265/* 262/*
@@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
271{ 268{
272 register unsigned int porth = ntohs(port); 269 register unsigned int porth = ntohs(port);
273 __be32 addr_fold = addr->ip; 270 __be32 addr_fold = addr->ip;
271 __u32 ahash;
274 272
275#ifdef CONFIG_IP_VS_IPV6 273#ifdef CONFIG_IP_VS_IPV6
276 if (af == AF_INET6) 274 if (af == AF_INET6)
277 addr_fold = addr->ip6[0]^addr->ip6[1]^ 275 addr_fold = addr->ip6[0]^addr->ip6[1]^
278 addr->ip6[2]^addr->ip6[3]; 276 addr->ip6[2]^addr->ip6[3];
279#endif 277#endif
280 addr_fold ^= ((size_t)net>>8); 278 ahash = ntohl(addr_fold);
279 ahash ^= ((size_t) net >> 8);
281 280
282 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 281 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
283 & IP_VS_SVC_TAB_MASK; 282 IP_VS_SVC_TAB_MASK;
284} 283}
285 284
286/* 285/*
@@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
312 */ 311 */
313 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 312 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314 &svc->addr, svc->port); 313 &svc->addr, svc->port);
315 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 314 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
316 } else { 315 } else {
317 /* 316 /*
318 * Hash it by fwmark in svc_fwm_table 317 * Hash it by fwmark in svc_fwm_table
319 */ 318 */
320 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 319 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 320 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322 } 321 }
323 322
324 svc->flags |= IP_VS_SVC_F_HASHED; 323 svc->flags |= IP_VS_SVC_F_HASHED;
@@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
342 341
343 if (svc->fwmark == 0) { 342 if (svc->fwmark == 0) {
344 /* Remove it from the svc_table table */ 343 /* Remove it from the svc_table table */
345 list_del(&svc->s_list); 344 hlist_del_rcu(&svc->s_list);
346 } else { 345 } else {
347 /* Remove it from the svc_fwm_table table */ 346 /* Remove it from the svc_fwm_table table */
348 list_del(&svc->f_list); 347 hlist_del_rcu(&svc->f_list);
349 } 348 }
350 349
351 svc->flags &= ~IP_VS_SVC_F_HASHED; 350 svc->flags &= ~IP_VS_SVC_F_HASHED;
@@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
367 /* Check for "full" addressed entries */ 366 /* Check for "full" addressed entries */
368 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 367 hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369 368
370 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 369 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
371 if ((svc->af == af) 370 if ((svc->af == af)
372 && ip_vs_addr_equal(af, &svc->addr, vaddr) 371 && ip_vs_addr_equal(af, &svc->addr, vaddr)
373 && (svc->port == vport) 372 && (svc->port == vport)
@@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
394 /* Check for fwmark addressed entries */ 393 /* Check for fwmark addressed entries */
395 hash = ip_vs_svc_fwm_hashkey(net, fwmark); 394 hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396 395
397 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 396 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398 if (svc->fwmark == fwmark && svc->af == af 397 if (svc->fwmark == fwmark && svc->af == af
399 && net_eq(svc->net, net)) { 398 && net_eq(svc->net, net)) {
400 /* HIT */ 399 /* HIT */
@@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
405 return NULL; 404 return NULL;
406} 405}
407 406
407/* Find service, called under RCU lock */
408struct ip_vs_service * 408struct ip_vs_service *
409ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, 409ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
410 const union nf_inet_addr *vaddr, __be16 vport) 410 const union nf_inet_addr *vaddr, __be16 vport)
411{ 411{
412 struct ip_vs_service *svc; 412 struct ip_vs_service *svc;
413 struct netns_ipvs *ipvs = net_ipvs(net); 413 struct netns_ipvs *ipvs = net_ipvs(net);
414 414
415 read_lock(&__ip_vs_svc_lock);
416
417 /* 415 /*
418 * Check the table hashed by fwmark first 416 * Check the table hashed by fwmark first
419 */ 417 */
@@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
449 } 447 }
450 448
451 out: 449 out:
452 if (svc)
453 atomic_inc(&svc->usecnt);
454 read_unlock(&__ip_vs_svc_lock);
455
456 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 450 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457 fwmark, ip_vs_proto_name(protocol), 451 fwmark, ip_vs_proto_name(protocol),
458 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 452 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
@@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
469 dest->svc = svc; 463 dest->svc = svc;
470} 464}
471 465
466static void ip_vs_service_free(struct ip_vs_service *svc)
467{
468 if (svc->stats.cpustats)
469 free_percpu(svc->stats.cpustats);
470 kfree(svc);
471}
472
472static void 473static void
473__ip_vs_unbind_svc(struct ip_vs_dest *dest) 474__ip_vs_unbind_svc(struct ip_vs_dest *dest)
474{ 475{
@@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)
476 477
477 dest->svc = NULL; 478 dest->svc = NULL;
478 if (atomic_dec_and_test(&svc->refcnt)) { 479 if (atomic_dec_and_test(&svc->refcnt)) {
479 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", 480 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
480 svc->fwmark, 481 svc->fwmark,
481 IP_VS_DBG_ADDR(svc->af, &svc->addr), 482 IP_VS_DBG_ADDR(svc->af, &svc->addr),
482 ntohs(svc->port), atomic_read(&svc->usecnt)); 483 ntohs(svc->port));
483 free_percpu(svc->stats.cpustats); 484 ip_vs_service_free(svc);
484 kfree(svc);
485 } 485 }
486} 486}
487 487
@@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,
506 & IP_VS_RTAB_MASK; 506 & IP_VS_RTAB_MASK;
507} 507}
508 508
509/* 509/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */
510 * Hashes ip_vs_dest in rs_table by <proto,addr,port>. 510static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
511 * should be called with locked tables.
512 */
513static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514{ 511{
515 unsigned int hash; 512 unsigned int hash;
516 513
517 if (!list_empty(&dest->d_list)) { 514 if (dest->in_rs_table)
518 return 0; 515 return;
519 }
520 516
521 /* 517 /*
522 * Hash by proto,addr,port, 518 * Hash by proto,addr,port,
@@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
524 */ 520 */
525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 521 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526 522
527 list_add(&dest->d_list, &ipvs->rs_table[hash]); 523 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]);
528 524 dest->in_rs_table = 1;
529 return 1;
530} 525}
531 526
532/* 527/* Unhash ip_vs_dest from rs_table. */
533 * UNhashes ip_vs_dest from rs_table. 528static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
534 * should be called with locked tables.
535 */
536static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537{ 529{
538 /* 530 /*
539 * Remove it from the rs_table table. 531 * Remove it from the rs_table table.
540 */ 532 */
541 if (!list_empty(&dest->d_list)) { 533 if (dest->in_rs_table) {
542 list_del_init(&dest->d_list); 534 hlist_del_rcu(&dest->d_list);
535 dest->in_rs_table = 0;
543 } 536 }
544
545 return 1;
546} 537}
547 538
548/* 539/* Check if real service by <proto,addr,port> is present */
549 * Lookup real service by <proto,addr,port> in the real service table. 540bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
550 */ 541 const union nf_inet_addr *daddr, __be16 dport)
551struct ip_vs_dest *
552ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
553 const union nf_inet_addr *daddr,
554 __be16 dport)
555{ 542{
556 struct netns_ipvs *ipvs = net_ipvs(net); 543 struct netns_ipvs *ipvs = net_ipvs(net);
557 unsigned int hash; 544 unsigned int hash;
558 struct ip_vs_dest *dest; 545 struct ip_vs_dest *dest;
559 546
560 /* 547 /* Check for "full" addressed entries */
561 * Check for "full" addressed entries
562 * Return the first found entry
563 */
564 hash = ip_vs_rs_hashkey(af, daddr, dport); 548 hash = ip_vs_rs_hashkey(af, daddr, dport);
565 549
566 read_lock(&ipvs->rs_lock); 550 rcu_read_lock();
567 list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { 551 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) {
568 if ((dest->af == af) 552 if (dest->port == dport &&
569 && ip_vs_addr_equal(af, &dest->addr, daddr) 553 dest->af == af &&
570 && (dest->port == dport) 554 ip_vs_addr_equal(af, &dest->addr, daddr) &&
571 && ((dest->protocol == protocol) || 555 (dest->protocol == protocol || dest->vfwmark)) {
572 dest->vfwmark)) {
573 /* HIT */ 556 /* HIT */
574 read_unlock(&ipvs->rs_lock); 557 rcu_read_unlock();
575 return dest; 558 return true;
576 } 559 }
577 } 560 }
578 read_unlock(&ipvs->rs_lock); 561 rcu_read_unlock();
579 562
580 return NULL; 563 return false;
581} 564}
582 565
583/* 566/* Lookup destination by {addr,port} in the given service
584 * Lookup destination by {addr,port} in the given service 567 * Called under RCU lock.
585 */ 568 */
586static struct ip_vs_dest * 569static struct ip_vs_dest *
587ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 570ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
@@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
592 /* 575 /*
593 * Find the destination for the given service 576 * Find the destination for the given service
594 */ 577 */
595 list_for_each_entry(dest, &svc->destinations, n_list) { 578 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
596 if ((dest->af == svc->af) 579 if ((dest->af == svc->af)
597 && ip_vs_addr_equal(svc->af, &dest->addr, daddr) 580 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
598 && (dest->port == dport)) { 581 && (dest->port == dport)) {
@@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
606 589
607/* 590/*
608 * Find destination by {daddr,dport,vaddr,protocol} 591 * Find destination by {daddr,dport,vaddr,protocol}
609 * Cretaed to be used in ip_vs_process_message() in 592 * Created to be used in ip_vs_process_message() in
610 * the backup synchronization daemon. It finds the 593 * the backup synchronization daemon. It finds the
611 * destination to be bound to the received connection 594 * destination to be bound to the received connection
612 * on the backup. 595 * on the backup.
613 * 596 * Called under RCU lock, no refcnt is returned.
614 * ip_vs_lookup_real_service() looked promissing, but
615 * seems not working as expected.
616 */ 597 */
617struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, 598struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
618 const union nf_inet_addr *daddr, 599 const union nf_inet_addr *daddr,
@@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
625 struct ip_vs_service *svc; 606 struct ip_vs_service *svc;
626 __be16 port = dport; 607 __be16 port = dport;
627 608
628 svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); 609 svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);
629 if (!svc) 610 if (!svc)
630 return NULL; 611 return NULL;
631 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 612 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
633 dest = ip_vs_lookup_dest(svc, daddr, port); 614 dest = ip_vs_lookup_dest(svc, daddr, port);
634 if (!dest) 615 if (!dest)
635 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); 616 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
636 if (dest)
637 atomic_inc(&dest->refcnt);
638 ip_vs_service_put(svc);
639 return dest; 617 return dest;
640} 618}
641 619
620void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
621{
622 struct ip_vs_dest_dst *dest_dst = container_of(head,
623 struct ip_vs_dest_dst,
624 rcu_head);
625
626 dst_release(dest_dst->dst_cache);
627 kfree(dest_dst);
628}
629
630/* Release dest_dst and dst_cache for dest in user context */
631static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
632{
633 struct ip_vs_dest_dst *old;
634
635 old = rcu_dereference_protected(dest->dest_dst, 1);
636 if (old) {
637 RCU_INIT_POINTER(dest->dest_dst, NULL);
638 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
639 }
640}
641
642/* 642/*
643 * Lookup dest by {svc,addr,port} in the destination trash. 643 * Lookup dest by {svc,addr,port} in the destination trash.
644 * The destination trash is used to hold the destinations that are removed 644 * The destination trash is used to hold the destinations that are removed
@@ -653,19 +653,25 @@ static struct ip_vs_dest *
653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 653ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
654 __be16 dport) 654 __be16 dport)
655{ 655{
656 struct ip_vs_dest *dest, *nxt; 656 struct ip_vs_dest *dest;
657 struct netns_ipvs *ipvs = net_ipvs(svc->net); 657 struct netns_ipvs *ipvs = net_ipvs(svc->net);
658 658
659 /* 659 /*
660 * Find the destination in trash 660 * Find the destination in trash
661 */ 661 */
662 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 662 spin_lock_bh(&ipvs->dest_trash_lock);
663 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
663 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 664 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
664 "dest->refcnt=%d\n", 665 "dest->refcnt=%d\n",
665 dest->vfwmark, 666 dest->vfwmark,
666 IP_VS_DBG_ADDR(svc->af, &dest->addr), 667 IP_VS_DBG_ADDR(svc->af, &dest->addr),
667 ntohs(dest->port), 668 ntohs(dest->port),
668 atomic_read(&dest->refcnt)); 669 atomic_read(&dest->refcnt));
670 /* We can not reuse dest while in grace period
671 * because conns still can use dest->svc
672 */
673 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
674 continue;
669 if (dest->af == svc->af && 675 if (dest->af == svc->af &&
670 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 676 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
671 dest->port == dport && 677 dest->port == dport &&
@@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
675 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 681 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
676 dest->vport == svc->port))) { 682 dest->vport == svc->port))) {
677 /* HIT */ 683 /* HIT */
678 return dest; 684 list_del(&dest->t_list);
679 } 685 ip_vs_dest_hold(dest);
680 686 goto out;
681 /*
682 * Try to purge the destination from trash if not referenced
683 */
684 if (atomic_read(&dest->refcnt) == 1) {
685 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
686 "from trash\n",
687 dest->vfwmark,
688 IP_VS_DBG_ADDR(svc->af, &dest->addr),
689 ntohs(dest->port));
690 list_del(&dest->n_list);
691 ip_vs_dst_reset(dest);
692 __ip_vs_unbind_svc(dest);
693 free_percpu(dest->stats.cpustats);
694 kfree(dest);
695 } 687 }
696 } 688 }
697 689
698 return NULL; 690 dest = NULL;
691
692out:
693 spin_unlock_bh(&ipvs->dest_trash_lock);
694
695 return dest;
699} 696}
700 697
698static void ip_vs_dest_free(struct ip_vs_dest *dest)
699{
700 __ip_vs_dst_cache_reset(dest);
701 __ip_vs_unbind_svc(dest);
702 free_percpu(dest->stats.cpustats);
703 kfree(dest);
704}
701 705
702/* 706/*
703 * Clean up all the destinations in the trash 707 * Clean up all the destinations in the trash
@@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
706 * When the ip_vs_control_clearup is activated by ipvs module exit, 710 * When the ip_vs_control_clearup is activated by ipvs module exit,
707 * the service tables must have been flushed and all the connections 711 * the service tables must have been flushed and all the connections
708 * are expired, and the refcnt of each destination in the trash must 712 * are expired, and the refcnt of each destination in the trash must
709 * be 1, so we simply release them here. 713 * be 0, so we simply release them here.
710 */ 714 */
711static void ip_vs_trash_cleanup(struct net *net) 715static void ip_vs_trash_cleanup(struct net *net)
712{ 716{
713 struct ip_vs_dest *dest, *nxt; 717 struct ip_vs_dest *dest, *nxt;
714 struct netns_ipvs *ipvs = net_ipvs(net); 718 struct netns_ipvs *ipvs = net_ipvs(net);
715 719
716 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 720 del_timer_sync(&ipvs->dest_trash_timer);
717 list_del(&dest->n_list); 721 /* No need to use dest_trash_lock */
718 ip_vs_dst_reset(dest); 722 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) {
719 __ip_vs_unbind_svc(dest); 723 list_del(&dest->t_list);
720 free_percpu(dest->stats.cpustats); 724 ip_vs_dest_free(dest);
721 kfree(dest);
722 } 725 }
723} 726}
724 727
@@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
768 struct ip_vs_dest_user_kern *udest, int add) 771 struct ip_vs_dest_user_kern *udest, int add)
769{ 772{
770 struct netns_ipvs *ipvs = net_ipvs(svc->net); 773 struct netns_ipvs *ipvs = net_ipvs(svc->net);
774 struct ip_vs_scheduler *sched;
771 int conn_flags; 775 int conn_flags;
772 776
773 /* set the weight and the flags */ 777 /* set the weight and the flags */
@@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
783 * Put the real service in rs_table if not present. 787 * Put the real service in rs_table if not present.
784 * For now only for NAT! 788 * For now only for NAT!
785 */ 789 */
786 write_lock_bh(&ipvs->rs_lock);
787 ip_vs_rs_hash(ipvs, dest); 790 ip_vs_rs_hash(ipvs, dest);
788 write_unlock_bh(&ipvs->rs_lock);
789 } 791 }
790 atomic_set(&dest->conn_flags, conn_flags); 792 atomic_set(&dest->conn_flags, conn_flags);
791 793
@@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
809 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
810 812
811 spin_lock_bh(&dest->dst_lock); 813 spin_lock_bh(&dest->dst_lock);
812 ip_vs_dst_reset(dest); 814 __ip_vs_dst_cache_reset(dest);
813 spin_unlock_bh(&dest->dst_lock); 815 spin_unlock_bh(&dest->dst_lock);
814 816
815 if (add) 817 sched = rcu_dereference_protected(svc->scheduler, 1);
816 ip_vs_start_estimator(svc->net, &dest->stats);
817
818 write_lock_bh(&__ip_vs_svc_lock);
819
820 /* Wait until all other svc users go away */
821 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823 if (add) { 818 if (add) {
824 list_add(&dest->n_list, &svc->destinations); 819 ip_vs_start_estimator(svc->net, &dest->stats);
820 list_add_rcu(&dest->n_list, &svc->destinations);
825 svc->num_dests++; 821 svc->num_dests++;
822 if (sched->add_dest)
823 sched->add_dest(svc, dest);
824 } else {
825 if (sched->upd_dest)
826 sched->upd_dest(svc, dest);
826 } 827 }
827
828 /* call the update_service, because server weight may be changed */
829 if (svc->scheduler->update_service)
830 svc->scheduler->update_service(svc);
831
832 write_unlock_bh(&__ip_vs_svc_lock);
833} 828}
834 829
835 830
@@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
881 atomic_set(&dest->persistconns, 0); 876 atomic_set(&dest->persistconns, 0);
882 atomic_set(&dest->refcnt, 1); 877 atomic_set(&dest->refcnt, 1);
883 878
884 INIT_LIST_HEAD(&dest->d_list); 879 INIT_HLIST_NODE(&dest->d_list);
885 spin_lock_init(&dest->dst_lock); 880 spin_lock_init(&dest->dst_lock);
886 spin_lock_init(&dest->stats.lock); 881 spin_lock_init(&dest->stats.lock);
887 __ip_vs_update_dest(svc, dest, udest, 1); 882 __ip_vs_update_dest(svc, dest, udest, 1);
@@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
923 918
924 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 919 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
925 920
926 /* 921 /* We use function that requires RCU lock */
927 * Check if the dest already exists in the list 922 rcu_read_lock();
928 */
929 dest = ip_vs_lookup_dest(svc, &daddr, dport); 923 dest = ip_vs_lookup_dest(svc, &daddr, dport);
924 rcu_read_unlock();
930 925
931 if (dest != NULL) { 926 if (dest != NULL) {
932 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 927 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
@@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
948 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 943 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
949 ntohs(dest->vport)); 944 ntohs(dest->vport));
950 945
951 /*
952 * Get the destination from the trash
953 */
954 list_del(&dest->n_list);
955
956 __ip_vs_update_dest(svc, dest, udest, 1); 946 __ip_vs_update_dest(svc, dest, udest, 1);
957 ret = 0; 947 ret = 0;
958 } else { 948 } else {
@@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
992 982
993 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 983 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
994 984
995 /* 985 /* We use function that requires RCU lock */
996 * Lookup the destination list 986 rcu_read_lock();
997 */
998 dest = ip_vs_lookup_dest(svc, &daddr, dport); 987 dest = ip_vs_lookup_dest(svc, &daddr, dport);
988 rcu_read_unlock();
999 989
1000 if (dest == NULL) { 990 if (dest == NULL) {
1001 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 991 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
@@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1008 return 0; 998 return 0;
1009} 999}
1010 1000
1001static void ip_vs_dest_wait_readers(struct rcu_head *head)
1002{
1003 struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest,
1004 rcu_head);
1005
1006 /* End of grace period after unlinking */
1007 clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1008}
1009
1011 1010
1012/* 1011/*
1013 * Delete a destination (must be already unlinked from the service) 1012 * Delete a destination (must be already unlinked from the service)
1014 */ 1013 */
1015static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) 1014static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
1015 bool cleanup)
1016{ 1016{
1017 struct netns_ipvs *ipvs = net_ipvs(net); 1017 struct netns_ipvs *ipvs = net_ipvs(net);
1018 1018
@@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1021 /* 1021 /*
1022 * Remove it from the d-linked list with the real services. 1022 * Remove it from the d-linked list with the real services.
1023 */ 1023 */
1024 write_lock_bh(&ipvs->rs_lock);
1025 ip_vs_rs_unhash(dest); 1024 ip_vs_rs_unhash(dest);
1026 write_unlock_bh(&ipvs->rs_lock);
1027 1025
1028 /* 1026 if (!cleanup) {
1029 * Decrease the refcnt of the dest, and free the dest 1027 set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state);
1030 * if nobody refers to it (refcnt=0). Otherwise, throw 1028 call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);
1031 * the destination into the trash.
1032 */
1033 if (atomic_dec_and_test(&dest->refcnt)) {
1034 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035 dest->vfwmark,
1036 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037 ntohs(dest->port));
1038 ip_vs_dst_reset(dest);
1039 /* simply decrease svc->refcnt here, let the caller check
1040 and release the service if nobody refers to it.
1041 Only user context can release destination and service,
1042 and only one user context can update virtual service at a
1043 time, so the operation here is OK */
1044 atomic_dec(&dest->svc->refcnt);
1045 free_percpu(dest->stats.cpustats);
1046 kfree(dest);
1047 } else {
1048 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049 "dest->refcnt=%d\n",
1050 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051 ntohs(dest->port),
1052 atomic_read(&dest->refcnt));
1053 list_add(&dest->n_list, &ipvs->dest_trash);
1054 atomic_inc(&dest->refcnt);
1055 } 1029 }
1030
1031 spin_lock_bh(&ipvs->dest_trash_lock);
1032 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n",
1033 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port),
1034 atomic_read(&dest->refcnt));
1035 if (list_empty(&ipvs->dest_trash) && !cleanup)
1036 mod_timer(&ipvs->dest_trash_timer,
1037 jiffies + IP_VS_DEST_TRASH_PERIOD);
1038 /* dest lives in trash without reference */
1039 list_add(&dest->t_list, &ipvs->dest_trash);
1040 spin_unlock_bh(&ipvs->dest_trash_lock);
1041 ip_vs_dest_put(dest);
1056} 1042}
1057 1043
1058 1044
@@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1068 /* 1054 /*
1069 * Remove it from the d-linked destination list. 1055 * Remove it from the d-linked destination list.
1070 */ 1056 */
1071 list_del(&dest->n_list); 1057 list_del_rcu(&dest->n_list);
1072 svc->num_dests--; 1058 svc->num_dests--;
1073 1059
1074 /* 1060 if (svcupd) {
1075 * Call the update_service function of its scheduler 1061 struct ip_vs_scheduler *sched;
1076 */ 1062
1077 if (svcupd && svc->scheduler->update_service) 1063 sched = rcu_dereference_protected(svc->scheduler, 1);
1078 svc->scheduler->update_service(svc); 1064 if (sched->del_dest)
1065 sched->del_dest(svc, dest);
1066 }
1079} 1067}
1080 1068
1081 1069
@@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1090 1078
1091 EnterFunction(2); 1079 EnterFunction(2);
1092 1080
1081 /* We use function that requires RCU lock */
1082 rcu_read_lock();
1093 dest = ip_vs_lookup_dest(svc, &udest->addr, dport); 1083 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1084 rcu_read_unlock();
1094 1085
1095 if (dest == NULL) { 1086 if (dest == NULL) {
1096 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1087 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097 return -ENOENT; 1088 return -ENOENT;
1098 } 1089 }
1099 1090
1100 write_lock_bh(&__ip_vs_svc_lock);
1101
1102 /*
1103 * Wait until all other svc users go away.
1104 */
1105 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106
1107 /* 1091 /*
1108 * Unlink dest from the service 1092 * Unlink dest from the service
1109 */ 1093 */
1110 __ip_vs_unlink_dest(svc, dest, 1); 1094 __ip_vs_unlink_dest(svc, dest, 1);
1111 1095
1112 write_unlock_bh(&__ip_vs_svc_lock);
1113
1114 /* 1096 /*
1115 * Delete the destination 1097 * Delete the destination
1116 */ 1098 */
1117 __ip_vs_del_dest(svc->net, dest); 1099 __ip_vs_del_dest(svc->net, dest, false);
1118 1100
1119 LeaveFunction(2); 1101 LeaveFunction(2);
1120 1102
1121 return 0; 1103 return 0;
1122} 1104}
1123 1105
1106static void ip_vs_dest_trash_expire(unsigned long data)
1107{
1108 struct net *net = (struct net *) data;
1109 struct netns_ipvs *ipvs = net_ipvs(net);
1110 struct ip_vs_dest *dest, *next;
1111
1112 spin_lock(&ipvs->dest_trash_lock);
1113 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) {
1114 /* Skip if dest is in grace period */
1115 if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state))
1116 continue;
1117 if (atomic_read(&dest->refcnt) > 0)
1118 continue;
1119 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n",
1120 dest->vfwmark,
1121 IP_VS_DBG_ADDR(dest->svc->af, &dest->addr),
1122 ntohs(dest->port));
1123 list_del(&dest->t_list);
1124 ip_vs_dest_free(dest);
1125 }
1126 if (!list_empty(&ipvs->dest_trash))
1127 mod_timer(&ipvs->dest_trash_timer,
1128 jiffies + IP_VS_DEST_TRASH_PERIOD);
1129 spin_unlock(&ipvs->dest_trash_lock);
1130}
1124 1131
1125/* 1132/*
1126 * Add a service into the service hash table 1133 * Add a service into the service hash table
@@ -1157,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1157 } 1164 }
1158 1165
1159#ifdef CONFIG_IP_VS_IPV6 1166#ifdef CONFIG_IP_VS_IPV6
1160 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1167 if (u->af == AF_INET6) {
1161 ret = -EINVAL; 1168 __u32 plen = (__force __u32) u->netmask;
1162 goto out_err; 1169
1170 if (plen < 1 || plen > 128) {
1171 ret = -EINVAL;
1172 goto out_err;
1173 }
1163 } 1174 }
1164#endif 1175#endif
1165 1176
@@ -1176,7 +1187,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1176 } 1187 }
1177 1188
1178 /* I'm the first user of the service */ 1189 /* I'm the first user of the service */
1179 atomic_set(&svc->usecnt, 0);
1180 atomic_set(&svc->refcnt, 0); 1190 atomic_set(&svc->refcnt, 0);
1181 1191
1182 svc->af = u->af; 1192 svc->af = u->af;
@@ -1190,7 +1200,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1190 svc->net = net; 1200 svc->net = net;
1191 1201
1192 INIT_LIST_HEAD(&svc->destinations); 1202 INIT_LIST_HEAD(&svc->destinations);
1193 rwlock_init(&svc->sched_lock); 1203 spin_lock_init(&svc->sched_lock);
1194 spin_lock_init(&svc->stats.lock); 1204 spin_lock_init(&svc->stats.lock);
1195 1205
1196 /* Bind the scheduler */ 1206 /* Bind the scheduler */
@@ -1200,7 +1210,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1200 sched = NULL; 1210 sched = NULL;
1201 1211
1202 /* Bind the ct retriever */ 1212 /* Bind the ct retriever */
1203 ip_vs_bind_pe(svc, pe); 1213 RCU_INIT_POINTER(svc->pe, pe);
1204 pe = NULL; 1214 pe = NULL;
1205 1215
1206 /* Update the virtual service counters */ 1216 /* Update the virtual service counters */
@@ -1216,9 +1226,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1216 ipvs->num_services++; 1226 ipvs->num_services++;
1217 1227
1218 /* Hash the service into the service table */ 1228 /* Hash the service into the service table */
1219 write_lock_bh(&__ip_vs_svc_lock);
1220 ip_vs_svc_hash(svc); 1229 ip_vs_svc_hash(svc);
1221 write_unlock_bh(&__ip_vs_svc_lock);
1222 1230
1223 *svc_p = svc; 1231 *svc_p = svc;
1224 /* Now there is a service - full throttle */ 1232 /* Now there is a service - full throttle */
@@ -1228,15 +1236,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1228 1236
1229 out_err: 1237 out_err:
1230 if (svc != NULL) { 1238 if (svc != NULL) {
1231 ip_vs_unbind_scheduler(svc); 1239 ip_vs_unbind_scheduler(svc, sched);
1232 if (svc->inc) { 1240 ip_vs_service_free(svc);
1233 local_bh_disable();
1234 ip_vs_app_inc_put(svc->inc);
1235 local_bh_enable();
1236 }
1237 if (svc->stats.cpustats)
1238 free_percpu(svc->stats.cpustats);
1239 kfree(svc);
1240 } 1241 }
1241 ip_vs_scheduler_put(sched); 1242 ip_vs_scheduler_put(sched);
1242 ip_vs_pe_put(pe); 1243 ip_vs_pe_put(pe);
@@ -1280,18 +1281,27 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1280 } 1281 }
1281 1282
1282#ifdef CONFIG_IP_VS_IPV6 1283#ifdef CONFIG_IP_VS_IPV6
1283 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1284 if (u->af == AF_INET6) {
1284 ret = -EINVAL; 1285 __u32 plen = (__force __u32) u->netmask;
1285 goto out; 1286
1287 if (plen < 1 || plen > 128) {
1288 ret = -EINVAL;
1289 goto out;
1290 }
1286 } 1291 }
1287#endif 1292#endif
1288 1293
1289 write_lock_bh(&__ip_vs_svc_lock); 1294 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1290 1295 if (sched != old_sched) {
1291 /* 1296 /* Bind the new scheduler */
1292 * Wait until all other svc users go away. 1297 ret = ip_vs_bind_scheduler(svc, sched);
1293 */ 1298 if (ret) {
1294 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1299 old_sched = sched;
1300 goto out;
1301 }
1302 /* Unbind the old scheduler on success */
1303 ip_vs_unbind_scheduler(svc, old_sched);
1304 }
1295 1305
1296 /* 1306 /*
1297 * Set the flags and timeout value 1307 * Set the flags and timeout value
@@ -1300,57 +1310,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1300 svc->timeout = u->timeout * HZ; 1310 svc->timeout = u->timeout * HZ;
1301 svc->netmask = u->netmask; 1311 svc->netmask = u->netmask;
1302 1312
1303 old_sched = svc->scheduler; 1313 old_pe = rcu_dereference_protected(svc->pe, 1);
1304 if (sched != old_sched) { 1314 if (pe != old_pe)
1305 /* 1315 rcu_assign_pointer(svc->pe, pe);
1306 * Unbind the old scheduler
1307 */
1308 if ((ret = ip_vs_unbind_scheduler(svc))) {
1309 old_sched = sched;
1310 goto out_unlock;
1311 }
1312 1316
1313 /*
1314 * Bind the new scheduler
1315 */
1316 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317 /*
1318 * If ip_vs_bind_scheduler fails, restore the old
1319 * scheduler.
1320 * The main reason of failure is out of memory.
1321 *
1322 * The question is if the old scheduler can be
1323 * restored all the time. TODO: if it cannot be
1324 * restored some time, we must delete the service,
1325 * otherwise the system may crash.
1326 */
1327 ip_vs_bind_scheduler(svc, old_sched);
1328 old_sched = sched;
1329 goto out_unlock;
1330 }
1331 }
1332
1333 old_pe = svc->pe;
1334 if (pe != old_pe) {
1335 ip_vs_unbind_pe(svc);
1336 ip_vs_bind_pe(svc, pe);
1337 }
1338
1339out_unlock:
1340 write_unlock_bh(&__ip_vs_svc_lock);
1341out: 1317out:
1342 ip_vs_scheduler_put(old_sched); 1318 ip_vs_scheduler_put(old_sched);
1343 ip_vs_pe_put(old_pe); 1319 ip_vs_pe_put(old_pe);
1344 return ret; 1320 return ret;
1345} 1321}
1346 1322
1323static void ip_vs_service_rcu_free(struct rcu_head *head)
1324{
1325 struct ip_vs_service *svc;
1326
1327 svc = container_of(head, struct ip_vs_service, rcu_head);
1328 ip_vs_service_free(svc);
1329}
1347 1330
1348/* 1331/*
1349 * Delete a service from the service list 1332 * Delete a service from the service list
1350 * - The service must be unlinked, unlocked and not referenced! 1333 * - The service must be unlinked, unlocked and not referenced!
1351 * - We are called under _bh lock 1334 * - We are called under _bh lock
1352 */ 1335 */
1353static void __ip_vs_del_service(struct ip_vs_service *svc) 1336static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
1354{ 1337{
1355 struct ip_vs_dest *dest, *nxt; 1338 struct ip_vs_dest *dest, *nxt;
1356 struct ip_vs_scheduler *old_sched; 1339 struct ip_vs_scheduler *old_sched;
@@ -1366,27 +1349,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1366 ip_vs_stop_estimator(svc->net, &svc->stats); 1349 ip_vs_stop_estimator(svc->net, &svc->stats);
1367 1350
1368 /* Unbind scheduler */ 1351 /* Unbind scheduler */
1369 old_sched = svc->scheduler; 1352 old_sched = rcu_dereference_protected(svc->scheduler, 1);
1370 ip_vs_unbind_scheduler(svc); 1353 ip_vs_unbind_scheduler(svc, old_sched);
1371 ip_vs_scheduler_put(old_sched); 1354 ip_vs_scheduler_put(old_sched);
1372 1355
1373 /* Unbind persistence engine */ 1356 /* Unbind persistence engine, keep svc->pe */
1374 old_pe = svc->pe; 1357 old_pe = rcu_dereference_protected(svc->pe, 1);
1375 ip_vs_unbind_pe(svc);
1376 ip_vs_pe_put(old_pe); 1358 ip_vs_pe_put(old_pe);
1377 1359
1378 /* Unbind app inc */
1379 if (svc->inc) {
1380 ip_vs_app_inc_put(svc->inc);
1381 svc->inc = NULL;
1382 }
1383
1384 /* 1360 /*
1385 * Unlink the whole destination list 1361 * Unlink the whole destination list
1386 */ 1362 */
1387 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1363 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388 __ip_vs_unlink_dest(svc, dest, 0); 1364 __ip_vs_unlink_dest(svc, dest, 0);
1389 __ip_vs_del_dest(svc->net, dest); 1365 __ip_vs_del_dest(svc->net, dest, cleanup);
1390 } 1366 }
1391 1367
1392 /* 1368 /*
@@ -1400,13 +1376,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1400 /* 1376 /*
1401 * Free the service if nobody refers to it 1377 * Free the service if nobody refers to it
1402 */ 1378 */
1403 if (atomic_read(&svc->refcnt) == 0) { 1379 if (atomic_dec_and_test(&svc->refcnt)) {
1404 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", 1380 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",
1405 svc->fwmark, 1381 svc->fwmark,
1406 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1382 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407 ntohs(svc->port), atomic_read(&svc->usecnt)); 1383 ntohs(svc->port));
1408 free_percpu(svc->stats.cpustats); 1384 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);
1409 kfree(svc);
1410 } 1385 }
1411 1386
1412 /* decrease the module use count */ 1387 /* decrease the module use count */
@@ -1416,23 +1391,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1416/* 1391/*
1417 * Unlink a service from list and try to delete it if its refcnt reached 0 1392 * Unlink a service from list and try to delete it if its refcnt reached 0
1418 */ 1393 */
1419static void ip_vs_unlink_service(struct ip_vs_service *svc) 1394static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)
1420{ 1395{
1396 /* Hold svc to avoid double release from dest_trash */
1397 atomic_inc(&svc->refcnt);
1421 /* 1398 /*
1422 * Unhash it from the service table 1399 * Unhash it from the service table
1423 */ 1400 */
1424 write_lock_bh(&__ip_vs_svc_lock);
1425
1426 ip_vs_svc_unhash(svc); 1401 ip_vs_svc_unhash(svc);
1427 1402
1428 /* 1403 __ip_vs_del_service(svc, cleanup);
1429 * Wait until all the svc users go away.
1430 */
1431 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432
1433 __ip_vs_del_service(svc);
1434
1435 write_unlock_bh(&__ip_vs_svc_lock);
1436} 1404}
1437 1405
1438/* 1406/*
@@ -1442,7 +1410,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1442{ 1410{
1443 if (svc == NULL) 1411 if (svc == NULL)
1444 return -EEXIST; 1412 return -EEXIST;
1445 ip_vs_unlink_service(svc); 1413 ip_vs_unlink_service(svc, false);
1446 1414
1447 return 0; 1415 return 0;
1448} 1416}
@@ -1451,19 +1419,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1451/* 1419/*
1452 * Flush all the virtual services 1420 * Flush all the virtual services
1453 */ 1421 */
1454static int ip_vs_flush(struct net *net) 1422static int ip_vs_flush(struct net *net, bool cleanup)
1455{ 1423{
1456 int idx; 1424 int idx;
1457 struct ip_vs_service *svc, *nxt; 1425 struct ip_vs_service *svc;
1426 struct hlist_node *n;
1458 1427
1459 /* 1428 /*
1460 * Flush the service table hashed by <netns,protocol,addr,port> 1429 * Flush the service table hashed by <netns,protocol,addr,port>
1461 */ 1430 */
1462 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1431 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], 1432 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
1464 s_list) { 1433 s_list) {
1465 if (net_eq(svc->net, net)) 1434 if (net_eq(svc->net, net))
1466 ip_vs_unlink_service(svc); 1435 ip_vs_unlink_service(svc, cleanup);
1467 } 1436 }
1468 } 1437 }
1469 1438
@@ -1471,10 +1440,10 @@ static int ip_vs_flush(struct net *net)
1471 * Flush the service table hashed by fwmark 1440 * Flush the service table hashed by fwmark
1472 */ 1441 */
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1442 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry_safe(svc, nxt, 1443 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
1475 &ip_vs_svc_fwm_table[idx], f_list) { 1444 f_list) {
1476 if (net_eq(svc->net, net)) 1445 if (net_eq(svc->net, net))
1477 ip_vs_unlink_service(svc); 1446 ip_vs_unlink_service(svc, cleanup);
1478 } 1447 }
1479 } 1448 }
1480 1449
@@ -1490,32 +1459,32 @@ void ip_vs_service_net_cleanup(struct net *net)
1490 EnterFunction(2); 1459 EnterFunction(2);
1491 /* Check for "full" addressed entries */ 1460 /* Check for "full" addressed entries */
1492 mutex_lock(&__ip_vs_mutex); 1461 mutex_lock(&__ip_vs_mutex);
1493 ip_vs_flush(net); 1462 ip_vs_flush(net, true);
1494 mutex_unlock(&__ip_vs_mutex); 1463 mutex_unlock(&__ip_vs_mutex);
1495 LeaveFunction(2); 1464 LeaveFunction(2);
1496} 1465}
1497/* 1466
1498 * Release dst hold by dst_cache 1467/* Put all references for device (dst_cache) */
1499 */
1500static inline void 1468static inline void
1501__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) 1469ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1502{ 1470{
1471 struct ip_vs_dest_dst *dest_dst;
1472
1503 spin_lock_bh(&dest->dst_lock); 1473 spin_lock_bh(&dest->dst_lock);
1504 if (dest->dst_cache && dest->dst_cache->dev == dev) { 1474 dest_dst = rcu_dereference_protected(dest->dest_dst, 1);
1475 if (dest_dst && dest_dst->dst_cache->dev == dev) {
1505 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1476 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506 dev->name, 1477 dev->name,
1507 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1478 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508 ntohs(dest->port), 1479 ntohs(dest->port),
1509 atomic_read(&dest->refcnt)); 1480 atomic_read(&dest->refcnt));
1510 ip_vs_dst_reset(dest); 1481 __ip_vs_dst_cache_reset(dest);
1511 } 1482 }
1512 spin_unlock_bh(&dest->dst_lock); 1483 spin_unlock_bh(&dest->dst_lock);
1513 1484
1514} 1485}
1515/* 1486/* Netdev event receiver
1516 * Netdev event receiver 1487 * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1517 * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518 * a device that is "unregister" it must be released.
1519 */ 1488 */
1520static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1489static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521 void *ptr) 1490 void *ptr)
@@ -1527,35 +1496,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1527 struct ip_vs_dest *dest; 1496 struct ip_vs_dest *dest;
1528 unsigned int idx; 1497 unsigned int idx;
1529 1498
1530 if (event != NETDEV_UNREGISTER || !ipvs) 1499 if (event != NETDEV_DOWN || !ipvs)
1531 return NOTIFY_DONE; 1500 return NOTIFY_DONE;
1532 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1501 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533 EnterFunction(2); 1502 EnterFunction(2);
1534 mutex_lock(&__ip_vs_mutex); 1503 mutex_lock(&__ip_vs_mutex);
1535 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1504 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1505 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537 if (net_eq(svc->net, net)) { 1506 if (net_eq(svc->net, net)) {
1538 list_for_each_entry(dest, &svc->destinations, 1507 list_for_each_entry(dest, &svc->destinations,
1539 n_list) { 1508 n_list) {
1540 __ip_vs_dev_reset(dest, dev); 1509 ip_vs_forget_dev(dest, dev);
1541 } 1510 }
1542 } 1511 }
1543 } 1512 }
1544 1513
1545 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1514 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546 if (net_eq(svc->net, net)) { 1515 if (net_eq(svc->net, net)) {
1547 list_for_each_entry(dest, &svc->destinations, 1516 list_for_each_entry(dest, &svc->destinations,
1548 n_list) { 1517 n_list) {
1549 __ip_vs_dev_reset(dest, dev); 1518 ip_vs_forget_dev(dest, dev);
1550 } 1519 }
1551 } 1520 }
1552 1521
1553 } 1522 }
1554 } 1523 }
1555 1524
1556 list_for_each_entry(dest, &ipvs->dest_trash, n_list) { 1525 spin_lock_bh(&ipvs->dest_trash_lock);
1557 __ip_vs_dev_reset(dest, dev); 1526 list_for_each_entry(dest, &ipvs->dest_trash, t_list) {
1527 ip_vs_forget_dev(dest, dev);
1558 } 1528 }
1529 spin_unlock_bh(&ipvs->dest_trash_lock);
1559 mutex_unlock(&__ip_vs_mutex); 1530 mutex_unlock(&__ip_vs_mutex);
1560 LeaveFunction(2); 1531 LeaveFunction(2);
1561 return NOTIFY_DONE; 1532 return NOTIFY_DONE;
@@ -1568,12 +1539,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
1568{ 1539{
1569 struct ip_vs_dest *dest; 1540 struct ip_vs_dest *dest;
1570 1541
1571 write_lock_bh(&__ip_vs_svc_lock);
1572 list_for_each_entry(dest, &svc->destinations, n_list) { 1542 list_for_each_entry(dest, &svc->destinations, n_list) {
1573 ip_vs_zero_stats(&dest->stats); 1543 ip_vs_zero_stats(&dest->stats);
1574 } 1544 }
1575 ip_vs_zero_stats(&svc->stats); 1545 ip_vs_zero_stats(&svc->stats);
1576 write_unlock_bh(&__ip_vs_svc_lock);
1577 return 0; 1546 return 0;
1578} 1547}
1579 1548
@@ -1583,14 +1552,14 @@ static int ip_vs_zero_all(struct net *net)
1583 struct ip_vs_service *svc; 1552 struct ip_vs_service *svc;
1584 1553
1585 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1554 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1555 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587 if (net_eq(svc->net, net)) 1556 if (net_eq(svc->net, net))
1588 ip_vs_zero_service(svc); 1557 ip_vs_zero_service(svc);
1589 } 1558 }
1590 } 1559 }
1591 1560
1592 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1561 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1562 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594 if (net_eq(svc->net, net)) 1563 if (net_eq(svc->net, net))
1595 ip_vs_zero_service(svc); 1564 ip_vs_zero_service(svc);
1596 } 1565 }
@@ -1918,7 +1887,7 @@ static struct ctl_table vs_vars[] = {
1918 1887
1919struct ip_vs_iter { 1888struct ip_vs_iter {
1920 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1889 struct seq_net_private p; /* Do not move this, netns depends upon it*/
1921 struct list_head *table; 1890 struct hlist_head *table;
1922 int bucket; 1891 int bucket;
1923}; 1892};
1924 1893
@@ -1951,7 +1920,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1951 1920
1952 /* look in hash by protocol */ 1921 /* look in hash by protocol */
1953 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1922 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1954 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1923 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
1955 if (net_eq(svc->net, net) && pos-- == 0) { 1924 if (net_eq(svc->net, net) && pos-- == 0) {
1956 iter->table = ip_vs_svc_table; 1925 iter->table = ip_vs_svc_table;
1957 iter->bucket = idx; 1926 iter->bucket = idx;
@@ -1962,7 +1931,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1962 1931
1963 /* keep looking in fwmark */ 1932 /* keep looking in fwmark */
1964 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1933 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1965 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1934 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
1935 f_list) {
1966 if (net_eq(svc->net, net) && pos-- == 0) { 1936 if (net_eq(svc->net, net) && pos-- == 0) {
1967 iter->table = ip_vs_svc_fwm_table; 1937 iter->table = ip_vs_svc_fwm_table;
1968 iter->bucket = idx; 1938 iter->bucket = idx;
@@ -1975,17 +1945,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1975} 1945}
1976 1946
1977static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1947static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1978__acquires(__ip_vs_svc_lock) 1948 __acquires(RCU)
1979{ 1949{
1980 1950 rcu_read_lock();
1981 read_lock_bh(&__ip_vs_svc_lock);
1982 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1951 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1983} 1952}
1984 1953
1985 1954
1986static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1955static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987{ 1956{
1988 struct list_head *e; 1957 struct hlist_node *e;
1989 struct ip_vs_iter *iter; 1958 struct ip_vs_iter *iter;
1990 struct ip_vs_service *svc; 1959 struct ip_vs_service *svc;
1991 1960
@@ -1998,13 +1967,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1998 1967
1999 if (iter->table == ip_vs_svc_table) { 1968 if (iter->table == ip_vs_svc_table) {
2000 /* next service in table hashed by protocol */ 1969 /* next service in table hashed by protocol */
2001 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) 1970 e = rcu_dereference(hlist_next_rcu(&svc->s_list));
2002 return list_entry(e, struct ip_vs_service, s_list); 1971 if (e)
2003 1972 return hlist_entry(e, struct ip_vs_service, s_list);
2004 1973
2005 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1974 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2006 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], 1975 hlist_for_each_entry_rcu(svc,
2007 s_list) { 1976 &ip_vs_svc_table[iter->bucket],
1977 s_list) {
2008 return svc; 1978 return svc;
2009 } 1979 }
2010 } 1980 }
@@ -2015,13 +1985,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2015 } 1985 }
2016 1986
2017 /* next service in hashed by fwmark */ 1987 /* next service in hashed by fwmark */
2018 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) 1988 e = rcu_dereference(hlist_next_rcu(&svc->f_list));
2019 return list_entry(e, struct ip_vs_service, f_list); 1989 if (e)
1990 return hlist_entry(e, struct ip_vs_service, f_list);
2020 1991
2021 scan_fwmark: 1992 scan_fwmark:
2022 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1993 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2023 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], 1994 hlist_for_each_entry_rcu(svc,
2024 f_list) 1995 &ip_vs_svc_fwm_table[iter->bucket],
1996 f_list)
2025 return svc; 1997 return svc;
2026 } 1998 }
2027 1999
@@ -2029,9 +2001,9 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2029} 2001}
2030 2002
2031static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2003static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2032__releases(__ip_vs_svc_lock) 2004 __releases(RCU)
2033{ 2005{
2034 read_unlock_bh(&__ip_vs_svc_lock); 2006 rcu_read_unlock();
2035} 2007}
2036 2008
2037 2009
@@ -2049,6 +2021,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2049 const struct ip_vs_service *svc = v; 2021 const struct ip_vs_service *svc = v;
2050 const struct ip_vs_iter *iter = seq->private; 2022 const struct ip_vs_iter *iter = seq->private;
2051 const struct ip_vs_dest *dest; 2023 const struct ip_vs_dest *dest;
2024 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
2052 2025
2053 if (iter->table == ip_vs_svc_table) { 2026 if (iter->table == ip_vs_svc_table) {
2054#ifdef CONFIG_IP_VS_IPV6 2027#ifdef CONFIG_IP_VS_IPV6
@@ -2057,18 +2030,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2057 ip_vs_proto_name(svc->protocol), 2030 ip_vs_proto_name(svc->protocol),
2058 &svc->addr.in6, 2031 &svc->addr.in6,
2059 ntohs(svc->port), 2032 ntohs(svc->port),
2060 svc->scheduler->name); 2033 sched->name);
2061 else 2034 else
2062#endif 2035#endif
2063 seq_printf(seq, "%s %08X:%04X %s %s ", 2036 seq_printf(seq, "%s %08X:%04X %s %s ",
2064 ip_vs_proto_name(svc->protocol), 2037 ip_vs_proto_name(svc->protocol),
2065 ntohl(svc->addr.ip), 2038 ntohl(svc->addr.ip),
2066 ntohs(svc->port), 2039 ntohs(svc->port),
2067 svc->scheduler->name, 2040 sched->name,
2068 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2041 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2069 } else { 2042 } else {
2070 seq_printf(seq, "FWM %08X %s %s", 2043 seq_printf(seq, "FWM %08X %s %s",
2071 svc->fwmark, svc->scheduler->name, 2044 svc->fwmark, sched->name,
2072 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2045 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2073 } 2046 }
2074 2047
@@ -2079,7 +2052,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2079 else 2052 else
2080 seq_putc(seq, '\n'); 2053 seq_putc(seq, '\n');
2081 2054
2082 list_for_each_entry(dest, &svc->destinations, n_list) { 2055 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
2083#ifdef CONFIG_IP_VS_IPV6 2056#ifdef CONFIG_IP_VS_IPV6
2084 if (dest->af == AF_INET6) 2057 if (dest->af == AF_INET6)
2085 seq_printf(seq, 2058 seq_printf(seq,
@@ -2173,7 +2146,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2173{ 2146{
2174 struct net *net = seq_file_single_net(seq); 2147 struct net *net = seq_file_single_net(seq);
2175 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2148 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2176 struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; 2149 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
2177 struct ip_vs_stats_user rates; 2150 struct ip_vs_stats_user rates;
2178 int i; 2151 int i;
2179 2152
@@ -2389,7 +2362,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2389 2362
2390 if (cmd == IP_VS_SO_SET_FLUSH) { 2363 if (cmd == IP_VS_SO_SET_FLUSH) {
2391 /* Flush the virtual service */ 2364 /* Flush the virtual service */
2392 ret = ip_vs_flush(net); 2365 ret = ip_vs_flush(net, false);
2393 goto out_unlock; 2366 goto out_unlock;
2394 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2367 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2395 /* Set timeout values for (tcp tcpfin udp) */ 2368 /* Set timeout values for (tcp tcpfin udp) */
@@ -2424,11 +2397,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2424 } 2397 }
2425 2398
2426 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2399 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2400 rcu_read_lock();
2427 if (usvc.fwmark == 0) 2401 if (usvc.fwmark == 0)
2428 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2402 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2429 &usvc.addr, usvc.port); 2403 &usvc.addr, usvc.port);
2430 else 2404 else
2431 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2405 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2406 rcu_read_unlock();
2432 2407
2433 if (cmd != IP_VS_SO_SET_ADD 2408 if (cmd != IP_VS_SO_SET_ADD
2434 && (svc == NULL || svc->protocol != usvc.protocol)) { 2409 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2480,11 +2455,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2480static void 2455static void
2481ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2456ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2482{ 2457{
2458 struct ip_vs_scheduler *sched;
2459
2460 sched = rcu_dereference_protected(src->scheduler, 1);
2483 dst->protocol = src->protocol; 2461 dst->protocol = src->protocol;
2484 dst->addr = src->addr.ip; 2462 dst->addr = src->addr.ip;
2485 dst->port = src->port; 2463 dst->port = src->port;
2486 dst->fwmark = src->fwmark; 2464 dst->fwmark = src->fwmark;
2487 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); 2465 strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
2488 dst->flags = src->flags; 2466 dst->flags = src->flags;
2489 dst->timeout = src->timeout / HZ; 2467 dst->timeout = src->timeout / HZ;
2490 dst->netmask = src->netmask; 2468 dst->netmask = src->netmask;
@@ -2503,7 +2481,7 @@ __ip_vs_get_service_entries(struct net *net,
2503 int ret = 0; 2481 int ret = 0;
2504 2482
2505 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2483 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2506 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2484 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2507 /* Only expose IPv4 entries to old interface */ 2485 /* Only expose IPv4 entries to old interface */
2508 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2486 if (svc->af != AF_INET || !net_eq(svc->net, net))
2509 continue; 2487 continue;
@@ -2522,7 +2500,7 @@ __ip_vs_get_service_entries(struct net *net,
2522 } 2500 }
2523 2501
2524 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2502 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2525 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2503 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2526 /* Only expose IPv4 entries to old interface */ 2504 /* Only expose IPv4 entries to old interface */
2527 if (svc->af != AF_INET || !net_eq(svc->net, net)) 2505 if (svc->af != AF_INET || !net_eq(svc->net, net))
2528 continue; 2506 continue;
@@ -2551,11 +2529,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2551 union nf_inet_addr addr = { .ip = get->addr }; 2529 union nf_inet_addr addr = { .ip = get->addr };
2552 int ret = 0; 2530 int ret = 0;
2553 2531
2532 rcu_read_lock();
2554 if (get->fwmark) 2533 if (get->fwmark)
2555 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2534 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2556 else 2535 else
2557 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2536 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2558 get->port); 2537 get->port);
2538 rcu_read_unlock();
2559 2539
2560 if (svc) { 2540 if (svc) {
2561 int count = 0; 2541 int count = 0;
@@ -2738,12 +2718,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2738 2718
2739 entry = (struct ip_vs_service_entry *)arg; 2719 entry = (struct ip_vs_service_entry *)arg;
2740 addr.ip = entry->addr; 2720 addr.ip = entry->addr;
2721 rcu_read_lock();
2741 if (entry->fwmark) 2722 if (entry->fwmark)
2742 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2723 svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2743 else 2724 else
2744 svc = __ip_vs_service_find(net, AF_INET, 2725 svc = __ip_vs_service_find(net, AF_INET,
2745 entry->protocol, &addr, 2726 entry->protocol, &addr,
2746 entry->port); 2727 entry->port);
2728 rcu_read_unlock();
2747 if (svc) { 2729 if (svc) {
2748 ip_vs_copy_service(entry, svc); 2730 ip_vs_copy_service(entry, svc);
2749 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2731 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2900,6 +2882,8 @@ nla_put_failure:
2900static int ip_vs_genl_fill_service(struct sk_buff *skb, 2882static int ip_vs_genl_fill_service(struct sk_buff *skb,
2901 struct ip_vs_service *svc) 2883 struct ip_vs_service *svc)
2902{ 2884{
2885 struct ip_vs_scheduler *sched;
2886 struct ip_vs_pe *pe;
2903 struct nlattr *nl_service; 2887 struct nlattr *nl_service;
2904 struct ip_vs_flags flags = { .flags = svc->flags, 2888 struct ip_vs_flags flags = { .flags = svc->flags,
2905 .mask = ~0 }; 2889 .mask = ~0 };
@@ -2916,16 +2900,17 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2916 } else { 2900 } else {
2917 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 2901 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2918 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 2902 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2919 nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 2903 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2920 goto nla_put_failure; 2904 goto nla_put_failure;
2921 } 2905 }
2922 2906
2923 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || 2907 sched = rcu_dereference_protected(svc->scheduler, 1);
2924 (svc->pe && 2908 pe = rcu_dereference_protected(svc->pe, 1);
2925 nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || 2909 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
2910 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
2926 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 2911 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2927 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 2912 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2928 nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 2913 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2929 goto nla_put_failure; 2914 goto nla_put_failure;
2930 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) 2915 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2931 goto nla_put_failure; 2916 goto nla_put_failure;
@@ -2971,7 +2956,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2971 2956
2972 mutex_lock(&__ip_vs_mutex); 2957 mutex_lock(&__ip_vs_mutex);
2973 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2958 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2974 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2959 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2975 if (++idx <= start || !net_eq(svc->net, net)) 2960 if (++idx <= start || !net_eq(svc->net, net))
2976 continue; 2961 continue;
2977 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2962 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -2982,7 +2967,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
2982 } 2967 }
2983 2968
2984 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2969 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2985 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2970 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2986 if (++idx <= start || !net_eq(svc->net, net)) 2971 if (++idx <= start || !net_eq(svc->net, net))
2987 continue; 2972 continue;
2988 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2973 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
@@ -3038,15 +3023,17 @@ static int ip_vs_genl_parse_service(struct net *net,
3038 } else { 3023 } else {
3039 usvc->protocol = nla_get_u16(nla_protocol); 3024 usvc->protocol = nla_get_u16(nla_protocol);
3040 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3025 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3041 usvc->port = nla_get_u16(nla_port); 3026 usvc->port = nla_get_be16(nla_port);
3042 usvc->fwmark = 0; 3027 usvc->fwmark = 0;
3043 } 3028 }
3044 3029
3030 rcu_read_lock();
3045 if (usvc->fwmark) 3031 if (usvc->fwmark)
3046 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 3032 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3047 else 3033 else
3048 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 3034 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3049 &usvc->addr, usvc->port); 3035 &usvc->addr, usvc->port);
3036 rcu_read_unlock();
3050 *ret_svc = svc; 3037 *ret_svc = svc;
3051 3038
3052 /* If a full entry was requested, check for the additional fields */ 3039 /* If a full entry was requested, check for the additional fields */
@@ -3076,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net,
3076 usvc->sched_name = nla_data(nla_sched); 3063 usvc->sched_name = nla_data(nla_sched);
3077 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3064 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3078 usvc->timeout = nla_get_u32(nla_timeout); 3065 usvc->timeout = nla_get_u32(nla_timeout);
3079 usvc->netmask = nla_get_u32(nla_netmask); 3066 usvc->netmask = nla_get_be32(nla_netmask);
3080 } 3067 }
3081 3068
3082 return 0; 3069 return 0;
@@ -3102,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3102 return -EMSGSIZE; 3089 return -EMSGSIZE;
3103 3090
3104 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3091 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3105 nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3092 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3106 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3093 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3107 (atomic_read(&dest->conn_flags) & 3094 (atomic_read(&dest->conn_flags) &
3108 IP_VS_CONN_F_FWD_MASK)) || 3095 IP_VS_CONN_F_FWD_MASK)) ||
@@ -3211,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3211 memset(udest, 0, sizeof(*udest)); 3198 memset(udest, 0, sizeof(*udest));
3212 3199
3213 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3200 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3214 udest->port = nla_get_u16(nla_port); 3201 udest->port = nla_get_be16(nla_port);
3215 3202
3216 /* If a full entry was requested, check for the additional fields */ 3203 /* If a full entry was requested, check for the additional fields */
3217 if (full_entry) { 3204 if (full_entry) {
@@ -3236,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3236 return 0; 3223 return 0;
3237} 3224}
3238 3225
3239static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, 3226static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
3240 const char *mcast_ifn, __be32 syncid) 3227 const char *mcast_ifn, __u32 syncid)
3241{ 3228{
3242 struct nlattr *nl_daemon; 3229 struct nlattr *nl_daemon;
3243 3230
@@ -3258,8 +3245,8 @@ nla_put_failure:
3258 return -EMSGSIZE; 3245 return -EMSGSIZE;
3259} 3246}
3260 3247
3261static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, 3248static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
3262 const char *mcast_ifn, __be32 syncid, 3249 const char *mcast_ifn, __u32 syncid,
3263 struct netlink_callback *cb) 3250 struct netlink_callback *cb)
3264{ 3251{
3265 void *hdr; 3252 void *hdr;
@@ -3398,7 +3385,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3398 mutex_lock(&__ip_vs_mutex); 3385 mutex_lock(&__ip_vs_mutex);
3399 3386
3400 if (cmd == IPVS_CMD_FLUSH) { 3387 if (cmd == IPVS_CMD_FLUSH) {
3401 ret = ip_vs_flush(net); 3388 ret = ip_vs_flush(net, false);
3402 goto out; 3389 goto out;
3403 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3390 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3404 ret = ip_vs_genl_set_config(net, info->attrs); 3391 ret = ip_vs_genl_set_config(net, info->attrs);
@@ -3790,13 +3777,14 @@ int __net_init ip_vs_control_net_init(struct net *net)
3790 int idx; 3777 int idx;
3791 struct netns_ipvs *ipvs = net_ipvs(net); 3778 struct netns_ipvs *ipvs = net_ipvs(net);
3792 3779
3793 rwlock_init(&ipvs->rs_lock);
3794
3795 /* Initialize rs_table */ 3780 /* Initialize rs_table */
3796 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3781 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3797 INIT_LIST_HEAD(&ipvs->rs_table[idx]); 3782 INIT_HLIST_HEAD(&ipvs->rs_table[idx]);
3798 3783
3799 INIT_LIST_HEAD(&ipvs->dest_trash); 3784 INIT_LIST_HEAD(&ipvs->dest_trash);
3785 spin_lock_init(&ipvs->dest_trash_lock);
3786 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
3787 (unsigned long) net);
3800 atomic_set(&ipvs->ftpsvc_counter, 0); 3788 atomic_set(&ipvs->ftpsvc_counter, 0);
3801 atomic_set(&ipvs->nullsvc_counter, 0); 3789 atomic_set(&ipvs->nullsvc_counter, 0);
3802 3790
@@ -3826,6 +3814,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)
3826{ 3814{
3827 struct netns_ipvs *ipvs = net_ipvs(net); 3815 struct netns_ipvs *ipvs = net_ipvs(net);
3828 3816
3817 /* Some dest can be in grace period even before cleanup, we have to
3818 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called.
3819 */
3820 rcu_barrier();
3829 ip_vs_trash_cleanup(net); 3821 ip_vs_trash_cleanup(net);
3830 ip_vs_stop_estimator(net, &ipvs->tot_stats); 3822 ip_vs_stop_estimator(net, &ipvs->tot_stats);
3831 ip_vs_control_net_cleanup_sysctl(net); 3823 ip_vs_control_net_cleanup_sysctl(net);
@@ -3871,10 +3863,10 @@ int __init ip_vs_control_init(void)
3871 3863
3872 EnterFunction(2); 3864 EnterFunction(2);
3873 3865
3874 /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ 3866 /* Initialize svc_table, ip_vs_svc_fwm_table */
3875 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3867 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3876 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3868 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]);
3877 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3869 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3878 } 3870 }
3879 3871
3880 smp_wmb(); /* Do we really need it now ? */ 3872 smp_wmb(); /* Do we really need it now ? */
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 7f3b0cc00b7a..ccab120df45e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -51,7 +51,7 @@
51 * IPVS DH bucket 51 * IPVS DH bucket
52 */ 52 */
53struct ip_vs_dh_bucket { 53struct ip_vs_dh_bucket {
54 struct ip_vs_dest *dest; /* real server (cache) */ 54 struct ip_vs_dest __rcu *dest; /* real server (cache) */
55}; 55};
56 56
57/* 57/*
@@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {
64#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS) 64#define IP_VS_DH_TAB_SIZE (1 << IP_VS_DH_TAB_BITS)
65#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1) 65#define IP_VS_DH_TAB_MASK (IP_VS_DH_TAB_SIZE - 1)
66 66
67struct ip_vs_dh_state {
68 struct ip_vs_dh_bucket buckets[IP_VS_DH_TAB_SIZE];
69 struct rcu_head rcu_head;
70};
67 71
68/* 72/*
69 * Returns hash value for IPVS DH entry 73 * Returns hash value for IPVS DH entry
@@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad
85 * Get ip_vs_dest associated with supplied parameters. 89 * Get ip_vs_dest associated with supplied parameters.
86 */ 90 */
87static inline struct ip_vs_dest * 91static inline struct ip_vs_dest *
88ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, 92ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)
89 const union nf_inet_addr *addr)
90{ 93{
91 return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; 94 return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);
92} 95}
93 96
94 97
@@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,
96 * Assign all the hash buckets of the specified table with the service. 99 * Assign all the hash buckets of the specified table with the service.
97 */ 100 */
98static int 101static int
99ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) 102ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)
100{ 103{
101 int i; 104 int i;
102 struct ip_vs_dh_bucket *b; 105 struct ip_vs_dh_bucket *b;
103 struct list_head *p; 106 struct list_head *p;
104 struct ip_vs_dest *dest; 107 struct ip_vs_dest *dest;
108 bool empty;
105 109
106 b = tbl; 110 b = &s->buckets[0];
107 p = &svc->destinations; 111 p = &svc->destinations;
112 empty = list_empty(p);
108 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 113 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
109 if (list_empty(p)) { 114 dest = rcu_dereference_protected(b->dest, 1);
110 b->dest = NULL; 115 if (dest)
111 } else { 116 ip_vs_dest_put(dest);
117 if (empty)
118 RCU_INIT_POINTER(b->dest, NULL);
119 else {
112 if (p == &svc->destinations) 120 if (p == &svc->destinations)
113 p = p->next; 121 p = p->next;
114 122
115 dest = list_entry(p, struct ip_vs_dest, n_list); 123 dest = list_entry(p, struct ip_vs_dest, n_list);
116 atomic_inc(&dest->refcnt); 124 ip_vs_dest_hold(dest);
117 b->dest = dest; 125 RCU_INIT_POINTER(b->dest, dest);
118 126
119 p = p->next; 127 p = p->next;
120 } 128 }
@@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)
127/* 135/*
128 * Flush all the hash buckets of the specified table. 136 * Flush all the hash buckets of the specified table.
129 */ 137 */
130static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) 138static void ip_vs_dh_flush(struct ip_vs_dh_state *s)
131{ 139{
132 int i; 140 int i;
133 struct ip_vs_dh_bucket *b; 141 struct ip_vs_dh_bucket *b;
142 struct ip_vs_dest *dest;
134 143
135 b = tbl; 144 b = &s->buckets[0];
136 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { 145 for (i=0; i<IP_VS_DH_TAB_SIZE; i++) {
137 if (b->dest) { 146 dest = rcu_dereference_protected(b->dest, 1);
138 atomic_dec(&b->dest->refcnt); 147 if (dest) {
139 b->dest = NULL; 148 ip_vs_dest_put(dest);
149 RCU_INIT_POINTER(b->dest, NULL);
140 } 150 }
141 b++; 151 b++;
142 } 152 }
@@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)
145 155
146static int ip_vs_dh_init_svc(struct ip_vs_service *svc) 156static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
147{ 157{
148 struct ip_vs_dh_bucket *tbl; 158 struct ip_vs_dh_state *s;
149 159
150 /* allocate the DH table for this service */ 160 /* allocate the DH table for this service */
151 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, 161 s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL);
152 GFP_KERNEL); 162 if (s == NULL)
153 if (tbl == NULL)
154 return -ENOMEM; 163 return -ENOMEM;
155 164
156 svc->sched_data = tbl; 165 svc->sched_data = s;
157 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for " 166 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "
158 "current service\n", 167 "current service\n",
159 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 168 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
160 169
161 /* assign the hash buckets with the updated service */ 170 /* assign the hash buckets with current dests */
162 ip_vs_dh_assign(tbl, svc); 171 ip_vs_dh_reassign(s, svc);
163 172
164 return 0; 173 return 0;
165} 174}
166 175
167 176
168static int ip_vs_dh_done_svc(struct ip_vs_service *svc) 177static void ip_vs_dh_done_svc(struct ip_vs_service *svc)
169{ 178{
170 struct ip_vs_dh_bucket *tbl = svc->sched_data; 179 struct ip_vs_dh_state *s = svc->sched_data;
171 180
172 /* got to clean up hash buckets here */ 181 /* got to clean up hash buckets here */
173 ip_vs_dh_flush(tbl); 182 ip_vs_dh_flush(s);
174 183
175 /* release the table itself */ 184 /* release the table itself */
176 kfree(svc->sched_data); 185 kfree_rcu(s, rcu_head);
177 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", 186 IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",
178 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); 187 sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE);
179
180 return 0;
181} 188}
182 189
183 190
184static int ip_vs_dh_update_svc(struct ip_vs_service *svc) 191static int ip_vs_dh_dest_changed(struct ip_vs_service *svc,
192 struct ip_vs_dest *dest)
185{ 193{
186 struct ip_vs_dh_bucket *tbl = svc->sched_data; 194 struct ip_vs_dh_state *s = svc->sched_data;
187
188 /* got to clean up hash buckets here */
189 ip_vs_dh_flush(tbl);
190 195
191 /* assign the hash buckets with the updated service */ 196 /* assign the hash buckets with the updated service */
192 ip_vs_dh_assign(tbl, svc); 197 ip_vs_dh_reassign(s, svc);
193 198
194 return 0; 199 return 0;
195} 200}
@@ -212,19 +217,20 @@ static struct ip_vs_dest *
212ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 217ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
213{ 218{
214 struct ip_vs_dest *dest; 219 struct ip_vs_dest *dest;
215 struct ip_vs_dh_bucket *tbl; 220 struct ip_vs_dh_state *s;
216 struct ip_vs_iphdr iph; 221 struct ip_vs_iphdr iph;
217 222
218 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 223 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
219 224
220 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 225 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
221 226
222 tbl = (struct ip_vs_dh_bucket *)svc->sched_data; 227 s = (struct ip_vs_dh_state *) svc->sched_data;
223 dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); 228 dest = ip_vs_dh_get(svc->af, s, &iph.daddr);
224 if (!dest 229 if (!dest
225 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 230 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
226 || atomic_read(&dest->weight) <= 0 231 || atomic_read(&dest->weight) <= 0
227 || is_overloaded(dest)) { 232 || is_overloaded(dest)) {
233 ip_vs_scheduler_err(svc, "no destination available");
228 return NULL; 234 return NULL;
229 } 235 }
230 236
@@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =
248 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), 254 .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
249 .init_service = ip_vs_dh_init_svc, 255 .init_service = ip_vs_dh_init_svc,
250 .done_service = ip_vs_dh_done_svc, 256 .done_service = ip_vs_dh_done_svc,
251 .update_service = ip_vs_dh_update_svc, 257 .add_dest = ip_vs_dh_dest_changed,
258 .del_dest = ip_vs_dh_dest_changed,
252 .schedule = ip_vs_dh_schedule, 259 .schedule = ip_vs_dh_schedule,
253}; 260};
254 261
@@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)
262static void __exit ip_vs_dh_cleanup(void) 269static void __exit ip_vs_dh_cleanup(void)
263{ 270{
264 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); 271 unregister_ip_vs_scheduler(&ip_vs_dh_scheduler);
272 synchronize_rcu();
265} 273}
266 274
267 275
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 0fac6017b6fb..6bee6d0c73a5 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -56,7 +56,7 @@
56 * Make a summary from each cpu 56 * Make a summary from each cpu
57 */ 57 */
58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, 58static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
59 struct ip_vs_cpu_stats *stats) 59 struct ip_vs_cpu_stats __percpu *stats)
60{ 60{
61 int i; 61 int i;
62 62
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4f53a5f04437..77c173282f38 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
267 * hopefully it will succeed on the retransmitted 267 * hopefully it will succeed on the retransmitted
268 * packet. 268 * packet.
269 */ 269 */
270 rcu_read_lock();
270 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 271 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
271 iph->ihl * 4, 272 iph->ihl * 4,
272 start-data, end-start, 273 start-data, end-start,
273 buf, buf_len); 274 buf, buf_len);
275 rcu_read_unlock();
274 if (ret) { 276 if (ret) {
275 ip_vs_nfct_expect_related(skb, ct, n_cp, 277 ip_vs_nfct_expect_related(skb, ct, n_cp,
276 IPPROTO_TCP, 0, 0); 278 IPPROTO_TCP, 0, 0);
@@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)
480 int rv; 482 int rv;
481 483
482 rv = register_pernet_subsys(&ip_vs_ftp_ops); 484 rv = register_pernet_subsys(&ip_vs_ftp_ops);
485 /* rcu_barrier() is called by netns on error */
483 return rv; 486 return rv;
484} 487}
485 488
@@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)
489static void __exit ip_vs_ftp_exit(void) 492static void __exit ip_vs_ftp_exit(void)
490{ 493{
491 unregister_pernet_subsys(&ip_vs_ftp_ops); 494 unregister_pernet_subsys(&ip_vs_ftp_ops);
495 /* rcu_barrier() is called by netns */
492} 496}
493 497
494 498
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index fdd89b9564ea..5ea26bd87743 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -90,11 +90,12 @@
90 * IP address and its destination server 90 * IP address and its destination server
91 */ 91 */
92struct ip_vs_lblc_entry { 92struct ip_vs_lblc_entry {
93 struct list_head list; 93 struct hlist_node list;
94 int af; /* address family */ 94 int af; /* address family */
95 union nf_inet_addr addr; /* destination IP address */ 95 union nf_inet_addr addr; /* destination IP address */
96 struct ip_vs_dest *dest; /* real server (cache) */ 96 struct ip_vs_dest __rcu *dest; /* real server (cache) */
97 unsigned long lastuse; /* last used time */ 97 unsigned long lastuse; /* last used time */
98 struct rcu_head rcu_head;
98}; 99};
99 100
100 101
@@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {
102 * IPVS lblc hash table 103 * IPVS lblc hash table
103 */ 104 */
104struct ip_vs_lblc_table { 105struct ip_vs_lblc_table {
105 struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ 106 struct rcu_head rcu_head;
107 struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */
108 struct timer_list periodic_timer; /* collect stale entries */
106 atomic_t entries; /* number of entries */ 109 atomic_t entries; /* number of entries */
107 int max_size; /* maximum size of entries */ 110 int max_size; /* maximum size of entries */
108 struct timer_list periodic_timer; /* collect stale entries */
109 int rover; /* rover for expire check */ 111 int rover; /* rover for expire check */
110 int counter; /* counter for no expire */ 112 int counter; /* counter for no expire */
113 bool dead;
111}; 114};
112 115
113 116
@@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {
129 132
130static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 133static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
131{ 134{
132 list_del(&en->list); 135 struct ip_vs_dest *dest;
136
137 hlist_del_rcu(&en->list);
133 /* 138 /*
134 * We don't kfree dest because it is referred either by its service 139 * We don't kfree dest because it is referred either by its service
135 * or the trash dest list. 140 * or the trash dest list.
136 */ 141 */
137 atomic_dec(&en->dest->refcnt); 142 dest = rcu_dereference_protected(en->dest, 1);
138 kfree(en); 143 ip_vs_dest_put(dest);
144 kfree_rcu(en, rcu_head);
139} 145}
140 146
141 147
@@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
165{ 171{
166 unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); 172 unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr);
167 173
168 list_add(&en->list, &tbl->bucket[hash]); 174 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
169 atomic_inc(&tbl->entries); 175 atomic_inc(&tbl->entries);
170} 176}
171 177
172 178
173/* 179/* Get ip_vs_lblc_entry associated with supplied parameters. */
174 * Get ip_vs_lblc_entry associated with supplied parameters. Called under read
175 * lock
176 */
177static inline struct ip_vs_lblc_entry * 180static inline struct ip_vs_lblc_entry *
178ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl, 181ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
179 const union nf_inet_addr *addr) 182 const union nf_inet_addr *addr)
@@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
181 unsigned int hash = ip_vs_lblc_hashkey(af, addr); 184 unsigned int hash = ip_vs_lblc_hashkey(af, addr);
182 struct ip_vs_lblc_entry *en; 185 struct ip_vs_lblc_entry *en;
183 186
184 list_for_each_entry(en, &tbl->bucket[hash], list) 187 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
185 if (ip_vs_addr_equal(af, &en->addr, addr)) 188 if (ip_vs_addr_equal(af, &en->addr, addr))
186 return en; 189 return en;
187 190
@@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,
191 194
192/* 195/*
193 * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP 196 * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP
194 * address to a server. Called under write lock. 197 * address to a server. Called under spin lock.
195 */ 198 */
196static inline struct ip_vs_lblc_entry * 199static inline struct ip_vs_lblc_entry *
197ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, 200ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
@@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
209 ip_vs_addr_copy(dest->af, &en->addr, daddr); 212 ip_vs_addr_copy(dest->af, &en->addr, daddr);
210 en->lastuse = jiffies; 213 en->lastuse = jiffies;
211 214
212 atomic_inc(&dest->refcnt); 215 ip_vs_dest_hold(dest);
213 en->dest = dest; 216 RCU_INIT_POINTER(en->dest, dest);
214 217
215 ip_vs_lblc_hash(tbl, en); 218 ip_vs_lblc_hash(tbl, en);
216 } else if (en->dest != dest) { 219 } else {
217 atomic_dec(&en->dest->refcnt); 220 struct ip_vs_dest *old_dest;
218 atomic_inc(&dest->refcnt); 221
219 en->dest = dest; 222 old_dest = rcu_dereference_protected(en->dest, 1);
223 if (old_dest != dest) {
224 ip_vs_dest_put(old_dest);
225 ip_vs_dest_hold(dest);
226 /* No ordering constraints for refcnt */
227 RCU_INIT_POINTER(en->dest, dest);
228 }
220 } 229 }
221 230
222 return en; 231 return en;
@@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
226/* 235/*
227 * Flush all the entries of the specified table. 236 * Flush all the entries of the specified table.
228 */ 237 */
229static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) 238static void ip_vs_lblc_flush(struct ip_vs_service *svc)
230{ 239{
231 struct ip_vs_lblc_entry *en, *nxt; 240 struct ip_vs_lblc_table *tbl = svc->sched_data;
241 struct ip_vs_lblc_entry *en;
242 struct hlist_node *next;
232 int i; 243 int i;
233 244
245 spin_lock_bh(&svc->sched_lock);
246 tbl->dead = 1;
234 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 247 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
235 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { 248 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
236 ip_vs_lblc_free(en); 249 ip_vs_lblc_free(en);
237 atomic_dec(&tbl->entries); 250 atomic_dec(&tbl->entries);
238 } 251 }
239 } 252 }
253 spin_unlock_bh(&svc->sched_lock);
240} 254}
241 255
242static int sysctl_lblc_expiration(struct ip_vs_service *svc) 256static int sysctl_lblc_expiration(struct ip_vs_service *svc)
@@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)
252static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) 266static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
253{ 267{
254 struct ip_vs_lblc_table *tbl = svc->sched_data; 268 struct ip_vs_lblc_table *tbl = svc->sched_data;
255 struct ip_vs_lblc_entry *en, *nxt; 269 struct ip_vs_lblc_entry *en;
270 struct hlist_node *next;
256 unsigned long now = jiffies; 271 unsigned long now = jiffies;
257 int i, j; 272 int i, j;
258 273
259 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 274 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
260 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 275 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
261 276
262 write_lock(&svc->sched_lock); 277 spin_lock(&svc->sched_lock);
263 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 278 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
264 if (time_before(now, 279 if (time_before(now,
265 en->lastuse + 280 en->lastuse +
266 sysctl_lblc_expiration(svc))) 281 sysctl_lblc_expiration(svc)))
@@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
269 ip_vs_lblc_free(en); 284 ip_vs_lblc_free(en);
270 atomic_dec(&tbl->entries); 285 atomic_dec(&tbl->entries);
271 } 286 }
272 write_unlock(&svc->sched_lock); 287 spin_unlock(&svc->sched_lock);
273 } 288 }
274 tbl->rover = j; 289 tbl->rover = j;
275} 290}
@@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
293 unsigned long now = jiffies; 308 unsigned long now = jiffies;
294 int goal; 309 int goal;
295 int i, j; 310 int i, j;
296 struct ip_vs_lblc_entry *en, *nxt; 311 struct ip_vs_lblc_entry *en;
312 struct hlist_node *next;
297 313
298 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 314 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
299 /* do full expiration check */ 315 /* do full expiration check */
@@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)
314 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 330 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
315 j = (j + 1) & IP_VS_LBLC_TAB_MASK; 331 j = (j + 1) & IP_VS_LBLC_TAB_MASK;
316 332
317 write_lock(&svc->sched_lock); 333 spin_lock(&svc->sched_lock);
318 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 334 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
319 if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) 335 if (time_before(now, en->lastuse + ENTRY_TIMEOUT))
320 continue; 336 continue;
321 337
@@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)
323 atomic_dec(&tbl->entries); 339 atomic_dec(&tbl->entries);
324 goal--; 340 goal--;
325 } 341 }
326 write_unlock(&svc->sched_lock); 342 spin_unlock(&svc->sched_lock);
327 if (goal <= 0) 343 if (goal <= 0)
328 break; 344 break;
329 } 345 }
@@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
354 * Initialize the hash buckets 370 * Initialize the hash buckets
355 */ 371 */
356 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { 372 for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) {
357 INIT_LIST_HEAD(&tbl->bucket[i]); 373 INIT_HLIST_HEAD(&tbl->bucket[i]);
358 } 374 }
359 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; 375 tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;
360 tbl->rover = 0; 376 tbl->rover = 0;
361 tbl->counter = 1; 377 tbl->counter = 1;
378 tbl->dead = 0;
362 379
363 /* 380 /*
364 * Hook periodic timer for garbage collection 381 * Hook periodic timer for garbage collection
@@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
371} 388}
372 389
373 390
374static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) 391static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)
375{ 392{
376 struct ip_vs_lblc_table *tbl = svc->sched_data; 393 struct ip_vs_lblc_table *tbl = svc->sched_data;
377 394
@@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)
379 del_timer_sync(&tbl->periodic_timer); 396 del_timer_sync(&tbl->periodic_timer);
380 397
381 /* got to clean up table entries here */ 398 /* got to clean up table entries here */
382 ip_vs_lblc_flush(tbl); 399 ip_vs_lblc_flush(svc);
383 400
384 /* release the table itself */ 401 /* release the table itself */
385 kfree(tbl); 402 kfree_rcu(tbl, rcu_head);
386 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", 403 IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",
387 sizeof(*tbl)); 404 sizeof(*tbl));
388
389 return 0;
390} 405}
391 406
392 407
@@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
408 * The server with weight=0 is quiesced and will not receive any 423 * The server with weight=0 is quiesced and will not receive any
409 * new connection. 424 * new connection.
410 */ 425 */
411 list_for_each_entry(dest, &svc->destinations, n_list) { 426 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
412 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 427 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
413 continue; 428 continue;
414 if (atomic_read(&dest->weight) > 0) { 429 if (atomic_read(&dest->weight) > 0) {
@@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)
423 * Find the destination with the least load. 438 * Find the destination with the least load.
424 */ 439 */
425 nextstage: 440 nextstage:
426 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 441 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
427 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 442 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
428 continue; 443 continue;
429 444
@@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
457 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 472 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
458 struct ip_vs_dest *d; 473 struct ip_vs_dest *d;
459 474
460 list_for_each_entry(d, &svc->destinations, n_list) { 475 list_for_each_entry_rcu(d, &svc->destinations, n_list) {
461 if (atomic_read(&d->activeconns)*2 476 if (atomic_read(&d->activeconns)*2
462 < atomic_read(&d->weight)) { 477 < atomic_read(&d->weight)) {
463 return 1; 478 return 1;
@@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
484 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 499 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
485 500
486 /* First look in our cache */ 501 /* First look in our cache */
487 read_lock(&svc->sched_lock);
488 en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); 502 en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);
489 if (en) { 503 if (en) {
490 /* We only hold a read lock, but this is atomic */ 504 /* We only hold a read lock, but this is atomic */
@@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
499 * free up entries from the trash at any time. 513 * free up entries from the trash at any time.
500 */ 514 */
501 515
502 if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) 516 dest = rcu_dereference(en->dest);
503 dest = en->dest; 517 if ((dest->flags & IP_VS_DEST_F_AVAILABLE) &&
518 atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
519 goto out;
504 } 520 }
505 read_unlock(&svc->sched_lock);
506
507 /* If the destination has a weight and is not overloaded, use it */
508 if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc))
509 goto out;
510 521
511 /* No cache entry or it is invalid, time to schedule */ 522 /* No cache entry or it is invalid, time to schedule */
512 dest = __ip_vs_lblc_schedule(svc); 523 dest = __ip_vs_lblc_schedule(svc);
@@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
516 } 527 }
517 528
518 /* If we fail to create a cache entry, we'll just use the valid dest */ 529 /* If we fail to create a cache entry, we'll just use the valid dest */
519 write_lock(&svc->sched_lock); 530 spin_lock_bh(&svc->sched_lock);
520 ip_vs_lblc_new(tbl, &iph.daddr, dest); 531 if (!tbl->dead)
521 write_unlock(&svc->sched_lock); 532 ip_vs_lblc_new(tbl, &iph.daddr, dest);
533 spin_unlock_bh(&svc->sched_lock);
522 534
523out: 535out:
524 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", 536 IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n",
@@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)
621{ 633{
622 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 634 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
623 unregister_pernet_subsys(&ip_vs_lblc_ops); 635 unregister_pernet_subsys(&ip_vs_lblc_ops);
636 synchronize_rcu();
624} 637}
625 638
626 639
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c03b6a3ade2f..50123c2ab484 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,40 +89,44 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest *dest; /* destination server */ 92 struct ip_vs_dest __rcu *dest; /* destination server */
93 struct rcu_head rcu_head;
93}; 94};
94 95
95struct ip_vs_dest_set { 96struct ip_vs_dest_set {
96 atomic_t size; /* set size */ 97 atomic_t size; /* set size */
97 unsigned long lastmod; /* last modified time */ 98 unsigned long lastmod; /* last modified time */
98 struct list_head list; /* destination list */ 99 struct list_head list; /* destination list */
99 rwlock_t lock; /* lock for this list */
100}; 100};
101 101
102 102
103static struct ip_vs_dest_set_elem * 103static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
104ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 104 struct ip_vs_dest *dest, bool check)
105{ 105{
106 struct ip_vs_dest_set_elem *e; 106 struct ip_vs_dest_set_elem *e;
107 107
108 list_for_each_entry(e, &set->list, list) { 108 if (check) {
109 if (e->dest == dest) 109 list_for_each_entry(e, &set->list, list) {
110 /* already existed */ 110 struct ip_vs_dest *d;
111 return NULL; 111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return;
116 }
112 } 117 }
113 118
114 e = kmalloc(sizeof(*e), GFP_ATOMIC); 119 e = kmalloc(sizeof(*e), GFP_ATOMIC);
115 if (e == NULL) 120 if (e == NULL)
116 return NULL; 121 return;
117 122
118 atomic_inc(&dest->refcnt); 123 ip_vs_dest_hold(dest);
119 e->dest = dest; 124 RCU_INIT_POINTER(e->dest, dest);
120 125
121 list_add(&e->list, &set->list); 126 list_add_rcu(&e->list, &set->list);
122 atomic_inc(&set->size); 127 atomic_inc(&set->size);
123 128
124 set->lastmod = jiffies; 129 set->lastmod = jiffies;
125 return e;
126} 130}
127 131
128static void 132static void
@@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
131 struct ip_vs_dest_set_elem *e; 135 struct ip_vs_dest_set_elem *e;
132 136
133 list_for_each_entry(e, &set->list, list) { 137 list_for_each_entry(e, &set->list, list) {
134 if (e->dest == dest) { 138 struct ip_vs_dest *d;
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
135 /* HIT */ 142 /* HIT */
136 atomic_dec(&set->size); 143 atomic_dec(&set->size);
137 set->lastmod = jiffies; 144 set->lastmod = jiffies;
138 atomic_dec(&e->dest->refcnt); 145 ip_vs_dest_put(dest);
139 list_del(&e->list); 146 list_del_rcu(&e->list);
140 kfree(e); 147 kfree_rcu(e, rcu_head);
141 break; 148 break;
142 } 149 }
143 } 150 }
@@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
147{ 154{
148 struct ip_vs_dest_set_elem *e, *ep; 155 struct ip_vs_dest_set_elem *e, *ep;
149 156
150 write_lock(&set->lock);
151 list_for_each_entry_safe(e, ep, &set->list, list) { 157 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
152 /* 161 /*
153 * We don't kfree dest because it is referred either 162 * We don't kfree dest because it is referred either
154 * by its service or by the trash dest list. 163 * by its service or by the trash dest list.
155 */ 164 */
156 atomic_dec(&e->dest->refcnt); 165 ip_vs_dest_put(d);
157 list_del(&e->list); 166 list_del_rcu(&e->list);
158 kfree(e); 167 kfree_rcu(e, rcu_head);
159 } 168 }
160 write_unlock(&set->lock);
161} 169}
162 170
163/* get weighted least-connection node in the destination set */ 171/* get weighted least-connection node in the destination set */
@@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
171 return NULL; 179 return NULL;
172 180
173 /* select the first destination server, whose weight > 0 */ 181 /* select the first destination server, whose weight > 0 */
174 list_for_each_entry(e, &set->list, list) { 182 list_for_each_entry_rcu(e, &set->list, list) {
175 least = e->dest; 183 least = rcu_dereference(e->dest);
176 if (least->flags & IP_VS_DEST_F_OVERLOAD) 184 if (least->flags & IP_VS_DEST_F_OVERLOAD)
177 continue; 185 continue;
178 186
@@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
186 194
187 /* find the destination with the weighted least load */ 195 /* find the destination with the weighted least load */
188 nextstage: 196 nextstage:
189 list_for_each_entry(e, &set->list, list) { 197 list_for_each_entry_continue_rcu(e, &set->list, list) {
190 dest = e->dest; 198 dest = rcu_dereference(e->dest);
191 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 199 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
192 continue; 200 continue;
193 201
@@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
224 232
225 /* select the first destination server, whose weight > 0 */ 233 /* select the first destination server, whose weight > 0 */
226 list_for_each_entry(e, &set->list, list) { 234 list_for_each_entry(e, &set->list, list) {
227 most = e->dest; 235 most = rcu_dereference_protected(e->dest, 1);
228 if (atomic_read(&most->weight) > 0) { 236 if (atomic_read(&most->weight) > 0) {
229 moh = ip_vs_dest_conn_overhead(most); 237 moh = ip_vs_dest_conn_overhead(most);
230 goto nextstage; 238 goto nextstage;
@@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
234 242
235 /* find the destination with the weighted most load */ 243 /* find the destination with the weighted most load */
236 nextstage: 244 nextstage:
237 list_for_each_entry(e, &set->list, list) { 245 list_for_each_entry_continue(e, &set->list, list) {
238 dest = e->dest; 246 dest = rcu_dereference_protected(e->dest, 1);
239 doh = ip_vs_dest_conn_overhead(dest); 247 doh = ip_vs_dest_conn_overhead(dest);
240 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
241 if ((moh * atomic_read(&dest->weight) < 249 if ((moh * atomic_read(&dest->weight) <
@@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
262 * IP address and its destination server set 270 * IP address and its destination server set
263 */ 271 */
264struct ip_vs_lblcr_entry { 272struct ip_vs_lblcr_entry {
265 struct list_head list; 273 struct hlist_node list;
266 int af; /* address family */ 274 int af; /* address family */
267 union nf_inet_addr addr; /* destination IP address */ 275 union nf_inet_addr addr; /* destination IP address */
268 struct ip_vs_dest_set set; /* destination server set */ 276 struct ip_vs_dest_set set; /* destination server set */
269 unsigned long lastuse; /* last used time */ 277 unsigned long lastuse; /* last used time */
278 struct rcu_head rcu_head;
270}; 279};
271 280
272 281
@@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {
274 * IPVS lblcr hash table 283 * IPVS lblcr hash table
275 */ 284 */
276struct ip_vs_lblcr_table { 285struct ip_vs_lblcr_table {
277 struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ 286 struct rcu_head rcu_head;
287 struct hlist_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */
278 atomic_t entries; /* number of entries */ 288 atomic_t entries; /* number of entries */
279 int max_size; /* maximum size of entries */ 289 int max_size; /* maximum size of entries */
280 struct timer_list periodic_timer; /* collect stale entries */ 290 struct timer_list periodic_timer; /* collect stale entries */
281 int rover; /* rover for expire check */ 291 int rover; /* rover for expire check */
282 int counter; /* counter for no expire */ 292 int counter; /* counter for no expire */
293 bool dead;
283}; 294};
284 295
285 296
@@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {
302 313
303static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 314static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
304{ 315{
305 list_del(&en->list); 316 hlist_del_rcu(&en->list);
306 ip_vs_dest_set_eraseall(&en->set); 317 ip_vs_dest_set_eraseall(&en->set);
307 kfree(en); 318 kfree_rcu(en, rcu_head);
308} 319}
309 320
310 321
@@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
334{ 345{
335 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); 346 unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
336 347
337 list_add(&en->list, &tbl->bucket[hash]); 348 hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);
338 atomic_inc(&tbl->entries); 349 atomic_inc(&tbl->entries);
339} 350}
340 351
341 352
342/* 353/* Get ip_vs_lblcr_entry associated with supplied parameters. */
343 * Get ip_vs_lblcr_entry associated with supplied parameters. Called under
344 * read lock.
345 */
346static inline struct ip_vs_lblcr_entry * 354static inline struct ip_vs_lblcr_entry *
347ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl, 355ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
348 const union nf_inet_addr *addr) 356 const union nf_inet_addr *addr)
@@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
350 unsigned int hash = ip_vs_lblcr_hashkey(af, addr); 358 unsigned int hash = ip_vs_lblcr_hashkey(af, addr);
351 struct ip_vs_lblcr_entry *en; 359 struct ip_vs_lblcr_entry *en;
352 360
353 list_for_each_entry(en, &tbl->bucket[hash], list) 361 hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)
354 if (ip_vs_addr_equal(af, &en->addr, addr)) 362 if (ip_vs_addr_equal(af, &en->addr, addr))
355 return en; 363 return en;
356 364
@@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
360 368
361/* 369/*
362 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination 370 * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
363 * IP address to a server. Called under write lock. 371 * IP address to a server. Called under spin lock.
364 */ 372 */
365static inline struct ip_vs_lblcr_entry * 373static inline struct ip_vs_lblcr_entry *
366ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, 374ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
@@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
381 /* initialize its dest set */ 389 /* initialize its dest set */
382 atomic_set(&(en->set.size), 0); 390 atomic_set(&(en->set.size), 0);
383 INIT_LIST_HEAD(&en->set.list); 391 INIT_LIST_HEAD(&en->set.list);
384 rwlock_init(&en->set.lock); 392
393 ip_vs_dest_set_insert(&en->set, dest, false);
385 394
386 ip_vs_lblcr_hash(tbl, en); 395 ip_vs_lblcr_hash(tbl, en);
396 return en;
387 } 397 }
388 398
389 write_lock(&en->set.lock); 399 ip_vs_dest_set_insert(&en->set, dest, true);
390 ip_vs_dest_set_insert(&en->set, dest);
391 write_unlock(&en->set.lock);
392 400
393 return en; 401 return en;
394} 402}
@@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
397/* 405/*
398 * Flush all the entries of the specified table. 406 * Flush all the entries of the specified table.
399 */ 407 */
400static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) 408static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
401{ 409{
410 struct ip_vs_lblcr_table *tbl = svc->sched_data;
402 int i; 411 int i;
403 struct ip_vs_lblcr_entry *en, *nxt; 412 struct ip_vs_lblcr_entry *en;
413 struct hlist_node *next;
404 414
405 /* No locking required, only called during cleanup. */ 415 spin_lock_bh(&svc->sched_lock);
416 tbl->dead = 1;
406 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 417 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
407 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { 418 hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {
408 ip_vs_lblcr_free(en); 419 ip_vs_lblcr_free(en);
409 } 420 }
410 } 421 }
422 spin_unlock_bh(&svc->sched_lock);
411} 423}
412 424
413static int sysctl_lblcr_expiration(struct ip_vs_service *svc) 425static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
@@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
425 struct ip_vs_lblcr_table *tbl = svc->sched_data; 437 struct ip_vs_lblcr_table *tbl = svc->sched_data;
426 unsigned long now = jiffies; 438 unsigned long now = jiffies;
427 int i, j; 439 int i, j;
428 struct ip_vs_lblcr_entry *en, *nxt; 440 struct ip_vs_lblcr_entry *en;
441 struct hlist_node *next;
429 442
430 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 443 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
431 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 444 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
432 445
433 write_lock(&svc->sched_lock); 446 spin_lock(&svc->sched_lock);
434 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 447 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
435 if (time_after(en->lastuse + 448 if (time_after(en->lastuse +
436 sysctl_lblcr_expiration(svc), now)) 449 sysctl_lblcr_expiration(svc), now))
437 continue; 450 continue;
@@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
439 ip_vs_lblcr_free(en); 452 ip_vs_lblcr_free(en);
440 atomic_dec(&tbl->entries); 453 atomic_dec(&tbl->entries);
441 } 454 }
442 write_unlock(&svc->sched_lock); 455 spin_unlock(&svc->sched_lock);
443 } 456 }
444 tbl->rover = j; 457 tbl->rover = j;
445} 458}
@@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
463 unsigned long now = jiffies; 476 unsigned long now = jiffies;
464 int goal; 477 int goal;
465 int i, j; 478 int i, j;
466 struct ip_vs_lblcr_entry *en, *nxt; 479 struct ip_vs_lblcr_entry *en;
480 struct hlist_node *next;
467 481
468 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { 482 if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
469 /* do full expiration check */ 483 /* do full expiration check */
@@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
484 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 498 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
485 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 499 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
486 500
487 write_lock(&svc->sched_lock); 501 spin_lock(&svc->sched_lock);
488 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 502 hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {
489 if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) 503 if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
490 continue; 504 continue;
491 505
@@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)
493 atomic_dec(&tbl->entries); 507 atomic_dec(&tbl->entries);
494 goal--; 508 goal--;
495 } 509 }
496 write_unlock(&svc->sched_lock); 510 spin_unlock(&svc->sched_lock);
497 if (goal <= 0) 511 if (goal <= 0)
498 break; 512 break;
499 } 513 }
@@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
523 * Initialize the hash buckets 537 * Initialize the hash buckets
524 */ 538 */
525 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { 539 for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
526 INIT_LIST_HEAD(&tbl->bucket[i]); 540 INIT_HLIST_HEAD(&tbl->bucket[i]);
527 } 541 }
528 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; 542 tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
529 tbl->rover = 0; 543 tbl->rover = 0;
530 tbl->counter = 1; 544 tbl->counter = 1;
545 tbl->dead = 0;
531 546
532 /* 547 /*
533 * Hook periodic timer for garbage collection 548 * Hook periodic timer for garbage collection
@@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
540} 555}
541 556
542 557
543static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) 558static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
544{ 559{
545 struct ip_vs_lblcr_table *tbl = svc->sched_data; 560 struct ip_vs_lblcr_table *tbl = svc->sched_data;
546 561
@@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
548 del_timer_sync(&tbl->periodic_timer); 563 del_timer_sync(&tbl->periodic_timer);
549 564
550 /* got to clean up table entries here */ 565 /* got to clean up table entries here */
551 ip_vs_lblcr_flush(tbl); 566 ip_vs_lblcr_flush(svc);
552 567
553 /* release the table itself */ 568 /* release the table itself */
554 kfree(tbl); 569 kfree_rcu(tbl, rcu_head);
555 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", 570 IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
556 sizeof(*tbl)); 571 sizeof(*tbl));
557
558 return 0;
559} 572}
560 573
561 574
@@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
577 * The server with weight=0 is quiesced and will not receive any 590 * The server with weight=0 is quiesced and will not receive any
578 * new connection. 591 * new connection.
579 */ 592 */
580 list_for_each_entry(dest, &svc->destinations, n_list) { 593 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
581 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 594 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
582 continue; 595 continue;
583 596
@@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
593 * Find the destination with the least load. 606 * Find the destination with the least load.
594 */ 607 */
595 nextstage: 608 nextstage:
596 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 609 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
597 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 610 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
598 continue; 611 continue;
599 612
@@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
627 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) { 640 if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
628 struct ip_vs_dest *d; 641 struct ip_vs_dest *d;
629 642
630 list_for_each_entry(d, &svc->destinations, n_list) { 643 list_for_each_entry_rcu(d, &svc->destinations, n_list) {
631 if (atomic_read(&d->activeconns)*2 644 if (atomic_read(&d->activeconns)*2
632 < atomic_read(&d->weight)) { 645 < atomic_read(&d->weight)) {
633 return 1; 646 return 1;
@@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
646{ 659{
647 struct ip_vs_lblcr_table *tbl = svc->sched_data; 660 struct ip_vs_lblcr_table *tbl = svc->sched_data;
648 struct ip_vs_iphdr iph; 661 struct ip_vs_iphdr iph;
649 struct ip_vs_dest *dest = NULL; 662 struct ip_vs_dest *dest;
650 struct ip_vs_lblcr_entry *en; 663 struct ip_vs_lblcr_entry *en;
651 664
652 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 665 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
@@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
654 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 667 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
655 668
656 /* First look in our cache */ 669 /* First look in our cache */
657 read_lock(&svc->sched_lock);
658 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 670 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
659 if (en) { 671 if (en) {
660 /* We only hold a read lock, but this is atomic */
661 en->lastuse = jiffies; 672 en->lastuse = jiffies;
662 673
663 /* Get the least loaded destination */ 674 /* Get the least loaded destination */
664 read_lock(&en->set.lock);
665 dest = ip_vs_dest_set_min(&en->set); 675 dest = ip_vs_dest_set_min(&en->set);
666 read_unlock(&en->set.lock);
667 676
668 /* More than one destination + enough time passed by, cleanup */ 677 /* More than one destination + enough time passed by, cleanup */
669 if (atomic_read(&en->set.size) > 1 && 678 if (atomic_read(&en->set.size) > 1 &&
670 time_after(jiffies, en->set.lastmod + 679 time_after(jiffies, en->set.lastmod +
671 sysctl_lblcr_expiration(svc))) { 680 sysctl_lblcr_expiration(svc))) {
672 struct ip_vs_dest *m; 681 spin_lock_bh(&svc->sched_lock);
682 if (atomic_read(&en->set.size) > 1) {
683 struct ip_vs_dest *m;
673 684
674 write_lock(&en->set.lock); 685 m = ip_vs_dest_set_max(&en->set);
675 m = ip_vs_dest_set_max(&en->set); 686 if (m)
676 if (m) 687 ip_vs_dest_set_erase(&en->set, m);
677 ip_vs_dest_set_erase(&en->set, m); 688 }
678 write_unlock(&en->set.lock); 689 spin_unlock_bh(&svc->sched_lock);
679 } 690 }
680 691
681 /* If the destination is not overloaded, use it */ 692 /* If the destination is not overloaded, use it */
682 if (dest && !is_overloaded(dest, svc)) { 693 if (dest && !is_overloaded(dest, svc))
683 read_unlock(&svc->sched_lock);
684 goto out; 694 goto out;
685 }
686 695
687 /* The cache entry is invalid, time to schedule */ 696 /* The cache entry is invalid, time to schedule */
688 dest = __ip_vs_lblcr_schedule(svc); 697 dest = __ip_vs_lblcr_schedule(svc);
689 if (!dest) { 698 if (!dest) {
690 ip_vs_scheduler_err(svc, "no destination available"); 699 ip_vs_scheduler_err(svc, "no destination available");
691 read_unlock(&svc->sched_lock);
692 return NULL; 700 return NULL;
693 } 701 }
694 702
695 /* Update our cache entry */ 703 /* Update our cache entry */
696 write_lock(&en->set.lock); 704 spin_lock_bh(&svc->sched_lock);
697 ip_vs_dest_set_insert(&en->set, dest); 705 if (!tbl->dead)
698 write_unlock(&en->set.lock); 706 ip_vs_dest_set_insert(&en->set, dest, true);
699 } 707 spin_unlock_bh(&svc->sched_lock);
700 read_unlock(&svc->sched_lock);
701
702 if (dest)
703 goto out; 708 goto out;
709 }
704 710
705 /* No cache entry, time to schedule */ 711 /* No cache entry, time to schedule */
706 dest = __ip_vs_lblcr_schedule(svc); 712 dest = __ip_vs_lblcr_schedule(svc);
@@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
710 } 716 }
711 717
712 /* If we fail to create a cache entry, we'll just use the valid dest */ 718 /* If we fail to create a cache entry, we'll just use the valid dest */
713 write_lock(&svc->sched_lock); 719 spin_lock_bh(&svc->sched_lock);
714 ip_vs_lblcr_new(tbl, &iph.daddr, dest); 720 if (!tbl->dead)
715 write_unlock(&svc->sched_lock); 721 ip_vs_lblcr_new(tbl, &iph.daddr, dest);
722 spin_unlock_bh(&svc->sched_lock);
716 723
717out: 724out:
718 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", 725 IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
@@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
814{ 821{
815 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 822 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
816 unregister_pernet_subsys(&ip_vs_lblcr_ops); 823 unregister_pernet_subsys(&ip_vs_lblcr_ops);
824 synchronize_rcu();
817} 825}
818 826
819 827
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index f391819c0cca..5128e338a749 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
42 * served, but no new connection is assigned to the server. 42 * served, but no new connection is assigned to the server.
43 */ 43 */
44 44
45 list_for_each_entry(dest, &svc->destinations, n_list) { 45 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) || 46 if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||
47 atomic_read(&dest->weight) == 0) 47 atomic_read(&dest->weight) == 0)
48 continue; 48 continue;
@@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)
84static void __exit ip_vs_lc_cleanup(void) 84static void __exit ip_vs_lc_cleanup(void)
85{ 85{
86 unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); 86 unregister_ip_vs_scheduler(&ip_vs_lc_scheduler);
87 synchronize_rcu();
87} 88}
88 89
89module_init(ip_vs_lc_init); 90module_init(ip_vs_lc_init);
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 984d9c137d84..646cfd4baa73 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
75 * new connections. 75 * new connections.
76 */ 76 */
77 77
78 list_for_each_entry(dest, &svc->destinations, n_list) { 78 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
79 79
80 if (dest->flags & IP_VS_DEST_F_OVERLOAD || 80 if (dest->flags & IP_VS_DEST_F_OVERLOAD ||
81 !atomic_read(&dest->weight)) 81 !atomic_read(&dest->weight))
@@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)
133static void __exit ip_vs_nq_cleanup(void) 133static void __exit ip_vs_nq_cleanup(void)
134{ 134{
135 unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); 135 unregister_ip_vs_scheduler(&ip_vs_nq_scheduler);
136 synchronize_rcu();
136} 137}
137 138
138module_init(ip_vs_nq_init); 139module_init(ip_vs_nq_init);
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 5cf859ccb31b..1a82b29ce8ea 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -13,20 +13,8 @@
13/* IPVS pe list */ 13/* IPVS pe list */
14static LIST_HEAD(ip_vs_pe); 14static LIST_HEAD(ip_vs_pe);
15 15
16/* lock for service table */ 16/* semaphore for IPVS PEs. */
17static DEFINE_SPINLOCK(ip_vs_pe_lock); 17static DEFINE_MUTEX(ip_vs_pe_mutex);
18
19/* Bind a service with a pe */
20void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
21{
22 svc->pe = pe;
23}
24
25/* Unbind a service from its pe */
26void ip_vs_unbind_pe(struct ip_vs_service *svc)
27{
28 svc->pe = NULL;
29}
30 18
31/* Get pe in the pe list by name */ 19/* Get pe in the pe list by name */
32struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) 20struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
@@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
36 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, 24 IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
37 pe_name); 25 pe_name);
38 26
39 spin_lock_bh(&ip_vs_pe_lock); 27 rcu_read_lock();
40 28 list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {
41 list_for_each_entry(pe, &ip_vs_pe, n_list) {
42 /* Test and get the modules atomically */ 29 /* Test and get the modules atomically */
43 if (pe->module && 30 if (pe->module &&
44 !try_module_get(pe->module)) { 31 !try_module_get(pe->module)) {
@@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
47 } 34 }
48 if (strcmp(pe_name, pe->name)==0) { 35 if (strcmp(pe_name, pe->name)==0) {
49 /* HIT */ 36 /* HIT */
50 spin_unlock_bh(&ip_vs_pe_lock); 37 rcu_read_unlock();
51 return pe; 38 return pe;
52 } 39 }
53 if (pe->module) 40 if (pe->module)
54 module_put(pe->module); 41 module_put(pe->module);
55 } 42 }
43 rcu_read_unlock();
56 44
57 spin_unlock_bh(&ip_vs_pe_lock);
58 return NULL; 45 return NULL;
59} 46}
60 47
@@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
83 /* increase the module use count */ 70 /* increase the module use count */
84 ip_vs_use_count_inc(); 71 ip_vs_use_count_inc();
85 72
86 spin_lock_bh(&ip_vs_pe_lock); 73 mutex_lock(&ip_vs_pe_mutex);
87
88 if (!list_empty(&pe->n_list)) {
89 spin_unlock_bh(&ip_vs_pe_lock);
90 ip_vs_use_count_dec();
91 pr_err("%s(): [%s] pe already linked\n",
92 __func__, pe->name);
93 return -EINVAL;
94 }
95
96 /* Make sure that the pe with this name doesn't exist 74 /* Make sure that the pe with this name doesn't exist
97 * in the pe list. 75 * in the pe list.
98 */ 76 */
99 list_for_each_entry(tmp, &ip_vs_pe, n_list) { 77 list_for_each_entry(tmp, &ip_vs_pe, n_list) {
100 if (strcmp(tmp->name, pe->name) == 0) { 78 if (strcmp(tmp->name, pe->name) == 0) {
101 spin_unlock_bh(&ip_vs_pe_lock); 79 mutex_unlock(&ip_vs_pe_mutex);
102 ip_vs_use_count_dec(); 80 ip_vs_use_count_dec();
103 pr_err("%s(): [%s] pe already existed " 81 pr_err("%s(): [%s] pe already existed "
104 "in the system\n", __func__, pe->name); 82 "in the system\n", __func__, pe->name);
@@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
106 } 84 }
107 } 85 }
108 /* Add it into the d-linked pe list */ 86 /* Add it into the d-linked pe list */
109 list_add(&pe->n_list, &ip_vs_pe); 87 list_add_rcu(&pe->n_list, &ip_vs_pe);
110 spin_unlock_bh(&ip_vs_pe_lock); 88 mutex_unlock(&ip_vs_pe_mutex);
111 89
112 pr_info("[%s] pe registered.\n", pe->name); 90 pr_info("[%s] pe registered.\n", pe->name);
113 91
@@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);
118/* Unregister a pe from the pe list */ 96/* Unregister a pe from the pe list */
119int unregister_ip_vs_pe(struct ip_vs_pe *pe) 97int unregister_ip_vs_pe(struct ip_vs_pe *pe)
120{ 98{
121 spin_lock_bh(&ip_vs_pe_lock); 99 mutex_lock(&ip_vs_pe_mutex);
122 if (list_empty(&pe->n_list)) {
123 spin_unlock_bh(&ip_vs_pe_lock);
124 pr_err("%s(): [%s] pe is not in the list. failed\n",
125 __func__, pe->name);
126 return -EINVAL;
127 }
128
129 /* Remove it from the d-linked pe list */ 100 /* Remove it from the d-linked pe list */
130 list_del(&pe->n_list); 101 list_del_rcu(&pe->n_list);
131 spin_unlock_bh(&ip_vs_pe_lock); 102 mutex_unlock(&ip_vs_pe_mutex);
132 103
133 /* decrease the module use count */ 104 /* decrease the module use count */
134 ip_vs_use_count_dec(); 105 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 12475ef88daf..9ef22bdce9f1 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -13,7 +13,8 @@ static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
13 const char *callid, size_t callid_len, 13 const char *callid, size_t callid_len,
14 int *idx) 14 int *idx)
15{ 15{
16 size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1); 16 size_t max_len = 64;
17 size_t len = min3(max_len, callid_len, buf_len - *idx - 1);
17 memcpy(buf + *idx, callid, len); 18 memcpy(buf + *idx, callid, len);
18 buf[*idx+len] = '\0'; 19 buf[*idx+len] = '\0';
19 *idx += len + 1; 20 *idx += len + 1;
@@ -37,14 +38,10 @@ static int get_callid(const char *dptr, unsigned int dataoff,
37 if (ret > 0) 38 if (ret > 0)
38 break; 39 break;
39 if (!ret) 40 if (!ret)
40 return 0; 41 return -EINVAL;
41 dataoff += *matchoff; 42 dataoff += *matchoff;
42 } 43 }
43 44
44 /* Empty callid is useless */
45 if (!*matchlen)
46 return -EINVAL;
47
48 /* Too large is useless */ 45 /* Too large is useless */
49 if (*matchlen > IP_VS_PEDATA_MAXLEN) 46 if (*matchlen > IP_VS_PEDATA_MAXLEN)
50 return -EINVAL; 47 return -EINVAL;
@@ -172,6 +169,7 @@ static int __init ip_vs_sip_init(void)
172static void __exit ip_vs_sip_cleanup(void) 169static void __exit ip_vs_sip_cleanup(void)
173{ 170{
174 unregister_ip_vs_pe(&ip_vs_sip_pe); 171 unregister_ip_vs_pe(&ip_vs_sip_pe);
172 synchronize_rcu();
175} 173}
176 174
177module_init(ip_vs_sip_init); 175module_init(ip_vs_sip_init);
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index cd1d7298f7ba..86464881cd20 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
27 if (sch == NULL) 27 if (sch == NULL)
28 return 0; 28 return 0;
29 net = skb_net(skb); 29 net = skb_net(skb);
30 rcu_read_lock();
30 if ((sch->type == SCTP_CID_INIT) && 31 if ((sch->type == SCTP_CID_INIT) &&
31 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 32 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
32 &iph->daddr, sh->dest))) { 33 &iph->daddr, sh->dest))) {
33 int ignored; 34 int ignored;
34 35
35 if (ip_vs_todrop(net_ipvs(net))) { 36 if (ip_vs_todrop(net_ipvs(net))) {
@@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
37 * It seems that we are very loaded. 38 * It seems that we are very loaded.
38 * We have to drop this packet :( 39 * We have to drop this packet :(
39 */ 40 */
40 ip_vs_service_put(svc); 41 rcu_read_unlock();
41 *verdict = NF_DROP; 42 *verdict = NF_DROP;
42 return 0; 43 return 0;
43 } 44 }
@@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
49 if (!*cpp && ignored <= 0) { 50 if (!*cpp && ignored <= 0) {
50 if (!ignored) 51 if (!ignored)
51 *verdict = ip_vs_leave(svc, skb, pd, iph); 52 *verdict = ip_vs_leave(svc, skb, pd, iph);
52 else { 53 else
53 ip_vs_service_put(svc);
54 *verdict = NF_DROP; 54 *verdict = NF_DROP;
55 } 55 rcu_read_unlock();
56 return 0; 56 return 0;
57 } 57 }
58 ip_vs_service_put(svc);
59 } 58 }
59 rcu_read_unlock();
60 /* NF_ACCEPT */ 60 /* NF_ACCEPT */
61 return 1; 61 return 1;
62} 62}
@@ -208,7 +208,7 @@ enum ipvs_sctp_event_t {
208 IP_VS_SCTP_EVE_LAST 208 IP_VS_SCTP_EVE_LAST
209}; 209};
210 210
211static enum ipvs_sctp_event_t sctp_events[255] = { 211static enum ipvs_sctp_event_t sctp_events[256] = {
212 IP_VS_SCTP_EVE_DATA_CLI, 212 IP_VS_SCTP_EVE_DATA_CLI,
213 IP_VS_SCTP_EVE_INIT_CLI, 213 IP_VS_SCTP_EVE_INIT_CLI,
214 IP_VS_SCTP_EVE_INIT_ACK_CLI, 214 IP_VS_SCTP_EVE_INIT_ACK_CLI,
@@ -994,9 +994,9 @@ static void
994sctp_state_transition(struct ip_vs_conn *cp, int direction, 994sctp_state_transition(struct ip_vs_conn *cp, int direction,
995 const struct sk_buff *skb, struct ip_vs_proto_data *pd) 995 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
996{ 996{
997 spin_lock(&cp->lock); 997 spin_lock_bh(&cp->lock);
998 set_sctp_state(pd, cp, direction, skb); 998 set_sctp_state(pd, cp, direction, skb);
999 spin_unlock(&cp->lock); 999 spin_unlock_bh(&cp->lock);
1000} 1000}
1001 1001
1002static inline __u16 sctp_app_hashkey(__be16 port) 1002static inline __u16 sctp_app_hashkey(__be16 port)
@@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
1016 1016
1017 hash = sctp_app_hashkey(port); 1017 hash = sctp_app_hashkey(port);
1018 1018
1019 spin_lock_bh(&ipvs->sctp_app_lock);
1020 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { 1019 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
1021 if (i->port == port) { 1020 if (i->port == port) {
1022 ret = -EEXIST; 1021 ret = -EEXIST;
1023 goto out; 1022 goto out;
1024 } 1023 }
1025 } 1024 }
1026 list_add(&inc->p_list, &ipvs->sctp_apps[hash]); 1025 list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
1027 atomic_inc(&pd->appcnt); 1026 atomic_inc(&pd->appcnt);
1028out: 1027out:
1029 spin_unlock_bh(&ipvs->sctp_app_lock);
1030 1028
1031 return ret; 1029 return ret;
1032} 1030}
1033 1031
1034static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) 1032static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
1035{ 1033{
1036 struct netns_ipvs *ipvs = net_ipvs(net);
1037 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 1034 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
1038 1035
1039 spin_lock_bh(&ipvs->sctp_app_lock);
1040 atomic_dec(&pd->appcnt); 1036 atomic_dec(&pd->appcnt);
1041 list_del(&inc->p_list); 1037 list_del_rcu(&inc->p_list);
1042 spin_unlock_bh(&ipvs->sctp_app_lock);
1043} 1038}
1044 1039
1045static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1040static int sctp_app_conn_bind(struct ip_vs_conn *cp)
@@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1055 /* Lookup application incarnations and bind the right one */ 1050 /* Lookup application incarnations and bind the right one */
1056 hash = sctp_app_hashkey(cp->vport); 1051 hash = sctp_app_hashkey(cp->vport);
1057 1052
1058 spin_lock(&ipvs->sctp_app_lock); 1053 rcu_read_lock();
1059 list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { 1054 list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
1060 if (inc->port == cp->vport) { 1055 if (inc->port == cp->vport) {
1061 if (unlikely(!ip_vs_app_inc_get(inc))) 1056 if (unlikely(!ip_vs_app_inc_get(inc)))
1062 break; 1057 break;
1063 spin_unlock(&ipvs->sctp_app_lock); 1058 rcu_read_unlock();
1064 1059
1065 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1060 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
1066 "%s:%u to app %s on port %u\n", 1061 "%s:%u to app %s on port %u\n",
@@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)
1076 goto out; 1071 goto out;
1077 } 1072 }
1078 } 1073 }
1079 spin_unlock(&ipvs->sctp_app_lock); 1074 rcu_read_unlock();
1080out: 1075out:
1081 return result; 1076 return result;
1082} 1077}
@@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
1090 struct netns_ipvs *ipvs = net_ipvs(net); 1085 struct netns_ipvs *ipvs = net_ipvs(net);
1091 1086
1092 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); 1087 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
1093 spin_lock_init(&ipvs->sctp_app_lock);
1094 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, 1088 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
1095 sizeof(sctp_timeouts)); 1089 sizeof(sctp_timeouts));
1096 if (!pd->timeout_table) 1090 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9af653a75825..50a15944c6c1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
47 } 47 }
48 net = skb_net(skb); 48 net = skb_net(skb);
49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 49 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
50 rcu_read_lock();
50 if (th->syn && 51 if (th->syn &&
51 (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 52 (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
52 &iph->daddr, th->dest))) { 53 &iph->daddr, th->dest))) {
53 int ignored; 54 int ignored;
54 55
55 if (ip_vs_todrop(net_ipvs(net))) { 56 if (ip_vs_todrop(net_ipvs(net))) {
@@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
57 * It seems that we are very loaded. 58 * It seems that we are very loaded.
58 * We have to drop this packet :( 59 * We have to drop this packet :(
59 */ 60 */
60 ip_vs_service_put(svc); 61 rcu_read_unlock();
61 *verdict = NF_DROP; 62 *verdict = NF_DROP;
62 return 0; 63 return 0;
63 } 64 }
@@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
70 if (!*cpp && ignored <= 0) { 71 if (!*cpp && ignored <= 0) {
71 if (!ignored) 72 if (!ignored)
72 *verdict = ip_vs_leave(svc, skb, pd, iph); 73 *verdict = ip_vs_leave(svc, skb, pd, iph);
73 else { 74 else
74 ip_vs_service_put(svc);
75 *verdict = NF_DROP; 75 *verdict = NF_DROP;
76 } 76 rcu_read_unlock();
77 return 0; 77 return 0;
78 } 78 }
79 ip_vs_service_put(svc);
80 } 79 }
80 rcu_read_unlock();
81 /* NF_ACCEPT */ 81 /* NF_ACCEPT */
82 return 1; 82 return 1;
83} 83}
@@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
557 if (th == NULL) 557 if (th == NULL)
558 return; 558 return;
559 559
560 spin_lock(&cp->lock); 560 spin_lock_bh(&cp->lock);
561 set_tcp_state(pd, cp, direction, th); 561 set_tcp_state(pd, cp, direction, th);
562 spin_unlock(&cp->lock); 562 spin_unlock_bh(&cp->lock);
563} 563}
564 564
565static inline __u16 tcp_app_hashkey(__be16 port) 565static inline __u16 tcp_app_hashkey(__be16 port)
@@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
580 580
581 hash = tcp_app_hashkey(port); 581 hash = tcp_app_hashkey(port);
582 582
583 spin_lock_bh(&ipvs->tcp_app_lock);
584 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { 583 list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
585 if (i->port == port) { 584 if (i->port == port) {
586 ret = -EEXIST; 585 ret = -EEXIST;
587 goto out; 586 goto out;
588 } 587 }
589 } 588 }
590 list_add(&inc->p_list, &ipvs->tcp_apps[hash]); 589 list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);
591 atomic_inc(&pd->appcnt); 590 atomic_inc(&pd->appcnt);
592 591
593 out: 592 out:
594 spin_unlock_bh(&ipvs->tcp_app_lock);
595 return ret; 593 return ret;
596} 594}
597 595
@@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
599static void 597static void
600tcp_unregister_app(struct net *net, struct ip_vs_app *inc) 598tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
601{ 599{
602 struct netns_ipvs *ipvs = net_ipvs(net);
603 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 600 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
604 601
605 spin_lock_bh(&ipvs->tcp_app_lock);
606 atomic_dec(&pd->appcnt); 602 atomic_dec(&pd->appcnt);
607 list_del(&inc->p_list); 603 list_del_rcu(&inc->p_list);
608 spin_unlock_bh(&ipvs->tcp_app_lock);
609} 604}
610 605
611 606
@@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
624 /* Lookup application incarnations and bind the right one */ 619 /* Lookup application incarnations and bind the right one */
625 hash = tcp_app_hashkey(cp->vport); 620 hash = tcp_app_hashkey(cp->vport);
626 621
627 spin_lock(&ipvs->tcp_app_lock); 622 rcu_read_lock();
628 list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { 623 list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {
629 if (inc->port == cp->vport) { 624 if (inc->port == cp->vport) {
630 if (unlikely(!ip_vs_app_inc_get(inc))) 625 if (unlikely(!ip_vs_app_inc_get(inc)))
631 break; 626 break;
632 spin_unlock(&ipvs->tcp_app_lock); 627 rcu_read_unlock();
633 628
634 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 629 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
635 "%s:%u to app %s on port %u\n", 630 "%s:%u to app %s on port %u\n",
@@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
646 goto out; 641 goto out;
647 } 642 }
648 } 643 }
649 spin_unlock(&ipvs->tcp_app_lock); 644 rcu_read_unlock();
650 645
651 out: 646 out:
652 return result; 647 return result;
@@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
660{ 655{
661 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 656 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
662 657
663 spin_lock(&cp->lock); 658 spin_lock_bh(&cp->lock);
664 cp->state = IP_VS_TCP_S_LISTEN; 659 cp->state = IP_VS_TCP_S_LISTEN;
665 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] 660 cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
666 : tcp_timeouts[IP_VS_TCP_S_LISTEN]); 661 : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
667 spin_unlock(&cp->lock); 662 spin_unlock_bh(&cp->lock);
668} 663}
669 664
670/* --------------------------------------------- 665/* ---------------------------------------------
@@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
676 struct netns_ipvs *ipvs = net_ipvs(net); 671 struct netns_ipvs *ipvs = net_ipvs(net);
677 672
678 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); 673 ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
679 spin_lock_init(&ipvs->tcp_app_lock);
680 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, 674 pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
681 sizeof(tcp_timeouts)); 675 sizeof(tcp_timeouts));
682 if (!pd->timeout_table) 676 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 503a842c90d2..b62a3c0ff9bf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
44 return 0; 44 return 0;
45 } 45 }
46 net = skb_net(skb); 46 net = skb_net(skb);
47 svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, 47 rcu_read_lock();
48 &iph->daddr, uh->dest); 48 svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
49 &iph->daddr, uh->dest);
49 if (svc) { 50 if (svc) {
50 int ignored; 51 int ignored;
51 52
@@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
54 * It seems that we are very loaded. 55 * It seems that we are very loaded.
55 * We have to drop this packet :( 56 * We have to drop this packet :(
56 */ 57 */
57 ip_vs_service_put(svc); 58 rcu_read_unlock();
58 *verdict = NF_DROP; 59 *verdict = NF_DROP;
59 return 0; 60 return 0;
60 } 61 }
@@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
67 if (!*cpp && ignored <= 0) { 68 if (!*cpp && ignored <= 0) {
68 if (!ignored) 69 if (!ignored)
69 *verdict = ip_vs_leave(svc, skb, pd, iph); 70 *verdict = ip_vs_leave(svc, skb, pd, iph);
70 else { 71 else
71 ip_vs_service_put(svc);
72 *verdict = NF_DROP; 72 *verdict = NF_DROP;
73 } 73 rcu_read_unlock();
74 return 0; 74 return 0;
75 } 75 }
76 ip_vs_service_put(svc);
77 } 76 }
77 rcu_read_unlock();
78 /* NF_ACCEPT */ 78 /* NF_ACCEPT */
79 return 1; 79 return 1;
80} 80}
@@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
359 359
360 hash = udp_app_hashkey(port); 360 hash = udp_app_hashkey(port);
361 361
362
363 spin_lock_bh(&ipvs->udp_app_lock);
364 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { 362 list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
365 if (i->port == port) { 363 if (i->port == port) {
366 ret = -EEXIST; 364 ret = -EEXIST;
367 goto out; 365 goto out;
368 } 366 }
369 } 367 }
370 list_add(&inc->p_list, &ipvs->udp_apps[hash]); 368 list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);
371 atomic_inc(&pd->appcnt); 369 atomic_inc(&pd->appcnt);
372 370
373 out: 371 out:
374 spin_unlock_bh(&ipvs->udp_app_lock);
375 return ret; 372 return ret;
376} 373}
377 374
@@ -380,12 +377,9 @@ static void
380udp_unregister_app(struct net *net, struct ip_vs_app *inc) 377udp_unregister_app(struct net *net, struct ip_vs_app *inc)
381{ 378{
382 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 379 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
383 struct netns_ipvs *ipvs = net_ipvs(net);
384 380
385 spin_lock_bh(&ipvs->udp_app_lock);
386 atomic_dec(&pd->appcnt); 381 atomic_dec(&pd->appcnt);
387 list_del(&inc->p_list); 382 list_del_rcu(&inc->p_list);
388 spin_unlock_bh(&ipvs->udp_app_lock);
389} 383}
390 384
391 385
@@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
403 /* Lookup application incarnations and bind the right one */ 397 /* Lookup application incarnations and bind the right one */
404 hash = udp_app_hashkey(cp->vport); 398 hash = udp_app_hashkey(cp->vport);
405 399
406 spin_lock(&ipvs->udp_app_lock); 400 rcu_read_lock();
407 list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { 401 list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {
408 if (inc->port == cp->vport) { 402 if (inc->port == cp->vport) {
409 if (unlikely(!ip_vs_app_inc_get(inc))) 403 if (unlikely(!ip_vs_app_inc_get(inc)))
410 break; 404 break;
411 spin_unlock(&ipvs->udp_app_lock); 405 rcu_read_unlock();
412 406
413 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 407 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
414 "%s:%u to app %s on port %u\n", 408 "%s:%u to app %s on port %u\n",
@@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
425 goto out; 419 goto out;
426 } 420 }
427 } 421 }
428 spin_unlock(&ipvs->udp_app_lock); 422 rcu_read_unlock();
429 423
430 out: 424 out:
431 return result; 425 return result;
@@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
467 struct netns_ipvs *ipvs = net_ipvs(net); 461 struct netns_ipvs *ipvs = net_ipvs(net);
468 462
469 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); 463 ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
470 spin_lock_init(&ipvs->udp_app_lock);
471 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, 464 pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
472 sizeof(udp_timeouts)); 465 sizeof(udp_timeouts));
473 if (!pd->timeout_table) 466 if (!pd->timeout_table)
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index c49b388d1085..c35986c793d9 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)
35} 35}
36 36
37 37
38static int ip_vs_rr_update_svc(struct ip_vs_service *svc) 38static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)
39{ 39{
40 svc->sched_data = &svc->destinations; 40 struct list_head *p;
41
42 spin_lock_bh(&svc->sched_lock);
43 p = (struct list_head *) svc->sched_data;
44 /* dest is already unlinked, so p->prev is not valid but
45 * p->next is valid, use it to reach previous entry.
46 */
47 if (p == &dest->n_list)
48 svc->sched_data = p->next->prev;
49 spin_unlock_bh(&svc->sched_lock);
41 return 0; 50 return 0;
42} 51}
43 52
@@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)
48static struct ip_vs_dest * 57static struct ip_vs_dest *
49ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 58ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
50{ 59{
51 struct list_head *p, *q; 60 struct list_head *p;
52 struct ip_vs_dest *dest; 61 struct ip_vs_dest *dest, *last;
62 int pass = 0;
53 63
54 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 64 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
55 65
56 write_lock(&svc->sched_lock); 66 spin_lock_bh(&svc->sched_lock);
57 p = (struct list_head *)svc->sched_data; 67 p = (struct list_head *) svc->sched_data;
58 p = p->next; 68 last = dest = list_entry(p, struct ip_vs_dest, n_list);
59 q = p; 69
60 do { 70 do {
61 /* skip list head */ 71 list_for_each_entry_continue_rcu(dest,
62 if (q == &svc->destinations) { 72 &svc->destinations,
63 q = q->next; 73 n_list) {
64 continue; 74 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
75 atomic_read(&dest->weight) > 0)
76 /* HIT */
77 goto out;
78 if (dest == last)
79 goto stop;
65 } 80 }
66 81 pass++;
67 dest = list_entry(q, struct ip_vs_dest, n_list); 82 /* Previous dest could be unlinked, do not loop forever.
68 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 83 * If we stay at head there is no need for 2nd pass.
69 atomic_read(&dest->weight) > 0) 84 */
70 /* HIT */ 85 } while (pass < 2 && p != &svc->destinations);
71 goto out; 86
72 q = q->next; 87stop:
73 } while (q != p); 88 spin_unlock_bh(&svc->sched_lock);
74 write_unlock(&svc->sched_lock);
75 ip_vs_scheduler_err(svc, "no destination available"); 89 ip_vs_scheduler_err(svc, "no destination available");
76 return NULL; 90 return NULL;
77 91
78 out: 92 out:
79 svc->sched_data = q; 93 svc->sched_data = &dest->n_list;
80 write_unlock(&svc->sched_lock); 94 spin_unlock_bh(&svc->sched_lock);
81 IP_VS_DBG_BUF(6, "RR: server %s:%u " 95 IP_VS_DBG_BUF(6, "RR: server %s:%u "
82 "activeconns %d refcnt %d weight %d\n", 96 "activeconns %d refcnt %d weight %d\n",
83 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), 97 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
@@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {
94 .module = THIS_MODULE, 108 .module = THIS_MODULE,
95 .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), 109 .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
96 .init_service = ip_vs_rr_init_svc, 110 .init_service = ip_vs_rr_init_svc,
97 .update_service = ip_vs_rr_update_svc, 111 .add_dest = NULL,
112 .del_dest = ip_vs_rr_del_dest,
98 .schedule = ip_vs_rr_schedule, 113 .schedule = ip_vs_rr_schedule,
99}; 114};
100 115
@@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)
106static void __exit ip_vs_rr_cleanup(void) 121static void __exit ip_vs_rr_cleanup(void)
107{ 122{
108 unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); 123 unregister_ip_vs_scheduler(&ip_vs_rr_scheduler);
124 synchronize_rcu();
109} 125}
110 126
111module_init(ip_vs_rr_init); 127module_init(ip_vs_rr_init);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index d6bf20d6cdbe..4dbcda6258bc 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);
35 */ 35 */
36static LIST_HEAD(ip_vs_schedulers); 36static LIST_HEAD(ip_vs_schedulers);
37 37
38/* lock for service table */ 38/* semaphore for schedulers */
39static DEFINE_SPINLOCK(ip_vs_sched_lock); 39static DEFINE_MUTEX(ip_vs_sched_mutex);
40 40
41 41
42/* 42/*
@@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
47{ 47{
48 int ret; 48 int ret;
49 49
50 svc->scheduler = scheduler;
51
52 if (scheduler->init_service) { 50 if (scheduler->init_service) {
53 ret = scheduler->init_service(svc); 51 ret = scheduler->init_service(svc);
54 if (ret) { 52 if (ret) {
@@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
56 return ret; 54 return ret;
57 } 55 }
58 } 56 }
59 57 rcu_assign_pointer(svc->scheduler, scheduler);
60 return 0; 58 return 0;
61} 59}
62 60
@@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
64/* 62/*
65 * Unbind a service with its scheduler 63 * Unbind a service with its scheduler
66 */ 64 */
67int ip_vs_unbind_scheduler(struct ip_vs_service *svc) 65void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
66 struct ip_vs_scheduler *sched)
68{ 67{
69 struct ip_vs_scheduler *sched = svc->scheduler; 68 struct ip_vs_scheduler *cur_sched;
70 69
71 if (!sched) 70 cur_sched = rcu_dereference_protected(svc->scheduler, 1);
72 return 0; 71 /* This check proves that old 'sched' was installed */
72 if (!cur_sched)
73 return;
73 74
74 if (sched->done_service) { 75 if (sched->done_service)
75 if (sched->done_service(svc) != 0) { 76 sched->done_service(svc);
76 pr_err("%s(): done error\n", __func__); 77 /* svc->scheduler can not be set to NULL */
77 return -EINVAL;
78 }
79 }
80
81 svc->scheduler = NULL;
82 return 0;
83} 78}
84 79
85 80
@@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
92 87
93 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 88 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
94 89
95 spin_lock_bh(&ip_vs_sched_lock); 90 mutex_lock(&ip_vs_sched_mutex);
96 91
97 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 92 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
98 /* 93 /*
@@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
106 } 101 }
107 if (strcmp(sched_name, sched->name)==0) { 102 if (strcmp(sched_name, sched->name)==0) {
108 /* HIT */ 103 /* HIT */
109 spin_unlock_bh(&ip_vs_sched_lock); 104 mutex_unlock(&ip_vs_sched_mutex);
110 return sched; 105 return sched;
111 } 106 }
112 if (sched->module) 107 if (sched->module)
113 module_put(sched->module); 108 module_put(sched->module);
114 } 109 }
115 110
116 spin_unlock_bh(&ip_vs_sched_lock); 111 mutex_unlock(&ip_vs_sched_mutex);
117 return NULL; 112 return NULL;
118} 113}
119 114
@@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
153 148
154void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) 149void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
155{ 150{
151 struct ip_vs_scheduler *sched;
152
153 sched = rcu_dereference(svc->scheduler);
156 if (svc->fwmark) { 154 if (svc->fwmark) {
157 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", 155 IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
158 svc->scheduler->name, svc->fwmark, 156 sched->name, svc->fwmark, svc->fwmark, msg);
159 svc->fwmark, msg);
160#ifdef CONFIG_IP_VS_IPV6 157#ifdef CONFIG_IP_VS_IPV6
161 } else if (svc->af == AF_INET6) { 158 } else if (svc->af == AF_INET6) {
162 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", 159 IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
163 svc->scheduler->name, 160 sched->name, ip_vs_proto_name(svc->protocol),
164 ip_vs_proto_name(svc->protocol),
165 &svc->addr.in6, ntohs(svc->port), msg); 161 &svc->addr.in6, ntohs(svc->port), msg);
166#endif 162#endif
167 } else { 163 } else {
168 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", 164 IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
169 svc->scheduler->name, 165 sched->name, ip_vs_proto_name(svc->protocol),
170 ip_vs_proto_name(svc->protocol),
171 &svc->addr.ip, ntohs(svc->port), msg); 166 &svc->addr.ip, ntohs(svc->port), msg);
172 } 167 }
173} 168}
@@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
192 /* increase the module use count */ 187 /* increase the module use count */
193 ip_vs_use_count_inc(); 188 ip_vs_use_count_inc();
194 189
195 spin_lock_bh(&ip_vs_sched_lock); 190 mutex_lock(&ip_vs_sched_mutex);
196 191
197 if (!list_empty(&scheduler->n_list)) { 192 if (!list_empty(&scheduler->n_list)) {
198 spin_unlock_bh(&ip_vs_sched_lock); 193 mutex_unlock(&ip_vs_sched_mutex);
199 ip_vs_use_count_dec(); 194 ip_vs_use_count_dec();
200 pr_err("%s(): [%s] scheduler already linked\n", 195 pr_err("%s(): [%s] scheduler already linked\n",
201 __func__, scheduler->name); 196 __func__, scheduler->name);
@@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
208 */ 203 */
209 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 204 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
210 if (strcmp(scheduler->name, sched->name) == 0) { 205 if (strcmp(scheduler->name, sched->name) == 0) {
211 spin_unlock_bh(&ip_vs_sched_lock); 206 mutex_unlock(&ip_vs_sched_mutex);
212 ip_vs_use_count_dec(); 207 ip_vs_use_count_dec();
213 pr_err("%s(): [%s] scheduler already existed " 208 pr_err("%s(): [%s] scheduler already existed "
214 "in the system\n", __func__, scheduler->name); 209 "in the system\n", __func__, scheduler->name);
@@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
219 * Add it into the d-linked scheduler list 214 * Add it into the d-linked scheduler list
220 */ 215 */
221 list_add(&scheduler->n_list, &ip_vs_schedulers); 216 list_add(&scheduler->n_list, &ip_vs_schedulers);
222 spin_unlock_bh(&ip_vs_sched_lock); 217 mutex_unlock(&ip_vs_sched_mutex);
223 218
224 pr_info("[%s] scheduler registered.\n", scheduler->name); 219 pr_info("[%s] scheduler registered.\n", scheduler->name);
225 220
@@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
237 return -EINVAL; 232 return -EINVAL;
238 } 233 }
239 234
240 spin_lock_bh(&ip_vs_sched_lock); 235 mutex_lock(&ip_vs_sched_mutex);
241 if (list_empty(&scheduler->n_list)) { 236 if (list_empty(&scheduler->n_list)) {
242 spin_unlock_bh(&ip_vs_sched_lock); 237 mutex_unlock(&ip_vs_sched_mutex);
243 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 238 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
244 __func__, scheduler->name); 239 __func__, scheduler->name);
245 return -EINVAL; 240 return -EINVAL;
@@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
249 * Remove it from the d-linked scheduler list 244 * Remove it from the d-linked scheduler list
250 */ 245 */
251 list_del(&scheduler->n_list); 246 list_del(&scheduler->n_list);
252 spin_unlock_bh(&ip_vs_sched_lock); 247 mutex_unlock(&ip_vs_sched_mutex);
253 248
254 /* decrease the module use count */ 249 /* decrease the module use count */
255 ip_vs_use_count_dec(); 250 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 89ead246ed3d..f3205925359a 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
79 * new connections. 79 * new connections.
80 */ 80 */
81 81
82 list_for_each_entry(dest, &svc->destinations, n_list) { 82 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
83 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 83 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
84 atomic_read(&dest->weight) > 0) { 84 atomic_read(&dest->weight) > 0) {
85 least = dest; 85 least = dest;
@@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
94 * Find the destination with the least load. 94 * Find the destination with the least load.
95 */ 95 */
96 nextstage: 96 nextstage:
97 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 97 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
98 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 98 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
99 continue; 99 continue;
100 doh = ip_vs_sed_dest_overhead(dest); 100 doh = ip_vs_sed_dest_overhead(dest);
@@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)
134static void __exit ip_vs_sed_cleanup(void) 134static void __exit ip_vs_sed_cleanup(void)
135{ 135{
136 unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); 136 unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
137 synchronize_rcu();
137} 138}
138 139
139module_init(ip_vs_sed_init); 140module_init(ip_vs_sed_init);
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e33126994628..0df269d7c99f 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -53,7 +53,7 @@
53 * IPVS SH bucket 53 * IPVS SH bucket
54 */ 54 */
55struct ip_vs_sh_bucket { 55struct ip_vs_sh_bucket {
56 struct ip_vs_dest *dest; /* real server (cache) */ 56 struct ip_vs_dest __rcu *dest; /* real server (cache) */
57}; 57};
58 58
59/* 59/*
@@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {
66#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) 66#define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS)
67#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) 67#define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1)
68 68
69struct ip_vs_sh_state {
70 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
71 struct rcu_head rcu_head;
72};
69 73
70/* 74/*
71 * Returns hash value for IPVS SH entry 75 * Returns hash value for IPVS SH entry
@@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
87 * Get ip_vs_dest associated with supplied parameters. 91 * Get ip_vs_dest associated with supplied parameters.
88 */ 92 */
89static inline struct ip_vs_dest * 93static inline struct ip_vs_dest *
90ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, 94ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)
91 const union nf_inet_addr *addr)
92{ 95{
93 return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; 96 return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);
94} 97}
95 98
96 99
@@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,
98 * Assign all the hash buckets of the specified table with the service. 101 * Assign all the hash buckets of the specified table with the service.
99 */ 102 */
100static int 103static int
101ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) 104ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
102{ 105{
103 int i; 106 int i;
104 struct ip_vs_sh_bucket *b; 107 struct ip_vs_sh_bucket *b;
105 struct list_head *p; 108 struct list_head *p;
106 struct ip_vs_dest *dest; 109 struct ip_vs_dest *dest;
107 int d_count; 110 int d_count;
111 bool empty;
108 112
109 b = tbl; 113 b = &s->buckets[0];
110 p = &svc->destinations; 114 p = &svc->destinations;
115 empty = list_empty(p);
111 d_count = 0; 116 d_count = 0;
112 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 117 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
113 if (list_empty(p)) { 118 dest = rcu_dereference_protected(b->dest, 1);
114 b->dest = NULL; 119 if (dest)
115 } else { 120 ip_vs_dest_put(dest);
121 if (empty)
122 RCU_INIT_POINTER(b->dest, NULL);
123 else {
116 if (p == &svc->destinations) 124 if (p == &svc->destinations)
117 p = p->next; 125 p = p->next;
118 126
119 dest = list_entry(p, struct ip_vs_dest, n_list); 127 dest = list_entry(p, struct ip_vs_dest, n_list);
120 atomic_inc(&dest->refcnt); 128 ip_vs_dest_hold(dest);
121 b->dest = dest; 129 RCU_INIT_POINTER(b->dest, dest);
122 130
123 IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", 131 IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
124 i, IP_VS_DBG_ADDR(svc->af, &dest->addr), 132 i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
@@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
140/* 148/*
141 * Flush all the hash buckets of the specified table. 149 * Flush all the hash buckets of the specified table.
142 */ 150 */
143static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) 151static void ip_vs_sh_flush(struct ip_vs_sh_state *s)
144{ 152{
145 int i; 153 int i;
146 struct ip_vs_sh_bucket *b; 154 struct ip_vs_sh_bucket *b;
155 struct ip_vs_dest *dest;
147 156
148 b = tbl; 157 b = &s->buckets[0];
149 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { 158 for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
150 if (b->dest) { 159 dest = rcu_dereference_protected(b->dest, 1);
151 atomic_dec(&b->dest->refcnt); 160 if (dest) {
152 b->dest = NULL; 161 ip_vs_dest_put(dest);
162 RCU_INIT_POINTER(b->dest, NULL);
153 } 163 }
154 b++; 164 b++;
155 } 165 }
@@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)
158 168
159static int ip_vs_sh_init_svc(struct ip_vs_service *svc) 169static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
160{ 170{
161 struct ip_vs_sh_bucket *tbl; 171 struct ip_vs_sh_state *s;
162 172
163 /* allocate the SH table for this service */ 173 /* allocate the SH table for this service */
164 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, 174 s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL);
165 GFP_KERNEL); 175 if (s == NULL)
166 if (tbl == NULL)
167 return -ENOMEM; 176 return -ENOMEM;
168 177
169 svc->sched_data = tbl; 178 svc->sched_data = s;
170 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " 179 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "
171 "current service\n", 180 "current service\n",
172 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 181 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
173 182
174 /* assign the hash buckets with the updated service */ 183 /* assign the hash buckets with current dests */
175 ip_vs_sh_assign(tbl, svc); 184 ip_vs_sh_reassign(s, svc);
176 185
177 return 0; 186 return 0;
178} 187}
179 188
180 189
181static int ip_vs_sh_done_svc(struct ip_vs_service *svc) 190static void ip_vs_sh_done_svc(struct ip_vs_service *svc)
182{ 191{
183 struct ip_vs_sh_bucket *tbl = svc->sched_data; 192 struct ip_vs_sh_state *s = svc->sched_data;
184 193
185 /* got to clean up hash buckets here */ 194 /* got to clean up hash buckets here */
186 ip_vs_sh_flush(tbl); 195 ip_vs_sh_flush(s);
187 196
188 /* release the table itself */ 197 /* release the table itself */
189 kfree(svc->sched_data); 198 kfree_rcu(s, rcu_head);
190 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", 199 IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",
191 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); 200 sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);
192
193 return 0;
194} 201}
195 202
196 203
197static int ip_vs_sh_update_svc(struct ip_vs_service *svc) 204static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
205 struct ip_vs_dest *dest)
198{ 206{
199 struct ip_vs_sh_bucket *tbl = svc->sched_data; 207 struct ip_vs_sh_state *s = svc->sched_data;
200
201 /* got to clean up hash buckets here */
202 ip_vs_sh_flush(tbl);
203 208
204 /* assign the hash buckets with the updated service */ 209 /* assign the hash buckets with the updated service */
205 ip_vs_sh_assign(tbl, svc); 210 ip_vs_sh_reassign(s, svc);
206 211
207 return 0; 212 return 0;
208} 213}
@@ -225,15 +230,15 @@ static struct ip_vs_dest *
225ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 230ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
226{ 231{
227 struct ip_vs_dest *dest; 232 struct ip_vs_dest *dest;
228 struct ip_vs_sh_bucket *tbl; 233 struct ip_vs_sh_state *s;
229 struct ip_vs_iphdr iph; 234 struct ip_vs_iphdr iph;
230 235
231 ip_vs_fill_iph_addr_only(svc->af, skb, &iph); 236 ip_vs_fill_iph_addr_only(svc->af, skb, &iph);
232 237
233 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 238 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
234 239
235 tbl = (struct ip_vs_sh_bucket *)svc->sched_data; 240 s = (struct ip_vs_sh_state *) svc->sched_data;
236 dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); 241 dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
237 if (!dest 242 if (!dest
238 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 243 || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
239 || atomic_read(&dest->weight) <= 0 244 || atomic_read(&dest->weight) <= 0
@@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =
262 .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), 267 .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
263 .init_service = ip_vs_sh_init_svc, 268 .init_service = ip_vs_sh_init_svc,
264 .done_service = ip_vs_sh_done_svc, 269 .done_service = ip_vs_sh_done_svc,
265 .update_service = ip_vs_sh_update_svc, 270 .add_dest = ip_vs_sh_dest_changed,
271 .del_dest = ip_vs_sh_dest_changed,
272 .upd_dest = ip_vs_sh_dest_changed,
266 .schedule = ip_vs_sh_schedule, 273 .schedule = ip_vs_sh_schedule,
267}; 274};
268 275
@@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)
276static void __exit ip_vs_sh_cleanup(void) 283static void __exit ip_vs_sh_cleanup(void)
277{ 284{
278 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); 285 unregister_ip_vs_scheduler(&ip_vs_sh_scheduler);
286 synchronize_rcu();
279} 287}
280 288
281 289
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 44fd10c539ac..f6046d9af8d3 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -246,7 +246,7 @@ struct ip_vs_sync_thread_data {
246struct ip_vs_sync_mesg_v0 { 246struct ip_vs_sync_mesg_v0 {
247 __u8 nr_conns; 247 __u8 nr_conns;
248 __u8 syncid; 248 __u8 syncid;
249 __u16 size; 249 __be16 size;
250 250
251 /* ip_vs_sync_conn entries start here */ 251 /* ip_vs_sync_conn entries start here */
252}; 252};
@@ -255,7 +255,7 @@ struct ip_vs_sync_mesg_v0 {
255struct ip_vs_sync_mesg { 255struct ip_vs_sync_mesg {
256 __u8 reserved; /* must be zero */ 256 __u8 reserved; /* must be zero */
257 __u8 syncid; 257 __u8 syncid;
258 __u16 size; 258 __be16 size;
259 __u8 nr_conns; 259 __u8 nr_conns;
260 __s8 version; /* SYNC_PROTO_VER */ 260 __s8 version; /* SYNC_PROTO_VER */
261 __u16 spare; 261 __u16 spare;
@@ -335,7 +335,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
335 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */ 335 sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */
336 sb->mesg->version = SYNC_PROTO_VER; 336 sb->mesg->version = SYNC_PROTO_VER;
337 sb->mesg->syncid = ipvs->master_syncid; 337 sb->mesg->syncid = ipvs->master_syncid;
338 sb->mesg->size = sizeof(struct ip_vs_sync_mesg); 338 sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));
339 sb->mesg->nr_conns = 0; 339 sb->mesg->nr_conns = 0;
340 sb->mesg->spare = 0; 340 sb->mesg->spare = 0;
341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); 341 sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
@@ -418,7 +418,7 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; 418 mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
419 mesg->nr_conns = 0; 419 mesg->nr_conns = 0;
420 mesg->syncid = ipvs->master_syncid; 420 mesg->syncid = ipvs->master_syncid;
421 mesg->size = sizeof(struct ip_vs_sync_mesg_v0); 421 mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));
422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); 422 sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; 423 sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
424 sb->firstuse = jiffies; 424 sb->firstuse = jiffies;
@@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts)) 531 if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))
532 return; 532 return;
533 533
534 spin_lock(&ipvs->sync_buff_lock); 534 spin_lock_bh(&ipvs->sync_buff_lock);
535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 535 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
536 spin_unlock(&ipvs->sync_buff_lock); 536 spin_unlock_bh(&ipvs->sync_buff_lock);
537 return; 537 return;
538 } 538 }
539 539
@@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
552 if (!buff) { 552 if (!buff) {
553 buff = ip_vs_sync_buff_create_v0(ipvs); 553 buff = ip_vs_sync_buff_create_v0(ipvs);
554 if (!buff) { 554 if (!buff) {
555 spin_unlock(&ipvs->sync_buff_lock); 555 spin_unlock_bh(&ipvs->sync_buff_lock);
556 pr_err("ip_vs_sync_buff_create failed.\n"); 556 pr_err("ip_vs_sync_buff_create failed.\n");
557 return; 557 return;
558 } 558 }
@@ -582,7 +582,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
582 } 582 }
583 583
584 m->nr_conns++; 584 m->nr_conns++;
585 m->size += len; 585 m->size = htons(ntohs(m->size) + len);
586 buff->head += len; 586 buff->head += len;
587 587
588 /* check if there is a space for next one */ 588 /* check if there is a space for next one */
@@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
590 sb_queue_tail(ipvs, ms); 590 sb_queue_tail(ipvs, ms);
591 ms->sync_buff = NULL; 591 ms->sync_buff = NULL;
592 } 592 }
593 spin_unlock(&ipvs->sync_buff_lock); 593 spin_unlock_bh(&ipvs->sync_buff_lock);
594 594
595 /* synchronize its controller if it has */ 595 /* synchronize its controller if it has */
596 cp = cp->control; 596 cp = cp->control;
@@ -641,9 +641,9 @@ sloop:
641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); 641 pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
642 } 642 }
643 643
644 spin_lock(&ipvs->sync_buff_lock); 644 spin_lock_bh(&ipvs->sync_buff_lock);
645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { 645 if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {
646 spin_unlock(&ipvs->sync_buff_lock); 646 spin_unlock_bh(&ipvs->sync_buff_lock);
647 return; 647 return;
648 } 648 }
649 649
@@ -683,7 +683,7 @@ sloop:
683 if (!buff) { 683 if (!buff) {
684 buff = ip_vs_sync_buff_create(ipvs); 684 buff = ip_vs_sync_buff_create(ipvs);
685 if (!buff) { 685 if (!buff) {
686 spin_unlock(&ipvs->sync_buff_lock); 686 spin_unlock_bh(&ipvs->sync_buff_lock);
687 pr_err("ip_vs_sync_buff_create failed.\n"); 687 pr_err("ip_vs_sync_buff_create failed.\n");
688 return; 688 return;
689 } 689 }
@@ -693,7 +693,7 @@ sloop:
693 693
694 p = buff->head; 694 p = buff->head;
695 buff->head += pad + len; 695 buff->head += pad + len;
696 m->size += pad + len; 696 m->size = htons(ntohs(m->size) + pad + len);
697 /* Add ev. padding from prev. sync_conn */ 697 /* Add ev. padding from prev. sync_conn */
698 while (pad--) 698 while (pad--)
699 *(p++) = 0; 699 *(p++) = 0;
@@ -750,7 +750,7 @@ sloop:
750 } 750 }
751 } 751 }
752 752
753 spin_unlock(&ipvs->sync_buff_lock); 753 spin_unlock_bh(&ipvs->sync_buff_lock);
754 754
755control: 755control:
756 /* synchronize its controller if it has */ 756 /* synchronize its controller if it has */
@@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
843 kfree(param->pe_data); 843 kfree(param->pe_data);
844 844
845 dest = cp->dest; 845 dest = cp->dest;
846 spin_lock(&cp->lock); 846 spin_lock_bh(&cp->lock);
847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE && 847 if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&
848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) { 848 !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {
849 if (flags & IP_VS_CONN_F_INACTIVE) { 849 if (flags & IP_VS_CONN_F_INACTIVE) {
@@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK; 857 flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;
858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK; 858 flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;
859 cp->flags = flags; 859 cp->flags = flags;
860 spin_unlock(&cp->lock); 860 spin_unlock_bh(&cp->lock);
861 if (!dest) { 861 if (!dest)
862 dest = ip_vs_try_bind_dest(cp); 862 ip_vs_try_bind_dest(cp);
863 if (dest)
864 atomic_dec(&dest->refcnt);
865 }
866 } else { 863 } else {
867 /* 864 /*
868 * Find the appropriate destination for the connection. 865 * Find the appropriate destination for the connection.
869 * If it is not found the connection will remain unbound 866 * If it is not found the connection will remain unbound
870 * but still handled. 867 * but still handled.
871 */ 868 */
869 rcu_read_lock();
872 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 870 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
873 param->vport, protocol, fwmark, flags); 871 param->vport, protocol, fwmark, flags);
874 872
875 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); 873 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
876 if (dest) 874 rcu_read_unlock();
877 atomic_dec(&dest->refcnt);
878 if (!cp) { 875 if (!cp) {
879 if (param->pe_data) 876 if (param->pe_data)
880 kfree(param->pe_data); 877 kfree(param->pe_data);
@@ -1178,10 +1175,8 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
1178 IP_VS_DBG(2, "BACKUP, message header too short\n"); 1175 IP_VS_DBG(2, "BACKUP, message header too short\n");
1179 return; 1176 return;
1180 } 1177 }
1181 /* Convert size back to host byte order */
1182 m2->size = ntohs(m2->size);
1183 1178
1184 if (buflen != m2->size) { 1179 if (buflen != ntohs(m2->size)) {
1185 IP_VS_DBG(2, "BACKUP, bogus message size\n"); 1180 IP_VS_DBG(2, "BACKUP, bogus message size\n");
1186 return; 1181 return;
1187 } 1182 }
@@ -1547,10 +1542,7 @@ ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
1547 int msize; 1542 int msize;
1548 int ret; 1543 int ret;
1549 1544
1550 msize = msg->size; 1545 msize = ntohs(msg->size);
1551
1552 /* Put size in network byte order */
1553 msg->size = htons(msg->size);
1554 1546
1555 ret = ip_vs_send_async(sock, (char *)msg, msize); 1547 ret = ip_vs_send_async(sock, (char *)msg, msize);
1556 if (ret >= 0 || ret == -EAGAIN) 1548 if (ret >= 0 || ret == -EAGAIN)
@@ -1692,11 +1684,7 @@ static int sync_thread_backup(void *data)
1692 break; 1684 break;
1693 } 1685 }
1694 1686
1695 /* disable bottom half, because it accesses the data
1696 shared by softirq while getting/creating conns */
1697 local_bh_disable();
1698 ip_vs_process_message(tinfo->net, tinfo->buf, len); 1687 ip_vs_process_message(tinfo->net, tinfo->buf, len);
1699 local_bh_enable();
1700 } 1688 }
1701 } 1689 }
1702 1690
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index bc1bfc48a17f..c60a81c4ce9a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
51 * new connections. 51 * new connections.
52 */ 52 */
53 53
54 list_for_each_entry(dest, &svc->destinations, n_list) { 54 list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 55 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
56 atomic_read(&dest->weight) > 0) { 56 atomic_read(&dest->weight) > 0) {
57 least = dest; 57 least = dest;
@@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
66 * Find the destination with the least load. 66 * Find the destination with the least load.
67 */ 67 */
68 nextstage: 68 nextstage:
69 list_for_each_entry_continue(dest, &svc->destinations, n_list) { 69 list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
70 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 70 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
71 continue; 71 continue;
72 doh = ip_vs_dest_conn_overhead(dest); 72 doh = ip_vs_dest_conn_overhead(dest);
@@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)
106static void __exit ip_vs_wlc_cleanup(void) 106static void __exit ip_vs_wlc_cleanup(void)
107{ 107{
108 unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); 108 unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler);
109 synchronize_rcu();
109} 110}
110 111
111module_init(ip_vs_wlc_init); 112module_init(ip_vs_wlc_init);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 231be7dd547a..0e68555bceb9 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -29,14 +29,45 @@
29 29
30#include <net/ip_vs.h> 30#include <net/ip_vs.h>
31 31
32/* The WRR algorithm depends on some caclulations:
33 * - mw: maximum weight
34 * - di: weight step, greatest common divisor from all weights
35 * - cw: current required weight
36 * As result, all weights are in the [di..mw] range with a step=di.
37 *
38 * First, we start with cw = mw and select dests with weight >= cw.
39 * Then cw is reduced with di and all dests are checked again.
40 * Last pass should be with cw = di. We have mw/di passes in total:
41 *
42 * pass 1: cw = max weight
43 * pass 2: cw = max weight - di
44 * pass 3: cw = max weight - 2 * di
45 * ...
46 * last pass: cw = di
47 *
48 * Weights are supposed to be >= di but we run in parallel with
49 * weight changes, it is possible some dest weight to be reduced
50 * below di, bad if it is the only available dest.
51 *
52 * So, we modify how mw is calculated, now it is reduced with (di - 1),
53 * so that last cw is 1 to catch such dests with weight below di:
54 * pass 1: cw = max weight - (di - 1)
55 * pass 2: cw = max weight - di - (di - 1)
56 * pass 3: cw = max weight - 2 * di - (di - 1)
57 * ...
58 * last pass: cw = 1
59 *
60 */
61
32/* 62/*
33 * current destination pointer for weighted round-robin scheduling 63 * current destination pointer for weighted round-robin scheduling
34 */ 64 */
35struct ip_vs_wrr_mark { 65struct ip_vs_wrr_mark {
36 struct list_head *cl; /* current list head */ 66 struct ip_vs_dest *cl; /* current dest or head */
37 int cw; /* current weight */ 67 int cw; /* current weight */
38 int mw; /* maximum weight */ 68 int mw; /* maximum weight */
39 int di; /* decreasing interval */ 69 int di; /* decreasing interval */
70 struct rcu_head rcu_head;
40}; 71};
41 72
42 73
@@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
88 if (mark == NULL) 119 if (mark == NULL)
89 return -ENOMEM; 120 return -ENOMEM;
90 121
91 mark->cl = &svc->destinations; 122 mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
92 mark->cw = 0;
93 mark->mw = ip_vs_wrr_max_weight(svc);
94 mark->di = ip_vs_wrr_gcd_weight(svc); 123 mark->di = ip_vs_wrr_gcd_weight(svc);
124 mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
125 mark->cw = mark->mw;
95 svc->sched_data = mark; 126 svc->sched_data = mark;
96 127
97 return 0; 128 return 0;
98} 129}
99 130
100 131
101static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) 132static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)
102{ 133{
134 struct ip_vs_wrr_mark *mark = svc->sched_data;
135
103 /* 136 /*
104 * Release the mark variable 137 * Release the mark variable
105 */ 138 */
106 kfree(svc->sched_data); 139 kfree_rcu(mark, rcu_head);
107
108 return 0;
109} 140}
110 141
111 142
112static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) 143static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc,
144 struct ip_vs_dest *dest)
113{ 145{
114 struct ip_vs_wrr_mark *mark = svc->sched_data; 146 struct ip_vs_wrr_mark *mark = svc->sched_data;
115 147
116 mark->cl = &svc->destinations; 148 spin_lock_bh(&svc->sched_lock);
117 mark->mw = ip_vs_wrr_max_weight(svc); 149 mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);
118 mark->di = ip_vs_wrr_gcd_weight(svc); 150 mark->di = ip_vs_wrr_gcd_weight(svc);
119 if (mark->cw > mark->mw) 151 mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1);
120 mark->cw = 0; 152 if (mark->cw > mark->mw || !mark->cw)
153 mark->cw = mark->mw;
154 else if (mark->di > 1)
155 mark->cw = (mark->cw / mark->di) * mark->di + 1;
156 spin_unlock_bh(&svc->sched_lock);
121 return 0; 157 return 0;
122} 158}
123 159
@@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)
128static struct ip_vs_dest * 164static struct ip_vs_dest *
129ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 165ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
130{ 166{
131 struct ip_vs_dest *dest; 167 struct ip_vs_dest *dest, *last, *stop = NULL;
132 struct ip_vs_wrr_mark *mark = svc->sched_data; 168 struct ip_vs_wrr_mark *mark = svc->sched_data;
133 struct list_head *p; 169 bool last_pass = false, restarted = false;
134 170
135 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); 171 IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
136 172
137 /* 173 spin_lock_bh(&svc->sched_lock);
138 * This loop will always terminate, because mark->cw in (0, max_weight] 174 dest = mark->cl;
139 * and at least one server has its weight equal to max_weight. 175 /* No available dests? */
140 */ 176 if (mark->mw == 0)
141 write_lock(&svc->sched_lock); 177 goto err_noavail;
142 p = mark->cl; 178 last = dest;
179 /* Stop only after all dests were checked for weight >= 1 (last pass) */
143 while (1) { 180 while (1) {
144 if (mark->cl == &svc->destinations) { 181 list_for_each_entry_continue_rcu(dest,
145 /* it is at the head of the destination list */ 182 &svc->destinations,
146 183 n_list) {
147 if (mark->cl == mark->cl->next) {
148 /* no dest entry */
149 ip_vs_scheduler_err(svc,
150 "no destination available: "
151 "no destinations present");
152 dest = NULL;
153 goto out;
154 }
155
156 mark->cl = svc->destinations.next;
157 mark->cw -= mark->di;
158 if (mark->cw <= 0) {
159 mark->cw = mark->mw;
160 /*
161 * Still zero, which means no available servers.
162 */
163 if (mark->cw == 0) {
164 mark->cl = &svc->destinations;
165 ip_vs_scheduler_err(svc,
166 "no destination available");
167 dest = NULL;
168 goto out;
169 }
170 }
171 } else
172 mark->cl = mark->cl->next;
173
174 if (mark->cl != &svc->destinations) {
175 /* not at the head of the list */
176 dest = list_entry(mark->cl, struct ip_vs_dest, n_list);
177 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && 184 if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
178 atomic_read(&dest->weight) >= mark->cw) { 185 atomic_read(&dest->weight) >= mark->cw)
179 /* got it */ 186 goto found;
180 break; 187 if (dest == stop)
181 } 188 goto err_over;
182 } 189 }
183 190 mark->cw -= mark->di;
184 if (mark->cl == p && mark->cw == mark->di) { 191 if (mark->cw <= 0) {
185 /* back to the start, and no dest is found. 192 mark->cw = mark->mw;
186 It is only possible when all dests are OVERLOADED */ 193 /* Stop if we tried last pass from first dest:
187 dest = NULL; 194 * 1. last_pass: we started checks when cw > di but
188 ip_vs_scheduler_err(svc, 195 * then all dests were checked for w >= 1
189 "no destination available: " 196 * 2. last was head: the first and only traversal
190 "all destinations are overloaded"); 197 * was for weight >= 1, for all dests.
191 goto out; 198 */
199 if (last_pass ||
200 &last->n_list == &svc->destinations)
201 goto err_over;
202 restarted = true;
203 }
204 last_pass = mark->cw <= mark->di;
205 if (last_pass && restarted &&
206 &last->n_list != &svc->destinations) {
207 /* First traversal was for w >= 1 but only
208 * for dests after 'last', now do the same
209 * for all dests up to 'last'.
210 */
211 stop = last;
192 } 212 }
193 } 213 }
194 214
215found:
195 IP_VS_DBG_BUF(6, "WRR: server %s:%u " 216 IP_VS_DBG_BUF(6, "WRR: server %s:%u "
196 "activeconns %d refcnt %d weight %d\n", 217 "activeconns %d refcnt %d weight %d\n",
197 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), 218 IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
198 atomic_read(&dest->activeconns), 219 atomic_read(&dest->activeconns),
199 atomic_read(&dest->refcnt), 220 atomic_read(&dest->refcnt),
200 atomic_read(&dest->weight)); 221 atomic_read(&dest->weight));
222 mark->cl = dest;
201 223
202 out: 224 out:
203 write_unlock(&svc->sched_lock); 225 spin_unlock_bh(&svc->sched_lock);
204 return dest; 226 return dest;
227
228err_noavail:
229 mark->cl = dest;
230 dest = NULL;
231 ip_vs_scheduler_err(svc, "no destination available");
232 goto out;
233
234err_over:
235 mark->cl = dest;
236 dest = NULL;
237 ip_vs_scheduler_err(svc, "no destination available: "
238 "all destinations are overloaded");
239 goto out;
205} 240}
206 241
207 242
@@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {
212 .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), 247 .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
213 .init_service = ip_vs_wrr_init_svc, 248 .init_service = ip_vs_wrr_init_svc,
214 .done_service = ip_vs_wrr_done_svc, 249 .done_service = ip_vs_wrr_done_svc,
215 .update_service = ip_vs_wrr_update_svc, 250 .add_dest = ip_vs_wrr_dest_changed,
251 .del_dest = ip_vs_wrr_dest_changed,
252 .upd_dest = ip_vs_wrr_dest_changed,
216 .schedule = ip_vs_wrr_schedule, 253 .schedule = ip_vs_wrr_schedule,
217}; 254};
218 255
@@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)
224static void __exit ip_vs_wrr_cleanup(void) 261static void __exit ip_vs_wrr_cleanup(void)
225{ 262{
226 unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); 263 unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler);
264 synchronize_rcu();
227} 265}
228 266
229module_init(ip_vs_wrr_init); 267module_init(ip_vs_wrr_init);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index ee6b7a9f1ec2..b75ff6429a04 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -17,6 +17,8 @@
17 * - not all connections have destination server, for example, 17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used 18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet 19 * - bypass connections use daddr from packet
20 * - we can use dst without ref while sending in RCU section, we use
21 * ref when returning NF_ACCEPT for NAT-ed packet via loopback
20 * LOCAL_OUT rules: 22 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING) 23 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet 24 * - skb->pkt_type is not set yet
@@ -51,39 +53,54 @@ enum {
51 */ 53 */
52 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ 54 IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
53 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ 55 IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
56 IP_VS_RT_MODE_TUNNEL = 32,/* Tunnel mode */
54}; 57};
55 58
59static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void)
60{
61 return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC);
62}
63
64static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst)
65{
66 kfree(dest_dst);
67}
68
56/* 69/*
57 * Destination cache to speed up outgoing route lookup 70 * Destination cache to speed up outgoing route lookup
58 */ 71 */
59static inline void 72static inline void
60__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, 73__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst,
61 u32 dst_cookie) 74 struct dst_entry *dst, u32 dst_cookie)
62{ 75{
63 struct dst_entry *old_dst; 76 struct ip_vs_dest_dst *old;
77
78 old = rcu_dereference_protected(dest->dest_dst,
79 lockdep_is_held(&dest->dst_lock));
64 80
65 old_dst = dest->dst_cache; 81 if (dest_dst) {
66 dest->dst_cache = dst; 82 dest_dst->dst_cache = dst;
67 dest->dst_rtos = rtos; 83 dest_dst->dst_cookie = dst_cookie;
68 dest->dst_cookie = dst_cookie; 84 }
69 dst_release(old_dst); 85 rcu_assign_pointer(dest->dest_dst, dest_dst);
86
87 if (old)
88 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
70} 89}
71 90
72static inline struct dst_entry * 91static inline struct ip_vs_dest_dst *
73__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) 92__ip_vs_dst_check(struct ip_vs_dest *dest)
74{ 93{
75 struct dst_entry *dst = dest->dst_cache; 94 struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst);
95 struct dst_entry *dst;
76 96
77 if (!dst) 97 if (!dest_dst)
78 return NULL; 98 return NULL;
79 if ((dst->obsolete || rtos != dest->dst_rtos) && 99 dst = dest_dst->dst_cache;
80 dst->ops->check(dst, dest->dst_cookie) == NULL) { 100 if (dst->obsolete &&
81 dest->dst_cache = NULL; 101 dst->ops->check(dst, dest_dst->dst_cookie) == NULL)
82 dst_release(dst);
83 return NULL; 102 return NULL;
84 } 103 return dest_dst;
85 dst_hold(dst);
86 return dst;
87} 104}
88 105
89static inline bool 106static inline bool
@@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)
104 121
105/* Get route to daddr, update *saddr, optionally bind route to saddr */ 122/* Get route to daddr, update *saddr, optionally bind route to saddr */
106static struct rtable *do_output_route4(struct net *net, __be32 daddr, 123static struct rtable *do_output_route4(struct net *net, __be32 daddr,
107 u32 rtos, int rt_mode, __be32 *saddr) 124 int rt_mode, __be32 *saddr)
108{ 125{
109 struct flowi4 fl4; 126 struct flowi4 fl4;
110 struct rtable *rt; 127 struct rtable *rt;
@@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
113 memset(&fl4, 0, sizeof(fl4)); 130 memset(&fl4, 0, sizeof(fl4));
114 fl4.daddr = daddr; 131 fl4.daddr = daddr;
115 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; 132 fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
116 fl4.flowi4_tos = rtos;
117 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? 133 fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
118 FLOWI_FLAG_KNOWN_NH : 0; 134 FLOWI_FLAG_KNOWN_NH : 0;
119 135
@@ -124,7 +140,7 @@ retry:
124 if (PTR_ERR(rt) == -EINVAL && *saddr && 140 if (PTR_ERR(rt) == -EINVAL && *saddr &&
125 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { 141 rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
126 *saddr = 0; 142 *saddr = 0;
127 flowi4_update_output(&fl4, 0, rtos, daddr, 0); 143 flowi4_update_output(&fl4, 0, 0, daddr, 0);
128 goto retry; 144 goto retry;
129 } 145 }
130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); 146 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -132,7 +148,7 @@ retry:
132 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { 148 } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
133 ip_rt_put(rt); 149 ip_rt_put(rt);
134 *saddr = fl4.saddr; 150 *saddr = fl4.saddr;
135 flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); 151 flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
136 loop++; 152 loop++;
137 goto retry; 153 goto retry;
138 } 154 }
@@ -141,113 +157,140 @@ retry:
141} 157}
142 158
143/* Get route to destination or remote server */ 159/* Get route to destination or remote server */
144static struct rtable * 160static int
145__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, 161__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
146 __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) 162 __be32 daddr, int rt_mode, __be32 *ret_saddr)
147{ 163{
148 struct net *net = dev_net(skb_dst(skb)->dev); 164 struct net *net = dev_net(skb_dst(skb)->dev);
165 struct netns_ipvs *ipvs = net_ipvs(net);
166 struct ip_vs_dest_dst *dest_dst;
149 struct rtable *rt; /* Route to the other host */ 167 struct rtable *rt; /* Route to the other host */
150 struct rtable *ort; /* Original route */ 168 struct rtable *ort; /* Original route */
151 int local; 169 struct iphdr *iph;
170 __be16 df;
171 int mtu;
172 int local, noref = 1;
152 173
153 if (dest) { 174 if (dest) {
154 spin_lock(&dest->dst_lock); 175 dest_dst = __ip_vs_dst_check(dest);
155 if (!(rt = (struct rtable *) 176 if (likely(dest_dst))
156 __ip_vs_dst_check(dest, rtos))) { 177 rt = (struct rtable *) dest_dst->dst_cache;
157 rt = do_output_route4(net, dest->addr.ip, rtos, 178 else {
158 rt_mode, &dest->dst_saddr.ip); 179 dest_dst = ip_vs_dest_dst_alloc();
180 spin_lock_bh(&dest->dst_lock);
181 if (!dest_dst) {
182 __ip_vs_dst_set(dest, NULL, NULL, 0);
183 spin_unlock_bh(&dest->dst_lock);
184 goto err_unreach;
185 }
186 rt = do_output_route4(net, dest->addr.ip, rt_mode,
187 &dest_dst->dst_saddr.ip);
159 if (!rt) { 188 if (!rt) {
160 spin_unlock(&dest->dst_lock); 189 __ip_vs_dst_set(dest, NULL, NULL, 0);
161 return NULL; 190 spin_unlock_bh(&dest->dst_lock);
191 ip_vs_dest_dst_free(dest_dst);
192 goto err_unreach;
162 } 193 }
163 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); 194 __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
164 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " 195 spin_unlock_bh(&dest->dst_lock);
165 "rtos=%X\n", 196 IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
166 &dest->addr.ip, &dest->dst_saddr.ip, 197 &dest->addr.ip, &dest_dst->dst_saddr.ip,
167 atomic_read(&rt->dst.__refcnt), rtos); 198 atomic_read(&rt->dst.__refcnt));
168 } 199 }
169 daddr = dest->addr.ip; 200 daddr = dest->addr.ip;
170 if (ret_saddr) 201 if (ret_saddr)
171 *ret_saddr = dest->dst_saddr.ip; 202 *ret_saddr = dest_dst->dst_saddr.ip;
172 spin_unlock(&dest->dst_lock);
173 } else { 203 } else {
174 __be32 saddr = htonl(INADDR_ANY); 204 __be32 saddr = htonl(INADDR_ANY);
175 205
206 noref = 0;
207
176 /* For such unconfigured boxes avoid many route lookups 208 /* For such unconfigured boxes avoid many route lookups
177 * for performance reasons because we do not remember saddr 209 * for performance reasons because we do not remember saddr
178 */ 210 */
179 rt_mode &= ~IP_VS_RT_MODE_CONNECT; 211 rt_mode &= ~IP_VS_RT_MODE_CONNECT;
180 rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); 212 rt = do_output_route4(net, daddr, rt_mode, &saddr);
181 if (!rt) 213 if (!rt)
182 return NULL; 214 goto err_unreach;
183 if (ret_saddr) 215 if (ret_saddr)
184 *ret_saddr = saddr; 216 *ret_saddr = saddr;
185 } 217 }
186 218
187 local = rt->rt_flags & RTCF_LOCAL; 219 local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
188 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & 220 if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
189 rt_mode)) { 221 rt_mode)) {
190 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", 222 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
191 (rt->rt_flags & RTCF_LOCAL) ? 223 (rt->rt_flags & RTCF_LOCAL) ?
192 "local":"non-local", &daddr); 224 "local":"non-local", &daddr);
193 ip_rt_put(rt); 225 goto err_put;
194 return NULL;
195 }
196 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
197 !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
198 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
199 "requires NAT method, dest: %pI4\n",
200 &ip_hdr(skb)->daddr, &daddr);
201 ip_rt_put(rt);
202 return NULL;
203 } 226 }
204 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { 227 iph = ip_hdr(skb);
205 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " 228 if (likely(!local)) {
206 "to non-local address, dest: %pI4\n", 229 if (unlikely(ipv4_is_loopback(iph->saddr))) {
207 &ip_hdr(skb)->saddr, &daddr); 230 IP_VS_DBG_RL("Stopping traffic from loopback address "
208 ip_rt_put(rt); 231 "%pI4 to non-local address, dest: %pI4\n",
209 return NULL; 232 &iph->saddr, &daddr);
233 goto err_put;
234 }
235 } else {
236 ort = skb_rtable(skb);
237 if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
238 !(ort->rt_flags & RTCF_LOCAL)) {
239 IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
240 "local requires NAT method, dest: %pI4\n",
241 &iph->daddr, &daddr);
242 goto err_put;
243 }
244 /* skb to local stack, preserve old route */
245 if (!noref)
246 ip_rt_put(rt);
247 return local;
210 } 248 }
211 249
212 return rt; 250 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
213} 251 mtu = dst_mtu(&rt->dst);
214 252 df = iph->frag_off & htons(IP_DF);
215/* Reroute packet to local IPv4 stack after DNAT */
216static int
217__ip_vs_reroute_locally(struct sk_buff *skb)
218{
219 struct rtable *rt = skb_rtable(skb);
220 struct net_device *dev = rt->dst.dev;
221 struct net *net = dev_net(dev);
222 struct iphdr *iph = ip_hdr(skb);
223
224 if (rt_is_input_route(rt)) {
225 unsigned long orefdst = skb->_skb_refdst;
226
227 if (ip_route_input(skb, iph->daddr, iph->saddr,
228 iph->tos, skb->dev))
229 return 0;
230 refdst_drop(orefdst);
231 } else { 253 } else {
232 struct flowi4 fl4 = { 254 struct sock *sk = skb->sk;
233 .daddr = iph->daddr, 255
234 .saddr = iph->saddr, 256 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
235 .flowi4_tos = RT_TOS(iph->tos), 257 if (mtu < 68) {
236 .flowi4_mark = skb->mark, 258 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
237 }; 259 goto err_put;
238
239 rt = ip_route_output_key(net, &fl4);
240 if (IS_ERR(rt))
241 return 0;
242 if (!(rt->rt_flags & RTCF_LOCAL)) {
243 ip_rt_put(rt);
244 return 0;
245 } 260 }
246 /* Drop old route. */ 261 ort = skb_rtable(skb);
247 skb_dst_drop(skb); 262 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
248 skb_dst_set(skb, &rt->dst); 263 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
264 /* MTU check allowed? */
265 df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
249 } 266 }
250 return 1; 267
268 /* MTU checking */
269 if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
270 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
271 IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
272 goto err_put;
273 }
274
275 skb_dst_drop(skb);
276 if (noref) {
277 if (!local)
278 skb_dst_set_noref_force(skb, &rt->dst);
279 else
280 skb_dst_set(skb, dst_clone(&rt->dst));
281 } else
282 skb_dst_set(skb, &rt->dst);
283
284 return local;
285
286err_put:
287 if (!noref)
288 ip_rt_put(rt);
289 return -1;
290
291err_unreach:
292 dst_link_failure(skb);
293 return -1;
251} 294}
252 295
253#ifdef CONFIG_IP_VS_IPV6 296#ifdef CONFIG_IP_VS_IPV6
@@ -294,44 +337,57 @@ out_err:
294/* 337/*
295 * Get route to destination or remote server 338 * Get route to destination or remote server
296 */ 339 */
297static struct rt6_info * 340static int
298__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, 341__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
299 struct in6_addr *daddr, struct in6_addr *ret_saddr, 342 struct in6_addr *daddr, struct in6_addr *ret_saddr,
300 int do_xfrm, int rt_mode) 343 struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
301{ 344{
302 struct net *net = dev_net(skb_dst(skb)->dev); 345 struct net *net = dev_net(skb_dst(skb)->dev);
346 struct ip_vs_dest_dst *dest_dst;
303 struct rt6_info *rt; /* Route to the other host */ 347 struct rt6_info *rt; /* Route to the other host */
304 struct rt6_info *ort; /* Original route */ 348 struct rt6_info *ort; /* Original route */
305 struct dst_entry *dst; 349 struct dst_entry *dst;
306 int local; 350 int mtu;
351 int local, noref = 1;
307 352
308 if (dest) { 353 if (dest) {
309 spin_lock(&dest->dst_lock); 354 dest_dst = __ip_vs_dst_check(dest);
310 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); 355 if (likely(dest_dst))
311 if (!rt) { 356 rt = (struct rt6_info *) dest_dst->dst_cache;
357 else {
312 u32 cookie; 358 u32 cookie;
313 359
360 dest_dst = ip_vs_dest_dst_alloc();
361 spin_lock_bh(&dest->dst_lock);
362 if (!dest_dst) {
363 __ip_vs_dst_set(dest, NULL, NULL, 0);
364 spin_unlock_bh(&dest->dst_lock);
365 goto err_unreach;
366 }
314 dst = __ip_vs_route_output_v6(net, &dest->addr.in6, 367 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
315 &dest->dst_saddr.in6, 368 &dest_dst->dst_saddr.in6,
316 do_xfrm); 369 do_xfrm);
317 if (!dst) { 370 if (!dst) {
318 spin_unlock(&dest->dst_lock); 371 __ip_vs_dst_set(dest, NULL, NULL, 0);
319 return NULL; 372 spin_unlock_bh(&dest->dst_lock);
373 ip_vs_dest_dst_free(dest_dst);
374 goto err_unreach;
320 } 375 }
321 rt = (struct rt6_info *) dst; 376 rt = (struct rt6_info *) dst;
322 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; 377 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
323 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); 378 __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
379 spin_unlock_bh(&dest->dst_lock);
324 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", 380 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
325 &dest->addr.in6, &dest->dst_saddr.in6, 381 &dest->addr.in6, &dest_dst->dst_saddr.in6,
326 atomic_read(&rt->dst.__refcnt)); 382 atomic_read(&rt->dst.__refcnt));
327 } 383 }
328 if (ret_saddr) 384 if (ret_saddr)
329 *ret_saddr = dest->dst_saddr.in6; 385 *ret_saddr = dest_dst->dst_saddr.in6;
330 spin_unlock(&dest->dst_lock);
331 } else { 386 } else {
387 noref = 0;
332 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); 388 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
333 if (!dst) 389 if (!dst)
334 return NULL; 390 goto err_unreach;
335 rt = (struct rt6_info *) dst; 391 rt = (struct rt6_info *) dst;
336 } 392 }
337 393
@@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
340 rt_mode)) { 396 rt_mode)) {
341 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n", 397 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
342 local ? "local":"non-local", daddr); 398 local ? "local":"non-local", daddr);
343 dst_release(&rt->dst); 399 goto err_put;
344 return NULL;
345 } 400 }
346 if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && 401 if (likely(!local)) {
347 !((ort = (struct rt6_info *) skb_dst(skb)) && 402 if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
348 __ip_vs_is_local_route6(ort))) { 403 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
349 IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " 404 IPV6_ADDR_LOOPBACK)) {
350 "requires NAT method, dest: %pI6c\n", 405 IP_VS_DBG_RL("Stopping traffic from loopback address "
351 &ipv6_hdr(skb)->daddr, daddr); 406 "%pI6c to non-local address, "
352 dst_release(&rt->dst); 407 "dest: %pI6c\n",
353 return NULL; 408 &ipv6_hdr(skb)->saddr, daddr);
409 goto err_put;
410 }
411 } else {
412 ort = (struct rt6_info *) skb_dst(skb);
413 if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
414 !__ip_vs_is_local_route6(ort)) {
415 IP_VS_DBG_RL("Redirect from non-local address %pI6c "
416 "to local requires NAT method, "
417 "dest: %pI6c\n",
418 &ipv6_hdr(skb)->daddr, daddr);
419 goto err_put;
420 }
421 /* skb to local stack, preserve old route */
422 if (!noref)
423 dst_release(&rt->dst);
424 return local;
354 } 425 }
355 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 426
356 ipv6_addr_type(&ipv6_hdr(skb)->saddr) & 427 /* MTU checking */
357 IPV6_ADDR_LOOPBACK)) { 428 if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
358 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " 429 mtu = dst_mtu(&rt->dst);
359 "to non-local address, dest: %pI6c\n", 430 else {
360 &ipv6_hdr(skb)->saddr, daddr); 431 struct sock *sk = skb->sk;
361 dst_release(&rt->dst); 432
362 return NULL; 433 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
434 if (mtu < IPV6_MIN_MTU) {
435 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
436 IPV6_MIN_MTU);
437 goto err_put;
438 }
439 ort = (struct rt6_info *) skb_dst(skb);
440 if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
441 ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
363 } 442 }
364 443
365 return rt; 444 if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
445 if (!skb->dev)
446 skb->dev = net->loopback_dev;
447 /* only send ICMP too big on first fragment */
448 if (!ipvsh->fragoffs)
449 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
450 IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
451 goto err_put;
452 }
453
454 skb_dst_drop(skb);
455 if (noref) {
456 if (!local)
457 skb_dst_set_noref_force(skb, &rt->dst);
458 else
459 skb_dst_set(skb, dst_clone(&rt->dst));
460 } else
461 skb_dst_set(skb, &rt->dst);
462
463 return local;
464
465err_put:
466 if (!noref)
467 dst_release(&rt->dst);
468 return -1;
469
470err_unreach:
471 dst_link_failure(skb);
472 return -1;
366} 473}
367#endif 474#endif
368 475
369 476
370/* 477/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */
371 * Release dest->dst_cache before a dest is removed 478static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
372 */ 479 struct ip_vs_conn *cp)
373void
374ip_vs_dst_reset(struct ip_vs_dest *dest)
375{ 480{
376 struct dst_entry *old_dst; 481 int ret = NF_ACCEPT;
482
483 skb->ipvs_property = 1;
484 if (unlikely(cp->flags & IP_VS_CONN_F_NFCT))
485 ret = ip_vs_confirm_conntrack(skb);
486 if (ret == NF_ACCEPT) {
487 nf_reset(skb);
488 skb_forward_csum(skb);
489 }
490 return ret;
491}
492
493/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
494static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
495 struct ip_vs_conn *cp, int local)
496{
497 int ret = NF_STOLEN;
377 498
378 old_dst = dest->dst_cache; 499 skb->ipvs_property = 1;
379 dest->dst_cache = NULL; 500 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
380 dst_release(old_dst); 501 ip_vs_notrack(skb);
381 dest->dst_saddr.ip = 0; 502 else
503 ip_vs_update_conntrack(skb, cp, 1);
504 if (!local) {
505 skb_forward_csum(skb);
506 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
507 dst_output);
508 } else
509 ret = NF_ACCEPT;
510 return ret;
382} 511}
383 512
384#define IP_VS_XMIT_TUNNEL(skb, cp) \ 513/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
385({ \ 514static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
386 int __ret = NF_ACCEPT; \ 515 struct ip_vs_conn *cp, int local)
387 \ 516{
388 (skb)->ipvs_property = 1; \ 517 int ret = NF_STOLEN;
389 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ 518
390 __ret = ip_vs_confirm_conntrack(skb); \ 519 skb->ipvs_property = 1;
391 if (__ret == NF_ACCEPT) { \ 520 if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
392 nf_reset(skb); \ 521 ip_vs_notrack(skb);
393 skb_forward_csum(skb); \ 522 if (!local) {
394 } \ 523 skb_forward_csum(skb);
395 __ret; \ 524 NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
396}) 525 dst_output);
397 526 } else
398#define IP_VS_XMIT_NAT(pf, skb, cp, local) \ 527 ret = NF_ACCEPT;
399do { \ 528 return ret;
400 (skb)->ipvs_property = 1; \ 529}
401 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
402 ip_vs_notrack(skb); \
403 else \
404 ip_vs_update_conntrack(skb, cp, 1); \
405 if (local) \
406 return NF_ACCEPT; \
407 skb_forward_csum(skb); \
408 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
409 skb_dst(skb)->dev, dst_output); \
410} while (0)
411
412#define IP_VS_XMIT(pf, skb, cp, local) \
413do { \
414 (skb)->ipvs_property = 1; \
415 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
416 ip_vs_notrack(skb); \
417 if (local) \
418 return NF_ACCEPT; \
419 skb_forward_csum(skb); \
420 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
421 skb_dst(skb)->dev, dst_output); \
422} while (0)
423 530
424 531
425/* 532/*
@@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
430 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 537 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
431{ 538{
432 /* we do not touch skb and do not need pskb ptr */ 539 /* we do not touch skb and do not need pskb ptr */
433 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 540 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
434} 541}
435 542
436 543
@@ -443,52 +550,29 @@ int
443ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 550ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
444 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 551 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
445{ 552{
446 struct rtable *rt; /* Route to the other host */
447 struct iphdr *iph = ip_hdr(skb); 553 struct iphdr *iph = ip_hdr(skb);
448 int mtu;
449 554
450 EnterFunction(10); 555 EnterFunction(10);
451 556
452 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), 557 rcu_read_lock();
453 IP_VS_RT_MODE_NON_LOCAL, NULL))) 558 if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
454 goto tx_error_icmp; 559 NULL) < 0)
455
456 /* MTU checking */
457 mtu = dst_mtu(&rt->dst);
458 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
459 !skb_is_gso(skb)) {
460 ip_rt_put(rt);
461 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
462 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
463 goto tx_error; 560 goto tx_error;
464 }
465 561
466 /* 562 ip_send_check(iph);
467 * Call ip_send_check because we are not sure it is called
468 * after ip_defrag. Is copy-on-write needed?
469 */
470 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
471 ip_rt_put(rt);
472 return NF_STOLEN;
473 }
474 ip_send_check(ip_hdr(skb));
475
476 /* drop old route */
477 skb_dst_drop(skb);
478 skb_dst_set(skb, &rt->dst);
479 563
480 /* Another hack: avoid icmp_send in ip_fragment */ 564 /* Another hack: avoid icmp_send in ip_fragment */
481 skb->local_df = 1; 565 skb->local_df = 1;
482 566
483 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 567 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
568 rcu_read_unlock();
484 569
485 LeaveFunction(10); 570 LeaveFunction(10);
486 return NF_STOLEN; 571 return NF_STOLEN;
487 572
488 tx_error_icmp:
489 dst_link_failure(skb);
490 tx_error: 573 tx_error:
491 kfree_skb(skb); 574 kfree_skb(skb);
575 rcu_read_unlock();
492 LeaveFunction(10); 576 LeaveFunction(10);
493 return NF_STOLEN; 577 return NF_STOLEN;
494} 578}
@@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
496#ifdef CONFIG_IP_VS_IPV6 580#ifdef CONFIG_IP_VS_IPV6
497int 581int
498ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 582ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
499 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 583 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
500{ 584{
501 struct rt6_info *rt; /* Route to the other host */
502 int mtu;
503
504 EnterFunction(10); 585 EnterFunction(10);
505 586
506 rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, 587 rcu_read_lock();
507 IP_VS_RT_MODE_NON_LOCAL); 588 if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL,
508 if (!rt) 589 ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
509 goto tx_error_icmp;
510
511 /* MTU checking */
512 mtu = dst_mtu(&rt->dst);
513 if (__mtu_check_toobig_v6(skb, mtu)) {
514 if (!skb->dev) {
515 struct net *net = dev_net(skb_dst(skb)->dev);
516
517 skb->dev = net->loopback_dev;
518 }
519 /* only send ICMP too big on first fragment */
520 if (!iph->fragoffs)
521 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
522 dst_release(&rt->dst);
523 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
524 goto tx_error; 590 goto tx_error;
525 }
526
527 /*
528 * Call ip_send_check because we are not sure it is called
529 * after ip_defrag. Is copy-on-write needed?
530 */
531 skb = skb_share_check(skb, GFP_ATOMIC);
532 if (unlikely(skb == NULL)) {
533 dst_release(&rt->dst);
534 return NF_STOLEN;
535 }
536
537 /* drop old route */
538 skb_dst_drop(skb);
539 skb_dst_set(skb, &rt->dst);
540 591
541 /* Another hack: avoid icmp_send in ip_fragment */ 592 /* Another hack: avoid icmp_send in ip_fragment */
542 skb->local_df = 1; 593 skb->local_df = 1;
543 594
544 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 595 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
596 rcu_read_unlock();
545 597
546 LeaveFunction(10); 598 LeaveFunction(10);
547 return NF_STOLEN; 599 return NF_STOLEN;
548 600
549 tx_error_icmp:
550 dst_link_failure(skb);
551 tx_error: 601 tx_error:
552 kfree_skb(skb); 602 kfree_skb(skb);
603 rcu_read_unlock();
553 LeaveFunction(10); 604 LeaveFunction(10);
554 return NF_STOLEN; 605 return NF_STOLEN;
555} 606}
@@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
564 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 615 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
565{ 616{
566 struct rtable *rt; /* Route to the other host */ 617 struct rtable *rt; /* Route to the other host */
567 int mtu; 618 int local, rc, was_input;
568 struct iphdr *iph = ip_hdr(skb);
569 int local;
570 619
571 EnterFunction(10); 620 EnterFunction(10);
572 621
622 rcu_read_lock();
573 /* check if it is a connection of no-client-port */ 623 /* check if it is a connection of no-client-port */
574 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { 624 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
575 __be16 _pt, *p; 625 __be16 _pt, *p;
576 p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); 626
627 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
577 if (p == NULL) 628 if (p == NULL)
578 goto tx_error; 629 goto tx_error;
579 ip_vs_conn_fill_cport(cp, *p); 630 ip_vs_conn_fill_cport(cp, *p);
580 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 631 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
581 } 632 }
582 633
583 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 634 was_input = rt_is_input_route(skb_rtable(skb));
584 RT_TOS(iph->tos), 635 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
585 IP_VS_RT_MODE_LOCAL | 636 IP_VS_RT_MODE_LOCAL |
586 IP_VS_RT_MODE_NON_LOCAL | 637 IP_VS_RT_MODE_NON_LOCAL |
587 IP_VS_RT_MODE_RDR, NULL))) 638 IP_VS_RT_MODE_RDR, NULL);
588 goto tx_error_icmp; 639 if (local < 0)
589 local = rt->rt_flags & RTCF_LOCAL; 640 goto tx_error;
641 rt = skb_rtable(skb);
590 /* 642 /*
591 * Avoid duplicate tuple in reply direction for NAT traffic 643 * Avoid duplicate tuple in reply direction for NAT traffic
592 * to local address when connection is sync-ed 644 * to local address when connection is sync-ed
@@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
600 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, 652 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
601 "ip_vs_nat_xmit(): " 653 "ip_vs_nat_xmit(): "
602 "stopping DNAT to local address"); 654 "stopping DNAT to local address");
603 goto tx_error_put; 655 goto tx_error;
604 } 656 }
605 } 657 }
606#endif 658#endif
607 659
608 /* From world but DNAT to loopback address? */ 660 /* From world but DNAT to loopback address? */
609 if (local && ipv4_is_loopback(cp->daddr.ip) && 661 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
610 rt_is_input_route(skb_rtable(skb))) {
611 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " 662 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
612 "stopping DNAT to loopback address"); 663 "stopping DNAT to loopback address");
613 goto tx_error_put; 664 goto tx_error;
614 }
615
616 /* MTU checking */
617 mtu = dst_mtu(&rt->dst);
618 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
619 !skb_is_gso(skb)) {
620 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
621 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
622 "ip_vs_nat_xmit(): frag needed for");
623 goto tx_error_put;
624 } 665 }
625 666
626 /* copy-on-write the packet before mangling it */ 667 /* copy-on-write the packet before mangling it */
627 if (!skb_make_writable(skb, sizeof(struct iphdr))) 668 if (!skb_make_writable(skb, sizeof(struct iphdr)))
628 goto tx_error_put; 669 goto tx_error;
629 670
630 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 671 if (skb_cow(skb, rt->dst.dev->hard_header_len))
631 goto tx_error_put; 672 goto tx_error;
632 673
633 /* mangle the packet */ 674 /* mangle the packet */
634 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) 675 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
635 goto tx_error_put; 676 goto tx_error;
636 ip_hdr(skb)->daddr = cp->daddr.ip; 677 ip_hdr(skb)->daddr = cp->daddr.ip;
637 ip_send_check(ip_hdr(skb)); 678 ip_send_check(ip_hdr(skb));
638 679
639 if (!local) {
640 /* drop old route */
641 skb_dst_drop(skb);
642 skb_dst_set(skb, &rt->dst);
643 } else {
644 ip_rt_put(rt);
645 /*
646 * Some IPv4 replies get local address from routes,
647 * not from iph, so while we DNAT after routing
648 * we need this second input/output route.
649 */
650 if (!__ip_vs_reroute_locally(skb))
651 goto tx_error;
652 }
653
654 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); 680 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
655 681
656 /* FIXME: when application helper enlarges the packet and the length 682 /* FIXME: when application helper enlarges the packet and the length
@@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
660 /* Another hack: avoid icmp_send in ip_fragment */ 686 /* Another hack: avoid icmp_send in ip_fragment */
661 skb->local_df = 1; 687 skb->local_df = 1;
662 688
663 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 689 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
690 rcu_read_unlock();
664 691
665 LeaveFunction(10); 692 LeaveFunction(10);
666 return NF_STOLEN; 693 return rc;
667 694
668 tx_error_icmp:
669 dst_link_failure(skb);
670 tx_error: 695 tx_error:
671 kfree_skb(skb); 696 kfree_skb(skb);
697 rcu_read_unlock();
672 LeaveFunction(10); 698 LeaveFunction(10);
673 return NF_STOLEN; 699 return NF_STOLEN;
674 tx_error_put:
675 ip_rt_put(rt);
676 goto tx_error;
677} 700}
678 701
679#ifdef CONFIG_IP_VS_IPV6 702#ifdef CONFIG_IP_VS_IPV6
680int 703int
681ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 704ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
682 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 705 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
683{ 706{
684 struct rt6_info *rt; /* Route to the other host */ 707 struct rt6_info *rt; /* Route to the other host */
685 int mtu; 708 int local, rc;
686 int local;
687 709
688 EnterFunction(10); 710 EnterFunction(10);
689 711
712 rcu_read_lock();
690 /* check if it is a connection of no-client-port */ 713 /* check if it is a connection of no-client-port */
691 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { 714 if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {
692 __be16 _pt, *p; 715 __be16 _pt, *p;
693 p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); 716 p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
694 if (p == NULL) 717 if (p == NULL)
695 goto tx_error; 718 goto tx_error;
696 ip_vs_conn_fill_cport(cp, *p); 719 ip_vs_conn_fill_cport(cp, *p);
697 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 720 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
698 } 721 }
699 722
700 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 723 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
701 0, (IP_VS_RT_MODE_LOCAL | 724 ipvsh, 0,
702 IP_VS_RT_MODE_NON_LOCAL | 725 IP_VS_RT_MODE_LOCAL |
703 IP_VS_RT_MODE_RDR)))) 726 IP_VS_RT_MODE_NON_LOCAL |
704 goto tx_error_icmp; 727 IP_VS_RT_MODE_RDR);
705 local = __ip_vs_is_local_route6(rt); 728 if (local < 0)
729 goto tx_error;
730 rt = (struct rt6_info *) skb_dst(skb);
706 /* 731 /*
707 * Avoid duplicate tuple in reply direction for NAT traffic 732 * Avoid duplicate tuple in reply direction for NAT traffic
708 * to local address when connection is sync-ed 733 * to local address when connection is sync-ed
@@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
716 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, 741 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
717 "ip_vs_nat_xmit_v6(): " 742 "ip_vs_nat_xmit_v6(): "
718 "stopping DNAT to local address"); 743 "stopping DNAT to local address");
719 goto tx_error_put; 744 goto tx_error;
720 } 745 }
721 } 746 }
722#endif 747#endif
@@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
727 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, 752 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
728 "ip_vs_nat_xmit_v6(): " 753 "ip_vs_nat_xmit_v6(): "
729 "stopping DNAT to loopback address"); 754 "stopping DNAT to loopback address");
730 goto tx_error_put; 755 goto tx_error;
731 }
732
733 /* MTU checking */
734 mtu = dst_mtu(&rt->dst);
735 if (__mtu_check_toobig_v6(skb, mtu)) {
736 if (!skb->dev) {
737 struct net *net = dev_net(skb_dst(skb)->dev);
738
739 skb->dev = net->loopback_dev;
740 }
741 /* only send ICMP too big on first fragment */
742 if (!iph->fragoffs)
743 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
744 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
745 "ip_vs_nat_xmit_v6(): frag needed for");
746 goto tx_error_put;
747 } 756 }
748 757
749 /* copy-on-write the packet before mangling it */ 758 /* copy-on-write the packet before mangling it */
750 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 759 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
751 goto tx_error_put; 760 goto tx_error;
752 761
753 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 762 if (skb_cow(skb, rt->dst.dev->hard_header_len))
754 goto tx_error_put; 763 goto tx_error;
755 764
756 /* mangle the packet */ 765 /* mangle the packet */
757 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) 766 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
758 goto tx_error; 767 goto tx_error;
759 ipv6_hdr(skb)->daddr = cp->daddr.in6; 768 ipv6_hdr(skb)->daddr = cp->daddr.in6;
760 769
761 if (!local || !skb->dev) {
762 /* drop the old route when skb is not shared */
763 skb_dst_drop(skb);
764 skb_dst_set(skb, &rt->dst);
765 } else {
766 /* destined to loopback, do we need to change route? */
767 dst_release(&rt->dst);
768 }
769
770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); 770 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
771 771
772 /* FIXME: when application helper enlarges the packet and the length 772 /* FIXME: when application helper enlarges the packet and the length
@@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
776 /* Another hack: avoid icmp_send in ip_fragment */ 776 /* Another hack: avoid icmp_send in ip_fragment */
777 skb->local_df = 1; 777 skb->local_df = 1;
778 778
779 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 779 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
780 rcu_read_unlock();
780 781
781 LeaveFunction(10); 782 LeaveFunction(10);
782 return NF_STOLEN; 783 return rc;
783 784
784tx_error_icmp:
785 dst_link_failure(skb);
786tx_error: 785tx_error:
787 LeaveFunction(10); 786 LeaveFunction(10);
788 kfree_skb(skb); 787 kfree_skb(skb);
788 rcu_read_unlock();
789 return NF_STOLEN; 789 return NF_STOLEN;
790tx_error_put:
791 dst_release(&rt->dst);
792 goto tx_error;
793} 790}
794#endif 791#endif
795 792
@@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
826 __be16 df; 823 __be16 df;
827 struct iphdr *iph; /* Our new IP header */ 824 struct iphdr *iph; /* Our new IP header */
828 unsigned int max_headroom; /* The extra header space needed */ 825 unsigned int max_headroom; /* The extra header space needed */
829 int mtu; 826 int ret, local;
830 int ret;
831 827
832 EnterFunction(10); 828 EnterFunction(10);
833 829
834 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 830 rcu_read_lock();
835 RT_TOS(tos), IP_VS_RT_MODE_LOCAL | 831 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
836 IP_VS_RT_MODE_NON_LOCAL | 832 IP_VS_RT_MODE_LOCAL |
837 IP_VS_RT_MODE_CONNECT, 833 IP_VS_RT_MODE_NON_LOCAL |
838 &saddr))) 834 IP_VS_RT_MODE_CONNECT |
839 goto tx_error_icmp; 835 IP_VS_RT_MODE_TUNNEL, &saddr);
840 if (rt->rt_flags & RTCF_LOCAL) { 836 if (local < 0)
841 ip_rt_put(rt); 837 goto tx_error;
842 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); 838 if (local) {
839 rcu_read_unlock();
840 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
843 } 841 }
844 842
843 rt = skb_rtable(skb);
845 tdev = rt->dst.dev; 844 tdev = rt->dst.dev;
846 845
847 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
848 if (mtu < 68) {
849 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
850 goto tx_error_put;
851 }
852 if (rt_is_output_route(skb_rtable(skb)))
853 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
854
855 /* Copy DF, reset fragment offset and MF */ 846 /* Copy DF, reset fragment offset and MF */
856 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; 847 df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;
857 848
858 if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) {
859 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
860 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
861 goto tx_error_put;
862 }
863
864 /* 849 /*
865 * Okay, now see if we can stuff it in the buffer as-is. 850 * Okay, now see if we can stuff it in the buffer as-is.
866 */ 851 */
867 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); 852 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
868 853
869 if (skb_headroom(skb) < max_headroom 854 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
870 || skb_cloned(skb) || skb_shared(skb)) {
871 struct sk_buff *new_skb = 855 struct sk_buff *new_skb =
872 skb_realloc_headroom(skb, max_headroom); 856 skb_realloc_headroom(skb, max_headroom);
873 if (!new_skb) { 857
874 ip_rt_put(rt); 858 if (!new_skb)
875 kfree_skb(skb); 859 goto tx_error;
876 IP_VS_ERR_RL("%s(): no memory\n", __func__);
877 return NF_STOLEN;
878 }
879 consume_skb(skb); 860 consume_skb(skb);
880 skb = new_skb; 861 skb = new_skb;
881 old_iph = ip_hdr(skb); 862 old_iph = ip_hdr(skb);
@@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
890 skb_reset_network_header(skb); 871 skb_reset_network_header(skb);
891 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 872 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
892 873
893 /* drop old route */
894 skb_dst_drop(skb);
895 skb_dst_set(skb, &rt->dst);
896
897 /* 874 /*
898 * Push down and install the IPIP header. 875 * Push down and install the IPIP header.
899 */ 876 */
@@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
911 /* Another hack: avoid icmp_send in ip_fragment */ 888 /* Another hack: avoid icmp_send in ip_fragment */
912 skb->local_df = 1; 889 skb->local_df = 1;
913 890
914 ret = IP_VS_XMIT_TUNNEL(skb, cp); 891 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
915 if (ret == NF_ACCEPT) 892 if (ret == NF_ACCEPT)
916 ip_local_out(skb); 893 ip_local_out(skb);
917 else if (ret == NF_DROP) 894 else if (ret == NF_DROP)
918 kfree_skb(skb); 895 kfree_skb(skb);
896 rcu_read_unlock();
919 897
920 LeaveFunction(10); 898 LeaveFunction(10);
921 899
922 return NF_STOLEN; 900 return NF_STOLEN;
923 901
924 tx_error_icmp:
925 dst_link_failure(skb);
926 tx_error: 902 tx_error:
927 kfree_skb(skb); 903 kfree_skb(skb);
904 rcu_read_unlock();
928 LeaveFunction(10); 905 LeaveFunction(10);
929 return NF_STOLEN; 906 return NF_STOLEN;
930tx_error_put:
931 ip_rt_put(rt);
932 goto tx_error;
933} 907}
934 908
935#ifdef CONFIG_IP_VS_IPV6 909#ifdef CONFIG_IP_VS_IPV6
@@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
943 struct ipv6hdr *old_iph = ipv6_hdr(skb); 917 struct ipv6hdr *old_iph = ipv6_hdr(skb);
944 struct ipv6hdr *iph; /* Our new IP header */ 918 struct ipv6hdr *iph; /* Our new IP header */
945 unsigned int max_headroom; /* The extra header space needed */ 919 unsigned int max_headroom; /* The extra header space needed */
946 int mtu; 920 int ret, local;
947 int ret;
948 921
949 EnterFunction(10); 922 EnterFunction(10);
950 923
951 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, 924 rcu_read_lock();
952 &saddr, 1, (IP_VS_RT_MODE_LOCAL | 925 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
953 IP_VS_RT_MODE_NON_LOCAL)))) 926 &saddr, ipvsh, 1,
954 goto tx_error_icmp; 927 IP_VS_RT_MODE_LOCAL |
955 if (__ip_vs_is_local_route6(rt)) { 928 IP_VS_RT_MODE_NON_LOCAL |
956 dst_release(&rt->dst); 929 IP_VS_RT_MODE_TUNNEL);
957 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); 930 if (local < 0)
931 goto tx_error;
932 if (local) {
933 rcu_read_unlock();
934 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
958 } 935 }
959 936
937 rt = (struct rt6_info *) skb_dst(skb);
960 tdev = rt->dst.dev; 938 tdev = rt->dst.dev;
961 939
962 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
963 if (mtu < IPV6_MIN_MTU) {
964 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
965 IPV6_MIN_MTU);
966 goto tx_error_put;
967 }
968 if (skb_dst(skb))
969 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
970
971 /* MTU checking: Notice that 'mtu' have been adjusted before hand */
972 if (__mtu_check_toobig_v6(skb, mtu)) {
973 if (!skb->dev) {
974 struct net *net = dev_net(skb_dst(skb)->dev);
975
976 skb->dev = net->loopback_dev;
977 }
978 /* only send ICMP too big on first fragment */
979 if (!ipvsh->fragoffs)
980 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
981 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
982 goto tx_error_put;
983 }
984
985 /* 940 /*
986 * Okay, now see if we can stuff it in the buffer as-is. 941 * Okay, now see if we can stuff it in the buffer as-is.
987 */ 942 */
988 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); 943 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
989 944
990 if (skb_headroom(skb) < max_headroom 945 if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
991 || skb_cloned(skb) || skb_shared(skb)) {
992 struct sk_buff *new_skb = 946 struct sk_buff *new_skb =
993 skb_realloc_headroom(skb, max_headroom); 947 skb_realloc_headroom(skb, max_headroom);
994 if (!new_skb) { 948
995 dst_release(&rt->dst); 949 if (!new_skb)
996 kfree_skb(skb); 950 goto tx_error;
997 IP_VS_ERR_RL("%s(): no memory\n", __func__);
998 return NF_STOLEN;
999 }
1000 consume_skb(skb); 951 consume_skb(skb);
1001 skb = new_skb; 952 skb = new_skb;
1002 old_iph = ipv6_hdr(skb); 953 old_iph = ipv6_hdr(skb);
@@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1008 skb_reset_network_header(skb); 959 skb_reset_network_header(skb);
1009 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 960 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1010 961
1011 /* drop old route */
1012 skb_dst_drop(skb);
1013 skb_dst_set(skb, &rt->dst);
1014
1015 /* 962 /*
1016 * Push down and install the IPIP header. 963 * Push down and install the IPIP header.
1017 */ 964 */
@@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1029 /* Another hack: avoid icmp_send in ip_fragment */ 976 /* Another hack: avoid icmp_send in ip_fragment */
1030 skb->local_df = 1; 977 skb->local_df = 1;
1031 978
1032 ret = IP_VS_XMIT_TUNNEL(skb, cp); 979 ret = ip_vs_tunnel_xmit_prepare(skb, cp);
1033 if (ret == NF_ACCEPT) 980 if (ret == NF_ACCEPT)
1034 ip6_local_out(skb); 981 ip6_local_out(skb);
1035 else if (ret == NF_DROP) 982 else if (ret == NF_DROP)
1036 kfree_skb(skb); 983 kfree_skb(skb);
984 rcu_read_unlock();
1037 985
1038 LeaveFunction(10); 986 LeaveFunction(10);
1039 987
1040 return NF_STOLEN; 988 return NF_STOLEN;
1041 989
1042tx_error_icmp:
1043 dst_link_failure(skb);
1044tx_error: 990tx_error:
1045 kfree_skb(skb); 991 kfree_skb(skb);
992 rcu_read_unlock();
1046 LeaveFunction(10); 993 LeaveFunction(10);
1047 return NF_STOLEN; 994 return NF_STOLEN;
1048tx_error_put:
1049 dst_release(&rt->dst);
1050 goto tx_error;
1051} 995}
1052#endif 996#endif
1053 997
@@ -1060,59 +1004,36 @@ int
1060ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, 1004ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1061 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) 1005 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1062{ 1006{
1063 struct rtable *rt; /* Route to the other host */ 1007 int local;
1064 struct iphdr *iph = ip_hdr(skb);
1065 int mtu;
1066 1008
1067 EnterFunction(10); 1009 EnterFunction(10);
1068 1010
1069 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1011 rcu_read_lock();
1070 RT_TOS(iph->tos), 1012 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1071 IP_VS_RT_MODE_LOCAL | 1013 IP_VS_RT_MODE_LOCAL |
1072 IP_VS_RT_MODE_NON_LOCAL | 1014 IP_VS_RT_MODE_NON_LOCAL |
1073 IP_VS_RT_MODE_KNOWN_NH, NULL))) 1015 IP_VS_RT_MODE_KNOWN_NH, NULL);
1074 goto tx_error_icmp; 1016 if (local < 0)
1075 if (rt->rt_flags & RTCF_LOCAL) {
1076 ip_rt_put(rt);
1077 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1078 }
1079
1080 /* MTU checking */
1081 mtu = dst_mtu(&rt->dst);
1082 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
1083 !skb_is_gso(skb)) {
1084 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
1085 ip_rt_put(rt);
1086 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1087 goto tx_error; 1017 goto tx_error;
1018 if (local) {
1019 rcu_read_unlock();
1020 return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
1088 } 1021 }
1089 1022
1090 /*
1091 * Call ip_send_check because we are not sure it is called
1092 * after ip_defrag. Is copy-on-write needed?
1093 */
1094 if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
1095 ip_rt_put(rt);
1096 return NF_STOLEN;
1097 }
1098 ip_send_check(ip_hdr(skb)); 1023 ip_send_check(ip_hdr(skb));
1099 1024
1100 /* drop old route */
1101 skb_dst_drop(skb);
1102 skb_dst_set(skb, &rt->dst);
1103
1104 /* Another hack: avoid icmp_send in ip_fragment */ 1025 /* Another hack: avoid icmp_send in ip_fragment */
1105 skb->local_df = 1; 1026 skb->local_df = 1;
1106 1027
1107 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); 1028 ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
1029 rcu_read_unlock();
1108 1030
1109 LeaveFunction(10); 1031 LeaveFunction(10);
1110 return NF_STOLEN; 1032 return NF_STOLEN;
1111 1033
1112 tx_error_icmp:
1113 dst_link_failure(skb);
1114 tx_error: 1034 tx_error:
1115 kfree_skb(skb); 1035 kfree_skb(skb);
1036 rcu_read_unlock();
1116 LeaveFunction(10); 1037 LeaveFunction(10);
1117 return NF_STOLEN; 1038 return NF_STOLEN;
1118} 1039}
@@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1120#ifdef CONFIG_IP_VS_IPV6 1041#ifdef CONFIG_IP_VS_IPV6
1121int 1042int
1122ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1043ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1123 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) 1044 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
1124{ 1045{
1125 struct rt6_info *rt; /* Route to the other host */ 1046 int local;
1126 int mtu;
1127 1047
1128 EnterFunction(10); 1048 EnterFunction(10);
1129 1049
1130 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1050 rcu_read_lock();
1131 0, (IP_VS_RT_MODE_LOCAL | 1051 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1132 IP_VS_RT_MODE_NON_LOCAL)))) 1052 ipvsh, 0,
1133 goto tx_error_icmp; 1053 IP_VS_RT_MODE_LOCAL |
1134 if (__ip_vs_is_local_route6(rt)) { 1054 IP_VS_RT_MODE_NON_LOCAL);
1135 dst_release(&rt->dst); 1055 if (local < 0)
1136 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1137 }
1138
1139 /* MTU checking */
1140 mtu = dst_mtu(&rt->dst);
1141 if (__mtu_check_toobig_v6(skb, mtu)) {
1142 if (!skb->dev) {
1143 struct net *net = dev_net(skb_dst(skb)->dev);
1144
1145 skb->dev = net->loopback_dev;
1146 }
1147 /* only send ICMP too big on first fragment */
1148 if (!iph->fragoffs)
1149 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1150 dst_release(&rt->dst);
1151 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1152 goto tx_error; 1056 goto tx_error;
1057 if (local) {
1058 rcu_read_unlock();
1059 return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);
1153 } 1060 }
1154 1061
1155 /*
1156 * Call ip_send_check because we are not sure it is called
1157 * after ip_defrag. Is copy-on-write needed?
1158 */
1159 skb = skb_share_check(skb, GFP_ATOMIC);
1160 if (unlikely(skb == NULL)) {
1161 dst_release(&rt->dst);
1162 return NF_STOLEN;
1163 }
1164
1165 /* drop old route */
1166 skb_dst_drop(skb);
1167 skb_dst_set(skb, &rt->dst);
1168
1169 /* Another hack: avoid icmp_send in ip_fragment */ 1062 /* Another hack: avoid icmp_send in ip_fragment */
1170 skb->local_df = 1; 1063 skb->local_df = 1;
1171 1064
1172 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); 1065 ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
1066 rcu_read_unlock();
1173 1067
1174 LeaveFunction(10); 1068 LeaveFunction(10);
1175 return NF_STOLEN; 1069 return NF_STOLEN;
1176 1070
1177tx_error_icmp:
1178 dst_link_failure(skb);
1179tx_error: 1071tx_error:
1180 kfree_skb(skb); 1072 kfree_skb(skb);
1073 rcu_read_unlock();
1181 LeaveFunction(10); 1074 LeaveFunction(10);
1182 return NF_STOLEN; 1075 return NF_STOLEN;
1183} 1076}
@@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1194 struct ip_vs_iphdr *iph) 1087 struct ip_vs_iphdr *iph)
1195{ 1088{
1196 struct rtable *rt; /* Route to the other host */ 1089 struct rtable *rt; /* Route to the other host */
1197 int mtu;
1198 int rc; 1090 int rc;
1199 int local; 1091 int local;
1200 int rt_mode; 1092 int rt_mode, was_input;
1201 1093
1202 EnterFunction(10); 1094 EnterFunction(10);
1203 1095
@@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1217 /* 1109 /*
1218 * mangle and send the packet here (only for VS/NAT) 1110 * mangle and send the packet here (only for VS/NAT)
1219 */ 1111 */
1112 was_input = rt_is_input_route(skb_rtable(skb));
1220 1113
1221 /* LOCALNODE from FORWARD hook is not supported */ 1114 /* LOCALNODE from FORWARD hook is not supported */
1222 rt_mode = (hooknum != NF_INET_FORWARD) ? 1115 rt_mode = (hooknum != NF_INET_FORWARD) ?
1223 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1116 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1224 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1117 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1225 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, 1118 rcu_read_lock();
1226 RT_TOS(ip_hdr(skb)->tos), 1119 local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
1227 rt_mode, NULL))) 1120 if (local < 0)
1228 goto tx_error_icmp; 1121 goto tx_error;
1229 local = rt->rt_flags & RTCF_LOCAL; 1122 rt = skb_rtable(skb);
1230 1123
1231 /* 1124 /*
1232 * Avoid duplicate tuple in reply direction for NAT traffic 1125 * Avoid duplicate tuple in reply direction for NAT traffic
@@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
1241 IP_VS_DBG(10, "%s(): " 1134 IP_VS_DBG(10, "%s(): "
1242 "stopping DNAT to local address %pI4\n", 1135 "stopping DNAT to local address %pI4\n",
1243 __func__, &cp->daddr.ip); 1136 __func__, &cp->daddr.ip);
1244 goto tx_error_put; 1137 goto tx_error;
1245 } 1138 }
1246 } 1139 }
1247#endif 1140#endif
1248 1141
1249 /* From world but DNAT to loopback address? */ 1142 /* From world but DNAT to loopback address? */
1250 if (local && ipv4_is_loopback(cp->daddr.ip) && 1143 if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
1251 rt_is_input_route(skb_rtable(skb))) {
1252 IP_VS_DBG(1, "%s(): " 1144 IP_VS_DBG(1, "%s(): "
1253 "stopping DNAT to loopback %pI4\n", 1145 "stopping DNAT to loopback %pI4\n",
1254 __func__, &cp->daddr.ip); 1146 __func__, &cp->daddr.ip);
1255 goto tx_error_put; 1147 goto tx_error;
1256 }
1257
1258 /* MTU checking */
1259 mtu = dst_mtu(&rt->dst);
1260 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
1261 !skb_is_gso(skb)) {
1262 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1263 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1264 goto tx_error_put;
1265 } 1148 }
1266 1149
1267 /* copy-on-write the packet before mangling it */ 1150 /* copy-on-write the packet before mangling it */
1268 if (!skb_make_writable(skb, offset)) 1151 if (!skb_make_writable(skb, offset))
1269 goto tx_error_put; 1152 goto tx_error;
1270 1153
1271 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1154 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1272 goto tx_error_put; 1155 goto tx_error;
1273 1156
1274 ip_vs_nat_icmp(skb, pp, cp, 0); 1157 ip_vs_nat_icmp(skb, pp, cp, 0);
1275 1158
1276 if (!local) {
1277 /* drop the old route when skb is not shared */
1278 skb_dst_drop(skb);
1279 skb_dst_set(skb, &rt->dst);
1280 } else {
1281 ip_rt_put(rt);
1282 /*
1283 * Some IPv4 replies get local address from routes,
1284 * not from iph, so while we DNAT after routing
1285 * we need this second input/output route.
1286 */
1287 if (!__ip_vs_reroute_locally(skb))
1288 goto tx_error;
1289 }
1290
1291 /* Another hack: avoid icmp_send in ip_fragment */ 1159 /* Another hack: avoid icmp_send in ip_fragment */
1292 skb->local_df = 1; 1160 skb->local_df = 1;
1293 1161
1294 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); 1162 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
1295 1163 rcu_read_unlock();
1296 rc = NF_STOLEN;
1297 goto out; 1164 goto out;
1298 1165
1299 tx_error_icmp:
1300 dst_link_failure(skb);
1301 tx_error: 1166 tx_error:
1302 dev_kfree_skb(skb); 1167 kfree_skb(skb);
1168 rcu_read_unlock();
1303 rc = NF_STOLEN; 1169 rc = NF_STOLEN;
1304 out: 1170 out:
1305 LeaveFunction(10); 1171 LeaveFunction(10);
1306 return rc; 1172 return rc;
1307 tx_error_put:
1308 ip_rt_put(rt);
1309 goto tx_error;
1310} 1173}
1311 1174
1312#ifdef CONFIG_IP_VS_IPV6 1175#ifdef CONFIG_IP_VS_IPV6
1313int 1176int
1314ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, 1177ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1315 struct ip_vs_protocol *pp, int offset, unsigned int hooknum, 1178 struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
1316 struct ip_vs_iphdr *iph) 1179 struct ip_vs_iphdr *ipvsh)
1317{ 1180{
1318 struct rt6_info *rt; /* Route to the other host */ 1181 struct rt6_info *rt; /* Route to the other host */
1319 int mtu;
1320 int rc; 1182 int rc;
1321 int local; 1183 int local;
1322 int rt_mode; 1184 int rt_mode;
@@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1328 translate address/port back */ 1190 translate address/port back */
1329 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { 1191 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
1330 if (cp->packet_xmit) 1192 if (cp->packet_xmit)
1331 rc = cp->packet_xmit(skb, cp, pp, iph); 1193 rc = cp->packet_xmit(skb, cp, pp, ipvsh);
1332 else 1194 else
1333 rc = NF_ACCEPT; 1195 rc = NF_ACCEPT;
1334 /* do not touch skb anymore */ 1196 /* do not touch skb anymore */
@@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1344 rt_mode = (hooknum != NF_INET_FORWARD) ? 1206 rt_mode = (hooknum != NF_INET_FORWARD) ?
1345 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | 1207 IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
1346 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; 1208 IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
1347 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 1209 rcu_read_lock();
1348 0, rt_mode))) 1210 local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
1349 goto tx_error_icmp; 1211 ipvsh, 0, rt_mode);
1350 1212 if (local < 0)
1351 local = __ip_vs_is_local_route6(rt); 1213 goto tx_error;
1214 rt = (struct rt6_info *) skb_dst(skb);
1352 /* 1215 /*
1353 * Avoid duplicate tuple in reply direction for NAT traffic 1216 * Avoid duplicate tuple in reply direction for NAT traffic
1354 * to local address when connection is sync-ed 1217 * to local address when connection is sync-ed
@@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1362 IP_VS_DBG(10, "%s(): " 1225 IP_VS_DBG(10, "%s(): "
1363 "stopping DNAT to local address %pI6\n", 1226 "stopping DNAT to local address %pI6\n",
1364 __func__, &cp->daddr.in6); 1227 __func__, &cp->daddr.in6);
1365 goto tx_error_put; 1228 goto tx_error;
1366 } 1229 }
1367 } 1230 }
1368#endif 1231#endif
@@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1373 IP_VS_DBG(1, "%s(): " 1236 IP_VS_DBG(1, "%s(): "
1374 "stopping DNAT to loopback %pI6\n", 1237 "stopping DNAT to loopback %pI6\n",
1375 __func__, &cp->daddr.in6); 1238 __func__, &cp->daddr.in6);
1376 goto tx_error_put; 1239 goto tx_error;
1377 }
1378
1379 /* MTU checking */
1380 mtu = dst_mtu(&rt->dst);
1381 if (__mtu_check_toobig_v6(skb, mtu)) {
1382 if (!skb->dev) {
1383 struct net *net = dev_net(skb_dst(skb)->dev);
1384
1385 skb->dev = net->loopback_dev;
1386 }
1387 /* only send ICMP too big on first fragment */
1388 if (!iph->fragoffs)
1389 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1390 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1391 goto tx_error_put;
1392 } 1240 }
1393 1241
1394 /* copy-on-write the packet before mangling it */ 1242 /* copy-on-write the packet before mangling it */
1395 if (!skb_make_writable(skb, offset)) 1243 if (!skb_make_writable(skb, offset))
1396 goto tx_error_put; 1244 goto tx_error;
1397 1245
1398 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1246 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1399 goto tx_error_put; 1247 goto tx_error;
1400 1248
1401 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1249 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1402 1250
1403 if (!local || !skb->dev) {
1404 /* drop the old route when skb is not shared */
1405 skb_dst_drop(skb);
1406 skb_dst_set(skb, &rt->dst);
1407 } else {
1408 /* destined to loopback, do we need to change route? */
1409 dst_release(&rt->dst);
1410 }
1411
1412 /* Another hack: avoid icmp_send in ip_fragment */ 1251 /* Another hack: avoid icmp_send in ip_fragment */
1413 skb->local_df = 1; 1252 skb->local_df = 1;
1414 1253
1415 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); 1254 rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
1416 1255 rcu_read_unlock();
1417 rc = NF_STOLEN;
1418 goto out; 1256 goto out;
1419 1257
1420tx_error_icmp:
1421 dst_link_failure(skb);
1422tx_error: 1258tx_error:
1423 dev_kfree_skb(skb); 1259 kfree_skb(skb);
1260 rcu_read_unlock();
1424 rc = NF_STOLEN; 1261 rc = NF_STOLEN;
1425out: 1262out:
1426 LeaveFunction(10); 1263 LeaveFunction(10);
1427 return rc; 1264 return rc;
1428tx_error_put:
1429 dst_release(&rt->dst);
1430 goto tx_error;
1431} 1265}
1432#endif 1266#endif
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index dbdaa1149260..b8b95f4027ca 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> 3 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
4 * based on HW's ip_conntrack_irc.c as well as other modules 4 * based on HW's ip_conntrack_irc.c as well as other modules
5 * (C) 2006 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or 7 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License 8 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c8e001a9c45b..0283baedcdfb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -5,6 +5,7 @@
5/* (C) 1999-2001 Paul `Rusty' Russell 5/* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 6 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 7 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
8 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
8 * 9 *
9 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as 11 * it under the terms of the GNU General Public License version 2 as
@@ -48,6 +49,7 @@
48#include <net/netfilter/nf_conntrack_labels.h> 49#include <net/netfilter/nf_conntrack_labels.h>
49#include <net/netfilter/nf_nat.h> 50#include <net/netfilter/nf_nat.h>
50#include <net/netfilter/nf_nat_core.h> 51#include <net/netfilter/nf_nat_core.h>
52#include <net/netfilter/nf_nat_helper.h>
51 53
52#define NF_CONNTRACK_VERSION "0.5.0" 54#define NF_CONNTRACK_VERSION "0.5.0"
53 55
@@ -264,7 +266,7 @@ static void death_by_event(unsigned long ul_conntrack)
264 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { 266 if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
265 /* bad luck, let's retry again */ 267 /* bad luck, let's retry again */
266 ecache->timeout.expires = jiffies + 268 ecache->timeout.expires = jiffies +
267 (random32() % net->ct.sysctl_events_retry_timeout); 269 (prandom_u32() % net->ct.sysctl_events_retry_timeout);
268 add_timer(&ecache->timeout); 270 add_timer(&ecache->timeout);
269 return; 271 return;
270 } 272 }
@@ -283,7 +285,7 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
283 /* set a new timer to retry event delivery */ 285 /* set a new timer to retry event delivery */
284 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct); 286 setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
285 ecache->timeout.expires = jiffies + 287 ecache->timeout.expires = jiffies +
286 (random32() % net->ct.sysctl_events_retry_timeout); 288 (prandom_u32() % net->ct.sysctl_events_retry_timeout);
287 add_timer(&ecache->timeout); 289 add_timer(&ecache->timeout);
288} 290}
289EXPORT_SYMBOL_GPL(nf_ct_dying_timeout); 291EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
@@ -1259,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net,
1259EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); 1261EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
1260 1262
1261struct __nf_ct_flush_report { 1263struct __nf_ct_flush_report {
1262 u32 pid; 1264 u32 portid;
1263 int report; 1265 int report;
1264}; 1266};
1265 1267
@@ -1274,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data)
1274 1276
1275 /* If we fail to deliver the event, death_by_timeout() will retry */ 1277 /* If we fail to deliver the event, death_by_timeout() will retry */
1276 if (nf_conntrack_event_report(IPCT_DESTROY, i, 1278 if (nf_conntrack_event_report(IPCT_DESTROY, i,
1277 fr->pid, fr->report) < 0) 1279 fr->portid, fr->report) < 0)
1278 return 1; 1280 return 1;
1279 1281
1280 /* Avoid the delivery of the destroy event in death_by_timeout(). */ 1282 /* Avoid the delivery of the destroy event in death_by_timeout(). */
@@ -1297,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
1297} 1299}
1298EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1300EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
1299 1301
1300void nf_conntrack_flush_report(struct net *net, u32 pid, int report) 1302void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
1301{ 1303{
1302 struct __nf_ct_flush_report fr = { 1304 struct __nf_ct_flush_report fr = {
1303 .pid = pid, 1305 .portid = portid,
1304 .report = report, 1306 .report = report,
1305 }; 1307 };
1306 nf_ct_iterate_cleanup(net, kill_report, &fr); 1308 nf_ct_iterate_cleanup(net, kill_report, &fr);
@@ -1364,30 +1366,48 @@ void nf_conntrack_cleanup_end(void)
1364 */ 1366 */
1365void nf_conntrack_cleanup_net(struct net *net) 1367void nf_conntrack_cleanup_net(struct net *net)
1366{ 1368{
1369 LIST_HEAD(single);
1370
1371 list_add(&net->exit_list, &single);
1372 nf_conntrack_cleanup_net_list(&single);
1373}
1374
1375void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
1376{
1377 int busy;
1378 struct net *net;
1379
1367 /* 1380 /*
1368 * This makes sure all current packets have passed through 1381 * This makes sure all current packets have passed through
1369 * netfilter framework. Roll on, two-stage module 1382 * netfilter framework. Roll on, two-stage module
1370 * delete... 1383 * delete...
1371 */ 1384 */
1372 synchronize_net(); 1385 synchronize_net();
1373 i_see_dead_people: 1386i_see_dead_people:
1374 nf_ct_iterate_cleanup(net, kill_all, NULL); 1387 busy = 0;
1375 nf_ct_release_dying_list(net); 1388 list_for_each_entry(net, net_exit_list, exit_list) {
1376 if (atomic_read(&net->ct.count) != 0) { 1389 nf_ct_iterate_cleanup(net, kill_all, NULL);
1390 nf_ct_release_dying_list(net);
1391 if (atomic_read(&net->ct.count) != 0)
1392 busy = 1;
1393 }
1394 if (busy) {
1377 schedule(); 1395 schedule();
1378 goto i_see_dead_people; 1396 goto i_see_dead_people;
1379 } 1397 }
1380 1398
1381 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1399 list_for_each_entry(net, net_exit_list, exit_list) {
1382 nf_conntrack_proto_pernet_fini(net); 1400 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
1383 nf_conntrack_helper_pernet_fini(net); 1401 nf_conntrack_proto_pernet_fini(net);
1384 nf_conntrack_ecache_pernet_fini(net); 1402 nf_conntrack_helper_pernet_fini(net);
1385 nf_conntrack_tstamp_pernet_fini(net); 1403 nf_conntrack_ecache_pernet_fini(net);
1386 nf_conntrack_acct_pernet_fini(net); 1404 nf_conntrack_tstamp_pernet_fini(net);
1387 nf_conntrack_expect_pernet_fini(net); 1405 nf_conntrack_acct_pernet_fini(net);
1388 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1406 nf_conntrack_expect_pernet_fini(net);
1389 kfree(net->ct.slabname); 1407 kmem_cache_destroy(net->ct.nf_conntrack_cachep);
1390 free_percpu(net->ct.stat); 1408 kfree(net->ct.slabname);
1409 free_percpu(net->ct.stat);
1410 }
1391} 1411}
1392 1412
1393void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 1413void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index b5d2eb8bf0d5..1df176146567 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -1,8 +1,10 @@
1/* Event cache for netfilter. */ 1/* Event cache for netfilter. */
2 2
3/* (C) 1999-2001 Paul `Rusty' Russell 3/*
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2005 Harald Welte <laforge@gnumonks.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2005 Patrick McHardy <kaber@trash.net>
6 * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org>
7 * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * 8 *
7 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 10 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 8c10e3db3d9b..c63b618cd619 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (c) 2005-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 41
41/* nf_conntrack_expect helper functions */ 42/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, 43void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
43 u32 pid, int report) 44 u32 portid, int report)
44{ 45{
45 struct nf_conn_help *master_help = nfct_help(exp->master); 46 struct nf_conn_help *master_help = nfct_help(exp->master);
46 struct net *net = nf_ct_exp_net(exp); 47 struct net *net = nf_ct_exp_net(exp);
@@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
54 hlist_del(&exp->lnode); 55 hlist_del(&exp->lnode);
55 master_help->expecting[exp->class]--; 56 master_help->expecting[exp->class]--;
56 57
57 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); 58 nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);
58 nf_ct_expect_put(exp); 59 nf_ct_expect_put(exp);
59 60
60 NF_CT_STAT_INC(net, expect_delete); 61 NF_CT_STAT_INC(net, expect_delete);
@@ -412,7 +413,7 @@ out:
412} 413}
413 414
414int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
415 u32 pid, int report) 416 u32 portid, int report)
416{ 417{
417 int ret; 418 int ret;
418 419
@@ -425,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
425 if (ret < 0) 426 if (ret < 0)
426 goto out; 427 goto out;
427 spin_unlock_bh(&nf_conntrack_lock); 428 spin_unlock_bh(&nf_conntrack_lock);
428 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); 429 nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
429 return ret; 430 return ret;
430out: 431out:
431 spin_unlock_bh(&nf_conntrack_lock); 432 spin_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 62fb8faedb80..6b217074237b 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 7df7b36d2e24..bdebd03bc8cd 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -2,6 +2,7 @@
2 * H.323 connection tracking helper 2 * H.323 connection tracking helper
3 * 3 *
4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> 4 * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
5 * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This source code is licensed under General Public License version 2. 7 * This source code is licensed under General Public License version 2.
7 * 8 *
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 94b4b9853f60..974a2a4adefa 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
@@ -353,7 +354,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,
353 /* rcu_read_lock()ed by nf_hook_slow */ 354 /* rcu_read_lock()ed by nf_hook_slow */
354 helper = rcu_dereference(help->helper); 355 helper = rcu_dereference(help->helper);
355 356
356 nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, 357 nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
357 "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); 358 "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);
358 359
359 va_end(args); 360 va_end(args);
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 70985c5d0ffa..0fd2976db7ee 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -1,6 +1,7 @@
1/* IRC extension for IP connection tracking, Version 1.21 1/* IRC extension for IP connection tracking, Version 1.21
2 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> 2 * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
3 * based on RR's ip_conntrack_ftp.c 3 * based on RR's ip_conntrack_ftp.c
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9904b15f600e..6d0f8a17c5b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2409,6 +2409,92 @@ out:
2409 return skb->len; 2409 return skb->len;
2410} 2410}
2411 2411
2412static int
2413ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
2414{
2415 struct nf_conntrack_expect *exp, *last;
2416 struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
2417 struct nf_conn *ct = cb->data;
2418 struct nf_conn_help *help = nfct_help(ct);
2419 u_int8_t l3proto = nfmsg->nfgen_family;
2420
2421 if (cb->args[0])
2422 return 0;
2423
2424 rcu_read_lock();
2425 last = (struct nf_conntrack_expect *)cb->args[1];
2426restart:
2427 hlist_for_each_entry(exp, &help->expectations, lnode) {
2428 if (l3proto && exp->tuple.src.l3num != l3proto)
2429 continue;
2430 if (cb->args[1]) {
2431 if (exp != last)
2432 continue;
2433 cb->args[1] = 0;
2434 }
2435 if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
2436 cb->nlh->nlmsg_seq,
2437 IPCTNL_MSG_EXP_NEW,
2438 exp) < 0) {
2439 if (!atomic_inc_not_zero(&exp->use))
2440 continue;
2441 cb->args[1] = (unsigned long)exp;
2442 goto out;
2443 }
2444 }
2445 if (cb->args[1]) {
2446 cb->args[1] = 0;
2447 goto restart;
2448 }
2449 cb->args[0] = 1;
2450out:
2451 rcu_read_unlock();
2452 if (last)
2453 nf_ct_expect_put(last);
2454
2455 return skb->len;
2456}
2457
2458static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
2459 const struct nlmsghdr *nlh,
2460 const struct nlattr * const cda[])
2461{
2462 int err;
2463 struct net *net = sock_net(ctnl);
2464 struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2465 u_int8_t u3 = nfmsg->nfgen_family;
2466 struct nf_conntrack_tuple tuple;
2467 struct nf_conntrack_tuple_hash *h;
2468 struct nf_conn *ct;
2469 u16 zone = 0;
2470 struct netlink_dump_control c = {
2471 .dump = ctnetlink_exp_ct_dump_table,
2472 .done = ctnetlink_exp_done,
2473 };
2474
2475 err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
2476 if (err < 0)
2477 return err;
2478
2479 if (cda[CTA_EXPECT_ZONE]) {
2480 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
2481 if (err < 0)
2482 return err;
2483 }
2484
2485 h = nf_conntrack_find_get(net, zone, &tuple);
2486 if (!h)
2487 return -ENOENT;
2488
2489 ct = nf_ct_tuplehash_to_ctrack(h);
2490 c.data = ct;
2491
2492 err = netlink_dump_start(ctnl, skb, nlh, &c);
2493 nf_ct_put(ct);
2494
2495 return err;
2496}
2497
2412static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { 2498static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
2413 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, 2499 [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
2414 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, 2500 [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
@@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
2439 int err; 2525 int err;
2440 2526
2441 if (nlh->nlmsg_flags & NLM_F_DUMP) { 2527 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2442 struct netlink_dump_control c = { 2528 if (cda[CTA_EXPECT_MASTER])
2443 .dump = ctnetlink_exp_dump_table, 2529 return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
2444 .done = ctnetlink_exp_done, 2530 else {
2445 }; 2531 struct netlink_dump_control c = {
2446 return netlink_dump_start(ctnl, skb, nlh, &c); 2532 .dump = ctnetlink_exp_dump_table,
2533 .done = ctnetlink_exp_done,
2534 };
2535 return netlink_dump_start(ctnl, skb, nlh, &c);
2536 }
2447 } 2537 }
2448 2538
2449 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); 2539 err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index e6678d2b624e..7bd03decd36c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -11,6 +11,8 @@
11 * 11 *
12 * Development of this code funded by Astaro AG (http://www.astaro.com/) 12 * Development of this code funded by Astaro AG (http://www.astaro.com/)
13 * 13 *
14 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
15 *
14 * Limitations: 16 * Limitations:
15 * - We blindly assume that control connections are always 17 * - We blindly assume that control connections are always
16 * established in PNS->PAC direction. This is a violation 18 * established in PNS->PAC direction. This is a violation
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 58ab4050830c..0ab9636ac57e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -3,6 +3,7 @@
3/* (C) 1999-2001 Paul `Rusty' Russell 3/* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index ba65b2041eb4..a99b6c3427b0 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
456 456
457out_invalid: 457out_invalid:
458 if (LOG_INVALID(net, IPPROTO_DCCP)) 458 if (LOG_INVALID(net, IPPROTO_DCCP))
459 nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); 459 nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL,
460 NULL, msg);
460 return false; 461 return false;
461} 462}
462 463
@@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
542 543
543 spin_unlock_bh(&ct->lock); 544 spin_unlock_bh(&ct->lock);
544 if (LOG_INVALID(net, IPPROTO_DCCP)) 545 if (LOG_INVALID(net, IPPROTO_DCCP))
545 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 546 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
546 "nf_ct_dccp: invalid packet ignored "); 547 "nf_ct_dccp: invalid packet ignored ");
547 return NF_ACCEPT; 548 return NF_ACCEPT;
548 case CT_DCCP_INVALID: 549 case CT_DCCP_INVALID:
549 spin_unlock_bh(&ct->lock); 550 spin_unlock_bh(&ct->lock);
550 if (LOG_INVALID(net, IPPROTO_DCCP)) 551 if (LOG_INVALID(net, IPPROTO_DCCP))
551 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 552 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
552 "nf_ct_dccp: invalid state transition "); 553 "nf_ct_dccp: invalid state transition ");
553 return -NF_ACCEPT; 554 return -NF_ACCEPT;
554 } 555 }
@@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
613 614
614out_invalid: 615out_invalid:
615 if (LOG_INVALID(net, IPPROTO_DCCP)) 616 if (LOG_INVALID(net, IPPROTO_DCCP))
616 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); 617 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);
617 return -NF_ACCEPT; 618 return -NF_ACCEPT;
618} 619}
619 620
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 155ce9f8a0db..9d9c0dade602 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -21,6 +21,7 @@
21 * 21 *
22 * Development of this code funded by Astaro AG (http://www.astaro.com/) 22 * Development of this code funded by Astaro AG (http://www.astaro.com/)
23 * 23 *
24 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
24 */ 25 */
25 26
26#include <linux/module.h> 27#include <linux/module.h>
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index ec83536def9a..1314d33f6bcf 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -1,6 +1,9 @@
1/* 1/*
2 * Connection tracking protocol helper module for SCTP. 2 * Connection tracking protocol helper module for SCTP.
3 * 3 *
4 * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com>
5 * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net>
6 *
4 * SCTP is defined in RFC 2960. References to various sections in this code 7 * SCTP is defined in RFC 2960. References to various sections in this code
5 * are to this RFC. 8 * are to this RFC.
6 * 9 *
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 83876e9877f1..4d4d8f1d01fc 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1,5 +1,7 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 5 *
4 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
720 tn->tcp_be_liberal) 722 tn->tcp_be_liberal)
721 res = true; 723 res = true;
722 if (!res && LOG_INVALID(net, IPPROTO_TCP)) 724 if (!res && LOG_INVALID(net, IPPROTO_TCP))
723 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 725 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
724 "nf_ct_tcp: %s ", 726 "nf_ct_tcp: %s ",
725 before(seq, sender->td_maxend + 1) ? 727 before(seq, sender->td_maxend + 1) ?
726 after(end, sender->td_end - receiver->td_maxwin - 1) ? 728 after(end, sender->td_end - receiver->td_maxwin - 1) ?
@@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
772 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 774 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
773 if (th == NULL) { 775 if (th == NULL) {
774 if (LOG_INVALID(net, IPPROTO_TCP)) 776 if (LOG_INVALID(net, IPPROTO_TCP))
775 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 777 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
776 "nf_ct_tcp: short packet "); 778 "nf_ct_tcp: short packet ");
777 return -NF_ACCEPT; 779 return -NF_ACCEPT;
778 } 780 }
@@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
780 /* Not whole TCP header or malformed packet */ 782 /* Not whole TCP header or malformed packet */
781 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { 783 if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
782 if (LOG_INVALID(net, IPPROTO_TCP)) 784 if (LOG_INVALID(net, IPPROTO_TCP))
783 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 785 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
784 "nf_ct_tcp: truncated/malformed packet "); 786 "nf_ct_tcp: truncated/malformed packet ");
785 return -NF_ACCEPT; 787 return -NF_ACCEPT;
786 } 788 }
@@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
793 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 795 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
794 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { 796 nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
795 if (LOG_INVALID(net, IPPROTO_TCP)) 797 if (LOG_INVALID(net, IPPROTO_TCP))
796 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 798 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
797 "nf_ct_tcp: bad TCP checksum "); 799 "nf_ct_tcp: bad TCP checksum ");
798 return -NF_ACCEPT; 800 return -NF_ACCEPT;
799 } 801 }
@@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
802 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); 804 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
803 if (!tcp_valid_flags[tcpflags]) { 805 if (!tcp_valid_flags[tcpflags]) {
804 if (LOG_INVALID(net, IPPROTO_TCP)) 806 if (LOG_INVALID(net, IPPROTO_TCP))
805 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 807 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
806 "nf_ct_tcp: invalid TCP flag combination "); 808 "nf_ct_tcp: invalid TCP flag combination ");
807 return -NF_ACCEPT; 809 return -NF_ACCEPT;
808 } 810 }
@@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct,
949 } 951 }
950 spin_unlock_bh(&ct->lock); 952 spin_unlock_bh(&ct->lock);
951 if (LOG_INVALID(net, IPPROTO_TCP)) 953 if (LOG_INVALID(net, IPPROTO_TCP))
952 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 954 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
953 "nf_ct_tcp: invalid packet ignored in " 955 "nf_ct_tcp: invalid packet ignored in "
954 "state %s ", tcp_conntrack_names[old_state]); 956 "state %s ", tcp_conntrack_names[old_state]);
955 return NF_ACCEPT; 957 return NF_ACCEPT;
@@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct,
959 dir, get_conntrack_index(th), old_state); 961 dir, get_conntrack_index(th), old_state);
960 spin_unlock_bh(&ct->lock); 962 spin_unlock_bh(&ct->lock);
961 if (LOG_INVALID(net, IPPROTO_TCP)) 963 if (LOG_INVALID(net, IPPROTO_TCP))
962 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 964 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
963 "nf_ct_tcp: invalid state "); 965 "nf_ct_tcp: invalid state ");
964 return -NF_ACCEPT; 966 return -NF_ACCEPT;
965 case TCP_CONNTRACK_CLOSE: 967 case TCP_CONNTRACK_CLOSE:
@@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct,
969 /* Invalid RST */ 971 /* Invalid RST */
970 spin_unlock_bh(&ct->lock); 972 spin_unlock_bh(&ct->lock);
971 if (LOG_INVALID(net, IPPROTO_TCP)) 973 if (LOG_INVALID(net, IPPROTO_TCP))
972 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 974 nf_log_packet(net, pf, 0, skb, NULL, NULL,
973 "nf_ct_tcp: invalid RST "); 975 NULL, "nf_ct_tcp: invalid RST ");
974 return -NF_ACCEPT; 976 return -NF_ACCEPT;
975 } 977 }
976 if (index == TCP_RST_SET 978 if (index == TCP_RST_SET
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 59623cc56e8d..9d7721cbce4b 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
119 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 120 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
120 if (hdr == NULL) { 121 if (hdr == NULL) {
121 if (LOG_INVALID(net, IPPROTO_UDP)) 122 if (LOG_INVALID(net, IPPROTO_UDP))
122 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 123 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
123 "nf_ct_udp: short packet "); 124 "nf_ct_udp: short packet ");
124 return -NF_ACCEPT; 125 return -NF_ACCEPT;
125 } 126 }
@@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
127 /* Truncated/malformed packets */ 128 /* Truncated/malformed packets */
128 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { 129 if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
129 if (LOG_INVALID(net, IPPROTO_UDP)) 130 if (LOG_INVALID(net, IPPROTO_UDP))
130 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 131 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
131 "nf_ct_udp: truncated/malformed packet "); 132 "nf_ct_udp: truncated/malformed packet ");
132 return -NF_ACCEPT; 133 return -NF_ACCEPT;
133 } 134 }
@@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
143 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && 144 if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
144 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { 145 nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {
145 if (LOG_INVALID(net, IPPROTO_UDP)) 146 if (LOG_INVALID(net, IPPROTO_UDP))
146 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 147 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
147 "nf_ct_udp: bad UDP checksum "); 148 "nf_ct_udp: bad UDP checksum ");
148 return -NF_ACCEPT; 149 return -NF_ACCEPT;
149 } 150 }
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index ca969f6273f7..2750e6c69f82 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
131 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); 131 hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
132 if (hdr == NULL) { 132 if (hdr == NULL) {
133 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 133 if (LOG_INVALID(net, IPPROTO_UDPLITE))
134 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 134 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
135 "nf_ct_udplite: short packet "); 135 "nf_ct_udplite: short packet ");
136 return -NF_ACCEPT; 136 return -NF_ACCEPT;
137 } 137 }
@@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
141 cscov = udplen; 141 cscov = udplen;
142 else if (cscov < sizeof(*hdr) || cscov > udplen) { 142 else if (cscov < sizeof(*hdr) || cscov > udplen) {
143 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 143 if (LOG_INVALID(net, IPPROTO_UDPLITE))
144 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 144 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
145 "nf_ct_udplite: invalid checksum coverage "); 145 "nf_ct_udplite: invalid checksum coverage ");
146 return -NF_ACCEPT; 146 return -NF_ACCEPT;
147 } 147 }
@@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
149 /* UDPLITE mandates checksums */ 149 /* UDPLITE mandates checksums */
150 if (!hdr->check) { 150 if (!hdr->check) {
151 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 151 if (LOG_INVALID(net, IPPROTO_UDPLITE))
152 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 152 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
153 "nf_ct_udplite: checksum missing "); 153 "nf_ct_udplite: checksum missing ");
154 return -NF_ACCEPT; 154 return -NF_ACCEPT;
155 } 155 }
@@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,
159 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, 159 nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,
160 pf)) { 160 pf)) {
161 if (LOG_INVALID(net, IPPROTO_UDPLITE)) 161 if (LOG_INVALID(net, IPPROTO_UDPLITE))
162 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 162 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
163 "nf_ct_udplite: bad UDPLite checksum "); 163 "nf_ct_udplite: bad UDPLite checksum ");
164 return -NF_ACCEPT; 164 return -NF_ACCEPT;
165 } 165 }
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 0e7d423324c3..e0c4373b4747 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1593,10 +1593,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
1593 end += strlen("\r\n\r\n") + clen; 1593 end += strlen("\r\n\r\n") + clen;
1594 1594
1595 msglen = origlen = end - dptr; 1595 msglen = origlen = end - dptr;
1596 if (msglen > datalen) { 1596 if (msglen > datalen)
1597 nf_ct_helper_log(skb, ct, "incomplete/bad SIP message"); 1597 return NF_ACCEPT;
1598 return NF_DROP;
1599 }
1600 1598
1601 ret = process_sip_msg(skb, ct, protoff, dataoff, 1599 ret = process_sip_msg(skb, ct, protoff, dataoff,
1602 &dptr, &msglen); 1600 &dptr, &msglen);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fedee3943661..bd700b4013c1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1,5 +1,6 @@
1/* (C) 1999-2001 Paul `Rusty' Russell 1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2005-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -545,16 +546,20 @@ out_init:
545 return ret; 546 return ret;
546} 547}
547 548
548static void nf_conntrack_pernet_exit(struct net *net) 549static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
549{ 550{
550 nf_conntrack_standalone_fini_sysctl(net); 551 struct net *net;
551 nf_conntrack_standalone_fini_proc(net); 552
552 nf_conntrack_cleanup_net(net); 553 list_for_each_entry(net, net_exit_list, exit_list) {
554 nf_conntrack_standalone_fini_sysctl(net);
555 nf_conntrack_standalone_fini_proc(net);
556 }
557 nf_conntrack_cleanup_net_list(net_exit_list);
553} 558}
554 559
555static struct pernet_operations nf_conntrack_net_ops = { 560static struct pernet_operations nf_conntrack_net_ops = {
556 .init = nf_conntrack_pernet_init, 561 .init = nf_conntrack_pernet_init,
557 .exit = nf_conntrack_pernet_exit, 562 .exit_batch = nf_conntrack_pernet_exit,
558}; 563};
559 564
560static int __init nf_conntrack_standalone_init(void) 565static int __init nf_conntrack_standalone_init(void)
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index e9936c830208..e68ab4fbd71f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -1,5 +1,5 @@
1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> 1/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
2 * 2 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * This program is free software; you can redistribute it and/or modify 3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as 4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation. 5 * published by the Free Software Foundation.
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 9e312695c818..388656d5a9ec 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,6 @@
16#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
17#define NFLOGGER_NAME_LEN 64 17#define NFLOGGER_NAME_LEN 64
18 18
19static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
20static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; 19static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
21static DEFINE_MUTEX(nf_log_mutex); 20static DEFINE_MUTEX(nf_log_mutex);
22 21
@@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
32 return NULL; 31 return NULL;
33} 32}
34 33
34void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
35{
36 const struct nf_logger *log;
37
38 if (pf == NFPROTO_UNSPEC)
39 return;
40
41 mutex_lock(&nf_log_mutex);
42 log = rcu_dereference_protected(net->nf.nf_loggers[pf],
43 lockdep_is_held(&nf_log_mutex));
44 if (log == NULL)
45 rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
46
47 mutex_unlock(&nf_log_mutex);
48}
49EXPORT_SYMBOL(nf_log_set);
50
51void nf_log_unset(struct net *net, const struct nf_logger *logger)
52{
53 int i;
54 const struct nf_logger *log;
55
56 mutex_lock(&nf_log_mutex);
57 for (i = 0; i < NFPROTO_NUMPROTO; i++) {
58 log = rcu_dereference_protected(net->nf.nf_loggers[i],
59 lockdep_is_held(&nf_log_mutex));
60 if (log == logger)
61 RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
62 }
63 mutex_unlock(&nf_log_mutex);
64 synchronize_rcu();
65}
66EXPORT_SYMBOL(nf_log_unset);
67
35/* return EEXIST if the same logger is registered, 0 on success. */ 68/* return EEXIST if the same logger is registered, 0 on success. */
36int nf_log_register(u_int8_t pf, struct nf_logger *logger) 69int nf_log_register(u_int8_t pf, struct nf_logger *logger)
37{ 70{
38 const struct nf_logger *llog;
39 int i; 71 int i;
40 72
41 if (pf >= ARRAY_SIZE(nf_loggers)) 73 if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))
42 return -EINVAL; 74 return -EINVAL;
43 75
44 for (i = 0; i < ARRAY_SIZE(logger->list); i++) 76 for (i = 0; i < ARRAY_SIZE(logger->list); i++)
@@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
52 } else { 84 } else {
53 /* register at end of list to honor first register win */ 85 /* register at end of list to honor first register win */
54 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); 86 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
55 llog = rcu_dereference_protected(nf_loggers[pf],
56 lockdep_is_held(&nf_log_mutex));
57 if (llog == NULL)
58 rcu_assign_pointer(nf_loggers[pf], logger);
59 } 87 }
60 88
61 mutex_unlock(&nf_log_mutex); 89 mutex_unlock(&nf_log_mutex);
@@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);
66 94
67void nf_log_unregister(struct nf_logger *logger) 95void nf_log_unregister(struct nf_logger *logger)
68{ 96{
69 const struct nf_logger *c_logger;
70 int i; 97 int i;
71 98
72 mutex_lock(&nf_log_mutex); 99 mutex_lock(&nf_log_mutex);
73 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { 100 for (i = 0; i < NFPROTO_NUMPROTO; i++)
74 c_logger = rcu_dereference_protected(nf_loggers[i],
75 lockdep_is_held(&nf_log_mutex));
76 if (c_logger == logger)
77 RCU_INIT_POINTER(nf_loggers[i], NULL);
78 list_del(&logger->list[i]); 101 list_del(&logger->list[i]);
79 }
80 mutex_unlock(&nf_log_mutex); 102 mutex_unlock(&nf_log_mutex);
81
82 synchronize_rcu();
83} 103}
84EXPORT_SYMBOL(nf_log_unregister); 104EXPORT_SYMBOL(nf_log_unregister);
85 105
86int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) 106int nf_log_bind_pf(struct net *net, u_int8_t pf,
107 const struct nf_logger *logger)
87{ 108{
88 if (pf >= ARRAY_SIZE(nf_loggers)) 109 if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
89 return -EINVAL; 110 return -EINVAL;
90 mutex_lock(&nf_log_mutex); 111 mutex_lock(&nf_log_mutex);
91 if (__find_logger(pf, logger->name) == NULL) { 112 if (__find_logger(pf, logger->name) == NULL) {
92 mutex_unlock(&nf_log_mutex); 113 mutex_unlock(&nf_log_mutex);
93 return -ENOENT; 114 return -ENOENT;
94 } 115 }
95 rcu_assign_pointer(nf_loggers[pf], logger); 116 rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
96 mutex_unlock(&nf_log_mutex); 117 mutex_unlock(&nf_log_mutex);
97 return 0; 118 return 0;
98} 119}
99EXPORT_SYMBOL(nf_log_bind_pf); 120EXPORT_SYMBOL(nf_log_bind_pf);
100 121
101void nf_log_unbind_pf(u_int8_t pf) 122void nf_log_unbind_pf(struct net *net, u_int8_t pf)
102{ 123{
103 if (pf >= ARRAY_SIZE(nf_loggers)) 124 if (pf >= ARRAY_SIZE(net->nf.nf_loggers))
104 return; 125 return;
105 mutex_lock(&nf_log_mutex); 126 mutex_lock(&nf_log_mutex);
106 RCU_INIT_POINTER(nf_loggers[pf], NULL); 127 RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);
107 mutex_unlock(&nf_log_mutex); 128 mutex_unlock(&nf_log_mutex);
108} 129}
109EXPORT_SYMBOL(nf_log_unbind_pf); 130EXPORT_SYMBOL(nf_log_unbind_pf);
110 131
111void nf_log_packet(u_int8_t pf, 132void nf_log_packet(struct net *net,
133 u_int8_t pf,
112 unsigned int hooknum, 134 unsigned int hooknum,
113 const struct sk_buff *skb, 135 const struct sk_buff *skb,
114 const struct net_device *in, 136 const struct net_device *in,
@@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,
121 const struct nf_logger *logger; 143 const struct nf_logger *logger;
122 144
123 rcu_read_lock(); 145 rcu_read_lock();
124 logger = rcu_dereference(nf_loggers[pf]); 146 logger = rcu_dereference(net->nf.nf_loggers[pf]);
125 if (logger) { 147 if (logger) {
126 va_start(args, fmt); 148 va_start(args, fmt);
127 vsnprintf(prefix, sizeof(prefix), fmt, args); 149 vsnprintf(prefix, sizeof(prefix), fmt, args);
@@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);
135#ifdef CONFIG_PROC_FS 157#ifdef CONFIG_PROC_FS
136static void *seq_start(struct seq_file *seq, loff_t *pos) 158static void *seq_start(struct seq_file *seq, loff_t *pos)
137{ 159{
160 struct net *net = seq_file_net(seq);
161
138 mutex_lock(&nf_log_mutex); 162 mutex_lock(&nf_log_mutex);
139 163
140 if (*pos >= ARRAY_SIZE(nf_loggers)) 164 if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
141 return NULL; 165 return NULL;
142 166
143 return pos; 167 return pos;
@@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)
145 169
146static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 170static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
147{ 171{
172 struct net *net = seq_file_net(s);
173
148 (*pos)++; 174 (*pos)++;
149 175
150 if (*pos >= ARRAY_SIZE(nf_loggers)) 176 if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))
151 return NULL; 177 return NULL;
152 178
153 return pos; 179 return pos;
@@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)
164 const struct nf_logger *logger; 190 const struct nf_logger *logger;
165 struct nf_logger *t; 191 struct nf_logger *t;
166 int ret; 192 int ret;
193 struct net *net = seq_file_net(s);
167 194
168 logger = rcu_dereference_protected(nf_loggers[*pos], 195 logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
169 lockdep_is_held(&nf_log_mutex)); 196 lockdep_is_held(&nf_log_mutex));
170 197
171 if (!logger) 198 if (!logger)
@@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {
199 226
200static int nflog_open(struct inode *inode, struct file *file) 227static int nflog_open(struct inode *inode, struct file *file)
201{ 228{
202 return seq_open(file, &nflog_seq_ops); 229 return seq_open_net(inode, file, &nflog_seq_ops,
230 sizeof(struct seq_net_private));
203} 231}
204 232
205static const struct file_operations nflog_file_ops = { 233static const struct file_operations nflog_file_ops = {
@@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {
207 .open = nflog_open, 235 .open = nflog_open,
208 .read = seq_read, 236 .read = seq_read,
209 .llseek = seq_lseek, 237 .llseek = seq_lseek,
210 .release = seq_release, 238 .release = seq_release_net,
211}; 239};
212 240
213 241
@@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {
216#ifdef CONFIG_SYSCTL 244#ifdef CONFIG_SYSCTL
217static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; 245static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
218static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; 246static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
219static struct ctl_table_header *nf_log_dir_header;
220 247
221static int nf_log_proc_dostring(ctl_table *table, int write, 248static int nf_log_proc_dostring(ctl_table *table, int write,
222 void __user *buffer, size_t *lenp, loff_t *ppos) 249 void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
226 size_t size = *lenp; 253 size_t size = *lenp;
227 int r = 0; 254 int r = 0;
228 int tindex = (unsigned long)table->extra1; 255 int tindex = (unsigned long)table->extra1;
256 struct net *net = current->nsproxy->net_ns;
229 257
230 if (write) { 258 if (write) {
231 if (size > sizeof(buf)) 259 if (size > sizeof(buf))
@@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
234 return -EFAULT; 262 return -EFAULT;
235 263
236 if (!strcmp(buf, "NONE")) { 264 if (!strcmp(buf, "NONE")) {
237 nf_log_unbind_pf(tindex); 265 nf_log_unbind_pf(net, tindex);
238 return 0; 266 return 0;
239 } 267 }
240 mutex_lock(&nf_log_mutex); 268 mutex_lock(&nf_log_mutex);
@@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
243 mutex_unlock(&nf_log_mutex); 271 mutex_unlock(&nf_log_mutex);
244 return -ENOENT; 272 return -ENOENT;
245 } 273 }
246 rcu_assign_pointer(nf_loggers[tindex], logger); 274 rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
247 mutex_unlock(&nf_log_mutex); 275 mutex_unlock(&nf_log_mutex);
248 } else { 276 } else {
249 mutex_lock(&nf_log_mutex); 277 mutex_lock(&nf_log_mutex);
250 logger = rcu_dereference_protected(nf_loggers[tindex], 278 logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
251 lockdep_is_held(&nf_log_mutex)); 279 lockdep_is_held(&nf_log_mutex));
252 if (!logger) 280 if (!logger)
253 table->data = "NONE"; 281 table->data = "NONE";
@@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,
260 return r; 288 return r;
261} 289}
262 290
263static __init int netfilter_log_sysctl_init(void) 291static int netfilter_log_sysctl_init(struct net *net)
264{ 292{
265 int i; 293 int i;
266 294 struct ctl_table *table;
267 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { 295
268 snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); 296 table = nf_log_sysctl_table;
269 nf_log_sysctl_table[i].procname = 297 if (!net_eq(net, &init_net)) {
270 nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; 298 table = kmemdup(nf_log_sysctl_table,
271 nf_log_sysctl_table[i].data = NULL; 299 sizeof(nf_log_sysctl_table),
272 nf_log_sysctl_table[i].maxlen = 300 GFP_KERNEL);
273 NFLOGGER_NAME_LEN * sizeof(char); 301 if (!table)
274 nf_log_sysctl_table[i].mode = 0644; 302 goto err_alloc;
275 nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; 303 } else {
276 nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; 304 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
305 snprintf(nf_log_sysctl_fnames[i],
306 3, "%d", i);
307 nf_log_sysctl_table[i].procname =
308 nf_log_sysctl_fnames[i];
309 nf_log_sysctl_table[i].data = NULL;
310 nf_log_sysctl_table[i].maxlen =
311 NFLOGGER_NAME_LEN * sizeof(char);
312 nf_log_sysctl_table[i].mode = 0644;
313 nf_log_sysctl_table[i].proc_handler =
314 nf_log_proc_dostring;
315 nf_log_sysctl_table[i].extra1 =
316 (void *)(unsigned long) i;
317 }
277 } 318 }
278 319
279 nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", 320 net->nf.nf_log_dir_header = register_net_sysctl(net,
280 nf_log_sysctl_table); 321 "net/netfilter/nf_log",
281 if (!nf_log_dir_header) 322 table);
282 return -ENOMEM; 323 if (!net->nf.nf_log_dir_header)
324 goto err_reg;
283 325
284 return 0; 326 return 0;
327
328err_reg:
329 if (!net_eq(net, &init_net))
330 kfree(table);
331err_alloc:
332 return -ENOMEM;
333}
334
335static void netfilter_log_sysctl_exit(struct net *net)
336{
337 struct ctl_table *table;
338
339 table = net->nf.nf_log_dir_header->ctl_table_arg;
340 unregister_net_sysctl_table(net->nf.nf_log_dir_header);
341 if (!net_eq(net, &init_net))
342 kfree(table);
285} 343}
286#else 344#else
287static __init int netfilter_log_sysctl_init(void) 345static int netfilter_log_sysctl_init(struct net *net)
288{ 346{
289 return 0; 347 return 0;
290} 348}
349
350static void netfilter_log_sysctl_exit(struct net *net)
351{
352}
291#endif /* CONFIG_SYSCTL */ 353#endif /* CONFIG_SYSCTL */
292 354
293int __init netfilter_log_init(void) 355static int __net_init nf_log_net_init(struct net *net)
294{ 356{
295 int i, r; 357 int ret = -ENOMEM;
358
296#ifdef CONFIG_PROC_FS 359#ifdef CONFIG_PROC_FS
297 if (!proc_create("nf_log", S_IRUGO, 360 if (!proc_create("nf_log", S_IRUGO,
298 proc_net_netfilter, &nflog_file_ops)) 361 net->nf.proc_netfilter, &nflog_file_ops))
299 return -1; 362 return ret;
300#endif 363#endif
364 ret = netfilter_log_sysctl_init(net);
365 if (ret < 0)
366 goto out_sysctl;
367
368 return 0;
301 369
302 /* Errors will trigger panic, unroll on error is unnecessary. */ 370out_sysctl:
303 r = netfilter_log_sysctl_init(); 371 /* For init_net: errors will trigger panic, don't unroll on error. */
304 if (r < 0) 372 if (!net_eq(net, &init_net))
305 return r; 373 remove_proc_entry("nf_log", net->nf.proc_netfilter);
374
375 return ret;
376}
377
378static void __net_exit nf_log_net_exit(struct net *net)
379{
380 netfilter_log_sysctl_exit(net);
381 remove_proc_entry("nf_log", net->nf.proc_netfilter);
382}
383
384static struct pernet_operations nf_log_net_ops = {
385 .init = nf_log_net_init,
386 .exit = nf_log_net_exit,
387};
388
389int __init netfilter_log_init(void)
390{
391 int i, ret;
392
393 ret = register_pernet_subsys(&nf_log_net_ops);
394 if (ret < 0)
395 return ret;
306 396
307 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) 397 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
308 INIT_LIST_HEAD(&(nf_loggers_l[i])); 398 INIT_LIST_HEAD(&(nf_loggers_l[i]));
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index 3b67c9d11273..eb772380a202 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -1,6 +1,7 @@
1/* Amanda extension for TCP NAT alteration. 1/* Amanda extension for TCP NAT alteration.
2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> 2 * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
3 * based on a copy of HW's ip_nat_irc.c as well as other modules 3 * based on a copy of HW's ip_nat_irc.c as well as other modules
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License 7 * modify it under the terms of the GNU General Public License
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 8d5769c6d16e..038eee5c8f85 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -87,9 +87,11 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
87 struct flowi fl; 87 struct flowi fl;
88 unsigned int hh_len; 88 unsigned int hh_len;
89 struct dst_entry *dst; 89 struct dst_entry *dst;
90 int err;
90 91
91 if (xfrm_decode_session(skb, &fl, family) < 0) 92 err = xfrm_decode_session(skb, &fl, family);
92 return -1; 93 if (err < 0)
94 return err;
93 95
94 dst = skb_dst(skb); 96 dst = skb_dst(skb);
95 if (dst->xfrm) 97 if (dst->xfrm)
@@ -98,7 +100,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
98 100
99 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0); 101 dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
100 if (IS_ERR(dst)) 102 if (IS_ERR(dst))
101 return -1; 103 return PTR_ERR(dst);
102 104
103 skb_dst_drop(skb); 105 skb_dst_drop(skb);
104 skb_dst_set(skb, dst); 106 skb_dst_set(skb, dst);
@@ -107,7 +109,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
107 hh_len = skb_dst(skb)->dev->hard_header_len; 109 hh_len = skb_dst(skb)->dev->hard_header_len;
108 if (skb_headroom(skb) < hh_len && 110 if (skb_headroom(skb) < hh_len &&
109 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) 111 pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC))
110 return -1; 112 return -ENOMEM;
111 return 0; 113 return 0;
112} 114}
113EXPORT_SYMBOL(nf_xfrm_me_harder); 115EXPORT_SYMBOL(nf_xfrm_me_harder);
@@ -467,33 +469,22 @@ EXPORT_SYMBOL_GPL(nf_nat_packet);
467struct nf_nat_proto_clean { 469struct nf_nat_proto_clean {
468 u8 l3proto; 470 u8 l3proto;
469 u8 l4proto; 471 u8 l4proto;
470 bool hash;
471}; 472};
472 473
473/* Clear NAT section of all conntracks, in case we're loaded again. */ 474/* kill conntracks with affected NAT section */
474static int nf_nat_proto_clean(struct nf_conn *i, void *data) 475static int nf_nat_proto_remove(struct nf_conn *i, void *data)
475{ 476{
476 const struct nf_nat_proto_clean *clean = data; 477 const struct nf_nat_proto_clean *clean = data;
477 struct nf_conn_nat *nat = nfct_nat(i); 478 struct nf_conn_nat *nat = nfct_nat(i);
478 479
479 if (!nat) 480 if (!nat)
480 return 0; 481 return 0;
481 if (!(i->status & IPS_SRC_NAT_DONE)) 482
482 return 0;
483 if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || 483 if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
484 (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) 484 (clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
485 return 0; 485 return 0;
486 486
487 if (clean->hash) { 487 return i->status & IPS_NAT_MASK ? 1 : 0;
488 spin_lock_bh(&nf_nat_lock);
489 hlist_del_rcu(&nat->bysource);
490 spin_unlock_bh(&nf_nat_lock);
491 } else {
492 memset(nat, 0, sizeof(*nat));
493 i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK |
494 IPS_SEQ_ADJUST);
495 }
496 return 0;
497} 488}
498 489
499static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) 490static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
@@ -505,16 +496,8 @@ static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
505 struct net *net; 496 struct net *net;
506 497
507 rtnl_lock(); 498 rtnl_lock();
508 /* Step 1 - remove from bysource hash */
509 clean.hash = true;
510 for_each_net(net) 499 for_each_net(net)
511 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); 500 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
512 synchronize_rcu();
513
514 /* Step 2 - clean NAT section */
515 clean.hash = false;
516 for_each_net(net)
517 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
518 rtnl_unlock(); 501 rtnl_unlock();
519} 502}
520 503
@@ -526,16 +509,9 @@ static void nf_nat_l3proto_clean(u8 l3proto)
526 struct net *net; 509 struct net *net;
527 510
528 rtnl_lock(); 511 rtnl_lock();
529 /* Step 1 - remove from bysource hash */
530 clean.hash = true;
531 for_each_net(net)
532 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean);
533 synchronize_rcu();
534 512
535 /* Step 2 - clean NAT section */
536 clean.hash = false;
537 for_each_net(net) 513 for_each_net(net)
538 nf_ct_iterate_cleanup(net, nf_nat_proto_clean, &clean); 514 nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean);
539 rtnl_unlock(); 515 rtnl_unlock();
540} 516}
541 517
@@ -773,7 +749,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
773{ 749{
774 struct nf_nat_proto_clean clean = {}; 750 struct nf_nat_proto_clean clean = {};
775 751
776 nf_ct_iterate_cleanup(net, &nf_nat_proto_clean, &clean); 752 nf_ct_iterate_cleanup(net, &nf_nat_proto_remove, &clean);
777 synchronize_rcu(); 753 synchronize_rcu();
778 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size); 754 nf_ct_free_hashtable(net->ct.nat_bysource, net->ct.nat_htable_size);
779} 755}
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 23c2b38676a6..5fea563afe30 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -2,6 +2,7 @@
2 * 2 *
3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org> 3 * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> 4 * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2007-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index e64faa5ca893..396e55d46f90 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,
36{ 36{
37 struct sk_buff *frag; 37 struct sk_buff *frag;
38 sctp_sctphdr_t *hdr; 38 sctp_sctphdr_t *hdr;
39 __be32 crc32; 39 __u32 crc32;
40 40
41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) 41 if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
42 return false; 42 return false;
@@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb,
55 skb_walk_frags(skb, frag) 55 skb_walk_frags(skb, frag)
56 crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag), 56 crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),
57 crc32); 57 crc32);
58 crc32 = sctp_end_cksum(crc32); 58 hdr->checksum = sctp_end_cksum(crc32);
59 hdr->checksum = crc32;
60 59
61 return true; 60 return true;
62} 61}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index d812c1235b30..5d24b1fdb593 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -1,3 +1,8 @@
1/*
2 * Rusty Russell (C)2000 -- This code is GPL.
3 * Patrick McHardy (c) 2006-2012
4 */
5
1#include <linux/kernel.h> 6#include <linux/kernel.h>
2#include <linux/slab.h> 7#include <linux/slab.h>
3#include <linux/init.h> 8#include <linux/init.h>
@@ -40,7 +45,7 @@ void nf_unregister_queue_handler(void)
40} 45}
41EXPORT_SYMBOL(nf_unregister_queue_handler); 46EXPORT_SYMBOL(nf_unregister_queue_handler);
42 47
43static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) 48void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
44{ 49{
45 /* Release those devices we held, or Alexey will kill me. */ 50 /* Release those devices we held, or Alexey will kill me. */
46 if (entry->indev) 51 if (entry->indev)
@@ -60,12 +65,41 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
60 /* Drop reference to owner of hook which queued us. */ 65 /* Drop reference to owner of hook which queued us. */
61 module_put(entry->elem->owner); 66 module_put(entry->elem->owner);
62} 67}
68EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
69
70/* Bump dev refs so they don't vanish while packet is out */
71bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
72{
73 if (!try_module_get(entry->elem->owner))
74 return false;
75
76 if (entry->indev)
77 dev_hold(entry->indev);
78 if (entry->outdev)
79 dev_hold(entry->outdev);
80#ifdef CONFIG_BRIDGE_NETFILTER
81 if (entry->skb->nf_bridge) {
82 struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
83 struct net_device *physdev;
84
85 physdev = nf_bridge->physindev;
86 if (physdev)
87 dev_hold(physdev);
88 physdev = nf_bridge->physoutdev;
89 if (physdev)
90 dev_hold(physdev);
91 }
92#endif
93
94 return true;
95}
96EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
63 97
64/* 98/*
65 * Any packet that leaves via this function must come back 99 * Any packet that leaves via this function must come back
66 * through nf_reinject(). 100 * through nf_reinject().
67 */ 101 */
68static int __nf_queue(struct sk_buff *skb, 102int nf_queue(struct sk_buff *skb,
69 struct nf_hook_ops *elem, 103 struct nf_hook_ops *elem,
70 u_int8_t pf, unsigned int hook, 104 u_int8_t pf, unsigned int hook,
71 struct net_device *indev, 105 struct net_device *indev,
@@ -75,10 +109,6 @@ static int __nf_queue(struct sk_buff *skb,
75{ 109{
76 int status = -ENOENT; 110 int status = -ENOENT;
77 struct nf_queue_entry *entry = NULL; 111 struct nf_queue_entry *entry = NULL;
78#ifdef CONFIG_BRIDGE_NETFILTER
79 struct net_device *physindev;
80 struct net_device *physoutdev;
81#endif
82 const struct nf_afinfo *afinfo; 112 const struct nf_afinfo *afinfo;
83 const struct nf_queue_handler *qh; 113 const struct nf_queue_handler *qh;
84 114
@@ -109,28 +139,13 @@ static int __nf_queue(struct sk_buff *skb,
109 .indev = indev, 139 .indev = indev,
110 .outdev = outdev, 140 .outdev = outdev,
111 .okfn = okfn, 141 .okfn = okfn,
142 .size = sizeof(*entry) + afinfo->route_key_size,
112 }; 143 };
113 144
114 /* If it's going away, ignore hook. */ 145 if (!nf_queue_entry_get_refs(entry)) {
115 if (!try_module_get(entry->elem->owner)) {
116 status = -ECANCELED; 146 status = -ECANCELED;
117 goto err_unlock; 147 goto err_unlock;
118 } 148 }
119 /* Bump dev refs so they don't vanish while packet is out */
120 if (indev)
121 dev_hold(indev);
122 if (outdev)
123 dev_hold(outdev);
124#ifdef CONFIG_BRIDGE_NETFILTER
125 if (skb->nf_bridge) {
126 physindev = skb->nf_bridge->physindev;
127 if (physindev)
128 dev_hold(physindev);
129 physoutdev = skb->nf_bridge->physoutdev;
130 if (physoutdev)
131 dev_hold(physoutdev);
132 }
133#endif
134 skb_dst_force(skb); 149 skb_dst_force(skb);
135 afinfo->saveroute(skb, entry); 150 afinfo->saveroute(skb, entry);
136 status = qh->outfn(entry, queuenum); 151 status = qh->outfn(entry, queuenum);
@@ -151,87 +166,6 @@ err:
151 return status; 166 return status;
152} 167}
153 168
154#ifdef CONFIG_BRIDGE_NETFILTER
155/* When called from bridge netfilter, skb->data must point to MAC header
156 * before calling skb_gso_segment(). Else, original MAC header is lost
157 * and segmented skbs will be sent to wrong destination.
158 */
159static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
160{
161 if (skb->nf_bridge)
162 __skb_push(skb, skb->network_header - skb->mac_header);
163}
164
165static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
166{
167 if (skb->nf_bridge)
168 __skb_pull(skb, skb->network_header - skb->mac_header);
169}
170#else
171#define nf_bridge_adjust_skb_data(s) do {} while (0)
172#define nf_bridge_adjust_segmented_data(s) do {} while (0)
173#endif
174
175int nf_queue(struct sk_buff *skb,
176 struct nf_hook_ops *elem,
177 u_int8_t pf, unsigned int hook,
178 struct net_device *indev,
179 struct net_device *outdev,
180 int (*okfn)(struct sk_buff *),
181 unsigned int queuenum)
182{
183 struct sk_buff *segs;
184 int err = -EINVAL;
185 unsigned int queued;
186
187 if (!skb_is_gso(skb))
188 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
189 queuenum);
190
191 switch (pf) {
192 case NFPROTO_IPV4:
193 skb->protocol = htons(ETH_P_IP);
194 break;
195 case NFPROTO_IPV6:
196 skb->protocol = htons(ETH_P_IPV6);
197 break;
198 }
199
200 nf_bridge_adjust_skb_data(skb);
201 segs = skb_gso_segment(skb, 0);
202 /* Does not use PTR_ERR to limit the number of error codes that can be
203 * returned by nf_queue. For instance, callers rely on -ECANCELED to mean
204 * 'ignore this hook'.
205 */
206 if (IS_ERR(segs))
207 goto out_err;
208 queued = 0;
209 err = 0;
210 do {
211 struct sk_buff *nskb = segs->next;
212
213 segs->next = NULL;
214 if (err == 0) {
215 nf_bridge_adjust_segmented_data(segs);
216 err = __nf_queue(segs, elem, pf, hook, indev,
217 outdev, okfn, queuenum);
218 }
219 if (err == 0)
220 queued++;
221 else
222 kfree_skb(segs);
223 segs = nskb;
224 } while (segs);
225
226 if (queued) {
227 kfree_skb(skb);
228 return 0;
229 }
230 out_err:
231 nf_bridge_adjust_segmented_data(skb);
232 return err;
233}
234
235void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) 169void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
236{ 170{
237 struct sk_buff *skb = entry->skb; 171 struct sk_buff *skb = entry->skb;
@@ -271,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
271 local_bh_enable(); 205 local_bh_enable();
272 break; 206 break;
273 case NF_QUEUE: 207 case NF_QUEUE:
274 err = __nf_queue(skb, elem, entry->pf, entry->hook, 208 err = nf_queue(skb, elem, entry->pf, entry->hook,
275 entry->indev, entry->outdev, entry->okfn, 209 entry->indev, entry->outdev, entry->okfn,
276 verdict >> NF_VERDICT_QBITS); 210 verdict >> NF_VERDICT_QBITS);
277 if (err < 0) { 211 if (err < 0) {
278 if (err == -ECANCELED) 212 if (err == -ECANCELED)
279 goto next_hook; 213 goto next_hook;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0b1b32cda307..572d87dc116f 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -24,10 +24,9 @@
24#include <linux/skbuff.h> 24#include <linux/skbuff.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <net/sock.h> 26#include <net/sock.h>
27#include <net/netlink.h>
28#include <linux/init.h> 27#include <linux/init.h>
29 28
30#include <linux/netlink.h> 29#include <net/netlink.h>
31#include <linux/netfilter/nfnetlink.h> 30#include <linux/netfilter/nfnetlink.h>
32 31
33MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
@@ -113,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)
113} 112}
114EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); 113EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
115 114
116int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, 115struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
116 u32 dst_portid, gfp_t gfp_mask)
117{
118 return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
119}
120EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
121
122int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
117 unsigned int group, int echo, gfp_t flags) 123 unsigned int group, int echo, gfp_t flags)
118{ 124{
119 return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); 125 return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);
120} 126}
121EXPORT_SYMBOL_GPL(nfnetlink_send); 127EXPORT_SYMBOL_GPL(nfnetlink_send);
122 128
123int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) 129int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)
124{ 130{
125 return netlink_set_err(net->nfnl, pid, group, error); 131 return netlink_set_err(net->nfnl, portid, group, error);
126} 132}
127EXPORT_SYMBOL_GPL(nfnetlink_set_err); 133EXPORT_SYMBOL_GPL(nfnetlink_set_err);
128 134
129int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) 135int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
136 int flags)
130{ 137{
131 return netlink_unicast(net->nfnl, skb, pid, flags); 138 return netlink_unicast(net->nfnl, skb, portid, flags);
132} 139}
133EXPORT_SYMBOL_GPL(nfnetlink_unicast); 140EXPORT_SYMBOL_GPL(nfnetlink_unicast);
134 141
@@ -144,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
144 return -EPERM; 151 return -EPERM;
145 152
146 /* All the messages must at least contain nfgenmsg */ 153 /* All the messages must at least contain nfgenmsg */
147 if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) 154 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
148 return 0; 155 return 0;
149 156
150 type = nlh->nlmsg_type; 157 type = nlh->nlmsg_type;
@@ -172,7 +179,7 @@ replay:
172 } 179 }
173 180
174 { 181 {
175 int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); 182 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
176 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 183 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
177 struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; 184 struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
178 struct nlattr *attr = (void *)nlh + min_len; 185 struct nlattr *attr = (void *)nlh + min_len;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index f248db572972..faf1e9300d8a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -3,6 +3,7 @@
3 * nfetlink. 3 * nfetlink.
4 * 4 *
5 * (C) 2005 by Harald Welte <laforge@netfilter.org> 5 * (C) 2005 by Harald Welte <laforge@netfilter.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * Based on the old ipv4-only ipt_ULOG.c: 8 * Based on the old ipv4-only ipt_ULOG.c:
8 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> 9 * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
@@ -19,7 +20,7 @@
19#include <linux/ipv6.h> 20#include <linux/ipv6.h>
20#include <linux/netdevice.h> 21#include <linux/netdevice.h>
21#include <linux/netfilter.h> 22#include <linux/netfilter.h>
22#include <linux/netlink.h> 23#include <net/netlink.h>
23#include <linux/netfilter/nfnetlink.h> 24#include <linux/netfilter/nfnetlink.h>
24#include <linux/netfilter/nfnetlink_log.h> 25#include <linux/netfilter/nfnetlink_log.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
@@ -32,6 +33,7 @@
32#include <linux/slab.h> 33#include <linux/slab.h>
33#include <net/sock.h> 34#include <net/sock.h>
34#include <net/netfilter/nf_log.h> 35#include <net/netfilter/nf_log.h>
36#include <net/netns/generic.h>
35#include <net/netfilter/nfnetlink_log.h> 37#include <net/netfilter/nfnetlink_log.h>
36 38
37#include <linux/atomic.h> 39#include <linux/atomic.h>
@@ -56,6 +58,7 @@ struct nfulnl_instance {
56 unsigned int qlen; /* number of nlmsgs in skb */ 58 unsigned int qlen; /* number of nlmsgs in skb */
57 struct sk_buff *skb; /* pre-allocatd skb */ 59 struct sk_buff *skb; /* pre-allocatd skb */
58 struct timer_list timer; 60 struct timer_list timer;
61 struct net *net;
59 struct user_namespace *peer_user_ns; /* User namespace of the peer process */ 62 struct user_namespace *peer_user_ns; /* User namespace of the peer process */
60 int peer_portid; /* PORTID of the peer process */ 63 int peer_portid; /* PORTID of the peer process */
61 64
@@ -71,25 +74,34 @@ struct nfulnl_instance {
71 struct rcu_head rcu; 74 struct rcu_head rcu;
72}; 75};
73 76
74static DEFINE_SPINLOCK(instances_lock);
75static atomic_t global_seq;
76
77#define INSTANCE_BUCKETS 16 77#define INSTANCE_BUCKETS 16
78static struct hlist_head instance_table[INSTANCE_BUCKETS];
79static unsigned int hash_init; 78static unsigned int hash_init;
80 79
80static int nfnl_log_net_id __read_mostly;
81
82struct nfnl_log_net {
83 spinlock_t instances_lock;
84 struct hlist_head instance_table[INSTANCE_BUCKETS];
85 atomic_t global_seq;
86};
87
88static struct nfnl_log_net *nfnl_log_pernet(struct net *net)
89{
90 return net_generic(net, nfnl_log_net_id);
91}
92
81static inline u_int8_t instance_hashfn(u_int16_t group_num) 93static inline u_int8_t instance_hashfn(u_int16_t group_num)
82{ 94{
83 return ((group_num & 0xff) % INSTANCE_BUCKETS); 95 return ((group_num & 0xff) % INSTANCE_BUCKETS);
84} 96}
85 97
86static struct nfulnl_instance * 98static struct nfulnl_instance *
87__instance_lookup(u_int16_t group_num) 99__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)
88{ 100{
89 struct hlist_head *head; 101 struct hlist_head *head;
90 struct nfulnl_instance *inst; 102 struct nfulnl_instance *inst;
91 103
92 head = &instance_table[instance_hashfn(group_num)]; 104 head = &log->instance_table[instance_hashfn(group_num)];
93 hlist_for_each_entry_rcu(inst, head, hlist) { 105 hlist_for_each_entry_rcu(inst, head, hlist) {
94 if (inst->group_num == group_num) 106 if (inst->group_num == group_num)
95 return inst; 107 return inst;
@@ -104,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)
104} 116}
105 117
106static struct nfulnl_instance * 118static struct nfulnl_instance *
107instance_lookup_get(u_int16_t group_num) 119instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)
108{ 120{
109 struct nfulnl_instance *inst; 121 struct nfulnl_instance *inst;
110 122
111 rcu_read_lock_bh(); 123 rcu_read_lock_bh();
112 inst = __instance_lookup(group_num); 124 inst = __instance_lookup(log, group_num);
113 if (inst && !atomic_inc_not_zero(&inst->use)) 125 if (inst && !atomic_inc_not_zero(&inst->use))
114 inst = NULL; 126 inst = NULL;
115 rcu_read_unlock_bh(); 127 rcu_read_unlock_bh();
@@ -119,7 +131,11 @@ instance_lookup_get(u_int16_t group_num)
119 131
120static void nfulnl_instance_free_rcu(struct rcu_head *head) 132static void nfulnl_instance_free_rcu(struct rcu_head *head)
121{ 133{
122 kfree(container_of(head, struct nfulnl_instance, rcu)); 134 struct nfulnl_instance *inst =
135 container_of(head, struct nfulnl_instance, rcu);
136
137 put_net(inst->net);
138 kfree(inst);
123 module_put(THIS_MODULE); 139 module_put(THIS_MODULE);
124} 140}
125 141
@@ -133,13 +149,15 @@ instance_put(struct nfulnl_instance *inst)
133static void nfulnl_timer(unsigned long data); 149static void nfulnl_timer(unsigned long data);
134 150
135static struct nfulnl_instance * 151static struct nfulnl_instance *
136instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) 152instance_create(struct net *net, u_int16_t group_num,
153 int portid, struct user_namespace *user_ns)
137{ 154{
138 struct nfulnl_instance *inst; 155 struct nfulnl_instance *inst;
156 struct nfnl_log_net *log = nfnl_log_pernet(net);
139 int err; 157 int err;
140 158
141 spin_lock_bh(&instances_lock); 159 spin_lock_bh(&log->instances_lock);
142 if (__instance_lookup(group_num)) { 160 if (__instance_lookup(log, group_num)) {
143 err = -EEXIST; 161 err = -EEXIST;
144 goto out_unlock; 162 goto out_unlock;
145 } 163 }
@@ -163,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
163 181
164 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); 182 setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst);
165 183
184 inst->net = get_net(net);
166 inst->peer_user_ns = user_ns; 185 inst->peer_user_ns = user_ns;
167 inst->peer_portid = portid; 186 inst->peer_portid = portid;
168 inst->group_num = group_num; 187 inst->group_num = group_num;
@@ -174,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)
174 inst->copy_range = NFULNL_COPY_RANGE_MAX; 193 inst->copy_range = NFULNL_COPY_RANGE_MAX;
175 194
176 hlist_add_head_rcu(&inst->hlist, 195 hlist_add_head_rcu(&inst->hlist,
177 &instance_table[instance_hashfn(group_num)]); 196 &log->instance_table[instance_hashfn(group_num)]);
197
178 198
179 spin_unlock_bh(&instances_lock); 199 spin_unlock_bh(&log->instances_lock);
180 200
181 return inst; 201 return inst;
182 202
183out_unlock: 203out_unlock:
184 spin_unlock_bh(&instances_lock); 204 spin_unlock_bh(&log->instances_lock);
185 return ERR_PTR(err); 205 return ERR_PTR(err);
186} 206}
187 207
@@ -210,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst)
210} 230}
211 231
212static inline void 232static inline void
213instance_destroy(struct nfulnl_instance *inst) 233instance_destroy(struct nfnl_log_net *log,
234 struct nfulnl_instance *inst)
214{ 235{
215 spin_lock_bh(&instances_lock); 236 spin_lock_bh(&log->instances_lock);
216 __instance_destroy(inst); 237 __instance_destroy(inst);
217 spin_unlock_bh(&instances_lock); 238 spin_unlock_bh(&log->instances_lock);
218} 239}
219 240
220static int 241static int
@@ -298,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)
298} 319}
299 320
300static struct sk_buff * 321static struct sk_buff *
301nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) 322nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)
302{ 323{
303 struct sk_buff *skb; 324 struct sk_buff *skb;
304 unsigned int n; 325 unsigned int n;
@@ -307,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
307 * message. WARNING: has to be <= 128k due to slab restrictions */ 328 * message. WARNING: has to be <= 128k due to slab restrictions */
308 329
309 n = max(inst_size, pkt_size); 330 n = max(inst_size, pkt_size);
310 skb = alloc_skb(n, GFP_ATOMIC); 331 skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);
311 if (!skb) { 332 if (!skb) {
312 if (n > pkt_size) { 333 if (n > pkt_size) {
313 /* try to allocate only as much as we need for current 334 /* try to allocate only as much as we need for current
314 * packet */ 335 * packet */
315 336
316 skb = alloc_skb(pkt_size, GFP_ATOMIC); 337 skb = nfnetlink_alloc_skb(&init_net, pkt_size,
338 peer_portid, GFP_ATOMIC);
317 if (!skb) 339 if (!skb)
318 pr_err("nfnetlink_log: can't even alloc %u bytes\n", 340 pr_err("nfnetlink_log: can't even alloc %u bytes\n",
319 pkt_size); 341 pkt_size);
@@ -336,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst)
336 if (!nlh) 358 if (!nlh)
337 goto out; 359 goto out;
338 } 360 }
339 status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, 361 status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,
340 MSG_DONTWAIT); 362 MSG_DONTWAIT);
341 363
342 inst->qlen = 0; 364 inst->qlen = 0;
@@ -370,7 +392,8 @@ nfulnl_timer(unsigned long data)
370/* This is an inline function, we don't really care about a long 392/* This is an inline function, we don't really care about a long
371 * list of arguments */ 393 * list of arguments */
372static inline int 394static inline int
373__build_packet_message(struct nfulnl_instance *inst, 395__build_packet_message(struct nfnl_log_net *log,
396 struct nfulnl_instance *inst,
374 const struct sk_buff *skb, 397 const struct sk_buff *skb,
375 unsigned int data_len, 398 unsigned int data_len,
376 u_int8_t pf, 399 u_int8_t pf,
@@ -536,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst,
536 /* global sequence number */ 559 /* global sequence number */
537 if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) && 560 if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
538 nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, 561 nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
539 htonl(atomic_inc_return(&global_seq)))) 562 htonl(atomic_inc_return(&log->global_seq))))
540 goto nla_put_failure; 563 goto nla_put_failure;
541 564
542 if (data_len) { 565 if (data_len) {
@@ -592,13 +615,15 @@ nfulnl_log_packet(u_int8_t pf,
592 const struct nf_loginfo *li; 615 const struct nf_loginfo *li;
593 unsigned int qthreshold; 616 unsigned int qthreshold;
594 unsigned int plen; 617 unsigned int plen;
618 struct net *net = dev_net(in ? in : out);
619 struct nfnl_log_net *log = nfnl_log_pernet(net);
595 620
596 if (li_user && li_user->type == NF_LOG_TYPE_ULOG) 621 if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
597 li = li_user; 622 li = li_user;
598 else 623 else
599 li = &default_loginfo; 624 li = &default_loginfo;
600 625
601 inst = instance_lookup_get(li->u.ulog.group); 626 inst = instance_lookup_get(log, li->u.ulog.group);
602 if (!inst) 627 if (!inst)
603 return; 628 return;
604 629
@@ -609,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf,
609 /* FIXME: do we want to make the size calculation conditional based on 634 /* FIXME: do we want to make the size calculation conditional based on
610 * what is actually present? way more branches and checks, but more 635 * what is actually present? way more branches and checks, but more
611 * memory efficient... */ 636 * memory efficient... */
612 size = NLMSG_SPACE(sizeof(struct nfgenmsg)) 637 size = nlmsg_total_size(sizeof(struct nfgenmsg))
613 + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) 638 + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))
614 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 639 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
615 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 640 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -673,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf,
673 } 698 }
674 699
675 if (!inst->skb) { 700 if (!inst->skb) {
676 inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size); 701 inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz,
702 size);
677 if (!inst->skb) 703 if (!inst->skb)
678 goto alloc_failure; 704 goto alloc_failure;
679 } 705 }
680 706
681 inst->qlen++; 707 inst->qlen++;
682 708
683 __build_packet_message(inst, skb, data_len, pf, 709 __build_packet_message(log, inst, skb, data_len, pf,
684 hooknum, in, out, prefix, plen); 710 hooknum, in, out, prefix, plen);
685 711
686 if (inst->qlen >= qthreshold) 712 if (inst->qlen >= qthreshold)
@@ -709,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
709 unsigned long event, void *ptr) 735 unsigned long event, void *ptr)
710{ 736{
711 struct netlink_notify *n = ptr; 737 struct netlink_notify *n = ptr;
738 struct nfnl_log_net *log = nfnl_log_pernet(n->net);
712 739
713 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { 740 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
714 int i; 741 int i;
715 742
716 /* destroy all instances for this portid */ 743 /* destroy all instances for this portid */
717 spin_lock_bh(&instances_lock); 744 spin_lock_bh(&log->instances_lock);
718 for (i = 0; i < INSTANCE_BUCKETS; i++) { 745 for (i = 0; i < INSTANCE_BUCKETS; i++) {
719 struct hlist_node *t2; 746 struct hlist_node *t2;
720 struct nfulnl_instance *inst; 747 struct nfulnl_instance *inst;
721 struct hlist_head *head = &instance_table[i]; 748 struct hlist_head *head = &log->instance_table[i];
722 749
723 hlist_for_each_entry_safe(inst, t2, head, hlist) { 750 hlist_for_each_entry_safe(inst, t2, head, hlist) {
724 if ((net_eq(n->net, &init_net)) && 751 if (n->portid == inst->peer_portid)
725 (n->portid == inst->peer_portid))
726 __instance_destroy(inst); 752 __instance_destroy(inst);
727 } 753 }
728 } 754 }
729 spin_unlock_bh(&instances_lock); 755 spin_unlock_bh(&log->instances_lock);
730 } 756 }
731 return NOTIFY_DONE; 757 return NOTIFY_DONE;
732} 758}
@@ -767,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
767 u_int16_t group_num = ntohs(nfmsg->res_id); 793 u_int16_t group_num = ntohs(nfmsg->res_id);
768 struct nfulnl_instance *inst; 794 struct nfulnl_instance *inst;
769 struct nfulnl_msg_config_cmd *cmd = NULL; 795 struct nfulnl_msg_config_cmd *cmd = NULL;
796 struct net *net = sock_net(ctnl);
797 struct nfnl_log_net *log = nfnl_log_pernet(net);
770 int ret = 0; 798 int ret = 0;
771 799
772 if (nfula[NFULA_CFG_CMD]) { 800 if (nfula[NFULA_CFG_CMD]) {
@@ -776,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
776 /* Commands without queue context */ 804 /* Commands without queue context */
777 switch (cmd->command) { 805 switch (cmd->command) {
778 case NFULNL_CFG_CMD_PF_BIND: 806 case NFULNL_CFG_CMD_PF_BIND:
779 return nf_log_bind_pf(pf, &nfulnl_logger); 807 return nf_log_bind_pf(net, pf, &nfulnl_logger);
780 case NFULNL_CFG_CMD_PF_UNBIND: 808 case NFULNL_CFG_CMD_PF_UNBIND:
781 nf_log_unbind_pf(pf); 809 nf_log_unbind_pf(net, pf);
782 return 0; 810 return 0;
783 } 811 }
784 } 812 }
785 813
786 inst = instance_lookup_get(group_num); 814 inst = instance_lookup_get(log, group_num);
787 if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { 815 if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {
788 ret = -EPERM; 816 ret = -EPERM;
789 goto out_put; 817 goto out_put;
@@ -797,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
797 goto out_put; 825 goto out_put;
798 } 826 }
799 827
800 inst = instance_create(group_num, 828 inst = instance_create(net, group_num,
801 NETLINK_CB(skb).portid, 829 NETLINK_CB(skb).portid,
802 sk_user_ns(NETLINK_CB(skb).ssk)); 830 sk_user_ns(NETLINK_CB(skb).sk));
803 if (IS_ERR(inst)) { 831 if (IS_ERR(inst)) {
804 ret = PTR_ERR(inst); 832 ret = PTR_ERR(inst);
805 goto out; 833 goto out;
@@ -811,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
811 goto out; 839 goto out;
812 } 840 }
813 841
814 instance_destroy(inst); 842 instance_destroy(log, inst);
815 goto out_put; 843 goto out_put;
816 default: 844 default:
817 ret = -ENOTSUPP; 845 ret = -ENOTSUPP;
@@ -894,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {
894 922
895#ifdef CONFIG_PROC_FS 923#ifdef CONFIG_PROC_FS
896struct iter_state { 924struct iter_state {
925 struct seq_net_private p;
897 unsigned int bucket; 926 unsigned int bucket;
898}; 927};
899 928
900static struct hlist_node *get_first(struct iter_state *st) 929static struct hlist_node *get_first(struct net *net, struct iter_state *st)
901{ 930{
931 struct nfnl_log_net *log;
902 if (!st) 932 if (!st)
903 return NULL; 933 return NULL;
904 934
935 log = nfnl_log_pernet(net);
936
905 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 937 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
906 if (!hlist_empty(&instance_table[st->bucket])) 938 struct hlist_head *head = &log->instance_table[st->bucket];
907 return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 939
940 if (!hlist_empty(head))
941 return rcu_dereference_bh(hlist_first_rcu(head));
908 } 942 }
909 return NULL; 943 return NULL;
910} 944}
911 945
912static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 946static struct hlist_node *get_next(struct net *net, struct iter_state *st,
947 struct hlist_node *h)
913{ 948{
914 h = rcu_dereference_bh(hlist_next_rcu(h)); 949 h = rcu_dereference_bh(hlist_next_rcu(h));
915 while (!h) { 950 while (!h) {
951 struct nfnl_log_net *log;
952 struct hlist_head *head;
953
916 if (++st->bucket >= INSTANCE_BUCKETS) 954 if (++st->bucket >= INSTANCE_BUCKETS)
917 return NULL; 955 return NULL;
918 956
919 h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 957 log = nfnl_log_pernet(net);
958 head = &log->instance_table[st->bucket];
959 h = rcu_dereference_bh(hlist_first_rcu(head));
920 } 960 }
921 return h; 961 return h;
922} 962}
923 963
924static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) 964static struct hlist_node *get_idx(struct net *net, struct iter_state *st,
965 loff_t pos)
925{ 966{
926 struct hlist_node *head; 967 struct hlist_node *head;
927 head = get_first(st); 968 head = get_first(net, st);
928 969
929 if (head) 970 if (head)
930 while (pos && (head = get_next(st, head))) 971 while (pos && (head = get_next(net, st, head)))
931 pos--; 972 pos--;
932 return pos ? NULL : head; 973 return pos ? NULL : head;
933} 974}
934 975
935static void *seq_start(struct seq_file *seq, loff_t *pos) 976static void *seq_start(struct seq_file *s, loff_t *pos)
936 __acquires(rcu_bh) 977 __acquires(rcu_bh)
937{ 978{
938 rcu_read_lock_bh(); 979 rcu_read_lock_bh();
939 return get_idx(seq->private, *pos); 980 return get_idx(seq_file_net(s), s->private, *pos);
940} 981}
941 982
942static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 983static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
943{ 984{
944 (*pos)++; 985 (*pos)++;
945 return get_next(s->private, v); 986 return get_next(seq_file_net(s), s->private, v);
946} 987}
947 988
948static void seq_stop(struct seq_file *s, void *v) 989static void seq_stop(struct seq_file *s, void *v)
@@ -971,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = {
971 1012
972static int nful_open(struct inode *inode, struct file *file) 1013static int nful_open(struct inode *inode, struct file *file)
973{ 1014{
974 return seq_open_private(file, &nful_seq_ops, 1015 return seq_open_net(inode, file, &nful_seq_ops,
975 sizeof(struct iter_state)); 1016 sizeof(struct iter_state));
976} 1017}
977 1018
978static const struct file_operations nful_file_ops = { 1019static const struct file_operations nful_file_ops = {
@@ -980,17 +1021,43 @@ static const struct file_operations nful_file_ops = {
980 .open = nful_open, 1021 .open = nful_open,
981 .read = seq_read, 1022 .read = seq_read,
982 .llseek = seq_lseek, 1023 .llseek = seq_lseek,
983 .release = seq_release_private, 1024 .release = seq_release_net,
984}; 1025};
985 1026
986#endif /* PROC_FS */ 1027#endif /* PROC_FS */
987 1028
988static int __init nfnetlink_log_init(void) 1029static int __net_init nfnl_log_net_init(struct net *net)
989{ 1030{
990 int i, status = -ENOMEM; 1031 unsigned int i;
1032 struct nfnl_log_net *log = nfnl_log_pernet(net);
991 1033
992 for (i = 0; i < INSTANCE_BUCKETS; i++) 1034 for (i = 0; i < INSTANCE_BUCKETS; i++)
993 INIT_HLIST_HEAD(&instance_table[i]); 1035 INIT_HLIST_HEAD(&log->instance_table[i]);
1036 spin_lock_init(&log->instances_lock);
1037
1038#ifdef CONFIG_PROC_FS
1039 if (!proc_create("nfnetlink_log", 0440,
1040 net->nf.proc_netfilter, &nful_file_ops))
1041 return -ENOMEM;
1042#endif
1043 return 0;
1044}
1045
1046static void __net_exit nfnl_log_net_exit(struct net *net)
1047{
1048 remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter);
1049}
1050
1051static struct pernet_operations nfnl_log_net_ops = {
1052 .init = nfnl_log_net_init,
1053 .exit = nfnl_log_net_exit,
1054 .id = &nfnl_log_net_id,
1055 .size = sizeof(struct nfnl_log_net),
1056};
1057
1058static int __init nfnetlink_log_init(void)
1059{
1060 int status = -ENOMEM;
994 1061
995 /* it's not really all that important to have a random value, so 1062 /* it's not really all that important to have a random value, so
996 * we can do this from the init function, even if there hasn't 1063 * we can do this from the init function, even if there hasn't
@@ -1000,29 +1067,25 @@ static int __init nfnetlink_log_init(void)
1000 netlink_register_notifier(&nfulnl_rtnl_notifier); 1067 netlink_register_notifier(&nfulnl_rtnl_notifier);
1001 status = nfnetlink_subsys_register(&nfulnl_subsys); 1068 status = nfnetlink_subsys_register(&nfulnl_subsys);
1002 if (status < 0) { 1069 if (status < 0) {
1003 printk(KERN_ERR "log: failed to create netlink socket\n"); 1070 pr_err("log: failed to create netlink socket\n");
1004 goto cleanup_netlink_notifier; 1071 goto cleanup_netlink_notifier;
1005 } 1072 }
1006 1073
1007 status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); 1074 status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
1008 if (status < 0) { 1075 if (status < 0) {
1009 printk(KERN_ERR "log: failed to register logger\n"); 1076 pr_err("log: failed to register logger\n");
1010 goto cleanup_subsys; 1077 goto cleanup_subsys;
1011 } 1078 }
1012 1079
1013#ifdef CONFIG_PROC_FS 1080 status = register_pernet_subsys(&nfnl_log_net_ops);
1014 if (!proc_create("nfnetlink_log", 0440, 1081 if (status < 0) {
1015 proc_net_netfilter, &nful_file_ops)) { 1082 pr_err("log: failed to register pernet ops\n");
1016 status = -ENOMEM;
1017 goto cleanup_logger; 1083 goto cleanup_logger;
1018 } 1084 }
1019#endif
1020 return status; 1085 return status;
1021 1086
1022#ifdef CONFIG_PROC_FS
1023cleanup_logger: 1087cleanup_logger:
1024 nf_log_unregister(&nfulnl_logger); 1088 nf_log_unregister(&nfulnl_logger);
1025#endif
1026cleanup_subsys: 1089cleanup_subsys:
1027 nfnetlink_subsys_unregister(&nfulnl_subsys); 1090 nfnetlink_subsys_unregister(&nfulnl_subsys);
1028cleanup_netlink_notifier: 1091cleanup_netlink_notifier:
@@ -1032,10 +1095,8 @@ cleanup_netlink_notifier:
1032 1095
1033static void __exit nfnetlink_log_fini(void) 1096static void __exit nfnetlink_log_fini(void)
1034{ 1097{
1098 unregister_pernet_subsys(&nfnl_log_net_ops);
1035 nf_log_unregister(&nfulnl_logger); 1099 nf_log_unregister(&nfulnl_logger);
1036#ifdef CONFIG_PROC_FS
1037 remove_proc_entry("nfnetlink_log", proc_net_netfilter);
1038#endif
1039 nfnetlink_subsys_unregister(&nfulnl_subsys); 1100 nfnetlink_subsys_unregister(&nfulnl_subsys);
1040 netlink_unregister_notifier(&nfulnl_rtnl_notifier); 1101 netlink_unregister_notifier(&nfulnl_rtnl_notifier);
1041} 1102}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 42680b2baa11..2e0e835baf72 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -30,6 +30,7 @@
30#include <linux/list.h> 30#include <linux/list.h>
31#include <net/sock.h> 31#include <net/sock.h>
32#include <net/netfilter/nf_queue.h> 32#include <net/netfilter/nf_queue.h>
33#include <net/netns/generic.h>
33#include <net/netfilter/nfnetlink_queue.h> 34#include <net/netfilter/nfnetlink_queue.h>
34 35
35#include <linux/atomic.h> 36#include <linux/atomic.h>
@@ -66,23 +67,31 @@ struct nfqnl_instance {
66 67
67typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); 68typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
68 69
69static DEFINE_SPINLOCK(instances_lock); 70static int nfnl_queue_net_id __read_mostly;
70 71
71#define INSTANCE_BUCKETS 16 72#define INSTANCE_BUCKETS 16
72static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; 73struct nfnl_queue_net {
74 spinlock_t instances_lock;
75 struct hlist_head instance_table[INSTANCE_BUCKETS];
76};
77
78static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
79{
80 return net_generic(net, nfnl_queue_net_id);
81}
73 82
74static inline u_int8_t instance_hashfn(u_int16_t queue_num) 83static inline u_int8_t instance_hashfn(u_int16_t queue_num)
75{ 84{
76 return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; 85 return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
77} 86}
78 87
79static struct nfqnl_instance * 88static struct nfqnl_instance *
80instance_lookup(u_int16_t queue_num) 89instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
81{ 90{
82 struct hlist_head *head; 91 struct hlist_head *head;
83 struct nfqnl_instance *inst; 92 struct nfqnl_instance *inst;
84 93
85 head = &instance_table[instance_hashfn(queue_num)]; 94 head = &q->instance_table[instance_hashfn(queue_num)];
86 hlist_for_each_entry_rcu(inst, head, hlist) { 95 hlist_for_each_entry_rcu(inst, head, hlist) {
87 if (inst->queue_num == queue_num) 96 if (inst->queue_num == queue_num)
88 return inst; 97 return inst;
@@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)
91} 100}
92 101
93static struct nfqnl_instance * 102static struct nfqnl_instance *
94instance_create(u_int16_t queue_num, int portid) 103instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
104 int portid)
95{ 105{
96 struct nfqnl_instance *inst; 106 struct nfqnl_instance *inst;
97 unsigned int h; 107 unsigned int h;
98 int err; 108 int err;
99 109
100 spin_lock(&instances_lock); 110 spin_lock(&q->instances_lock);
101 if (instance_lookup(queue_num)) { 111 if (instance_lookup(q, queue_num)) {
102 err = -EEXIST; 112 err = -EEXIST;
103 goto out_unlock; 113 goto out_unlock;
104 } 114 }
@@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)
123 } 133 }
124 134
125 h = instance_hashfn(queue_num); 135 h = instance_hashfn(queue_num);
126 hlist_add_head_rcu(&inst->hlist, &instance_table[h]); 136 hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
127 137
128 spin_unlock(&instances_lock); 138 spin_unlock(&q->instances_lock);
129 139
130 return inst; 140 return inst;
131 141
132out_free: 142out_free:
133 kfree(inst); 143 kfree(inst);
134out_unlock: 144out_unlock:
135 spin_unlock(&instances_lock); 145 spin_unlock(&q->instances_lock);
136 return ERR_PTR(err); 146 return ERR_PTR(err);
137} 147}
138 148
@@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)
158} 168}
159 169
160static void 170static void
161instance_destroy(struct nfqnl_instance *inst) 171instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
162{ 172{
163 spin_lock(&instances_lock); 173 spin_lock(&q->instances_lock);
164 __instance_destroy(inst); 174 __instance_destroy(inst);
165 spin_unlock(&instances_lock); 175 spin_unlock(&q->instances_lock);
166} 176}
167 177
168static inline void 178static inline void
@@ -217,14 +227,71 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
217 spin_unlock_bh(&queue->lock); 227 spin_unlock_bh(&queue->lock);
218} 228}
219 229
230static void
231nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
232{
233 int i, j = 0;
234 int plen = 0; /* length of skb->head fragment */
235 struct page *page;
236 unsigned int offset;
237
238 /* dont bother with small payloads */
239 if (len <= skb_tailroom(to)) {
240 skb_copy_bits(from, 0, skb_put(to, len), len);
241 return;
242 }
243
244 if (hlen) {
245 skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
246 len -= hlen;
247 } else {
248 plen = min_t(int, skb_headlen(from), len);
249 if (plen) {
250 page = virt_to_head_page(from->head);
251 offset = from->data - (unsigned char *)page_address(page);
252 __skb_fill_page_desc(to, 0, page, offset, plen);
253 get_page(page);
254 j = 1;
255 len -= plen;
256 }
257 }
258
259 to->truesize += len + plen;
260 to->len += len + plen;
261 to->data_len += len + plen;
262
263 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
264 if (!len)
265 break;
266 skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
267 skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
268 len -= skb_shinfo(to)->frags[j].size;
269 skb_frag_ref(to, j);
270 j++;
271 }
272 skb_shinfo(to)->nr_frags = j;
273}
274
275static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
276{
277 __u32 flags = 0;
278
279 if (packet->ip_summed == CHECKSUM_PARTIAL)
280 flags = NFQA_SKB_CSUMNOTREADY;
281 if (skb_is_gso(packet))
282 flags |= NFQA_SKB_GSO;
283
284 return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
285}
286
220static struct sk_buff * 287static struct sk_buff *
221nfqnl_build_packet_message(struct nfqnl_instance *queue, 288nfqnl_build_packet_message(struct nfqnl_instance *queue,
222 struct nf_queue_entry *entry, 289 struct nf_queue_entry *entry,
223 __be32 **packet_id_ptr) 290 __be32 **packet_id_ptr)
224{ 291{
225 sk_buff_data_t old_tail;
226 size_t size; 292 size_t size;
227 size_t data_len = 0, cap_len = 0; 293 size_t data_len = 0, cap_len = 0;
294 int hlen = 0;
228 struct sk_buff *skb; 295 struct sk_buff *skb;
229 struct nlattr *nla; 296 struct nlattr *nla;
230 struct nfqnl_msg_packet_hdr *pmsg; 297 struct nfqnl_msg_packet_hdr *pmsg;
@@ -236,7 +303,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
236 struct nf_conn *ct = NULL; 303 struct nf_conn *ct = NULL;
237 enum ip_conntrack_info uninitialized_var(ctinfo); 304 enum ip_conntrack_info uninitialized_var(ctinfo);
238 305
239 size = NLMSG_SPACE(sizeof(struct nfgenmsg)) 306 size = nlmsg_total_size(sizeof(struct nfgenmsg))
240 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) 307 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
241 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 308 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
242 + nla_total_size(sizeof(u_int32_t)) /* ifindex */ 309 + nla_total_size(sizeof(u_int32_t)) /* ifindex */
@@ -246,8 +313,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
246#endif 313#endif
247 + nla_total_size(sizeof(u_int32_t)) /* mark */ 314 + nla_total_size(sizeof(u_int32_t)) /* mark */
248 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) 315 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
249 + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) 316 + nla_total_size(sizeof(u_int32_t)) /* skbinfo */
250 + nla_total_size(sizeof(u_int32_t))); /* cap_len */ 317 + nla_total_size(sizeof(u_int32_t)); /* cap_len */
318
319 if (entskb->tstamp.tv64)
320 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
251 321
252 outdev = entry->outdev; 322 outdev = entry->outdev;
253 323
@@ -257,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
257 break; 327 break;
258 328
259 case NFQNL_COPY_PACKET: 329 case NFQNL_COPY_PACKET:
260 if (entskb->ip_summed == CHECKSUM_PARTIAL && 330 if (!(queue->flags & NFQA_CFG_F_GSO) &&
331 entskb->ip_summed == CHECKSUM_PARTIAL &&
261 skb_checksum_help(entskb)) 332 skb_checksum_help(entskb))
262 return NULL; 333 return NULL;
263 334
@@ -265,7 +336,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
265 if (data_len == 0 || data_len > entskb->len) 336 if (data_len == 0 || data_len > entskb->len)
266 data_len = entskb->len; 337 data_len = entskb->len;
267 338
268 size += nla_total_size(data_len); 339
340 if (!entskb->head_frag ||
341 skb_headlen(entskb) < L1_CACHE_BYTES ||
342 skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
343 hlen = skb_headlen(entskb);
344
345 if (skb_has_frag_list(entskb))
346 hlen = entskb->len;
347 hlen = min_t(int, data_len, hlen);
348 size += sizeof(struct nlattr) + hlen;
269 cap_len = entskb->len; 349 cap_len = entskb->len;
270 break; 350 break;
271 } 351 }
@@ -273,11 +353,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
273 if (queue->flags & NFQA_CFG_F_CONNTRACK) 353 if (queue->flags & NFQA_CFG_F_CONNTRACK)
274 ct = nfqnl_ct_get(entskb, &size, &ctinfo); 354 ct = nfqnl_ct_get(entskb, &size, &ctinfo);
275 355
276 skb = alloc_skb(size, GFP_ATOMIC); 356 skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid,
357 GFP_ATOMIC);
277 if (!skb) 358 if (!skb)
278 return NULL; 359 return NULL;
279 360
280 old_tail = skb->tail;
281 nlh = nlmsg_put(skb, 0, 0, 361 nlh = nlmsg_put(skb, 0, 0,
282 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, 362 NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
283 sizeof(struct nfgenmsg), 0); 363 sizeof(struct nfgenmsg), 0);
@@ -382,31 +462,29 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
382 goto nla_put_failure; 462 goto nla_put_failure;
383 } 463 }
384 464
465 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
466 goto nla_put_failure;
467
468 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
469 goto nla_put_failure;
470
471 if (nfqnl_put_packet_info(skb, entskb))
472 goto nla_put_failure;
473
385 if (data_len) { 474 if (data_len) {
386 struct nlattr *nla; 475 struct nlattr *nla;
387 int sz = nla_attr_size(data_len);
388 476
389 if (skb_tailroom(skb) < nla_total_size(data_len)) { 477 if (skb_tailroom(skb) < sizeof(*nla) + hlen)
390 printk(KERN_WARNING "nf_queue: no tailroom!\n"); 478 goto nla_put_failure;
391 kfree_skb(skb);
392 return NULL;
393 }
394 479
395 nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); 480 nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
396 nla->nla_type = NFQA_PAYLOAD; 481 nla->nla_type = NFQA_PAYLOAD;
397 nla->nla_len = sz; 482 nla->nla_len = nla_attr_size(data_len);
398 483
399 if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) 484 nfqnl_zcopy(skb, entskb, data_len, hlen);
400 BUG();
401 } 485 }
402 486
403 if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) 487 nlh->nlmsg_len = skb->len;
404 goto nla_put_failure;
405
406 if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
407 goto nla_put_failure;
408
409 nlh->nlmsg_len = skb->tail - old_tail;
410 return skb; 488 return skb;
411 489
412nla_put_failure: 490nla_put_failure:
@@ -416,26 +494,14 @@ nla_put_failure:
416} 494}
417 495
418static int 496static int
419nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) 497__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
498 struct nf_queue_entry *entry)
420{ 499{
421 struct sk_buff *nskb; 500 struct sk_buff *nskb;
422 struct nfqnl_instance *queue;
423 int err = -ENOBUFS; 501 int err = -ENOBUFS;
424 __be32 *packet_id_ptr; 502 __be32 *packet_id_ptr;
425 int failopen = 0; 503 int failopen = 0;
426 504
427 /* rcu_read_lock()ed by nf_hook_slow() */
428 queue = instance_lookup(queuenum);
429 if (!queue) {
430 err = -ESRCH;
431 goto err_out;
432 }
433
434 if (queue->copy_mode == NFQNL_COPY_NONE) {
435 err = -EINVAL;
436 goto err_out;
437 }
438
439 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr); 505 nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);
440 if (nskb == NULL) { 506 if (nskb == NULL) {
441 err = -ENOMEM; 507 err = -ENOMEM;
@@ -462,7 +528,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
462 *packet_id_ptr = htonl(entry->id); 528 *packet_id_ptr = htonl(entry->id);
463 529
464 /* nfnetlink_unicast will either free the nskb or add it to a socket */ 530 /* nfnetlink_unicast will either free the nskb or add it to a socket */
465 err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); 531 err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);
466 if (err < 0) { 532 if (err < 0) {
467 queue->queue_user_dropped++; 533 queue->queue_user_dropped++;
468 goto err_out_unlock; 534 goto err_out_unlock;
@@ -483,6 +549,141 @@ err_out:
483 return err; 549 return err;
484} 550}
485 551
552static struct nf_queue_entry *
553nf_queue_entry_dup(struct nf_queue_entry *e)
554{
555 struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
556 if (entry) {
557 if (nf_queue_entry_get_refs(entry))
558 return entry;
559 kfree(entry);
560 }
561 return NULL;
562}
563
564#ifdef CONFIG_BRIDGE_NETFILTER
565/* When called from bridge netfilter, skb->data must point to MAC header
566 * before calling skb_gso_segment(). Else, original MAC header is lost
567 * and segmented skbs will be sent to wrong destination.
568 */
569static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
570{
571 if (skb->nf_bridge)
572 __skb_push(skb, skb->network_header - skb->mac_header);
573}
574
575static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
576{
577 if (skb->nf_bridge)
578 __skb_pull(skb, skb->network_header - skb->mac_header);
579}
580#else
581#define nf_bridge_adjust_skb_data(s) do {} while (0)
582#define nf_bridge_adjust_segmented_data(s) do {} while (0)
583#endif
584
585static void free_entry(struct nf_queue_entry *entry)
586{
587 nf_queue_entry_release_refs(entry);
588 kfree(entry);
589}
590
591static int
592__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
593 struct sk_buff *skb, struct nf_queue_entry *entry)
594{
595 int ret = -ENOMEM;
596 struct nf_queue_entry *entry_seg;
597
598 nf_bridge_adjust_segmented_data(skb);
599
600 if (skb->next == NULL) { /* last packet, no need to copy entry */
601 struct sk_buff *gso_skb = entry->skb;
602 entry->skb = skb;
603 ret = __nfqnl_enqueue_packet(net, queue, entry);
604 if (ret)
605 entry->skb = gso_skb;
606 return ret;
607 }
608
609 skb->next = NULL;
610
611 entry_seg = nf_queue_entry_dup(entry);
612 if (entry_seg) {
613 entry_seg->skb = skb;
614 ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
615 if (ret)
616 free_entry(entry_seg);
617 }
618 return ret;
619}
620
621static int
622nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
623{
624 unsigned int queued;
625 struct nfqnl_instance *queue;
626 struct sk_buff *skb, *segs;
627 int err = -ENOBUFS;
628 struct net *net = dev_net(entry->indev ?
629 entry->indev : entry->outdev);
630 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
631
632 /* rcu_read_lock()ed by nf_hook_slow() */
633 queue = instance_lookup(q, queuenum);
634 if (!queue)
635 return -ESRCH;
636
637 if (queue->copy_mode == NFQNL_COPY_NONE)
638 return -EINVAL;
639
640 if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb))
641 return __nfqnl_enqueue_packet(net, queue, entry);
642
643 skb = entry->skb;
644
645 switch (entry->pf) {
646 case NFPROTO_IPV4:
647 skb->protocol = htons(ETH_P_IP);
648 break;
649 case NFPROTO_IPV6:
650 skb->protocol = htons(ETH_P_IPV6);
651 break;
652 }
653
654 nf_bridge_adjust_skb_data(skb);
655 segs = skb_gso_segment(skb, 0);
656 /* Does not use PTR_ERR to limit the number of error codes that can be
657 * returned by nf_queue. For instance, callers rely on -ECANCELED to
658 * mean 'ignore this hook'.
659 */
660 if (IS_ERR(segs))
661 goto out_err;
662 queued = 0;
663 err = 0;
664 do {
665 struct sk_buff *nskb = segs->next;
666 if (err == 0)
667 err = __nfqnl_enqueue_packet_gso(net, queue,
668 segs, entry);
669 if (err == 0)
670 queued++;
671 else
672 kfree_skb(segs);
673 segs = nskb;
674 } while (segs);
675
676 if (queued) {
677 if (err) /* some segments are already queued */
678 free_entry(entry);
679 kfree_skb(skb);
680 return 0;
681 }
682 out_err:
683 nf_bridge_adjust_segmented_data(skb);
684 return err;
685}
686
486static int 687static int
487nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) 688nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
488{ 689{
@@ -575,15 +776,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
575/* drop all packets with either indev or outdev == ifindex from all queue 776/* drop all packets with either indev or outdev == ifindex from all queue
576 * instances */ 777 * instances */
577static void 778static void
578nfqnl_dev_drop(int ifindex) 779nfqnl_dev_drop(struct net *net, int ifindex)
579{ 780{
580 int i; 781 int i;
782 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
581 783
582 rcu_read_lock(); 784 rcu_read_lock();
583 785
584 for (i = 0; i < INSTANCE_BUCKETS; i++) { 786 for (i = 0; i < INSTANCE_BUCKETS; i++) {
585 struct nfqnl_instance *inst; 787 struct nfqnl_instance *inst;
586 struct hlist_head *head = &instance_table[i]; 788 struct hlist_head *head = &q->instance_table[i];
587 789
588 hlist_for_each_entry_rcu(inst, head, hlist) 790 hlist_for_each_entry_rcu(inst, head, hlist)
589 nfqnl_flush(inst, dev_cmp, ifindex); 791 nfqnl_flush(inst, dev_cmp, ifindex);
@@ -600,12 +802,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,
600{ 802{
601 struct net_device *dev = ptr; 803 struct net_device *dev = ptr;
602 804
603 if (!net_eq(dev_net(dev), &init_net))
604 return NOTIFY_DONE;
605
606 /* Drop any packets associated with the downed device */ 805 /* Drop any packets associated with the downed device */
607 if (event == NETDEV_DOWN) 806 if (event == NETDEV_DOWN)
608 nfqnl_dev_drop(dev->ifindex); 807 nfqnl_dev_drop(dev_net(dev), dev->ifindex);
609 return NOTIFY_DONE; 808 return NOTIFY_DONE;
610} 809}
611 810
@@ -618,24 +817,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,
618 unsigned long event, void *ptr) 817 unsigned long event, void *ptr)
619{ 818{
620 struct netlink_notify *n = ptr; 819 struct netlink_notify *n = ptr;
820 struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
621 821
622 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { 822 if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
623 int i; 823 int i;
624 824
625 /* destroy all instances for this portid */ 825 /* destroy all instances for this portid */
626 spin_lock(&instances_lock); 826 spin_lock(&q->instances_lock);
627 for (i = 0; i < INSTANCE_BUCKETS; i++) { 827 for (i = 0; i < INSTANCE_BUCKETS; i++) {
628 struct hlist_node *t2; 828 struct hlist_node *t2;
629 struct nfqnl_instance *inst; 829 struct nfqnl_instance *inst;
630 struct hlist_head *head = &instance_table[i]; 830 struct hlist_head *head = &q->instance_table[i];
631 831
632 hlist_for_each_entry_safe(inst, t2, head, hlist) { 832 hlist_for_each_entry_safe(inst, t2, head, hlist) {
633 if ((n->net == &init_net) && 833 if (n->portid == inst->peer_portid)
634 (n->portid == inst->peer_portid))
635 __instance_destroy(inst); 834 __instance_destroy(inst);
636 } 835 }
637 } 836 }
638 spin_unlock(&instances_lock); 837 spin_unlock(&q->instances_lock);
639 } 838 }
640 return NOTIFY_DONE; 839 return NOTIFY_DONE;
641} 840}
@@ -656,11 +855,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
656 [NFQA_MARK] = { .type = NLA_U32 }, 855 [NFQA_MARK] = { .type = NLA_U32 },
657}; 856};
658 857
659static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) 858static struct nfqnl_instance *
859verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
660{ 860{
661 struct nfqnl_instance *queue; 861 struct nfqnl_instance *queue;
662 862
663 queue = instance_lookup(queue_num); 863 queue = instance_lookup(q, queue_num);
664 if (!queue) 864 if (!queue)
665 return ERR_PTR(-ENODEV); 865 return ERR_PTR(-ENODEV);
666 866
@@ -704,7 +904,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
704 LIST_HEAD(batch_list); 904 LIST_HEAD(batch_list);
705 u16 queue_num = ntohs(nfmsg->res_id); 905 u16 queue_num = ntohs(nfmsg->res_id);
706 906
707 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); 907 struct net *net = sock_net(ctnl);
908 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
909
910 queue = verdict_instance_lookup(q, queue_num,
911 NETLINK_CB(skb).portid);
708 if (IS_ERR(queue)) 912 if (IS_ERR(queue))
709 return PTR_ERR(queue); 913 return PTR_ERR(queue);
710 914
@@ -752,10 +956,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
752 enum ip_conntrack_info uninitialized_var(ctinfo); 956 enum ip_conntrack_info uninitialized_var(ctinfo);
753 struct nf_conn *ct = NULL; 957 struct nf_conn *ct = NULL;
754 958
755 queue = instance_lookup(queue_num); 959 struct net *net = sock_net(ctnl);
756 if (!queue) 960 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
757 961
758 queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); 962 queue = instance_lookup(q, queue_num);
963 if (!queue)
964 queue = verdict_instance_lookup(q, queue_num,
965 NETLINK_CB(skb).portid);
759 if (IS_ERR(queue)) 966 if (IS_ERR(queue))
760 return PTR_ERR(queue); 967 return PTR_ERR(queue);
761 968
@@ -819,6 +1026,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
819 u_int16_t queue_num = ntohs(nfmsg->res_id); 1026 u_int16_t queue_num = ntohs(nfmsg->res_id);
820 struct nfqnl_instance *queue; 1027 struct nfqnl_instance *queue;
821 struct nfqnl_msg_config_cmd *cmd = NULL; 1028 struct nfqnl_msg_config_cmd *cmd = NULL;
1029 struct net *net = sock_net(ctnl);
1030 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
822 int ret = 0; 1031 int ret = 0;
823 1032
824 if (nfqa[NFQA_CFG_CMD]) { 1033 if (nfqa[NFQA_CFG_CMD]) {
@@ -832,7 +1041,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
832 } 1041 }
833 1042
834 rcu_read_lock(); 1043 rcu_read_lock();
835 queue = instance_lookup(queue_num); 1044 queue = instance_lookup(q, queue_num);
836 if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { 1045 if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
837 ret = -EPERM; 1046 ret = -EPERM;
838 goto err_out_unlock; 1047 goto err_out_unlock;
@@ -845,7 +1054,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
845 ret = -EBUSY; 1054 ret = -EBUSY;
846 goto err_out_unlock; 1055 goto err_out_unlock;
847 } 1056 }
848 queue = instance_create(queue_num, NETLINK_CB(skb).portid); 1057 queue = instance_create(q, queue_num,
1058 NETLINK_CB(skb).portid);
849 if (IS_ERR(queue)) { 1059 if (IS_ERR(queue)) {
850 ret = PTR_ERR(queue); 1060 ret = PTR_ERR(queue);
851 goto err_out_unlock; 1061 goto err_out_unlock;
@@ -856,7 +1066,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
856 ret = -ENODEV; 1066 ret = -ENODEV;
857 goto err_out_unlock; 1067 goto err_out_unlock;
858 } 1068 }
859 instance_destroy(queue); 1069 instance_destroy(q, queue);
860 break; 1070 break;
861 case NFQNL_CFG_CMD_PF_BIND: 1071 case NFQNL_CFG_CMD_PF_BIND:
862 case NFQNL_CFG_CMD_PF_UNBIND: 1072 case NFQNL_CFG_CMD_PF_UNBIND:
@@ -950,19 +1160,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {
950 1160
951#ifdef CONFIG_PROC_FS 1161#ifdef CONFIG_PROC_FS
952struct iter_state { 1162struct iter_state {
1163 struct seq_net_private p;
953 unsigned int bucket; 1164 unsigned int bucket;
954}; 1165};
955 1166
956static struct hlist_node *get_first(struct seq_file *seq) 1167static struct hlist_node *get_first(struct seq_file *seq)
957{ 1168{
958 struct iter_state *st = seq->private; 1169 struct iter_state *st = seq->private;
1170 struct net *net;
1171 struct nfnl_queue_net *q;
959 1172
960 if (!st) 1173 if (!st)
961 return NULL; 1174 return NULL;
962 1175
1176 net = seq_file_net(seq);
1177 q = nfnl_queue_pernet(net);
963 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 1178 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
964 if (!hlist_empty(&instance_table[st->bucket])) 1179 if (!hlist_empty(&q->instance_table[st->bucket]))
965 return instance_table[st->bucket].first; 1180 return q->instance_table[st->bucket].first;
966 } 1181 }
967 return NULL; 1182 return NULL;
968} 1183}
@@ -970,13 +1185,17 @@ static struct hlist_node *get_first(struct seq_file *seq)
970static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) 1185static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
971{ 1186{
972 struct iter_state *st = seq->private; 1187 struct iter_state *st = seq->private;
1188 struct net *net = seq_file_net(seq);
973 1189
974 h = h->next; 1190 h = h->next;
975 while (!h) { 1191 while (!h) {
1192 struct nfnl_queue_net *q;
1193
976 if (++st->bucket >= INSTANCE_BUCKETS) 1194 if (++st->bucket >= INSTANCE_BUCKETS)
977 return NULL; 1195 return NULL;
978 1196
979 h = instance_table[st->bucket].first; 1197 q = nfnl_queue_pernet(net);
1198 h = q->instance_table[st->bucket].first;
980 } 1199 }
981 return h; 1200 return h;
982} 1201}
@@ -992,11 +1211,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
992 return pos ? NULL : head; 1211 return pos ? NULL : head;
993} 1212}
994 1213
995static void *seq_start(struct seq_file *seq, loff_t *pos) 1214static void *seq_start(struct seq_file *s, loff_t *pos)
996 __acquires(instances_lock) 1215 __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
997{ 1216{
998 spin_lock(&instances_lock); 1217 spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
999 return get_idx(seq, *pos); 1218 return get_idx(s, *pos);
1000} 1219}
1001 1220
1002static void *seq_next(struct seq_file *s, void *v, loff_t *pos) 1221static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
@@ -1006,9 +1225,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1006} 1225}
1007 1226
1008static void seq_stop(struct seq_file *s, void *v) 1227static void seq_stop(struct seq_file *s, void *v)
1009 __releases(instances_lock) 1228 __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1010{ 1229{
1011 spin_unlock(&instances_lock); 1230 spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1012} 1231}
1013 1232
1014static int seq_show(struct seq_file *s, void *v) 1233static int seq_show(struct seq_file *s, void *v)
@@ -1032,7 +1251,7 @@ static const struct seq_operations nfqnl_seq_ops = {
1032 1251
1033static int nfqnl_open(struct inode *inode, struct file *file) 1252static int nfqnl_open(struct inode *inode, struct file *file)
1034{ 1253{
1035 return seq_open_private(file, &nfqnl_seq_ops, 1254 return seq_open_net(inode, file, &nfqnl_seq_ops,
1036 sizeof(struct iter_state)); 1255 sizeof(struct iter_state));
1037} 1256}
1038 1257
@@ -1041,41 +1260,63 @@ static const struct file_operations nfqnl_file_ops = {
1041 .open = nfqnl_open, 1260 .open = nfqnl_open,
1042 .read = seq_read, 1261 .read = seq_read,
1043 .llseek = seq_lseek, 1262 .llseek = seq_lseek,
1044 .release = seq_release_private, 1263 .release = seq_release_net,
1045}; 1264};
1046 1265
1047#endif /* PROC_FS */ 1266#endif /* PROC_FS */
1048 1267
1049static int __init nfnetlink_queue_init(void) 1268static int __net_init nfnl_queue_net_init(struct net *net)
1050{ 1269{
1051 int i, status = -ENOMEM; 1270 unsigned int i;
1271 struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1052 1272
1053 for (i = 0; i < INSTANCE_BUCKETS; i++) 1273 for (i = 0; i < INSTANCE_BUCKETS; i++)
1054 INIT_HLIST_HEAD(&instance_table[i]); 1274 INIT_HLIST_HEAD(&q->instance_table[i]);
1275
1276 spin_lock_init(&q->instances_lock);
1277
1278#ifdef CONFIG_PROC_FS
1279 if (!proc_create("nfnetlink_queue", 0440,
1280 net->nf.proc_netfilter, &nfqnl_file_ops))
1281 return -ENOMEM;
1282#endif
1283 return 0;
1284}
1285
1286static void __net_exit nfnl_queue_net_exit(struct net *net)
1287{
1288 remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1289}
1290
1291static struct pernet_operations nfnl_queue_net_ops = {
1292 .init = nfnl_queue_net_init,
1293 .exit = nfnl_queue_net_exit,
1294 .id = &nfnl_queue_net_id,
1295 .size = sizeof(struct nfnl_queue_net),
1296};
1297
1298static int __init nfnetlink_queue_init(void)
1299{
1300 int status = -ENOMEM;
1055 1301
1056 netlink_register_notifier(&nfqnl_rtnl_notifier); 1302 netlink_register_notifier(&nfqnl_rtnl_notifier);
1057 status = nfnetlink_subsys_register(&nfqnl_subsys); 1303 status = nfnetlink_subsys_register(&nfqnl_subsys);
1058 if (status < 0) { 1304 if (status < 0) {
1059 printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); 1305 pr_err("nf_queue: failed to create netlink socket\n");
1060 goto cleanup_netlink_notifier; 1306 goto cleanup_netlink_notifier;
1061 } 1307 }
1062 1308
1063#ifdef CONFIG_PROC_FS 1309 status = register_pernet_subsys(&nfnl_queue_net_ops);
1064 if (!proc_create("nfnetlink_queue", 0440, 1310 if (status < 0) {
1065 proc_net_netfilter, &nfqnl_file_ops)) { 1311 pr_err("nf_queue: failed to register pernet ops\n");
1066 status = -ENOMEM;
1067 goto cleanup_subsys; 1312 goto cleanup_subsys;
1068 } 1313 }
1069#endif
1070
1071 register_netdevice_notifier(&nfqnl_dev_notifier); 1314 register_netdevice_notifier(&nfqnl_dev_notifier);
1072 nf_register_queue_handler(&nfqh); 1315 nf_register_queue_handler(&nfqh);
1073 return status; 1316 return status;
1074 1317
1075#ifdef CONFIG_PROC_FS
1076cleanup_subsys: 1318cleanup_subsys:
1077 nfnetlink_subsys_unregister(&nfqnl_subsys); 1319 nfnetlink_subsys_unregister(&nfqnl_subsys);
1078#endif
1079cleanup_netlink_notifier: 1320cleanup_netlink_notifier:
1080 netlink_unregister_notifier(&nfqnl_rtnl_notifier); 1321 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1081 return status; 1322 return status;
@@ -1085,9 +1326,7 @@ static void __exit nfnetlink_queue_fini(void)
1085{ 1326{
1086 nf_unregister_queue_handler(); 1327 nf_unregister_queue_handler();
1087 unregister_netdevice_notifier(&nfqnl_dev_notifier); 1328 unregister_netdevice_notifier(&nfqnl_dev_notifier);
1088#ifdef CONFIG_PROC_FS 1329 unregister_pernet_subsys(&nfnl_queue_net_ops);
1089 remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
1090#endif
1091 nfnetlink_subsys_unregister(&nfqnl_subsys); 1330 nfnetlink_subsys_unregister(&nfqnl_subsys);
1092 netlink_unregister_notifier(&nfqnl_rtnl_notifier); 1331 netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1093 1332
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 686c7715d777..8b03028cca69 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -2,6 +2,7 @@
2 * x_tables core - Backend for {ip,ip6,arp}_tables 2 * x_tables core - Backend for {ip,ip6,arp}_tables
3 * 3 *
4 * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org> 4 * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org>
5 * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * Based on existing ip_tables code which is 7 * Based on existing ip_tables code which is
7 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling 8 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
@@ -999,7 +1000,7 @@ static int xt_table_open(struct inode *inode, struct file *file)
999 sizeof(struct xt_names_priv)); 1000 sizeof(struct xt_names_priv));
1000 if (!ret) { 1001 if (!ret) {
1001 priv = ((struct seq_file *)file->private_data)->private; 1002 priv = ((struct seq_file *)file->private_data)->private;
1002 priv->af = (unsigned long)PDE(inode)->data; 1003 priv->af = (unsigned long)PDE_DATA(inode);
1003 } 1004 }
1004 return ret; 1005 return ret;
1005} 1006}
@@ -1147,7 +1148,7 @@ static int xt_match_open(struct inode *inode, struct file *file)
1147 1148
1148 seq = file->private_data; 1149 seq = file->private_data;
1149 seq->private = trav; 1150 seq->private = trav;
1150 trav->nfproto = (unsigned long)PDE(inode)->data; 1151 trav->nfproto = (unsigned long)PDE_DATA(inode);
1151 return 0; 1152 return 0;
1152} 1153}
1153 1154
@@ -1211,7 +1212,7 @@ static int xt_target_open(struct inode *inode, struct file *file)
1211 1212
1212 seq = file->private_data; 1213 seq = file->private_data;
1213 seq->private = trav; 1214 seq->private = trav;
1214 trav->nfproto = (unsigned long)PDE(inode)->data; 1215 trav->nfproto = (unsigned long)PDE_DATA(inode);
1215 return 0; 1216 return 0;
1216} 1217}
1217 1218
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index fa40096940a1..fe573f6c9e91 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,
474 const struct nf_loginfo *loginfo, 474 const struct nf_loginfo *loginfo,
475 const char *prefix) 475 const char *prefix)
476{ 476{
477 struct sbuff *m = sb_open(); 477 struct sbuff *m;
478 struct net *net = dev_net(in ? in : out);
479
480 /* FIXME: Disabled from containers until syslog ns is supported */
481 if (!net_eq(net, &init_net))
482 return;
483
484 m = sb_open();
478 485
479 if (!loginfo) 486 if (!loginfo)
480 loginfo = &default_loginfo; 487 loginfo = &default_loginfo;
@@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,
798 const struct nf_loginfo *loginfo, 805 const struct nf_loginfo *loginfo,
799 const char *prefix) 806 const char *prefix)
800{ 807{
801 struct sbuff *m = sb_open(); 808 struct sbuff *m;
809 struct net *net = dev_net(in ? in : out);
810
811 /* FIXME: Disabled from containers until syslog ns is supported */
812 if (!net_eq(net, &init_net))
813 return;
814
815 m = sb_open();
802 816
803 if (!loginfo) 817 if (!loginfo)
804 loginfo = &default_loginfo; 818 loginfo = &default_loginfo;
@@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {
893}; 907};
894#endif 908#endif
895 909
910static int __net_init log_net_init(struct net *net)
911{
912 nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger);
913#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
914 nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger);
915#endif
916 return 0;
917}
918
919static void __net_exit log_net_exit(struct net *net)
920{
921 nf_log_unset(net, &ipt_log_logger);
922#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
923 nf_log_unset(net, &ip6t_log_logger);
924#endif
925}
926
927static struct pernet_operations log_net_ops = {
928 .init = log_net_init,
929 .exit = log_net_exit,
930};
931
896static int __init log_tg_init(void) 932static int __init log_tg_init(void)
897{ 933{
898 int ret; 934 int ret;
899 935
936 ret = register_pernet_subsys(&log_net_ops);
937 if (ret < 0)
938 goto err_pernet;
939
900 ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs)); 940 ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));
901 if (ret < 0) 941 if (ret < 0)
902 return ret; 942 goto err_target;
903 943
904 nf_log_register(NFPROTO_IPV4, &ipt_log_logger); 944 nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
905#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 945#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
906 nf_log_register(NFPROTO_IPV6, &ip6t_log_logger); 946 nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);
907#endif 947#endif
908 return 0; 948 return 0;
949
950err_target:
951 unregister_pernet_subsys(&log_net_ops);
952err_pernet:
953 return ret;
909} 954}
910 955
911static void __exit log_tg_exit(void) 956static void __exit log_tg_exit(void)
912{ 957{
958 unregister_pernet_subsys(&log_net_ops);
913 nf_log_unregister(&ipt_log_logger); 959 nf_log_unregister(&ipt_log_logger);
914#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 960#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
915 nf_log_unregister(&ip6t_log_logger); 961 nf_log_unregister(&ip6t_log_logger);
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..1e2fae32f81b 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)
76} 76}
77#endif 77#endif
78 78
79static unsigned int 79static u32
80nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) 80nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)
81{ 81{
82 const struct xt_NFQ_info_v1 *info = par->targinfo; 82 const struct xt_NFQ_info_v1 *info = par->targinfo;
83 u32 queue = info->queuenum; 83 u32 queue = info->queuenum;
84 84
85 if (info->queues_total > 1) { 85 if (par->family == NFPROTO_IPV4)
86 if (par->family == NFPROTO_IPV4) 86 queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;
87 queue = (((u64) hash_v4(skb) * info->queues_total) >>
88 32) + queue;
89#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 87#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
90 else if (par->family == NFPROTO_IPV6) 88 else if (par->family == NFPROTO_IPV6)
91 queue = (((u64) hash_v6(skb) * info->queues_total) >> 89 queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;
92 32) + queue;
93#endif 90#endif
94 } 91
92 return queue;
93}
94
95static unsigned int
96nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
97{
98 const struct xt_NFQ_info_v1 *info = par->targinfo;
99 u32 queue = info->queuenum;
100
101 if (info->queues_total > 1)
102 queue = nfqueue_hash(skb, par);
103
95 return NF_QUEUE_NR(queue); 104 return NF_QUEUE_NR(queue);
96} 105}
97 106
@@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
108 117
109static int nfqueue_tg_check(const struct xt_tgchk_param *par) 118static int nfqueue_tg_check(const struct xt_tgchk_param *par)
110{ 119{
111 const struct xt_NFQ_info_v2 *info = par->targinfo; 120 const struct xt_NFQ_info_v3 *info = par->targinfo;
112 u32 maxid; 121 u32 maxid;
113 122
114 if (unlikely(!rnd_inited)) { 123 if (unlikely(!rnd_inited)) {
@@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
125 info->queues_total, maxid); 134 info->queues_total, maxid);
126 return -ERANGE; 135 return -ERANGE;
127 } 136 }
128 if (par->target->revision == 2 && info->bypass > 1) 137 if (par->target->revision == 2 && info->flags > 1)
129 return -EINVAL; 138 return -EINVAL;
139 if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
140 return -EINVAL;
141
130 return 0; 142 return 0;
131} 143}
132 144
145static unsigned int
146nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
147{
148 const struct xt_NFQ_info_v3 *info = par->targinfo;
149 u32 queue = info->queuenum;
150
151 if (info->queues_total > 1) {
152 if (info->flags & NFQ_FLAG_CPU_FANOUT) {
153 int cpu = smp_processor_id();
154
155 queue = info->queuenum + cpu % info->queues_total;
156 } else
157 queue = nfqueue_hash(skb, par);
158 }
159
160 return NF_QUEUE_NR(queue);
161}
162
133static struct xt_target nfqueue_tg_reg[] __read_mostly = { 163static struct xt_target nfqueue_tg_reg[] __read_mostly = {
134 { 164 {
135 .name = "NFQUEUE", 165 .name = "NFQUEUE",
@@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
156 .targetsize = sizeof(struct xt_NFQ_info_v2), 186 .targetsize = sizeof(struct xt_NFQ_info_v2),
157 .me = THIS_MODULE, 187 .me = THIS_MODULE,
158 }, 188 },
189 {
190 .name = "NFQUEUE",
191 .revision = 3,
192 .family = NFPROTO_UNSPEC,
193 .checkentry = nfqueue_tg_check,
194 .target = nfqueue_tg_v3,
195 .targetsize = sizeof(struct xt_NFQ_info_v3),
196 .me = THIS_MODULE,
197 },
159}; 198};
160 199
161static int __init nfqueue_tg_init(void) 200static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 71a266de5fb4..a75240f0d42b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -2,6 +2,7 @@
2 * This is a module which is used for setting the MSS option in TCP packets. 2 * This is a module which is used for setting the MSS option in TCP packets.
3 * 3 *
4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> 4 * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
5 * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>
5 * 6 *
6 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 61805d7b38aa..188404b9b002 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -3,6 +3,7 @@
3 * information. (Superset of Rusty's minimalistic state match.) 3 * information. (Superset of Rusty's minimalistic state match.)
4 * 4 *
5 * (C) 2001 Marc Boucher (marc@mbsi.ca). 5 * (C) 2001 Marc Boucher (marc@mbsi.ca).
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008 7 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * 8 *
8 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index f330e8beaf69..9ff035c71403 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -3,6 +3,7 @@
3 * separately for each hashbucket (sourceip/sourceport/dstip/dstport) 3 * separately for each hashbucket (sourceip/sourceport/dstip/dstport)
4 * 4 *
5 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> 5 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
6 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008 7 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * 8 *
8 * Development of this code was funded by Astaro AG, http://www.astaro.com/ 9 * Development of this code was funded by Astaro AG, http://www.astaro.com/
@@ -107,6 +108,7 @@ struct xt_hashlimit_htable {
107 108
108 /* seq_file stuff */ 109 /* seq_file stuff */
109 struct proc_dir_entry *pde; 110 struct proc_dir_entry *pde;
111 const char *name;
110 struct net *net; 112 struct net *net;
111 113
112 struct hlist_head hash[0]; /* hashtable itself */ 114 struct hlist_head hash[0]; /* hashtable itself */
@@ -253,6 +255,11 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
253 hinfo->count = 0; 255 hinfo->count = 0;
254 hinfo->family = family; 256 hinfo->family = family;
255 hinfo->rnd_initialized = false; 257 hinfo->rnd_initialized = false;
258 hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
259 if (!hinfo->name) {
260 vfree(hinfo);
261 return -ENOMEM;
262 }
256 spin_lock_init(&hinfo->lock); 263 spin_lock_init(&hinfo->lock);
257 264
258 hinfo->pde = proc_create_data(minfo->name, 0, 265 hinfo->pde = proc_create_data(minfo->name, 0,
@@ -260,6 +267,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
260 hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, 267 hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
261 &dl_file_ops, hinfo); 268 &dl_file_ops, hinfo);
262 if (hinfo->pde == NULL) { 269 if (hinfo->pde == NULL) {
270 kfree(hinfo->name);
263 vfree(hinfo); 271 vfree(hinfo);
264 return -ENOMEM; 272 return -ENOMEM;
265 } 273 }
@@ -330,9 +338,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
330 parent = hashlimit_net->ip6t_hashlimit; 338 parent = hashlimit_net->ip6t_hashlimit;
331 339
332 if(parent != NULL) 340 if(parent != NULL)
333 remove_proc_entry(hinfo->pde->name, parent); 341 remove_proc_entry(hinfo->name, parent);
334 342
335 htable_selective_cleanup(hinfo, select_all); 343 htable_selective_cleanup(hinfo, select_all);
344 kfree(hinfo->name);
336 vfree(hinfo); 345 vfree(hinfo);
337} 346}
338 347
@@ -344,7 +353,7 @@ static struct xt_hashlimit_htable *htable_find_get(struct net *net,
344 struct xt_hashlimit_htable *hinfo; 353 struct xt_hashlimit_htable *hinfo;
345 354
346 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) { 355 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) {
347 if (!strcmp(name, hinfo->pde->name) && 356 if (!strcmp(name, hinfo->name) &&
348 hinfo->family == family) { 357 hinfo->family == family) {
349 hinfo->use++; 358 hinfo->use++;
350 return hinfo; 359 return hinfo;
@@ -841,7 +850,7 @@ static int dl_proc_open(struct inode *inode, struct file *file)
841 850
842 if (!ret) { 851 if (!ret) {
843 struct seq_file *sf = file->private_data; 852 struct seq_file *sf = file->private_data;
844 sf->private = PDE(inode)->data; 853 sf->private = PDE_DATA(inode);
845 } 854 }
846 return ret; 855 return ret;
847} 856}
@@ -887,7 +896,7 @@ static void __net_exit hashlimit_proc_net_exit(struct net *net)
887 pde = hashlimit_net->ip6t_hashlimit; 896 pde = hashlimit_net->ip6t_hashlimit;
888 897
889 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) 898 hlist_for_each_entry(hinfo, &hashlimit_net->htables, node)
890 remove_proc_entry(hinfo->pde->name, pde); 899 remove_proc_entry(hinfo->name, pde);
891 900
892 hashlimit_net->ipt_hashlimit = NULL; 901 hashlimit_net->ipt_hashlimit = NULL;
893 hashlimit_net->ip6t_hashlimit = NULL; 902 hashlimit_net->ip6t_hashlimit = NULL;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a4c1e4528cac..bef850596558 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -1,5 +1,6 @@
1/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> 1/* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
2 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> 2 * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
3 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index a5e673d32bda..647d989a01e6 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
201 unsigned char opts[MAX_IPOPTLEN]; 201 unsigned char opts[MAX_IPOPTLEN];
202 const struct xt_osf_finger *kf; 202 const struct xt_osf_finger *kf;
203 const struct xt_osf_user_finger *f; 203 const struct xt_osf_user_finger *f;
204 struct net *net = dev_net(p->in ? p->in : p->out);
204 205
205 if (!info) 206 if (!info)
206 return false; 207 return false;
@@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
325 fcount++; 326 fcount++;
326 327
327 if (info->flags & XT_OSF_LOG) 328 if (info->flags & XT_OSF_LOG)
328 nf_log_packet(p->family, p->hooknum, skb, 329 nf_log_packet(net, p->family, p->hooknum, skb,
329 p->in, p->out, NULL, 330 p->in, p->out, NULL,
330 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", 331 "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
331 f->genre, f->version, f->subtype, 332 f->genre, f->version, f->subtype,
@@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
341 rcu_read_unlock(); 342 rcu_read_unlock();
342 343
343 if (!fcount && (info->flags & XT_OSF_LOG)) 344 if (!fcount && (info->flags & XT_OSF_LOG))
344 nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, 345 nf_log_packet(net, p->family, p->hooknum, skb, p->in,
346 p->out, NULL,
345 "Remote OS is not known: %pI4:%u -> %pI4:%u\n", 347 "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
346 &ip->saddr, ntohs(tcp->source), 348 &ip->saddr, ntohs(tcp->source),
347 &ip->daddr, ntohs(tcp->dest)); 349 &ip->daddr, ntohs(tcp->dest));
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d9cad315229d..1e657cf715c4 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -401,8 +401,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
401 ret = -ENOMEM; 401 ret = -ENOMEM;
402 goto out; 402 goto out;
403 } 403 }
404 pde->uid = uid; 404 proc_set_user(pde, uid, gid);
405 pde->gid = gid;
406#endif 405#endif
407 spin_lock_bh(&recent_lock); 406 spin_lock_bh(&recent_lock);
408 list_add_tail(&t->list, &recent_net->tables); 407 list_add_tail(&t->list, &recent_net->tables);
@@ -525,14 +524,13 @@ static const struct seq_operations recent_seq_ops = {
525 524
526static int recent_seq_open(struct inode *inode, struct file *file) 525static int recent_seq_open(struct inode *inode, struct file *file)
527{ 526{
528 struct proc_dir_entry *pde = PDE(inode);
529 struct recent_iter_state *st; 527 struct recent_iter_state *st;
530 528
531 st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); 529 st = __seq_open_private(file, &recent_seq_ops, sizeof(*st));
532 if (st == NULL) 530 if (st == NULL)
533 return -ENOMEM; 531 return -ENOMEM;
534 532
535 st->table = pde->data; 533 st->table = PDE_DATA(inode);
536 return 0; 534 return 0;
537} 535}
538 536
@@ -540,8 +538,7 @@ static ssize_t
540recent_mt_proc_write(struct file *file, const char __user *input, 538recent_mt_proc_write(struct file *file, const char __user *input,
541 size_t size, loff_t *loff) 539 size_t size, loff_t *loff)
542{ 540{
543 const struct proc_dir_entry *pde = PDE(file_inode(file)); 541 struct recent_table *t = PDE_DATA(file_inode(file));
544 struct recent_table *t = pde->data;
545 struct recent_entry *e; 542 struct recent_entry *e;
546 char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; 543 char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
547 const char *c = buf; 544 const char *c = buf;
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 865a9e54f3ad..31790e789e22 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -1,7 +1,7 @@
1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> 1/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2 * Patrick Schaaf <bof@bof.de> 2 * Patrick Schaaf <bof@bof.de>
3 * Martin Josefsson <gandalf@wlug.westbo.se> 3 * Martin Josefsson <gandalf@wlug.westbo.se>
4 * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> 4 * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SET");
30static inline int 30static inline int
31match_set(ip_set_id_t index, const struct sk_buff *skb, 31match_set(ip_set_id_t index, const struct sk_buff *skb,
32 const struct xt_action_param *par, 32 const struct xt_action_param *par,
33 const struct ip_set_adt_opt *opt, int inv) 33 struct ip_set_adt_opt *opt, int inv)
34{ 34{
35 if (ip_set_test(index, skb, par, opt)) 35 if (ip_set_test(index, skb, par, opt))
36 inv = !inv; 36 inv = !inv;
@@ -38,20 +38,12 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,
38} 38}
39 39
40#define ADT_OPT(n, f, d, fs, cfs, t) \ 40#define ADT_OPT(n, f, d, fs, cfs, t) \
41const struct ip_set_adt_opt n = { \
42 .family = f, \
43 .dim = d, \
44 .flags = fs, \
45 .cmdflags = cfs, \
46 .timeout = t, \
47}
48#define ADT_MOPT(n, f, d, fs, cfs, t) \
49struct ip_set_adt_opt n = { \ 41struct ip_set_adt_opt n = { \
50 .family = f, \ 42 .family = f, \
51 .dim = d, \ 43 .dim = d, \
52 .flags = fs, \ 44 .flags = fs, \
53 .cmdflags = cfs, \ 45 .cmdflags = cfs, \
54 .timeout = t, \ 46 .ext.timeout = t, \
55} 47}
56 48
57/* Revision 0 interface: backward compatible with netfilter/iptables */ 49/* Revision 0 interface: backward compatible with netfilter/iptables */
@@ -197,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
197 ADT_OPT(opt, par->family, info->match_set.dim, 189 ADT_OPT(opt, par->family, info->match_set.dim,
198 info->match_set.flags, 0, UINT_MAX); 190 info->match_set.flags, 0, UINT_MAX);
199 191
192 if (opt.flags & IPSET_RETURN_NOMATCH)
193 opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
194
200 return match_set(info->match_set.index, skb, par, &opt, 195 return match_set(info->match_set.index, skb, par, &opt,
201 info->match_set.flags & IPSET_INV_MATCH); 196 info->match_set.flags & IPSET_INV_MATCH);
202} 197}
@@ -305,15 +300,15 @@ static unsigned int
305set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) 300set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
306{ 301{
307 const struct xt_set_info_target_v2 *info = par->targinfo; 302 const struct xt_set_info_target_v2 *info = par->targinfo;
308 ADT_MOPT(add_opt, par->family, info->add_set.dim, 303 ADT_OPT(add_opt, par->family, info->add_set.dim,
309 info->add_set.flags, info->flags, info->timeout); 304 info->add_set.flags, info->flags, info->timeout);
310 ADT_OPT(del_opt, par->family, info->del_set.dim, 305 ADT_OPT(del_opt, par->family, info->del_set.dim,
311 info->del_set.flags, 0, UINT_MAX); 306 info->del_set.flags, 0, UINT_MAX);
312 307
313 /* Normalize to fit into jiffies */ 308 /* Normalize to fit into jiffies */
314 if (add_opt.timeout != IPSET_NO_TIMEOUT && 309 if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
315 add_opt.timeout > UINT_MAX/MSEC_PER_SEC) 310 add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
316 add_opt.timeout = UINT_MAX/MSEC_PER_SEC; 311 add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
317 if (info->add_set.index != IPSET_INVALID_ID) 312 if (info->add_set.index != IPSET_INVALID_ID)
318 ip_set_add(info->add_set.index, skb, par, &add_opt); 313 ip_set_add(info->add_set.index, skb, par, &add_opt);
319 if (info->del_set.index != IPSET_INVALID_ID) 314 if (info->del_set.index != IPSET_INVALID_ID)
@@ -325,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
325#define set_target_v2_checkentry set_target_v1_checkentry 320#define set_target_v2_checkentry set_target_v1_checkentry
326#define set_target_v2_destroy set_target_v1_destroy 321#define set_target_v2_destroy set_target_v1_destroy
327 322
323/* Revision 3 match */
324
325static bool
326match_counter(u64 counter, const struct ip_set_counter_match *info)
327{
328 switch (info->op) {
329 case IPSET_COUNTER_NONE:
330 return true;
331 case IPSET_COUNTER_EQ:
332 return counter == info->value;
333 case IPSET_COUNTER_NE:
334 return counter != info->value;
335 case IPSET_COUNTER_LT:
336 return counter < info->value;
337 case IPSET_COUNTER_GT:
338 return counter > info->value;
339 }
340 return false;
341}
342
343static bool
344set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
345{
346 const struct xt_set_info_match_v3 *info = par->matchinfo;
347 ADT_OPT(opt, par->family, info->match_set.dim,
348 info->match_set.flags, info->flags, UINT_MAX);
349 int ret;
350
351 if (info->packets.op != IPSET_COUNTER_NONE ||
352 info->bytes.op != IPSET_COUNTER_NONE)
353 opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
354
355 ret = match_set(info->match_set.index, skb, par, &opt,
356 info->match_set.flags & IPSET_INV_MATCH);
357
358 if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
359 return ret;
360
361 if (!match_counter(opt.ext.packets, &info->packets))
362 return 0;
363 return match_counter(opt.ext.bytes, &info->bytes);
364}
365
366#define set_match_v3_checkentry set_match_v1_checkentry
367#define set_match_v3_destroy set_match_v1_destroy
368
328static struct xt_match set_matches[] __read_mostly = { 369static struct xt_match set_matches[] __read_mostly = {
329 { 370 {
330 .name = "set", 371 .name = "set",
@@ -377,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = {
377 .destroy = set_match_v1_destroy, 418 .destroy = set_match_v1_destroy,
378 .me = THIS_MODULE 419 .me = THIS_MODULE
379 }, 420 },
421 /* counters support: update, match */
422 {
423 .name = "set",
424 .family = NFPROTO_IPV4,
425 .revision = 3,
426 .match = set_match_v3,
427 .matchsize = sizeof(struct xt_set_info_match_v3),
428 .checkentry = set_match_v3_checkentry,
429 .destroy = set_match_v3_destroy,
430 .me = THIS_MODULE
431 },
432 {
433 .name = "set",
434 .family = NFPROTO_IPV6,
435 .revision = 3,
436 .match = set_match_v3,
437 .matchsize = sizeof(struct xt_set_info_match_v3),
438 .checkentry = set_match_v3_checkentry,
439 .destroy = set_match_v3_destroy,
440 .me = THIS_MODULE
441 },
380}; 442};
381 443
382static struct xt_target set_targets[] __read_mostly = { 444static struct xt_target set_targets[] __read_mostly = {
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
new file mode 100644
index 000000000000..2c5e95e9bfbd
--- /dev/null
+++ b/net/netlink/Kconfig
@@ -0,0 +1,19 @@
1#
2# Netlink Sockets
3#
4
5config NETLINK_MMAP
6 bool "NETLINK: mmaped IO"
7 ---help---
8 This option enables support for memory mapped netlink IO. This
9 reduces overhead by avoiding copying data between kernel- and
10 userspace.
11
12 If unsure, say N.
13
14config NETLINK_DIAG
15 tristate "NETLINK: socket monitoring interface"
16 default n
17 ---help---
18 Support for NETLINK socket monitoring interface used by the ss tool.
19 If unsure, say Y.
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index bdd6ddf4e95b..e837917f6c03 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,3 +3,6 @@
3# 3#
4 4
5obj-y := af_netlink.o genetlink.o 5obj-y := af_netlink.o genetlink.o
6
7obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o
8netlink_diag-y := diag.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1e3fd5bfcd86..12ac6b47a35c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> 4 * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 * Patrick McHardy <kaber@trash.net>
6 * 7 *
7 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License 9 * modify it under the terms of the GNU General Public License
@@ -55,87 +56,45 @@
55#include <linux/types.h> 56#include <linux/types.h>
56#include <linux/audit.h> 57#include <linux/audit.h>
57#include <linux/mutex.h> 58#include <linux/mutex.h>
59#include <linux/vmalloc.h>
60#include <asm/cacheflush.h>
58 61
59#include <net/net_namespace.h> 62#include <net/net_namespace.h>
60#include <net/sock.h> 63#include <net/sock.h>
61#include <net/scm.h> 64#include <net/scm.h>
62#include <net/netlink.h> 65#include <net/netlink.h>
63 66
64#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) 67#include "af_netlink.h"
65#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
66
67struct netlink_sock {
68 /* struct sock has to be the first member of netlink_sock */
69 struct sock sk;
70 u32 portid;
71 u32 dst_portid;
72 u32 dst_group;
73 u32 flags;
74 u32 subscriptions;
75 u32 ngroups;
76 unsigned long *groups;
77 unsigned long state;
78 wait_queue_head_t wait;
79 struct netlink_callback *cb;
80 struct mutex *cb_mutex;
81 struct mutex cb_def_mutex;
82 void (*netlink_rcv)(struct sk_buff *skb);
83 void (*netlink_bind)(int group);
84 struct module *module;
85};
86 68
87struct listeners { 69struct listeners {
88 struct rcu_head rcu; 70 struct rcu_head rcu;
89 unsigned long masks[0]; 71 unsigned long masks[0];
90}; 72};
91 73
74/* state bits */
75#define NETLINK_CONGESTED 0x0
76
77/* flags */
92#define NETLINK_KERNEL_SOCKET 0x1 78#define NETLINK_KERNEL_SOCKET 0x1
93#define NETLINK_RECV_PKTINFO 0x2 79#define NETLINK_RECV_PKTINFO 0x2
94#define NETLINK_BROADCAST_SEND_ERROR 0x4 80#define NETLINK_BROADCAST_SEND_ERROR 0x4
95#define NETLINK_RECV_NO_ENOBUFS 0x8 81#define NETLINK_RECV_NO_ENOBUFS 0x8
96 82
97static inline struct netlink_sock *nlk_sk(struct sock *sk)
98{
99 return container_of(sk, struct netlink_sock, sk);
100}
101
102static inline int netlink_is_kernel(struct sock *sk) 83static inline int netlink_is_kernel(struct sock *sk)
103{ 84{
104 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; 85 return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
105} 86}
106 87
107struct nl_portid_hash { 88struct netlink_table *nl_table;
108 struct hlist_head *table; 89EXPORT_SYMBOL_GPL(nl_table);
109 unsigned long rehash_time;
110
111 unsigned int mask;
112 unsigned int shift;
113
114 unsigned int entries;
115 unsigned int max_shift;
116
117 u32 rnd;
118};
119
120struct netlink_table {
121 struct nl_portid_hash hash;
122 struct hlist_head mc_list;
123 struct listeners __rcu *listeners;
124 unsigned int flags;
125 unsigned int groups;
126 struct mutex *cb_mutex;
127 struct module *module;
128 void (*bind)(int group);
129 int registered;
130};
131
132static struct netlink_table *nl_table;
133 90
134static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); 91static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
135 92
136static int netlink_dump(struct sock *sk); 93static int netlink_dump(struct sock *sk);
94static void netlink_skb_destructor(struct sk_buff *skb);
137 95
138static DEFINE_RWLOCK(nl_table_lock); 96DEFINE_RWLOCK(nl_table_lock);
97EXPORT_SYMBOL_GPL(nl_table_lock);
139static atomic_t nl_table_users = ATOMIC_INIT(0); 98static atomic_t nl_table_users = ATOMIC_INIT(0);
140 99
141#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); 100#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
@@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
152 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; 111 return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
153} 112}
154 113
114static void netlink_overrun(struct sock *sk)
115{
116 struct netlink_sock *nlk = nlk_sk(sk);
117
118 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
119 if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
120 sk->sk_err = ENOBUFS;
121 sk->sk_error_report(sk);
122 }
123 }
124 atomic_inc(&sk->sk_drops);
125}
126
127static void netlink_rcv_wake(struct sock *sk)
128{
129 struct netlink_sock *nlk = nlk_sk(sk);
130
131 if (skb_queue_empty(&sk->sk_receive_queue))
132 clear_bit(NETLINK_CONGESTED, &nlk->state);
133 if (!test_bit(NETLINK_CONGESTED, &nlk->state))
134 wake_up_interruptible(&nlk->wait);
135}
136
137#ifdef CONFIG_NETLINK_MMAP
138static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
139{
140 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
141}
142
143static bool netlink_rx_is_mmaped(struct sock *sk)
144{
145 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
146}
147
148static bool netlink_tx_is_mmaped(struct sock *sk)
149{
150 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
151}
152
153static __pure struct page *pgvec_to_page(const void *addr)
154{
155 if (is_vmalloc_addr(addr))
156 return vmalloc_to_page(addr);
157 else
158 return virt_to_page(addr);
159}
160
161static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
162{
163 unsigned int i;
164
165 for (i = 0; i < len; i++) {
166 if (pg_vec[i] != NULL) {
167 if (is_vmalloc_addr(pg_vec[i]))
168 vfree(pg_vec[i]);
169 else
170 free_pages((unsigned long)pg_vec[i], order);
171 }
172 }
173 kfree(pg_vec);
174}
175
176static void *alloc_one_pg_vec_page(unsigned long order)
177{
178 void *buffer;
179 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
180 __GFP_NOWARN | __GFP_NORETRY;
181
182 buffer = (void *)__get_free_pages(gfp_flags, order);
183 if (buffer != NULL)
184 return buffer;
185
186 buffer = vzalloc((1 << order) * PAGE_SIZE);
187 if (buffer != NULL)
188 return buffer;
189
190 gfp_flags &= ~__GFP_NORETRY;
191 return (void *)__get_free_pages(gfp_flags, order);
192}
193
194static void **alloc_pg_vec(struct netlink_sock *nlk,
195 struct nl_mmap_req *req, unsigned int order)
196{
197 unsigned int block_nr = req->nm_block_nr;
198 unsigned int i;
199 void **pg_vec, *ptr;
200
201 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
202 if (pg_vec == NULL)
203 return NULL;
204
205 for (i = 0; i < block_nr; i++) {
206 pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
207 if (pg_vec[i] == NULL)
208 goto err1;
209 }
210
211 return pg_vec;
212err1:
213 free_pg_vec(pg_vec, order, block_nr);
214 return NULL;
215}
216
217static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
218 bool closing, bool tx_ring)
219{
220 struct netlink_sock *nlk = nlk_sk(sk);
221 struct netlink_ring *ring;
222 struct sk_buff_head *queue;
223 void **pg_vec = NULL;
224 unsigned int order = 0;
225 int err;
226
227 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
228 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
229
230 if (!closing) {
231 if (atomic_read(&nlk->mapped))
232 return -EBUSY;
233 if (atomic_read(&ring->pending))
234 return -EBUSY;
235 }
236
237 if (req->nm_block_nr) {
238 if (ring->pg_vec != NULL)
239 return -EBUSY;
240
241 if ((int)req->nm_block_size <= 0)
242 return -EINVAL;
243 if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
244 return -EINVAL;
245 if (req->nm_frame_size < NL_MMAP_HDRLEN)
246 return -EINVAL;
247 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
248 return -EINVAL;
249
250 ring->frames_per_block = req->nm_block_size /
251 req->nm_frame_size;
252 if (ring->frames_per_block == 0)
253 return -EINVAL;
254 if (ring->frames_per_block * req->nm_block_nr !=
255 req->nm_frame_nr)
256 return -EINVAL;
257
258 order = get_order(req->nm_block_size);
259 pg_vec = alloc_pg_vec(nlk, req, order);
260 if (pg_vec == NULL)
261 return -ENOMEM;
262 } else {
263 if (req->nm_frame_nr)
264 return -EINVAL;
265 }
266
267 err = -EBUSY;
268 mutex_lock(&nlk->pg_vec_lock);
269 if (closing || atomic_read(&nlk->mapped) == 0) {
270 err = 0;
271 spin_lock_bh(&queue->lock);
272
273 ring->frame_max = req->nm_frame_nr - 1;
274 ring->head = 0;
275 ring->frame_size = req->nm_frame_size;
276 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
277
278 swap(ring->pg_vec_len, req->nm_block_nr);
279 swap(ring->pg_vec_order, order);
280 swap(ring->pg_vec, pg_vec);
281
282 __skb_queue_purge(queue);
283 spin_unlock_bh(&queue->lock);
284
285 WARN_ON(atomic_read(&nlk->mapped));
286 }
287 mutex_unlock(&nlk->pg_vec_lock);
288
289 if (pg_vec)
290 free_pg_vec(pg_vec, order, req->nm_block_nr);
291 return err;
292}
293
294static void netlink_mm_open(struct vm_area_struct *vma)
295{
296 struct file *file = vma->vm_file;
297 struct socket *sock = file->private_data;
298 struct sock *sk = sock->sk;
299
300 if (sk)
301 atomic_inc(&nlk_sk(sk)->mapped);
302}
303
304static void netlink_mm_close(struct vm_area_struct *vma)
305{
306 struct file *file = vma->vm_file;
307 struct socket *sock = file->private_data;
308 struct sock *sk = sock->sk;
309
310 if (sk)
311 atomic_dec(&nlk_sk(sk)->mapped);
312}
313
314static const struct vm_operations_struct netlink_mmap_ops = {
315 .open = netlink_mm_open,
316 .close = netlink_mm_close,
317};
318
319static int netlink_mmap(struct file *file, struct socket *sock,
320 struct vm_area_struct *vma)
321{
322 struct sock *sk = sock->sk;
323 struct netlink_sock *nlk = nlk_sk(sk);
324 struct netlink_ring *ring;
325 unsigned long start, size, expected;
326 unsigned int i;
327 int err = -EINVAL;
328
329 if (vma->vm_pgoff)
330 return -EINVAL;
331
332 mutex_lock(&nlk->pg_vec_lock);
333
334 expected = 0;
335 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
336 if (ring->pg_vec == NULL)
337 continue;
338 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
339 }
340
341 if (expected == 0)
342 goto out;
343
344 size = vma->vm_end - vma->vm_start;
345 if (size != expected)
346 goto out;
347
348 start = vma->vm_start;
349 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
350 if (ring->pg_vec == NULL)
351 continue;
352
353 for (i = 0; i < ring->pg_vec_len; i++) {
354 struct page *page;
355 void *kaddr = ring->pg_vec[i];
356 unsigned int pg_num;
357
358 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
359 page = pgvec_to_page(kaddr);
360 err = vm_insert_page(vma, start, page);
361 if (err < 0)
362 goto out;
363 start += PAGE_SIZE;
364 kaddr += PAGE_SIZE;
365 }
366 }
367 }
368
369 atomic_inc(&nlk->mapped);
370 vma->vm_ops = &netlink_mmap_ops;
371 err = 0;
372out:
373 mutex_unlock(&nlk->pg_vec_lock);
374 return 0;
375}
376
377static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
378{
379#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
380 struct page *p_start, *p_end;
381
382 /* First page is flushed through netlink_{get,set}_status */
383 p_start = pgvec_to_page(hdr + PAGE_SIZE);
384 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
385 while (p_start <= p_end) {
386 flush_dcache_page(p_start);
387 p_start++;
388 }
389#endif
390}
391
392static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
393{
394 smp_rmb();
395 flush_dcache_page(pgvec_to_page(hdr));
396 return hdr->nm_status;
397}
398
399static void netlink_set_status(struct nl_mmap_hdr *hdr,
400 enum nl_mmap_status status)
401{
402 hdr->nm_status = status;
403 flush_dcache_page(pgvec_to_page(hdr));
404 smp_wmb();
405}
406
407static struct nl_mmap_hdr *
408__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
409{
410 unsigned int pg_vec_pos, frame_off;
411
412 pg_vec_pos = pos / ring->frames_per_block;
413 frame_off = pos % ring->frames_per_block;
414
415 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
416}
417
418static struct nl_mmap_hdr *
419netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
420 enum nl_mmap_status status)
421{
422 struct nl_mmap_hdr *hdr;
423
424 hdr = __netlink_lookup_frame(ring, pos);
425 if (netlink_get_status(hdr) != status)
426 return NULL;
427
428 return hdr;
429}
430
431static struct nl_mmap_hdr *
432netlink_current_frame(const struct netlink_ring *ring,
433 enum nl_mmap_status status)
434{
435 return netlink_lookup_frame(ring, ring->head, status);
436}
437
438static struct nl_mmap_hdr *
439netlink_previous_frame(const struct netlink_ring *ring,
440 enum nl_mmap_status status)
441{
442 unsigned int prev;
443
444 prev = ring->head ? ring->head - 1 : ring->frame_max;
445 return netlink_lookup_frame(ring, prev, status);
446}
447
448static void netlink_increment_head(struct netlink_ring *ring)
449{
450 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
451}
452
453static void netlink_forward_ring(struct netlink_ring *ring)
454{
455 unsigned int head = ring->head, pos = head;
456 const struct nl_mmap_hdr *hdr;
457
458 do {
459 hdr = __netlink_lookup_frame(ring, pos);
460 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
461 break;
462 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
463 break;
464 netlink_increment_head(ring);
465 } while (ring->head != head);
466}
467
468static bool netlink_dump_space(struct netlink_sock *nlk)
469{
470 struct netlink_ring *ring = &nlk->rx_ring;
471 struct nl_mmap_hdr *hdr;
472 unsigned int n;
473
474 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
475 if (hdr == NULL)
476 return false;
477
478 n = ring->head + ring->frame_max / 2;
479 if (n > ring->frame_max)
480 n -= ring->frame_max;
481
482 hdr = __netlink_lookup_frame(ring, n);
483
484 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
485}
486
487static unsigned int netlink_poll(struct file *file, struct socket *sock,
488 poll_table *wait)
489{
490 struct sock *sk = sock->sk;
491 struct netlink_sock *nlk = nlk_sk(sk);
492 unsigned int mask;
493 int err;
494
495 if (nlk->rx_ring.pg_vec != NULL) {
496 /* Memory mapped sockets don't call recvmsg(), so flow control
497 * for dumps is performed here. A dump is allowed to continue
498 * if at least half the ring is unused.
499 */
500 while (nlk->cb != NULL && netlink_dump_space(nlk)) {
501 err = netlink_dump(sk);
502 if (err < 0) {
503 sk->sk_err = err;
504 sk->sk_error_report(sk);
505 break;
506 }
507 }
508 netlink_rcv_wake(sk);
509 }
510
511 mask = datagram_poll(file, sock, wait);
512
513 spin_lock_bh(&sk->sk_receive_queue.lock);
514 if (nlk->rx_ring.pg_vec) {
515 netlink_forward_ring(&nlk->rx_ring);
516 if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
517 mask |= POLLIN | POLLRDNORM;
518 }
519 spin_unlock_bh(&sk->sk_receive_queue.lock);
520
521 spin_lock_bh(&sk->sk_write_queue.lock);
522 if (nlk->tx_ring.pg_vec) {
523 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
524 mask |= POLLOUT | POLLWRNORM;
525 }
526 spin_unlock_bh(&sk->sk_write_queue.lock);
527
528 return mask;
529}
530
531static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
532{
533 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
534}
535
536static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
537 struct netlink_ring *ring,
538 struct nl_mmap_hdr *hdr)
539{
540 unsigned int size;
541 void *data;
542
543 size = ring->frame_size - NL_MMAP_HDRLEN;
544 data = (void *)hdr + NL_MMAP_HDRLEN;
545
546 skb->head = data;
547 skb->data = data;
548 skb_reset_tail_pointer(skb);
549 skb->end = skb->tail + size;
550 skb->len = 0;
551
552 skb->destructor = netlink_skb_destructor;
553 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
554 NETLINK_CB(skb).sk = sk;
555}
556
557static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
558 u32 dst_portid, u32 dst_group,
559 struct sock_iocb *siocb)
560{
561 struct netlink_sock *nlk = nlk_sk(sk);
562 struct netlink_ring *ring;
563 struct nl_mmap_hdr *hdr;
564 struct sk_buff *skb;
565 unsigned int maxlen;
566 bool excl = true;
567 int err = 0, len = 0;
568
569 /* Netlink messages are validated by the receiver before processing.
570 * In order to avoid userspace changing the contents of the message
571 * after validation, the socket and the ring may only be used by a
572 * single process, otherwise we fall back to copying.
573 */
574 if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
575 atomic_read(&nlk->mapped) > 1)
576 excl = false;
577
578 mutex_lock(&nlk->pg_vec_lock);
579
580 ring = &nlk->tx_ring;
581 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
582
583 do {
584 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
585 if (hdr == NULL) {
586 if (!(msg->msg_flags & MSG_DONTWAIT) &&
587 atomic_read(&nlk->tx_ring.pending))
588 schedule();
589 continue;
590 }
591 if (hdr->nm_len > maxlen) {
592 err = -EINVAL;
593 goto out;
594 }
595
596 netlink_frame_flush_dcache(hdr);
597
598 if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
599 skb = alloc_skb_head(GFP_KERNEL);
600 if (skb == NULL) {
601 err = -ENOBUFS;
602 goto out;
603 }
604 sock_hold(sk);
605 netlink_ring_setup_skb(skb, sk, ring, hdr);
606 NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
607 __skb_put(skb, hdr->nm_len);
608 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
609 atomic_inc(&ring->pending);
610 } else {
611 skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
612 if (skb == NULL) {
613 err = -ENOBUFS;
614 goto out;
615 }
616 __skb_put(skb, hdr->nm_len);
617 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
618 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
619 }
620
621 netlink_increment_head(ring);
622
623 NETLINK_CB(skb).portid = nlk->portid;
624 NETLINK_CB(skb).dst_group = dst_group;
625 NETLINK_CB(skb).creds = siocb->scm->creds;
626
627 err = security_netlink_send(sk, skb);
628 if (err) {
629 kfree_skb(skb);
630 goto out;
631 }
632
633 if (unlikely(dst_group)) {
634 atomic_inc(&skb->users);
635 netlink_broadcast(sk, skb, dst_portid, dst_group,
636 GFP_KERNEL);
637 }
638 err = netlink_unicast(sk, skb, dst_portid,
639 msg->msg_flags & MSG_DONTWAIT);
640 if (err < 0)
641 goto out;
642 len += err;
643
644 } while (hdr != NULL ||
645 (!(msg->msg_flags & MSG_DONTWAIT) &&
646 atomic_read(&nlk->tx_ring.pending)));
647
648 if (len > 0)
649 err = len;
650out:
651 mutex_unlock(&nlk->pg_vec_lock);
652 return err;
653}
654
655static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
656{
657 struct nl_mmap_hdr *hdr;
658
659 hdr = netlink_mmap_hdr(skb);
660 hdr->nm_len = skb->len;
661 hdr->nm_group = NETLINK_CB(skb).dst_group;
662 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
663 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
664 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
665 netlink_frame_flush_dcache(hdr);
666 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
667
668 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
669 kfree_skb(skb);
670}
671
672static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
673{
674 struct netlink_sock *nlk = nlk_sk(sk);
675 struct netlink_ring *ring = &nlk->rx_ring;
676 struct nl_mmap_hdr *hdr;
677
678 spin_lock_bh(&sk->sk_receive_queue.lock);
679 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
680 if (hdr == NULL) {
681 spin_unlock_bh(&sk->sk_receive_queue.lock);
682 kfree_skb(skb);
683 netlink_overrun(sk);
684 return;
685 }
686 netlink_increment_head(ring);
687 __skb_queue_tail(&sk->sk_receive_queue, skb);
688 spin_unlock_bh(&sk->sk_receive_queue.lock);
689
690 hdr->nm_len = skb->len;
691 hdr->nm_group = NETLINK_CB(skb).dst_group;
692 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
693 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
694 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
695 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
696}
697
698#else /* CONFIG_NETLINK_MMAP */
699#define netlink_skb_is_mmaped(skb) false
700#define netlink_rx_is_mmaped(sk) false
701#define netlink_tx_is_mmaped(sk) false
702#define netlink_mmap sock_no_mmap
703#define netlink_poll datagram_poll
704#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
705#endif /* CONFIG_NETLINK_MMAP */
706
155static void netlink_destroy_callback(struct netlink_callback *cb) 707static void netlink_destroy_callback(struct netlink_callback *cb)
156{ 708{
157 kfree_skb(cb->skb); 709 kfree_skb(cb->skb);
@@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb)
164 kfree(cb); 716 kfree(cb);
165} 717}
166 718
719static void netlink_skb_destructor(struct sk_buff *skb)
720{
721#ifdef CONFIG_NETLINK_MMAP
722 struct nl_mmap_hdr *hdr;
723 struct netlink_ring *ring;
724 struct sock *sk;
725
726 /* If a packet from the kernel to userspace was freed because of an
727 * error without being delivered to userspace, the kernel must reset
728 * the status. In the direction userspace to kernel, the status is
729 * always reset here after the packet was processed and freed.
730 */
731 if (netlink_skb_is_mmaped(skb)) {
732 hdr = netlink_mmap_hdr(skb);
733 sk = NETLINK_CB(skb).sk;
734
735 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
736 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
737 ring = &nlk_sk(sk)->tx_ring;
738 } else {
739 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
740 hdr->nm_len = 0;
741 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
742 }
743 ring = &nlk_sk(sk)->rx_ring;
744 }
745
746 WARN_ON(atomic_read(&ring->pending) == 0);
747 atomic_dec(&ring->pending);
748 sock_put(sk);
749
750 skb->data = NULL;
751 }
752#endif
753 if (skb->sk != NULL)
754 sock_rfree(skb);
755}
756
757static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
758{
759 WARN_ON(skb->sk != NULL);
760 skb->sk = sk;
761 skb->destructor = netlink_skb_destructor;
762 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
763 sk_mem_charge(sk, skb->truesize);
764}
765
167static void netlink_sock_destruct(struct sock *sk) 766static void netlink_sock_destruct(struct sock *sk)
168{ 767{
169 struct netlink_sock *nlk = nlk_sk(sk); 768 struct netlink_sock *nlk = nlk_sk(sk);
@@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk)
177 } 776 }
178 777
179 skb_queue_purge(&sk->sk_receive_queue); 778 skb_queue_purge(&sk->sk_receive_queue);
779#ifdef CONFIG_NETLINK_MMAP
780 if (1) {
781 struct nl_mmap_req req;
782
783 memset(&req, 0, sizeof(req));
784 if (nlk->rx_ring.pg_vec)
785 netlink_set_ring(sk, &req, true, false);
786 memset(&req, 0, sizeof(req));
787 if (nlk->tx_ring.pg_vec)
788 netlink_set_ring(sk, &req, true, true);
789 }
790#endif /* CONFIG_NETLINK_MMAP */
180 791
181 if (!sock_flag(sk, SOCK_DEAD)) { 792 if (!sock_flag(sk, SOCK_DEAD)) {
182 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 793 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
440 mutex_init(nlk->cb_mutex); 1051 mutex_init(nlk->cb_mutex);
441 } 1052 }
442 init_waitqueue_head(&nlk->wait); 1053 init_waitqueue_head(&nlk->wait);
1054#ifdef CONFIG_NETLINK_MMAP
1055 mutex_init(&nlk->pg_vec_lock);
1056#endif
443 1057
444 sk->sk_destruct = netlink_sock_destruct; 1058 sk->sk_destruct = netlink_sock_destruct;
445 sk->sk_protocol = protocol; 1059 sk->sk_protocol = protocol;
@@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
771 return 0; 1385 return 0;
772} 1386}
773 1387
774static void netlink_overrun(struct sock *sk)
775{
776 struct netlink_sock *nlk = nlk_sk(sk);
777
778 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
779 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
780 sk->sk_err = ENOBUFS;
781 sk->sk_error_report(sk);
782 }
783 }
784 atomic_inc(&sk->sk_drops);
785}
786
787static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1388static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
788{ 1389{
789 struct sock *sock; 1390 struct sock *sock;
@@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
836 1437
837 nlk = nlk_sk(sk); 1438 nlk = nlk_sk(sk);
838 1439
839 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1440 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
840 test_bit(0, &nlk->state)) { 1441 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1442 !netlink_skb_is_mmaped(skb)) {
841 DECLARE_WAITQUEUE(wait, current); 1443 DECLARE_WAITQUEUE(wait, current);
842 if (!*timeo) { 1444 if (!*timeo) {
843 if (!ssk || netlink_is_kernel(ssk)) 1445 if (!ssk || netlink_is_kernel(ssk))
@@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
851 add_wait_queue(&nlk->wait, &wait); 1453 add_wait_queue(&nlk->wait, &wait);
852 1454
853 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1455 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
854 test_bit(0, &nlk->state)) && 1456 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
855 !sock_flag(sk, SOCK_DEAD)) 1457 !sock_flag(sk, SOCK_DEAD))
856 *timeo = schedule_timeout(*timeo); 1458 *timeo = schedule_timeout(*timeo);
857 1459
@@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
865 } 1467 }
866 return 1; 1468 return 1;
867 } 1469 }
868 skb_set_owner_r(skb, sk); 1470 netlink_skb_set_owner_r(skb, sk);
869 return 0; 1471 return 0;
870} 1472}
871 1473
@@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
873{ 1475{
874 int len = skb->len; 1476 int len = skb->len;
875 1477
876 skb_queue_tail(&sk->sk_receive_queue, skb); 1478#ifdef CONFIG_NETLINK_MMAP
1479 if (netlink_skb_is_mmaped(skb))
1480 netlink_queue_mmaped_skb(sk, skb);
1481 else if (netlink_rx_is_mmaped(sk))
1482 netlink_ring_set_copied(sk, skb);
1483 else
1484#endif /* CONFIG_NETLINK_MMAP */
1485 skb_queue_tail(&sk->sk_receive_queue, skb);
877 sk->sk_data_ready(sk, len); 1486 sk->sk_data_ready(sk, len);
878 return len; 1487 return len;
879} 1488}
@@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
896{ 1505{
897 int delta; 1506 int delta;
898 1507
899 skb_orphan(skb); 1508 WARN_ON(skb->sk != NULL);
1509 if (netlink_skb_is_mmaped(skb))
1510 return skb;
900 1511
901 delta = skb->end - skb->tail; 1512 delta = skb->end - skb->tail;
902 if (delta * 2 < skb->truesize) 1513 if (delta * 2 < skb->truesize)
@@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
916 return skb; 1527 return skb;
917} 1528}
918 1529
919static void netlink_rcv_wake(struct sock *sk)
920{
921 struct netlink_sock *nlk = nlk_sk(sk);
922
923 if (skb_queue_empty(&sk->sk_receive_queue))
924 clear_bit(0, &nlk->state);
925 if (!test_bit(0, &nlk->state))
926 wake_up_interruptible(&nlk->wait);
927}
928
929static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, 1530static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
930 struct sock *ssk) 1531 struct sock *ssk)
931{ 1532{
@@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
935 ret = -ECONNREFUSED; 1536 ret = -ECONNREFUSED;
936 if (nlk->netlink_rcv != NULL) { 1537 if (nlk->netlink_rcv != NULL) {
937 ret = skb->len; 1538 ret = skb->len;
938 skb_set_owner_r(skb, sk); 1539 netlink_skb_set_owner_r(skb, sk);
939 NETLINK_CB(skb).ssk = ssk; 1540 NETLINK_CB(skb).sk = ssk;
940 nlk->netlink_rcv(skb); 1541 nlk->netlink_rcv(skb);
941 consume_skb(skb); 1542 consume_skb(skb);
942 } else { 1543 } else {
@@ -982,6 +1583,69 @@ retry:
982} 1583}
983EXPORT_SYMBOL(netlink_unicast); 1584EXPORT_SYMBOL(netlink_unicast);
984 1585
1586struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1587 u32 dst_portid, gfp_t gfp_mask)
1588{
1589#ifdef CONFIG_NETLINK_MMAP
1590 struct sock *sk = NULL;
1591 struct sk_buff *skb;
1592 struct netlink_ring *ring;
1593 struct nl_mmap_hdr *hdr;
1594 unsigned int maxlen;
1595
1596 sk = netlink_getsockbyportid(ssk, dst_portid);
1597 if (IS_ERR(sk))
1598 goto out;
1599
1600 ring = &nlk_sk(sk)->rx_ring;
1601 /* fast-path without atomic ops for common case: non-mmaped receiver */
1602 if (ring->pg_vec == NULL)
1603 goto out_put;
1604
1605 skb = alloc_skb_head(gfp_mask);
1606 if (skb == NULL)
1607 goto err1;
1608
1609 spin_lock_bh(&sk->sk_receive_queue.lock);
1610 /* check again under lock */
1611 if (ring->pg_vec == NULL)
1612 goto out_free;
1613
1614 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1615 if (maxlen < size)
1616 goto out_free;
1617
1618 netlink_forward_ring(ring);
1619 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1620 if (hdr == NULL)
1621 goto err2;
1622 netlink_ring_setup_skb(skb, sk, ring, hdr);
1623 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1624 atomic_inc(&ring->pending);
1625 netlink_increment_head(ring);
1626
1627 spin_unlock_bh(&sk->sk_receive_queue.lock);
1628 return skb;
1629
1630err2:
1631 kfree_skb(skb);
1632 spin_unlock_bh(&sk->sk_receive_queue.lock);
1633 netlink_overrun(sk);
1634err1:
1635 sock_put(sk);
1636 return NULL;
1637
1638out_free:
1639 kfree_skb(skb);
1640 spin_unlock_bh(&sk->sk_receive_queue.lock);
1641out_put:
1642 sock_put(sk);
1643out:
1644#endif
1645 return alloc_skb(size, gfp_mask);
1646}
1647EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1648
985int netlink_has_listeners(struct sock *sk, unsigned int group) 1649int netlink_has_listeners(struct sock *sk, unsigned int group)
986{ 1650{
987 int res = 0; 1651 int res = 0;
@@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1006 struct netlink_sock *nlk = nlk_sk(sk); 1670 struct netlink_sock *nlk = nlk_sk(sk);
1007 1671
1008 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && 1672 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1009 !test_bit(0, &nlk->state)) { 1673 !test_bit(NETLINK_CONGESTED, &nlk->state)) {
1010 skb_set_owner_r(skb, sk); 1674 netlink_skb_set_owner_r(skb, sk);
1011 __netlink_sendskb(sk, skb); 1675 __netlink_sendskb(sk, skb);
1012 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1); 1676 return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1013 } 1677 }
@@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1242 if (level != SOL_NETLINK) 1906 if (level != SOL_NETLINK)
1243 return -ENOPROTOOPT; 1907 return -ENOPROTOOPT;
1244 1908
1245 if (optlen >= sizeof(int) && 1909 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
1910 optlen >= sizeof(int) &&
1246 get_user(val, (unsigned int __user *)optval)) 1911 get_user(val, (unsigned int __user *)optval))
1247 return -EFAULT; 1912 return -EFAULT;
1248 1913
@@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1284 case NETLINK_NO_ENOBUFS: 1949 case NETLINK_NO_ENOBUFS:
1285 if (val) { 1950 if (val) {
1286 nlk->flags |= NETLINK_RECV_NO_ENOBUFS; 1951 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1287 clear_bit(0, &nlk->state); 1952 clear_bit(NETLINK_CONGESTED, &nlk->state);
1288 wake_up_interruptible(&nlk->wait); 1953 wake_up_interruptible(&nlk->wait);
1289 } else { 1954 } else {
1290 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; 1955 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1291 } 1956 }
1292 err = 0; 1957 err = 0;
1293 break; 1958 break;
1959#ifdef CONFIG_NETLINK_MMAP
1960 case NETLINK_RX_RING:
1961 case NETLINK_TX_RING: {
1962 struct nl_mmap_req req;
1963
1964 /* Rings might consume more memory than queue limits, require
1965 * CAP_NET_ADMIN.
1966 */
1967 if (!capable(CAP_NET_ADMIN))
1968 return -EPERM;
1969 if (optlen < sizeof(req))
1970 return -EINVAL;
1971 if (copy_from_user(&req, optval, sizeof(req)))
1972 return -EFAULT;
1973 err = netlink_set_ring(sk, &req, false,
1974 optname == NETLINK_TX_RING);
1975 break;
1976 }
1977#endif /* CONFIG_NETLINK_MMAP */
1294 default: 1978 default:
1295 err = -ENOPROTOOPT; 1979 err = -ENOPROTOOPT;
1296 } 1980 }
@@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1401 goto out; 2085 goto out;
1402 } 2086 }
1403 2087
2088 if (netlink_tx_is_mmaped(sk) &&
2089 msg->msg_iov->iov_base == NULL) {
2090 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2091 siocb);
2092 goto out;
2093 }
2094
1404 err = -EMSGSIZE; 2095 err = -EMSGSIZE;
1405 if (len > sk->sk_sndbuf - 32) 2096 if (len > sk->sk_sndbuf - 32)
1406 goto out; 2097 goto out;
@@ -1695,7 +2386,7 @@ struct nlmsghdr *
1695__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) 2386__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
1696{ 2387{
1697 struct nlmsghdr *nlh; 2388 struct nlmsghdr *nlh;
1698 int size = NLMSG_LENGTH(len); 2389 int size = nlmsg_msg_size(len);
1699 2390
1700 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); 2391 nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1701 nlh->nlmsg_type = type; 2392 nlh->nlmsg_type = type;
@@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla
1704 nlh->nlmsg_pid = portid; 2395 nlh->nlmsg_pid = portid;
1705 nlh->nlmsg_seq = seq; 2396 nlh->nlmsg_seq = seq;
1706 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) 2397 if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1707 memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); 2398 memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1708 return nlh; 2399 return nlh;
1709} 2400}
1710EXPORT_SYMBOL(__nlmsg_put); 2401EXPORT_SYMBOL(__nlmsg_put);
@@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk)
1733 2424
1734 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2425 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1735 2426
1736 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); 2427 if (!netlink_rx_is_mmaped(sk) &&
2428 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2429 goto errout_skb;
2430 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
1737 if (!skb) 2431 if (!skb)
1738 goto errout_skb; 2432 goto errout_skb;
2433 netlink_skb_set_owner_r(skb, sk);
1739 2434
1740 len = cb->dump(skb, cb); 2435 len = cb->dump(skb, cb);
1741 2436
@@ -1790,13 +2485,25 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1790 if (cb == NULL) 2485 if (cb == NULL)
1791 return -ENOBUFS; 2486 return -ENOBUFS;
1792 2487
2488 /* Memory mapped dump requests need to be copied to avoid looping
2489 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2490 * a reference to the skb.
2491 */
2492 if (netlink_skb_is_mmaped(skb)) {
2493 skb = skb_copy(skb, GFP_KERNEL);
2494 if (skb == NULL) {
2495 kfree(cb);
2496 return -ENOBUFS;
2497 }
2498 } else
2499 atomic_inc(&skb->users);
2500
1793 cb->dump = control->dump; 2501 cb->dump = control->dump;
1794 cb->done = control->done; 2502 cb->done = control->done;
1795 cb->nlh = nlh; 2503 cb->nlh = nlh;
1796 cb->data = control->data; 2504 cb->data = control->data;
1797 cb->module = control->module; 2505 cb->module = control->module;
1798 cb->min_dump_alloc = control->min_dump_alloc; 2506 cb->min_dump_alloc = control->min_dump_alloc;
1799 atomic_inc(&skb->users);
1800 cb->skb = skb; 2507 cb->skb = skb;
1801 2508
1802 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2509 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
@@ -1850,7 +2557,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1850 if (err) 2557 if (err)
1851 payload += nlmsg_len(nlh); 2558 payload += nlmsg_len(nlh);
1852 2559
1853 skb = nlmsg_new(payload, GFP_KERNEL); 2560 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2561 NETLINK_CB(in_skb).portid, GFP_KERNEL);
1854 if (!skb) { 2562 if (!skb) {
1855 struct sock *sk; 2563 struct sock *sk;
1856 2564
@@ -2116,7 +2824,7 @@ static const struct proto_ops netlink_ops = {
2116 .socketpair = sock_no_socketpair, 2824 .socketpair = sock_no_socketpair,
2117 .accept = sock_no_accept, 2825 .accept = sock_no_accept,
2118 .getname = netlink_getname, 2826 .getname = netlink_getname,
2119 .poll = datagram_poll, 2827 .poll = netlink_poll,
2120 .ioctl = sock_no_ioctl, 2828 .ioctl = sock_no_ioctl,
2121 .listen = sock_no_listen, 2829 .listen = sock_no_listen,
2122 .shutdown = sock_no_shutdown, 2830 .shutdown = sock_no_shutdown,
@@ -2124,7 +2832,7 @@ static const struct proto_ops netlink_ops = {
2124 .getsockopt = netlink_getsockopt, 2832 .getsockopt = netlink_getsockopt,
2125 .sendmsg = netlink_sendmsg, 2833 .sendmsg = netlink_sendmsg,
2126 .recvmsg = netlink_recvmsg, 2834 .recvmsg = netlink_recvmsg,
2127 .mmap = sock_no_mmap, 2835 .mmap = netlink_mmap,
2128 .sendpage = sock_no_sendpage, 2836 .sendpage = sock_no_sendpage,
2129}; 2837};
2130 2838
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
new file mode 100644
index 000000000000..ed8522265f4e
--- /dev/null
+++ b/net/netlink/af_netlink.h
@@ -0,0 +1,82 @@
1#ifndef _AF_NETLINK_H
2#define _AF_NETLINK_H
3
4#include <net/sock.h>
5
6#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
7#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long))
8
9struct netlink_ring {
10 void **pg_vec;
11 unsigned int head;
12 unsigned int frames_per_block;
13 unsigned int frame_size;
14 unsigned int frame_max;
15
16 unsigned int pg_vec_order;
17 unsigned int pg_vec_pages;
18 unsigned int pg_vec_len;
19
20 atomic_t pending;
21};
22
23struct netlink_sock {
24 /* struct sock has to be the first member of netlink_sock */
25 struct sock sk;
26 u32 portid;
27 u32 dst_portid;
28 u32 dst_group;
29 u32 flags;
30 u32 subscriptions;
31 u32 ngroups;
32 unsigned long *groups;
33 unsigned long state;
34 wait_queue_head_t wait;
35 struct netlink_callback *cb;
36 struct mutex *cb_mutex;
37 struct mutex cb_def_mutex;
38 void (*netlink_rcv)(struct sk_buff *skb);
39 void (*netlink_bind)(int group);
40 struct module *module;
41#ifdef CONFIG_NETLINK_MMAP
42 struct mutex pg_vec_lock;
43 struct netlink_ring rx_ring;
44 struct netlink_ring tx_ring;
45 atomic_t mapped;
46#endif /* CONFIG_NETLINK_MMAP */
47};
48
49static inline struct netlink_sock *nlk_sk(struct sock *sk)
50{
51 return container_of(sk, struct netlink_sock, sk);
52}
53
54struct nl_portid_hash {
55 struct hlist_head *table;
56 unsigned long rehash_time;
57
58 unsigned int mask;
59 unsigned int shift;
60
61 unsigned int entries;
62 unsigned int max_shift;
63
64 u32 rnd;
65};
66
67struct netlink_table {
68 struct nl_portid_hash hash;
69 struct hlist_head mc_list;
70 struct listeners __rcu *listeners;
71 unsigned int flags;
72 unsigned int groups;
73 struct mutex *cb_mutex;
74 struct module *module;
75 void (*bind)(int group);
76 int registered;
77};
78
79extern struct netlink_table *nl_table;
80extern rwlock_t nl_table_lock;
81
82#endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
new file mode 100644
index 000000000000..1af29624b92f
--- /dev/null
+++ b/net/netlink/diag.c
@@ -0,0 +1,227 @@
1#include <linux/module.h>
2
3#include <net/sock.h>
4#include <linux/netlink.h>
5#include <linux/sock_diag.h>
6#include <linux/netlink_diag.h>
7
8#include "af_netlink.h"
9
10#ifdef CONFIG_NETLINK_MMAP
11static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
12 struct sk_buff *nlskb)
13{
14 struct netlink_diag_ring ndr;
15
16 ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
17 ndr.ndr_block_nr = ring->pg_vec_len;
18 ndr.ndr_frame_size = ring->frame_size;
19 ndr.ndr_frame_nr = ring->frame_max + 1;
20
21 return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
22}
23
24static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
25{
26 struct netlink_sock *nlk = nlk_sk(sk);
27 int ret;
28
29 mutex_lock(&nlk->pg_vec_lock);
30 ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
31 if (!ret)
32 ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
33 nlskb);
34 mutex_unlock(&nlk->pg_vec_lock);
35
36 return ret;
37}
38#else
39static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
40{
41 return 0;
42}
43#endif
44
45static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
46{
47 struct netlink_sock *nlk = nlk_sk(sk);
48
49 if (nlk->groups == NULL)
50 return 0;
51
52 return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups),
53 nlk->groups);
54}
55
56static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
57 struct netlink_diag_req *req,
58 u32 portid, u32 seq, u32 flags, int sk_ino)
59{
60 struct nlmsghdr *nlh;
61 struct netlink_diag_msg *rep;
62 struct netlink_sock *nlk = nlk_sk(sk);
63
64 nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
65 flags);
66 if (!nlh)
67 return -EMSGSIZE;
68
69 rep = nlmsg_data(nlh);
70 rep->ndiag_family = AF_NETLINK;
71 rep->ndiag_type = sk->sk_type;
72 rep->ndiag_protocol = sk->sk_protocol;
73 rep->ndiag_state = sk->sk_state;
74
75 rep->ndiag_ino = sk_ino;
76 rep->ndiag_portid = nlk->portid;
77 rep->ndiag_dst_portid = nlk->dst_portid;
78 rep->ndiag_dst_group = nlk->dst_group;
79 sock_diag_save_cookie(sk, rep->ndiag_cookie);
80
81 if ((req->ndiag_show & NDIAG_SHOW_GROUPS) &&
82 sk_diag_dump_groups(sk, skb))
83 goto out_nlmsg_trim;
84
85 if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) &&
86 sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
87 goto out_nlmsg_trim;
88
89 if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
90 sk_diag_put_rings_cfg(sk, skb))
91 goto out_nlmsg_trim;
92
93 return nlmsg_end(skb, nlh);
94
95out_nlmsg_trim:
96 nlmsg_cancel(skb, nlh);
97 return -EMSGSIZE;
98}
99
100static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
101 int protocol, int s_num)
102{
103 struct netlink_table *tbl = &nl_table[protocol];
104 struct nl_portid_hash *hash = &tbl->hash;
105 struct net *net = sock_net(skb->sk);
106 struct netlink_diag_req *req;
107 struct sock *sk;
108 int ret = 0, num = 0, i;
109
110 req = nlmsg_data(cb->nlh);
111
112 for (i = 0; i <= hash->mask; i++) {
113 sk_for_each(sk, &hash->table[i]) {
114 if (!net_eq(sock_net(sk), net))
115 continue;
116 if (num < s_num) {
117 num++;
118 continue;
119 }
120
121 if (sk_diag_fill(sk, skb, req,
122 NETLINK_CB(cb->skb).portid,
123 cb->nlh->nlmsg_seq,
124 NLM_F_MULTI,
125 sock_i_ino(sk)) < 0) {
126 ret = 1;
127 goto done;
128 }
129
130 num++;
131 }
132 }
133
134 sk_for_each_bound(sk, &tbl->mc_list) {
135 if (sk_hashed(sk))
136 continue;
137 if (!net_eq(sock_net(sk), net))
138 continue;
139 if (num < s_num) {
140 num++;
141 continue;
142 }
143
144 if (sk_diag_fill(sk, skb, req,
145 NETLINK_CB(cb->skb).portid,
146 cb->nlh->nlmsg_seq,
147 NLM_F_MULTI,
148 sock_i_ino(sk)) < 0) {
149 ret = 1;
150 goto done;
151 }
152 num++;
153 }
154done:
155 cb->args[0] = num;
156 cb->args[1] = protocol;
157
158 return ret;
159}
160
161static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
162{
163 struct netlink_diag_req *req;
164 int s_num = cb->args[0];
165
166 req = nlmsg_data(cb->nlh);
167
168 read_lock(&nl_table_lock);
169
170 if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
171 int i;
172
173 for (i = cb->args[1]; i < MAX_LINKS; i++) {
174 if (__netlink_diag_dump(skb, cb, i, s_num))
175 break;
176 s_num = 0;
177 }
178 } else {
179 if (req->sdiag_protocol >= MAX_LINKS) {
180 read_unlock(&nl_table_lock);
181 return -ENOENT;
182 }
183
184 __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
185 }
186
187 read_unlock(&nl_table_lock);
188
189 return skb->len;
190}
191
192static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
193{
194 int hdrlen = sizeof(struct netlink_diag_req);
195 struct net *net = sock_net(skb->sk);
196
197 if (nlmsg_len(h) < hdrlen)
198 return -EINVAL;
199
200 if (h->nlmsg_flags & NLM_F_DUMP) {
201 struct netlink_dump_control c = {
202 .dump = netlink_diag_dump,
203 };
204 return netlink_dump_start(net->diag_nlsk, skb, h, &c);
205 } else
206 return -EOPNOTSUPP;
207}
208
209static const struct sock_diag_handler netlink_diag_handler = {
210 .family = AF_NETLINK,
211 .dump = netlink_diag_handler_dump,
212};
213
214static int __init netlink_diag_init(void)
215{
216 return sock_diag_register(&netlink_diag_handler);
217}
218
219static void __exit netlink_diag_exit(void)
220{
221 sock_diag_unregister(&netlink_diag_handler);
222}
223
224module_init(netlink_diag_init);
225module_exit(netlink_diag_exit);
226MODULE_LICENSE("GPL");
227MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 5a55be3f17a5..2fd6dbea327a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -16,10 +16,12 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/mutex.h> 17#include <linux/mutex.h>
18#include <linux/bitmap.h> 18#include <linux/bitmap.h>
19#include <linux/rwsem.h>
19#include <net/sock.h> 20#include <net/sock.h>
20#include <net/genetlink.h> 21#include <net/genetlink.h>
21 22
22static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ 23static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
24static DECLARE_RWSEM(cb_lock);
23 25
24void genl_lock(void) 26void genl_lock(void)
25{ 27{
@@ -41,6 +43,18 @@ int lockdep_genl_is_held(void)
41EXPORT_SYMBOL(lockdep_genl_is_held); 43EXPORT_SYMBOL(lockdep_genl_is_held);
42#endif 44#endif
43 45
46static void genl_lock_all(void)
47{
48 down_write(&cb_lock);
49 genl_lock();
50}
51
52static void genl_unlock_all(void)
53{
54 genl_unlock();
55 up_write(&cb_lock);
56}
57
44#define GENL_FAM_TAB_SIZE 16 58#define GENL_FAM_TAB_SIZE 16
45#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) 59#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
46 60
@@ -144,7 +158,7 @@ int genl_register_mc_group(struct genl_family *family,
144 BUG_ON(grp->name[0] == '\0'); 158 BUG_ON(grp->name[0] == '\0');
145 BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); 159 BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL);
146 160
147 genl_lock(); 161 genl_lock_all();
148 162
149 /* special-case our own group */ 163 /* special-case our own group */
150 if (grp == &notify_grp) 164 if (grp == &notify_grp)
@@ -213,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family,
213 227
214 genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp); 228 genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp);
215 out: 229 out:
216 genl_unlock(); 230 genl_unlock_all();
217 return err; 231 return err;
218} 232}
219EXPORT_SYMBOL(genl_register_mc_group); 233EXPORT_SYMBOL(genl_register_mc_group);
@@ -255,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family,
255void genl_unregister_mc_group(struct genl_family *family, 269void genl_unregister_mc_group(struct genl_family *family,
256 struct genl_multicast_group *grp) 270 struct genl_multicast_group *grp)
257{ 271{
258 genl_lock(); 272 genl_lock_all();
259 __genl_unregister_mc_group(family, grp); 273 __genl_unregister_mc_group(family, grp);
260 genl_unlock(); 274 genl_unlock_all();
261} 275}
262EXPORT_SYMBOL(genl_unregister_mc_group); 276EXPORT_SYMBOL(genl_unregister_mc_group);
263 277
@@ -303,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
303 if (ops->policy) 317 if (ops->policy)
304 ops->flags |= GENL_CMD_CAP_HASPOL; 318 ops->flags |= GENL_CMD_CAP_HASPOL;
305 319
306 genl_lock(); 320 genl_lock_all();
307 list_add_tail(&ops->ops_list, &family->ops_list); 321 list_add_tail(&ops->ops_list, &family->ops_list);
308 genl_unlock(); 322 genl_unlock_all();
309 323
310 genl_ctrl_event(CTRL_CMD_NEWOPS, ops); 324 genl_ctrl_event(CTRL_CMD_NEWOPS, ops);
311 err = 0; 325 err = 0;
@@ -334,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
334{ 348{
335 struct genl_ops *rc; 349 struct genl_ops *rc;
336 350
337 genl_lock(); 351 genl_lock_all();
338 list_for_each_entry(rc, &family->ops_list, ops_list) { 352 list_for_each_entry(rc, &family->ops_list, ops_list) {
339 if (rc == ops) { 353 if (rc == ops) {
340 list_del(&ops->ops_list); 354 list_del(&ops->ops_list);
341 genl_unlock(); 355 genl_unlock_all();
342 genl_ctrl_event(CTRL_CMD_DELOPS, ops); 356 genl_ctrl_event(CTRL_CMD_DELOPS, ops);
343 return 0; 357 return 0;
344 } 358 }
345 } 359 }
346 genl_unlock(); 360 genl_unlock_all();
347 361
348 return -ENOENT; 362 return -ENOENT;
349} 363}
@@ -373,7 +387,7 @@ int genl_register_family(struct genl_family *family)
373 INIT_LIST_HEAD(&family->ops_list); 387 INIT_LIST_HEAD(&family->ops_list);
374 INIT_LIST_HEAD(&family->mcast_groups); 388 INIT_LIST_HEAD(&family->mcast_groups);
375 389
376 genl_lock(); 390 genl_lock_all();
377 391
378 if (genl_family_find_byname(family->name)) { 392 if (genl_family_find_byname(family->name)) {
379 err = -EEXIST; 393 err = -EEXIST;
@@ -394,7 +408,7 @@ int genl_register_family(struct genl_family *family)
394 goto errout_locked; 408 goto errout_locked;
395 } 409 }
396 410
397 if (family->maxattr) { 411 if (family->maxattr && !family->parallel_ops) {
398 family->attrbuf = kmalloc((family->maxattr+1) * 412 family->attrbuf = kmalloc((family->maxattr+1) *
399 sizeof(struct nlattr *), GFP_KERNEL); 413 sizeof(struct nlattr *), GFP_KERNEL);
400 if (family->attrbuf == NULL) { 414 if (family->attrbuf == NULL) {
@@ -405,14 +419,14 @@ int genl_register_family(struct genl_family *family)
405 family->attrbuf = NULL; 419 family->attrbuf = NULL;
406 420
407 list_add_tail(&family->family_list, genl_family_chain(family->id)); 421 list_add_tail(&family->family_list, genl_family_chain(family->id));
408 genl_unlock(); 422 genl_unlock_all();
409 423
410 genl_ctrl_event(CTRL_CMD_NEWFAMILY, family); 424 genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);
411 425
412 return 0; 426 return 0;
413 427
414errout_locked: 428errout_locked:
415 genl_unlock(); 429 genl_unlock_all();
416errout: 430errout:
417 return err; 431 return err;
418} 432}
@@ -476,7 +490,7 @@ int genl_unregister_family(struct genl_family *family)
476{ 490{
477 struct genl_family *rc; 491 struct genl_family *rc;
478 492
479 genl_lock(); 493 genl_lock_all();
480 494
481 genl_unregister_mc_groups(family); 495 genl_unregister_mc_groups(family);
482 496
@@ -486,14 +500,14 @@ int genl_unregister_family(struct genl_family *family)
486 500
487 list_del(&rc->family_list); 501 list_del(&rc->family_list);
488 INIT_LIST_HEAD(&family->ops_list); 502 INIT_LIST_HEAD(&family->ops_list);
489 genl_unlock(); 503 genl_unlock_all();
490 504
491 kfree(family->attrbuf); 505 kfree(family->attrbuf);
492 genl_ctrl_event(CTRL_CMD_DELFAMILY, family); 506 genl_ctrl_event(CTRL_CMD_DELFAMILY, family);
493 return 0; 507 return 0;
494 } 508 }
495 509
496 genl_unlock(); 510 genl_unlock_all();
497 511
498 return -ENOENT; 512 return -ENOENT;
499} 513}
@@ -530,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
530} 544}
531EXPORT_SYMBOL(genlmsg_put); 545EXPORT_SYMBOL(genlmsg_put);
532 546
533static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 547static int genl_family_rcv_msg(struct genl_family *family,
548 struct sk_buff *skb,
549 struct nlmsghdr *nlh)
534{ 550{
535 struct genl_ops *ops; 551 struct genl_ops *ops;
536 struct genl_family *family;
537 struct net *net = sock_net(skb->sk); 552 struct net *net = sock_net(skb->sk);
538 struct genl_info info; 553 struct genl_info info;
539 struct genlmsghdr *hdr = nlmsg_data(nlh); 554 struct genlmsghdr *hdr = nlmsg_data(nlh);
555 struct nlattr **attrbuf;
540 int hdrlen, err; 556 int hdrlen, err;
541 557
542 family = genl_family_find_byid(nlh->nlmsg_type);
543 if (family == NULL)
544 return -ENOENT;
545
546 /* this family doesn't exist in this netns */ 558 /* this family doesn't exist in this netns */
547 if (!family->netnsok && !net_eq(net, &init_net)) 559 if (!family->netnsok && !net_eq(net, &init_net))
548 return -ENOENT; 560 return -ENOENT;
@@ -560,29 +572,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
560 return -EPERM; 572 return -EPERM;
561 573
562 if (nlh->nlmsg_flags & NLM_F_DUMP) { 574 if (nlh->nlmsg_flags & NLM_F_DUMP) {
575 struct netlink_dump_control c = {
576 .dump = ops->dumpit,
577 .done = ops->done,
578 };
579
563 if (ops->dumpit == NULL) 580 if (ops->dumpit == NULL)
564 return -EOPNOTSUPP; 581 return -EOPNOTSUPP;
565 582
566 genl_unlock(); 583 return netlink_dump_start(net->genl_sock, skb, nlh, &c);
567 {
568 struct netlink_dump_control c = {
569 .dump = ops->dumpit,
570 .done = ops->done,
571 };
572 err = netlink_dump_start(net->genl_sock, skb, nlh, &c);
573 }
574 genl_lock();
575 return err;
576 } 584 }
577 585
578 if (ops->doit == NULL) 586 if (ops->doit == NULL)
579 return -EOPNOTSUPP; 587 return -EOPNOTSUPP;
580 588
581 if (family->attrbuf) { 589 if (family->maxattr && family->parallel_ops) {
582 err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, 590 attrbuf = kmalloc((family->maxattr+1) *
591 sizeof(struct nlattr *), GFP_KERNEL);
592 if (attrbuf == NULL)
593 return -ENOMEM;
594 } else
595 attrbuf = family->attrbuf;
596
597 if (attrbuf) {
598 err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
583 ops->policy); 599 ops->policy);
584 if (err < 0) 600 if (err < 0)
585 return err; 601 goto out;
586 } 602 }
587 603
588 info.snd_seq = nlh->nlmsg_seq; 604 info.snd_seq = nlh->nlmsg_seq;
@@ -590,14 +606,14 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
590 info.nlhdr = nlh; 606 info.nlhdr = nlh;
591 info.genlhdr = nlmsg_data(nlh); 607 info.genlhdr = nlmsg_data(nlh);
592 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 608 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
593 info.attrs = family->attrbuf; 609 info.attrs = attrbuf;
594 genl_info_net_set(&info, net); 610 genl_info_net_set(&info, net);
595 memset(&info.user_ptr, 0, sizeof(info.user_ptr)); 611 memset(&info.user_ptr, 0, sizeof(info.user_ptr));
596 612
597 if (family->pre_doit) { 613 if (family->pre_doit) {
598 err = family->pre_doit(ops, skb, &info); 614 err = family->pre_doit(ops, skb, &info);
599 if (err) 615 if (err)
600 return err; 616 goto out;
601 } 617 }
602 618
603 err = ops->doit(skb, &info); 619 err = ops->doit(skb, &info);
@@ -605,14 +621,38 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
605 if (family->post_doit) 621 if (family->post_doit)
606 family->post_doit(ops, skb, &info); 622 family->post_doit(ops, skb, &info);
607 623
624out:
625 if (family->parallel_ops)
626 kfree(attrbuf);
627
628 return err;
629}
630
631static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
632{
633 struct genl_family *family;
634 int err;
635
636 family = genl_family_find_byid(nlh->nlmsg_type);
637 if (family == NULL)
638 return -ENOENT;
639
640 if (!family->parallel_ops)
641 genl_lock();
642
643 err = genl_family_rcv_msg(family, skb, nlh);
644
645 if (!family->parallel_ops)
646 genl_unlock();
647
608 return err; 648 return err;
609} 649}
610 650
611static void genl_rcv(struct sk_buff *skb) 651static void genl_rcv(struct sk_buff *skb)
612{ 652{
613 genl_lock(); 653 down_read(&cb_lock);
614 netlink_rcv_skb(skb, &genl_rcv_msg); 654 netlink_rcv_skb(skb, &genl_rcv_msg);
615 genl_unlock(); 655 up_read(&cb_lock);
616} 656}
617 657
618/************************************************************************** 658/**************************************************************************
@@ -918,7 +958,6 @@ static int __net_init genl_pernet_init(struct net *net)
918{ 958{
919 struct netlink_kernel_cfg cfg = { 959 struct netlink_kernel_cfg cfg = {
920 .input = genl_rcv, 960 .input = genl_rcv,
921 .cb_mutex = &genl_mutex,
922 .flags = NL_CFG_F_NONROOT_RECV, 961 .flags = NL_CFG_F_NONROOT_RECV,
923 }; 962 };
924 963
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index d1fa1d9ffd2e..ec0c80fde69f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -834,6 +834,8 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,
834 struct sock *sk = sock->sk; 834 struct sock *sk = sock->sk;
835 struct nr_sock *nr = nr_sk(sk); 835 struct nr_sock *nr = nr_sk(sk);
836 836
837 memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25));
838
837 lock_sock(sk); 839 lock_sock(sk);
838 if (peer != 0) { 840 if (peer != 0) {
839 if (sk->sk_state != TCP_ESTABLISHED) { 841 if (sk->sk_state != TCP_ESTABLISHED) {
@@ -1173,6 +1175,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
1173 } 1175 }
1174 1176
1175 if (sax != NULL) { 1177 if (sax != NULL) {
1178 memset(sax, 0, sizeof(*sax));
1176 sax->sax25_family = AF_NETROM; 1179 sax->sax25_family = AF_NETROM;
1177 skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, 1180 skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
1178 AX25_ADDR_LEN); 1181 AX25_ADDR_LEN);
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 60c3bbb63e8e..5948b2fc72f6 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -4,6 +4,7 @@
4 4
5menuconfig NFC 5menuconfig NFC
6 depends on NET 6 depends on NET
7 depends on RFKILL || !RFKILL
7 tristate "NFC subsystem support" 8 tristate "NFC subsystem support"
8 default n 9 default n
9 help 10 help
@@ -15,6 +16,5 @@ menuconfig NFC
15 16
16source "net/nfc/nci/Kconfig" 17source "net/nfc/nci/Kconfig"
17source "net/nfc/hci/Kconfig" 18source "net/nfc/hci/Kconfig"
18source "net/nfc/llcp/Kconfig"
19 19
20source "drivers/nfc/Kconfig" 20source "drivers/nfc/Kconfig"
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index d1a117c2c401..fb799deaed4f 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -5,6 +5,8 @@
5obj-$(CONFIG_NFC) += nfc.o 5obj-$(CONFIG_NFC) += nfc.o
6obj-$(CONFIG_NFC_NCI) += nci/ 6obj-$(CONFIG_NFC_NCI) += nci/
7obj-$(CONFIG_NFC_HCI) += hci/ 7obj-$(CONFIG_NFC_HCI) += hci/
8#obj-$(CONFIG_NFC_LLCP) += llcp/
9
10nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \
11 llcp_sock.o
8 12
9nfc-objs := core.o netlink.o af_nfc.o rawsock.o
10nfc-$(CONFIG_NFC_LLCP) += llcp/llcp.o llcp/commands.o llcp/sock.o
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 6ceee8e181ca..40d2527693da 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -27,6 +27,7 @@
27#include <linux/kernel.h> 27#include <linux/kernel.h>
28#include <linux/module.h> 28#include <linux/module.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/rfkill.h>
30#include <linux/nfc.h> 31#include <linux/nfc.h>
31 32
32#include <net/genetlink.h> 33#include <net/genetlink.h>
@@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev)
58 59
59 device_lock(&dev->dev); 60 device_lock(&dev->dev);
60 61
62 if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
63 rc = -ERFKILL;
64 goto error;
65 }
66
61 if (!device_is_registered(&dev->dev)) { 67 if (!device_is_registered(&dev->dev)) {
62 rc = -ENODEV; 68 rc = -ENODEV;
63 goto error; 69 goto error;
@@ -117,6 +123,24 @@ error:
117 return rc; 123 return rc;
118} 124}
119 125
126static int nfc_rfkill_set_block(void *data, bool blocked)
127{
128 struct nfc_dev *dev = data;
129
130 pr_debug("%s blocked %d", dev_name(&dev->dev), blocked);
131
132 if (!blocked)
133 return 0;
134
135 nfc_dev_down(dev);
136
137 return 0;
138}
139
140static const struct rfkill_ops nfc_rfkill_ops = {
141 .set_block = nfc_rfkill_set_block,
142};
143
120/** 144/**
121 * nfc_start_poll - start polling for nfc targets 145 * nfc_start_poll - start polling for nfc targets
122 * 146 *
@@ -143,6 +167,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
143 goto error; 167 goto error;
144 } 168 }
145 169
170 if (!dev->dev_up) {
171 rc = -ENODEV;
172 goto error;
173 }
174
146 if (dev->polling) { 175 if (dev->polling) {
147 rc = -EBUSY; 176 rc = -EBUSY;
148 goto error; 177 goto error;
@@ -835,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev)
835 pr_debug("The userspace won't be notified that the device %s was added\n", 864 pr_debug("The userspace won't be notified that the device %s was added\n",
836 dev_name(&dev->dev)); 865 dev_name(&dev->dev));
837 866
867 dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
868 RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
869 if (dev->rfkill) {
870 if (rfkill_register(dev->rfkill) < 0) {
871 rfkill_destroy(dev->rfkill);
872 dev->rfkill = NULL;
873 }
874 }
875
838 return 0; 876 return 0;
839} 877}
840EXPORT_SYMBOL(nfc_register_device); 878EXPORT_SYMBOL(nfc_register_device);
@@ -852,6 +890,11 @@ void nfc_unregister_device(struct nfc_dev *dev)
852 890
853 id = dev->idx; 891 id = dev->idx;
854 892
893 if (dev->rfkill) {
894 rfkill_unregister(dev->rfkill);
895 rfkill_destroy(dev->rfkill);
896 }
897
855 if (dev->ops->check_presence) { 898 if (dev->ops->check_presence) {
856 device_lock(&dev->dev); 899 device_lock(&dev->dev);
857 dev->shutting_down = true; 900 dev->shutting_down = true;
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp.h
index 0eae5c509504..ff8c434f7df8 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp.h
@@ -31,6 +31,7 @@ enum llcp_state {
31#define LLCP_MAX_LTO 0xff 31#define LLCP_MAX_LTO 0xff
32#define LLCP_MAX_RW 15 32#define LLCP_MAX_RW 15
33#define LLCP_MAX_MIUX 0x7ff 33#define LLCP_MAX_MIUX 0x7ff
34#define LLCP_MAX_MIU (LLCP_MAX_MIUX + 128)
34 35
35#define LLCP_WKS_NUM_SAP 16 36#define LLCP_WKS_NUM_SAP 16
36#define LLCP_SDP_NUM_SAP 16 37#define LLCP_SDP_NUM_SAP 16
@@ -46,6 +47,19 @@ struct llcp_sock_list {
46 rwlock_t lock; 47 rwlock_t lock;
47}; 48};
48 49
50struct nfc_llcp_sdp_tlv {
51 u8 *tlv;
52 u8 tlv_len;
53
54 char *uri;
55 u8 tid;
56 u8 sap;
57
58 unsigned long time;
59
60 struct hlist_node node;
61};
62
49struct nfc_llcp_local { 63struct nfc_llcp_local {
50 struct list_head list; 64 struct list_head list;
51 struct nfc_dev *dev; 65 struct nfc_dev *dev;
@@ -86,6 +100,12 @@ struct nfc_llcp_local {
86 u8 remote_opt; 100 u8 remote_opt;
87 u16 remote_wks; 101 u16 remote_wks;
88 102
103 struct mutex sdreq_lock;
104 struct hlist_head pending_sdreqs;
105 struct timer_list sdreq_timer;
106 struct work_struct sdreq_timeout_work;
107 u8 sdreq_next_tid;
108
89 /* sockets array */ 109 /* sockets array */
90 struct llcp_sock_list sockets; 110 struct llcp_sock_list sockets;
91 struct llcp_sock_list connecting_sockets; 111 struct llcp_sock_list connecting_sockets;
@@ -105,7 +125,12 @@ struct nfc_llcp_sock {
105 char *service_name; 125 char *service_name;
106 size_t service_name_len; 126 size_t service_name_len;
107 u8 rw; 127 u8 rw;
108 u16 miu; 128 __be16 miux;
129
130
131 /* Remote link parameters */
132 u8 remote_rw;
133 u16 remote_miu;
109 134
110 /* Link variables */ 135 /* Link variables */
111 u8 send_n; 136 u8 send_n;
@@ -138,6 +163,7 @@ struct nfc_llcp_ui_cb {
138 163
139#define LLCP_HEADER_SIZE 2 164#define LLCP_HEADER_SIZE 2
140#define LLCP_SEQUENCE_SIZE 1 165#define LLCP_SEQUENCE_SIZE 1
166#define LLCP_AGF_PDU_HEADER_SIZE 2
141 167
142/* LLCP versions: 1.1 is 1.0 plus SDP */ 168/* LLCP versions: 1.1 is 1.0 plus SDP */
143#define LLCP_VERSION_10 0x10 169#define LLCP_VERSION_10 0x10
@@ -186,6 +212,7 @@ struct nfc_llcp_ui_cb {
186 212
187void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s); 213void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s);
188void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s); 214void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s);
215void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock);
189struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); 216struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
190struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local); 217struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local);
191int nfc_llcp_local_put(struct nfc_llcp_local *local); 218int nfc_llcp_local_put(struct nfc_llcp_local *local);
@@ -213,12 +240,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
213/* Commands API */ 240/* Commands API */
214void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); 241void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
215u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); 242u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
243struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
244struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
245 size_t uri_len);
246void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
247void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
216void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); 248void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
217int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); 249int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
218int nfc_llcp_send_symm(struct nfc_dev *dev); 250int nfc_llcp_send_symm(struct nfc_dev *dev);
219int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); 251int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
220int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); 252int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
221int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap); 253int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
254 struct hlist_head *tlv_list, size_t tlvs_len);
255int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
256 struct hlist_head *tlv_list, size_t tlvs_len);
222int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason); 257int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
223int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock); 258int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
224int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, 259int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig
deleted file mode 100644
index a1a41cd68255..000000000000
--- a/net/nfc/llcp/Kconfig
+++ /dev/null
@@ -1,7 +0,0 @@
1config NFC_LLCP
2 depends on NFC
3 bool "NFC LLCP support"
4 default n
5 help
6 Say Y here if you want to build support for a kernel NFC LLCP
7 implementation. \ No newline at end of file
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp_commands.c
index c6bc3bd95052..c1b23eef83ca 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp_commands.c
@@ -26,7 +26,7 @@
26 26
27#include <net/nfc/nfc.h> 27#include <net/nfc/nfc.h>
28 28
29#include "../nfc.h" 29#include "nfc.h"
30#include "llcp.h" 30#include "llcp.h"
31 31
32static u8 llcp_tlv_length[LLCP_TLV_MAX] = { 32static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
@@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
117 return tlv; 117 return tlv;
118} 118}
119 119
120struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
121{
122 struct nfc_llcp_sdp_tlv *sdres;
123 u8 value[2];
124
125 sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
126 if (sdres == NULL)
127 return NULL;
128
129 value[0] = tid;
130 value[1] = sap;
131
132 sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2,
133 &sdres->tlv_len);
134 if (sdres->tlv == NULL) {
135 kfree(sdres);
136 return NULL;
137 }
138
139 sdres->tid = tid;
140 sdres->sap = sap;
141
142 INIT_HLIST_NODE(&sdres->node);
143
144 return sdres;
145}
146
147struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
148 size_t uri_len)
149{
150 struct nfc_llcp_sdp_tlv *sdreq;
151
152 pr_debug("uri: %s, len: %zu\n", uri, uri_len);
153
154 sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
155 if (sdreq == NULL)
156 return NULL;
157
158 sdreq->tlv_len = uri_len + 3;
159
160 if (uri[uri_len - 1] == 0)
161 sdreq->tlv_len--;
162
163 sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
164 if (sdreq->tlv == NULL) {
165 kfree(sdreq);
166 return NULL;
167 }
168
169 sdreq->tlv[0] = LLCP_TLV_SDREQ;
170 sdreq->tlv[1] = sdreq->tlv_len - 2;
171 sdreq->tlv[2] = tid;
172
173 sdreq->tid = tid;
174 sdreq->uri = sdreq->tlv + 3;
175 memcpy(sdreq->uri, uri, uri_len);
176
177 sdreq->time = jiffies;
178
179 INIT_HLIST_NODE(&sdreq->node);
180
181 return sdreq;
182}
183
184void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
185{
186 kfree(sdp->tlv);
187 kfree(sdp);
188}
189
190void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
191{
192 struct nfc_llcp_sdp_tlv *sdp;
193 struct hlist_node *n;
194
195 hlist_for_each_entry_safe(sdp, n, head, node) {
196 hlist_del(&sdp->node);
197
198 nfc_llcp_free_sdp_tlv(sdp);
199 }
200}
201
120int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, 202int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
121 u8 *tlv_array, u16 tlv_array_len) 203 u8 *tlv_array, u16 tlv_array_len)
122{ 204{
@@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
184 266
185 switch (type) { 267 switch (type) {
186 case LLCP_TLV_MIUX: 268 case LLCP_TLV_MIUX:
187 sock->miu = llcp_tlv_miux(tlv) + 128; 269 sock->remote_miu = llcp_tlv_miux(tlv) + 128;
188 break; 270 break;
189 case LLCP_TLV_RW: 271 case LLCP_TLV_RW:
190 sock->rw = llcp_tlv_rw(tlv); 272 sock->remote_rw = llcp_tlv_rw(tlv);
191 break; 273 break;
192 case LLCP_TLV_SN: 274 case LLCP_TLV_SN:
193 break; 275 break;
@@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
200 tlv += length + 2; 282 tlv += length + 2;
201 } 283 }
202 284
203 pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu); 285 pr_debug("sock %p rw %d miu %d\n", sock,
286 sock->remote_rw, sock->remote_miu);
204 287
205 return 0; 288 return 0;
206} 289}
@@ -318,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
318 struct sk_buff *skb; 401 struct sk_buff *skb;
319 u8 *service_name_tlv = NULL, service_name_tlv_length; 402 u8 *service_name_tlv = NULL, service_name_tlv_length;
320 u8 *miux_tlv = NULL, miux_tlv_length; 403 u8 *miux_tlv = NULL, miux_tlv_length;
321 u8 *rw_tlv = NULL, rw_tlv_length; 404 u8 *rw_tlv = NULL, rw_tlv_length, rw;
322 int err; 405 int err;
323 u16 size = 0; 406 u16 size = 0, miux;
324 407
325 pr_debug("Sending CONNECT\n"); 408 pr_debug("Sending CONNECT\n");
326 409
@@ -336,11 +419,16 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
336 size += service_name_tlv_length; 419 size += service_name_tlv_length;
337 } 420 }
338 421
339 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 422 /* If the socket parameters are not set, use the local ones */
423 miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
424 local->miux : sock->miux;
425 rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
426
427 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
340 &miux_tlv_length); 428 &miux_tlv_length);
341 size += miux_tlv_length; 429 size += miux_tlv_length;
342 430
343 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); 431 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
344 size += rw_tlv_length; 432 size += rw_tlv_length;
345 433
346 pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); 434 pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
@@ -377,9 +465,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
377 struct nfc_llcp_local *local; 465 struct nfc_llcp_local *local;
378 struct sk_buff *skb; 466 struct sk_buff *skb;
379 u8 *miux_tlv = NULL, miux_tlv_length; 467 u8 *miux_tlv = NULL, miux_tlv_length;
380 u8 *rw_tlv = NULL, rw_tlv_length; 468 u8 *rw_tlv = NULL, rw_tlv_length, rw;
381 int err; 469 int err;
382 u16 size = 0; 470 u16 size = 0, miux;
383 471
384 pr_debug("Sending CC\n"); 472 pr_debug("Sending CC\n");
385 473
@@ -387,11 +475,16 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
387 if (local == NULL) 475 if (local == NULL)
388 return -ENODEV; 476 return -ENODEV;
389 477
390 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, 478 /* If the socket parameters are not set, use the local ones */
479 miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ?
480 local->miux : sock->miux;
481 rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw;
482
483 miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,
391 &miux_tlv_length); 484 &miux_tlv_length);
392 size += miux_tlv_length; 485 size += miux_tlv_length;
393 486
394 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); 487 rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);
395 size += rw_tlv_length; 488 size += rw_tlv_length;
396 489
397 skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); 490 skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size);
@@ -416,48 +509,90 @@ error_tlv:
416 return err; 509 return err;
417} 510}
418 511
419int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap) 512static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local,
513 size_t tlv_length)
420{ 514{
421 struct sk_buff *skb; 515 struct sk_buff *skb;
422 struct nfc_dev *dev; 516 struct nfc_dev *dev;
423 u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2];
424 u16 size = 0; 517 u16 size = 0;
425 518
426 pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap);
427
428 if (local == NULL) 519 if (local == NULL)
429 return -ENODEV; 520 return ERR_PTR(-ENODEV);
430 521
431 dev = local->dev; 522 dev = local->dev;
432 if (dev == NULL) 523 if (dev == NULL)
433 return -ENODEV; 524 return ERR_PTR(-ENODEV);
434
435 sdres[0] = tid;
436 sdres[1] = sap;
437 sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0,
438 &sdres_tlv_length);
439 if (sdres_tlv == NULL)
440 return -ENOMEM;
441 525
442 size += LLCP_HEADER_SIZE; 526 size += LLCP_HEADER_SIZE;
443 size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; 527 size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
444 size += sdres_tlv_length; 528 size += tlv_length;
445 529
446 skb = alloc_skb(size, GFP_KERNEL); 530 skb = alloc_skb(size, GFP_KERNEL);
447 if (skb == NULL) { 531 if (skb == NULL)
448 kfree(sdres_tlv); 532 return ERR_PTR(-ENOMEM);
449 return -ENOMEM;
450 }
451 533
452 skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); 534 skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
453 535
454 skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL); 536 skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL);
455 537
456 memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length); 538 return skb;
539}
540
541int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
542 struct hlist_head *tlv_list, size_t tlvs_len)
543{
544 struct nfc_llcp_sdp_tlv *sdp;
545 struct hlist_node *n;
546 struct sk_buff *skb;
547
548 skb = nfc_llcp_allocate_snl(local, tlvs_len);
549 if (IS_ERR(skb))
550 return PTR_ERR(skb);
551
552 hlist_for_each_entry_safe(sdp, n, tlv_list, node) {
553 memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len);
554
555 hlist_del(&sdp->node);
556
557 nfc_llcp_free_sdp_tlv(sdp);
558 }
457 559
458 skb_queue_tail(&local->tx_queue, skb); 560 skb_queue_tail(&local->tx_queue, skb);
459 561
460 kfree(sdres_tlv); 562 return 0;
563}
564
565int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
566 struct hlist_head *tlv_list, size_t tlvs_len)
567{
568 struct nfc_llcp_sdp_tlv *sdreq;
569 struct hlist_node *n;
570 struct sk_buff *skb;
571
572 skb = nfc_llcp_allocate_snl(local, tlvs_len);
573 if (IS_ERR(skb))
574 return PTR_ERR(skb);
575
576 mutex_lock(&local->sdreq_lock);
577
578 if (hlist_empty(&local->pending_sdreqs))
579 mod_timer(&local->sdreq_timer,
580 jiffies + msecs_to_jiffies(3 * local->remote_lto));
581
582 hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
583 pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
584
585 memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
586 sdreq->tlv_len);
587
588 hlist_del(&sdreq->node);
589
590 hlist_add_head(&sdreq->node, &local->pending_sdreqs);
591 }
592
593 mutex_unlock(&local->sdreq_lock);
594
595 skb_queue_tail(&local->tx_queue, skb);
461 596
462 return 0; 597 return 0;
463} 598}
@@ -523,6 +658,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
523 struct nfc_llcp_local *local; 658 struct nfc_llcp_local *local;
524 size_t frag_len = 0, remaining_len; 659 size_t frag_len = 0, remaining_len;
525 u8 *msg_data, *msg_ptr; 660 u8 *msg_data, *msg_ptr;
661 u16 remote_miu;
526 662
527 pr_debug("Send I frame len %zd\n", len); 663 pr_debug("Send I frame len %zd\n", len);
528 664
@@ -532,8 +668,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
532 668
533 /* Remote is ready but has not acknowledged our frames */ 669 /* Remote is ready but has not acknowledged our frames */
534 if((sock->remote_ready && 670 if((sock->remote_ready &&
535 skb_queue_len(&sock->tx_pending_queue) >= sock->rw && 671 skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw &&
536 skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { 672 skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
537 pr_err("Pending queue is full %d frames\n", 673 pr_err("Pending queue is full %d frames\n",
538 skb_queue_len(&sock->tx_pending_queue)); 674 skb_queue_len(&sock->tx_pending_queue));
539 return -ENOBUFS; 675 return -ENOBUFS;
@@ -541,7 +677,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
541 677
542 /* Remote is not ready and we've been queueing enough frames */ 678 /* Remote is not ready and we've been queueing enough frames */
543 if ((!sock->remote_ready && 679 if ((!sock->remote_ready &&
544 skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { 680 skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {
545 pr_err("Tx queue is full %d frames\n", 681 pr_err("Tx queue is full %d frames\n",
546 skb_queue_len(&sock->tx_queue)); 682 skb_queue_len(&sock->tx_queue));
547 return -ENOBUFS; 683 return -ENOBUFS;
@@ -559,9 +695,11 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
559 remaining_len = len; 695 remaining_len = len;
560 msg_ptr = msg_data; 696 msg_ptr = msg_data;
561 697
562 while (remaining_len > 0) { 698 do {
699 remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
700 local->remote_miu : sock->remote_miu;
563 701
564 frag_len = min_t(size_t, sock->miu, remaining_len); 702 frag_len = min_t(size_t, remote_miu, remaining_len);
565 703
566 pr_debug("Fragment %zd bytes remaining %zd", 704 pr_debug("Fragment %zd bytes remaining %zd",
567 frag_len, remaining_len); 705 frag_len, remaining_len);
@@ -573,7 +711,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
573 711
574 skb_put(pdu, LLCP_SEQUENCE_SIZE); 712 skb_put(pdu, LLCP_SEQUENCE_SIZE);
575 713
576 memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); 714 if (likely(frag_len > 0))
715 memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
577 716
578 skb_queue_tail(&sock->tx_queue, pdu); 717 skb_queue_tail(&sock->tx_queue, pdu);
579 718
@@ -585,7 +724,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
585 724
586 remaining_len -= frag_len; 725 remaining_len -= frag_len;
587 msg_ptr += frag_len; 726 msg_ptr += frag_len;
588 } 727 } while (remaining_len > 0);
589 728
590 kfree(msg_data); 729 kfree(msg_data);
591 730
@@ -599,6 +738,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
599 struct nfc_llcp_local *local; 738 struct nfc_llcp_local *local;
600 size_t frag_len = 0, remaining_len; 739 size_t frag_len = 0, remaining_len;
601 u8 *msg_ptr, *msg_data; 740 u8 *msg_ptr, *msg_data;
741 u16 remote_miu;
602 int err; 742 int err;
603 743
604 pr_debug("Send UI frame len %zd\n", len); 744 pr_debug("Send UI frame len %zd\n", len);
@@ -619,9 +759,11 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
619 remaining_len = len; 759 remaining_len = len;
620 msg_ptr = msg_data; 760 msg_ptr = msg_data;
621 761
622 while (remaining_len > 0) { 762 do {
763 remote_miu = sock->remote_miu > LLCP_MAX_MIU ?
764 local->remote_miu : sock->remote_miu;
623 765
624 frag_len = min_t(size_t, sock->miu, remaining_len); 766 frag_len = min_t(size_t, remote_miu, remaining_len);
625 767
626 pr_debug("Fragment %zd bytes remaining %zd", 768 pr_debug("Fragment %zd bytes remaining %zd",
627 frag_len, remaining_len); 769 frag_len, remaining_len);
@@ -635,14 +777,15 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
635 777
636 pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI); 778 pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
637 779
638 memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); 780 if (likely(frag_len > 0))
781 memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);
639 782
640 /* No need to check for the peer RW for UI frames */ 783 /* No need to check for the peer RW for UI frames */
641 skb_queue_tail(&local->tx_queue, pdu); 784 skb_queue_tail(&local->tx_queue, pdu);
642 785
643 remaining_len -= frag_len; 786 remaining_len -= frag_len;
644 msg_ptr += frag_len; 787 msg_ptr += frag_len;
645 } 788 } while (remaining_len > 0);
646 789
647 kfree(msg_data); 790 kfree(msg_data);
648 791
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp_core.c
index ee25f25f0cd6..158bdbf668cc 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp_core.c
@@ -24,13 +24,15 @@
24#include <linux/list.h> 24#include <linux/list.h>
25#include <linux/nfc.h> 25#include <linux/nfc.h>
26 26
27#include "../nfc.h" 27#include "nfc.h"
28#include "llcp.h" 28#include "llcp.h"
29 29
30static u8 llcp_magic[3] = {0x46, 0x66, 0x6d}; 30static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
31 31
32static struct list_head llcp_devices; 32static struct list_head llcp_devices;
33 33
34static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb);
35
34void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk) 36void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk)
35{ 37{
36 write_lock(&l->lock); 38 write_lock(&l->lock);
@@ -45,6 +47,12 @@ void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *sk)
45 write_unlock(&l->lock); 47 write_unlock(&l->lock);
46} 48}
47 49
50void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock)
51{
52 sock->remote_rw = LLCP_DEFAULT_RW;
53 sock->remote_miu = LLCP_MAX_MIU + 1;
54}
55
48static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock) 56static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
49{ 57{
50 struct nfc_llcp_local *local = sock->local; 58 struct nfc_llcp_local *local = sock->local;
@@ -68,7 +76,7 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)
68 } 76 }
69} 77}
70 78
71static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, 79static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
72 int err) 80 int err)
73{ 81{
74 struct sock *sk; 82 struct sock *sk;
@@ -108,21 +116,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
108 116
109 bh_unlock_sock(accept_sk); 117 bh_unlock_sock(accept_sk);
110 } 118 }
111
112 if (listen == true) {
113 bh_unlock_sock(sk);
114 continue;
115 }
116 }
117
118 /*
119 * If we have a connection less socket bound, we keep it alive
120 * if the device is still present.
121 */
122 if (sk->sk_state == LLCP_BOUND && sk->sk_type == SOCK_DGRAM &&
123 listen == true) {
124 bh_unlock_sock(sk);
125 continue;
126 } 119 }
127 120
128 if (err) 121 if (err)
@@ -137,11 +130,8 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,
137 130
138 write_unlock(&local->sockets.lock); 131 write_unlock(&local->sockets.lock);
139 132
140 /* 133 /* If we still have a device, we keep the RAW sockets alive */
141 * If we want to keep the listening sockets alive, 134 if (device == true)
142 * we don't touch the RAW ones.
143 */
144 if (listen == true)
145 return; 135 return;
146 136
147 write_lock(&local->raw_sockets.lock); 137 write_lock(&local->raw_sockets.lock);
@@ -173,15 +163,18 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
173 return local; 163 return local;
174} 164}
175 165
176static void local_cleanup(struct nfc_llcp_local *local, bool listen) 166static void local_cleanup(struct nfc_llcp_local *local)
177{ 167{
178 nfc_llcp_socket_release(local, listen, ENXIO); 168 nfc_llcp_socket_release(local, false, ENXIO);
179 del_timer_sync(&local->link_timer); 169 del_timer_sync(&local->link_timer);
180 skb_queue_purge(&local->tx_queue); 170 skb_queue_purge(&local->tx_queue);
181 cancel_work_sync(&local->tx_work); 171 cancel_work_sync(&local->tx_work);
182 cancel_work_sync(&local->rx_work); 172 cancel_work_sync(&local->rx_work);
183 cancel_work_sync(&local->timeout_work); 173 cancel_work_sync(&local->timeout_work);
184 kfree_skb(local->rx_pending); 174 kfree_skb(local->rx_pending);
175 del_timer_sync(&local->sdreq_timer);
176 cancel_work_sync(&local->sdreq_timeout_work);
177 nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
185} 178}
186 179
187static void local_release(struct kref *ref) 180static void local_release(struct kref *ref)
@@ -191,7 +184,7 @@ static void local_release(struct kref *ref)
191 local = container_of(ref, struct nfc_llcp_local, ref); 184 local = container_of(ref, struct nfc_llcp_local, ref);
192 185
193 list_del(&local->list); 186 list_del(&local->list);
194 local_cleanup(local, false); 187 local_cleanup(local);
195 kfree(local); 188 kfree(local);
196} 189}
197 190
@@ -259,6 +252,47 @@ static void nfc_llcp_symm_timer(unsigned long data)
259 schedule_work(&local->timeout_work); 252 schedule_work(&local->timeout_work);
260} 253}
261 254
255static void nfc_llcp_sdreq_timeout_work(struct work_struct *work)
256{
257 unsigned long time;
258 HLIST_HEAD(nl_sdres_list);
259 struct hlist_node *n;
260 struct nfc_llcp_sdp_tlv *sdp;
261 struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
262 sdreq_timeout_work);
263
264 mutex_lock(&local->sdreq_lock);
265
266 time = jiffies - msecs_to_jiffies(3 * local->remote_lto);
267
268 hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) {
269 if (time_after(sdp->time, time))
270 continue;
271
272 sdp->sap = LLCP_SDP_UNBOUND;
273
274 hlist_del(&sdp->node);
275
276 hlist_add_head(&sdp->node, &nl_sdres_list);
277 }
278
279 if (!hlist_empty(&local->pending_sdreqs))
280 mod_timer(&local->sdreq_timer,
281 jiffies + msecs_to_jiffies(3 * local->remote_lto));
282
283 mutex_unlock(&local->sdreq_lock);
284
285 if (!hlist_empty(&nl_sdres_list))
286 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
287}
288
289static void nfc_llcp_sdreq_timer(unsigned long data)
290{
291 struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
292
293 schedule_work(&local->sdreq_timeout_work);
294}
295
262struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) 296struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
263{ 297{
264 struct nfc_llcp_local *local, *n; 298 struct nfc_llcp_local *local, *n;
@@ -802,8 +836,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
802 ui_cb->dsap = dsap; 836 ui_cb->dsap = dsap;
803 ui_cb->ssap = ssap; 837 ui_cb->ssap = ssap;
804 838
805 printk("%s %d %d\n", __func__, dsap, ssap);
806
807 pr_debug("%d %d\n", dsap, ssap); 839 pr_debug("%d %d\n", dsap, ssap);
808 840
809 /* We're looking for a bound socket, not a client one */ 841 /* We're looking for a bound socket, not a client one */
@@ -900,7 +932,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
900 new_sock = nfc_llcp_sock(new_sk); 932 new_sock = nfc_llcp_sock(new_sk);
901 new_sock->dev = local->dev; 933 new_sock->dev = local->dev;
902 new_sock->local = nfc_llcp_local_get(local); 934 new_sock->local = nfc_llcp_local_get(local);
903 new_sock->miu = local->remote_miu; 935 new_sock->rw = sock->rw;
936 new_sock->miux = sock->miux;
937 new_sock->remote_miu = local->remote_miu;
904 new_sock->nfc_protocol = sock->nfc_protocol; 938 new_sock->nfc_protocol = sock->nfc_protocol;
905 new_sock->dsap = ssap; 939 new_sock->dsap = ssap;
906 new_sock->target_idx = local->target_idx; 940 new_sock->target_idx = local->target_idx;
@@ -954,11 +988,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)
954 988
955 pr_debug("Remote ready %d tx queue len %d remote rw %d", 989 pr_debug("Remote ready %d tx queue len %d remote rw %d",
956 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue), 990 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue),
957 sock->rw); 991 sock->remote_rw);
958 992
959 /* Try to queue some I frames for transmission */ 993 /* Try to queue some I frames for transmission */
960 while (sock->remote_ready && 994 while (sock->remote_ready &&
961 skb_queue_len(&sock->tx_pending_queue) < sock->rw) { 995 skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {
962 struct sk_buff *pdu; 996 struct sk_buff *pdu;
963 997
964 pdu = skb_dequeue(&sock->tx_queue); 998 pdu = skb_dequeue(&sock->tx_queue);
@@ -1072,6 +1106,12 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
1072 dsap = nfc_llcp_dsap(skb); 1106 dsap = nfc_llcp_dsap(skb);
1073 ssap = nfc_llcp_ssap(skb); 1107 ssap = nfc_llcp_ssap(skb);
1074 1108
1109 if ((dsap == 0) && (ssap == 0)) {
1110 pr_debug("Connection termination");
1111 nfc_dep_link_down(local->dev);
1112 return;
1113 }
1114
1075 llcp_sock = nfc_llcp_sock_get(local, dsap, ssap); 1115 llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
1076 if (llcp_sock == NULL) { 1116 if (llcp_sock == NULL) {
1077 nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN); 1117 nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
@@ -1178,6 +1218,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1178 u16 tlv_len, offset; 1218 u16 tlv_len, offset;
1179 char *service_name; 1219 char *service_name;
1180 size_t service_name_len; 1220 size_t service_name_len;
1221 struct nfc_llcp_sdp_tlv *sdp;
1222 HLIST_HEAD(llc_sdres_list);
1223 size_t sdres_tlvs_len;
1224 HLIST_HEAD(nl_sdres_list);
1181 1225
1182 dsap = nfc_llcp_dsap(skb); 1226 dsap = nfc_llcp_dsap(skb);
1183 ssap = nfc_llcp_ssap(skb); 1227 ssap = nfc_llcp_ssap(skb);
@@ -1192,6 +1236,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1192 tlv = &skb->data[LLCP_HEADER_SIZE]; 1236 tlv = &skb->data[LLCP_HEADER_SIZE];
1193 tlv_len = skb->len - LLCP_HEADER_SIZE; 1237 tlv_len = skb->len - LLCP_HEADER_SIZE;
1194 offset = 0; 1238 offset = 0;
1239 sdres_tlvs_len = 0;
1195 1240
1196 while (offset < tlv_len) { 1241 while (offset < tlv_len) {
1197 type = tlv[0]; 1242 type = tlv[0];
@@ -1209,14 +1254,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1209 !strncmp(service_name, "urn:nfc:sn:sdp", 1254 !strncmp(service_name, "urn:nfc:sn:sdp",
1210 service_name_len)) { 1255 service_name_len)) {
1211 sap = 1; 1256 sap = 1;
1212 goto send_snl; 1257 goto add_snl;
1213 } 1258 }
1214 1259
1215 llcp_sock = nfc_llcp_sock_from_sn(local, service_name, 1260 llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
1216 service_name_len); 1261 service_name_len);
1217 if (!llcp_sock) { 1262 if (!llcp_sock) {
1218 sap = 0; 1263 sap = 0;
1219 goto send_snl; 1264 goto add_snl;
1220 } 1265 }
1221 1266
1222 /* 1267 /*
@@ -1233,7 +1278,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1233 1278
1234 if (sap == LLCP_SAP_MAX) { 1279 if (sap == LLCP_SAP_MAX) {
1235 sap = 0; 1280 sap = 0;
1236 goto send_snl; 1281 goto add_snl;
1237 } 1282 }
1238 1283
1239 client_count = 1284 client_count =
@@ -1250,8 +1295,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
1250 1295
1251 pr_debug("%p %d\n", llcp_sock, sap); 1296 pr_debug("%p %d\n", llcp_sock, sap);
1252 1297
1253send_snl: 1298add_snl:
1254 nfc_llcp_send_snl(local, tid, sap); 1299 sdp = nfc_llcp_build_sdres_tlv(tid, sap);
1300 if (sdp == NULL)
1301 goto exit;
1302
1303 sdres_tlvs_len += sdp->tlv_len;
1304 hlist_add_head(&sdp->node, &llc_sdres_list);
1305 break;
1306
1307 case LLCP_TLV_SDRES:
1308 mutex_lock(&local->sdreq_lock);
1309
1310 pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
1311
1312 hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
1313 if (sdp->tid != tlv[2])
1314 continue;
1315
1316 sdp->sap = tlv[3];
1317
1318 pr_debug("Found: uri=%s, sap=%d\n",
1319 sdp->uri, sdp->sap);
1320
1321 hlist_del(&sdp->node);
1322
1323 hlist_add_head(&sdp->node, &nl_sdres_list);
1324
1325 break;
1326 }
1327
1328 mutex_unlock(&local->sdreq_lock);
1255 break; 1329 break;
1256 1330
1257 default: 1331 default:
@@ -1262,21 +1336,63 @@ send_snl:
1262 offset += length + 2; 1336 offset += length + 2;
1263 tlv += length + 2; 1337 tlv += length + 2;
1264 } 1338 }
1339
1340exit:
1341 if (!hlist_empty(&nl_sdres_list))
1342 nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
1343
1344 if (!hlist_empty(&llc_sdres_list))
1345 nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
1265} 1346}
1266 1347
1267static void nfc_llcp_rx_work(struct work_struct *work) 1348static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)
1268{ 1349{
1269 struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, 1350 u8 ptype;
1270 rx_work); 1351 u16 pdu_len;
1271 u8 dsap, ssap, ptype; 1352 struct sk_buff *new_skb;
1272 struct sk_buff *skb;
1273 1353
1274 skb = local->rx_pending; 1354 if (skb->len <= LLCP_HEADER_SIZE) {
1275 if (skb == NULL) { 1355 pr_err("Malformed AGF PDU\n");
1276 pr_debug("No pending SKB\n");
1277 return; 1356 return;
1278 } 1357 }
1279 1358
1359 skb_pull(skb, LLCP_HEADER_SIZE);
1360
1361 while (skb->len > LLCP_AGF_PDU_HEADER_SIZE) {
1362 pdu_len = skb->data[0] << 8 | skb->data[1];
1363
1364 skb_pull(skb, LLCP_AGF_PDU_HEADER_SIZE);
1365
1366 if (pdu_len < LLCP_HEADER_SIZE || pdu_len > skb->len) {
1367 pr_err("Malformed AGF PDU\n");
1368 return;
1369 }
1370
1371 ptype = nfc_llcp_ptype(skb);
1372
1373 if (ptype == LLCP_PDU_SYMM || ptype == LLCP_PDU_AGF)
1374 goto next;
1375
1376 new_skb = nfc_alloc_recv_skb(pdu_len, GFP_KERNEL);
1377 if (new_skb == NULL) {
1378 pr_err("Could not allocate PDU\n");
1379 return;
1380 }
1381
1382 memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len);
1383
1384 nfc_llcp_rx_skb(local, new_skb);
1385
1386 kfree_skb(new_skb);
1387next:
1388 skb_pull(skb, pdu_len);
1389 }
1390}
1391
1392static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb)
1393{
1394 u8 dsap, ssap, ptype;
1395
1280 ptype = nfc_llcp_ptype(skb); 1396 ptype = nfc_llcp_ptype(skb);
1281 dsap = nfc_llcp_dsap(skb); 1397 dsap = nfc_llcp_dsap(skb);
1282 ssap = nfc_llcp_ssap(skb); 1398 ssap = nfc_llcp_ssap(skb);
@@ -1287,10 +1403,6 @@ static void nfc_llcp_rx_work(struct work_struct *work)
1287 print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET, 1403 print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,
1288 16, 1, skb->data, skb->len, true); 1404 16, 1, skb->data, skb->len, true);
1289 1405
1290 __net_timestamp(skb);
1291
1292 nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
1293
1294 switch (ptype) { 1406 switch (ptype) {
1295 case LLCP_PDU_SYMM: 1407 case LLCP_PDU_SYMM:
1296 pr_debug("SYMM\n"); 1408 pr_debug("SYMM\n");
@@ -1333,8 +1445,31 @@ static void nfc_llcp_rx_work(struct work_struct *work)
1333 nfc_llcp_recv_hdlc(local, skb); 1445 nfc_llcp_recv_hdlc(local, skb);
1334 break; 1446 break;
1335 1447
1448 case LLCP_PDU_AGF:
1449 pr_debug("AGF frame\n");
1450 nfc_llcp_recv_agf(local, skb);
1451 break;
1452 }
1453}
1454
1455static void nfc_llcp_rx_work(struct work_struct *work)
1456{
1457 struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
1458 rx_work);
1459 struct sk_buff *skb;
1460
1461 skb = local->rx_pending;
1462 if (skb == NULL) {
1463 pr_debug("No pending SKB\n");
1464 return;
1336 } 1465 }
1337 1466
1467 __net_timestamp(skb);
1468
1469 nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX);
1470
1471 nfc_llcp_rx_skb(local, skb);
1472
1338 schedule_work(&local->tx_work); 1473 schedule_work(&local->tx_work);
1339 kfree_skb(local->rx_pending); 1474 kfree_skb(local->rx_pending);
1340 local->rx_pending = NULL; 1475 local->rx_pending = NULL;
@@ -1381,6 +1516,9 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)
1381 if (local == NULL) 1516 if (local == NULL)
1382 return; 1517 return;
1383 1518
1519 local->remote_miu = LLCP_DEFAULT_MIU;
1520 local->remote_lto = LLCP_DEFAULT_LTO;
1521
1384 /* Close and purge all existing sockets */ 1522 /* Close and purge all existing sockets */
1385 nfc_llcp_socket_release(local, true, 0); 1523 nfc_llcp_socket_release(local, true, 0);
1386} 1524}
@@ -1447,6 +1585,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
1447 local->remote_miu = LLCP_DEFAULT_MIU; 1585 local->remote_miu = LLCP_DEFAULT_MIU;
1448 local->remote_lto = LLCP_DEFAULT_LTO; 1586 local->remote_lto = LLCP_DEFAULT_LTO;
1449 1587
1588 mutex_init(&local->sdreq_lock);
1589 INIT_HLIST_HEAD(&local->pending_sdreqs);
1590 init_timer(&local->sdreq_timer);
1591 local->sdreq_timer.data = (unsigned long) local;
1592 local->sdreq_timer.function = nfc_llcp_sdreq_timer;
1593 INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
1594
1450 list_add(&local->list, &llcp_devices); 1595 list_add(&local->list, &llcp_devices);
1451 1596
1452 return 0; 1597 return 0;
@@ -1461,7 +1606,7 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)
1461 return; 1606 return;
1462 } 1607 }
1463 1608
1464 local_cleanup(local, false); 1609 local_cleanup(local);
1465 1610
1466 nfc_llcp_local_put(local); 1611 nfc_llcp_local_put(local);
1467} 1612}
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp_sock.c
index 8f025746f337..380253eccb74 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp_sock.c
@@ -24,7 +24,7 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/nfc.h> 25#include <linux/nfc.h>
26 26
27#include "../nfc.h" 27#include "nfc.h"
28#include "llcp.h" 28#include "llcp.h"
29 29
30static int sock_wait_state(struct sock *sk, int state, unsigned long timeo) 30static int sock_wait_state(struct sock *sk, int state, unsigned long timeo)
@@ -223,6 +223,156 @@ error:
223 return ret; 223 return ret;
224} 224}
225 225
226static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
227 char __user *optval, unsigned int optlen)
228{
229 struct sock *sk = sock->sk;
230 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
231 u32 opt;
232 int err = 0;
233
234 pr_debug("%p optname %d\n", sk, optname);
235
236 if (level != SOL_NFC)
237 return -ENOPROTOOPT;
238
239 lock_sock(sk);
240
241 switch (optname) {
242 case NFC_LLCP_RW:
243 if (sk->sk_state == LLCP_CONNECTED ||
244 sk->sk_state == LLCP_BOUND ||
245 sk->sk_state == LLCP_LISTEN) {
246 err = -EINVAL;
247 break;
248 }
249
250 if (get_user(opt, (u32 __user *) optval)) {
251 err = -EFAULT;
252 break;
253 }
254
255 if (opt > LLCP_MAX_RW) {
256 err = -EINVAL;
257 break;
258 }
259
260 llcp_sock->rw = (u8) opt;
261
262 break;
263
264 case NFC_LLCP_MIUX:
265 if (sk->sk_state == LLCP_CONNECTED ||
266 sk->sk_state == LLCP_BOUND ||
267 sk->sk_state == LLCP_LISTEN) {
268 err = -EINVAL;
269 break;
270 }
271
272 if (get_user(opt, (u32 __user *) optval)) {
273 err = -EFAULT;
274 break;
275 }
276
277 if (opt > LLCP_MAX_MIUX) {
278 err = -EINVAL;
279 break;
280 }
281
282 llcp_sock->miux = cpu_to_be16((u16) opt);
283
284 break;
285
286 default:
287 err = -ENOPROTOOPT;
288 break;
289 }
290
291 release_sock(sk);
292
293 pr_debug("%p rw %d miux %d\n", llcp_sock,
294 llcp_sock->rw, llcp_sock->miux);
295
296 return err;
297}
298
299static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
300 char __user *optval, int __user *optlen)
301{
302 struct nfc_llcp_local *local;
303 struct sock *sk = sock->sk;
304 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
305 int len, err = 0;
306 u16 miux, remote_miu;
307 u8 rw;
308
309 pr_debug("%p optname %d\n", sk, optname);
310
311 if (level != SOL_NFC)
312 return -ENOPROTOOPT;
313
314 if (get_user(len, optlen))
315 return -EFAULT;
316
317 local = llcp_sock->local;
318 if (!local)
319 return -ENODEV;
320
321 len = min_t(u32, len, sizeof(u32));
322
323 lock_sock(sk);
324
325 switch (optname) {
326 case NFC_LLCP_RW:
327 rw = llcp_sock->rw > LLCP_MAX_RW ? local->rw : llcp_sock->rw;
328 if (put_user(rw, (u32 __user *) optval))
329 err = -EFAULT;
330
331 break;
332
333 case NFC_LLCP_MIUX:
334 miux = be16_to_cpu(llcp_sock->miux) > LLCP_MAX_MIUX ?
335 be16_to_cpu(local->miux) : be16_to_cpu(llcp_sock->miux);
336
337 if (put_user(miux, (u32 __user *) optval))
338 err = -EFAULT;
339
340 break;
341
342 case NFC_LLCP_REMOTE_MIU:
343 remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ?
344 local->remote_miu : llcp_sock->remote_miu;
345
346 if (put_user(remote_miu, (u32 __user *) optval))
347 err = -EFAULT;
348
349 break;
350
351 case NFC_LLCP_REMOTE_LTO:
352 if (put_user(local->remote_lto / 10, (u32 __user *) optval))
353 err = -EFAULT;
354
355 break;
356
357 case NFC_LLCP_REMOTE_RW:
358 if (put_user(llcp_sock->remote_rw, (u32 __user *) optval))
359 err = -EFAULT;
360
361 break;
362
363 default:
364 err = -ENOPROTOOPT;
365 break;
366 }
367
368 release_sock(sk);
369
370 if (put_user(len, optlen))
371 return -EFAULT;
372
373 return err;
374}
375
226void nfc_llcp_accept_unlink(struct sock *sk) 376void nfc_llcp_accept_unlink(struct sock *sk)
227{ 377{
228 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); 378 struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -358,12 +508,13 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,
358 pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx, 508 pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx,
359 llcp_sock->dsap, llcp_sock->ssap); 509 llcp_sock->dsap, llcp_sock->ssap);
360 510
361 uaddr->sa_family = AF_NFC; 511 memset(llcp_addr, 0, sizeof(*llcp_addr));
362
363 *len = sizeof(struct sockaddr_nfc_llcp); 512 *len = sizeof(struct sockaddr_nfc_llcp);
364 513
514 llcp_addr->sa_family = AF_NFC;
365 llcp_addr->dev_idx = llcp_sock->dev->idx; 515 llcp_addr->dev_idx = llcp_sock->dev->idx;
366 llcp_addr->target_idx = llcp_sock->target_idx; 516 llcp_addr->target_idx = llcp_sock->target_idx;
517 llcp_addr->nfc_protocol = llcp_sock->nfc_protocol;
367 llcp_addr->dsap = llcp_sock->dsap; 518 llcp_addr->dsap = llcp_sock->dsap;
368 llcp_addr->ssap = llcp_sock->ssap; 519 llcp_addr->ssap = llcp_sock->ssap;
369 llcp_addr->service_name_len = llcp_sock->service_name_len; 520 llcp_addr->service_name_len = llcp_sock->service_name_len;
@@ -405,7 +556,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
405 return llcp_accept_poll(sk); 556 return llcp_accept_poll(sk);
406 557
407 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 558 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
408 mask |= POLLERR; 559 mask |= POLLERR |
560 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
409 561
410 if (!skb_queue_empty(&sk->sk_receive_queue)) 562 if (!skb_queue_empty(&sk->sk_receive_queue))
411 mask |= POLLIN | POLLRDNORM; 563 mask |= POLLIN | POLLRDNORM;
@@ -543,7 +695,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
543 695
544 llcp_sock->dev = dev; 696 llcp_sock->dev = dev;
545 llcp_sock->local = nfc_llcp_local_get(local); 697 llcp_sock->local = nfc_llcp_local_get(local);
546 llcp_sock->miu = llcp_sock->local->remote_miu; 698 llcp_sock->remote_miu = llcp_sock->local->remote_miu;
547 llcp_sock->ssap = nfc_llcp_get_local_ssap(local); 699 llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
548 if (llcp_sock->ssap == LLCP_SAP_MAX) { 700 if (llcp_sock->ssap == LLCP_SAP_MAX) {
549 ret = -ENOMEM; 701 ret = -ENOMEM;
@@ -646,6 +798,8 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
646 798
647 pr_debug("%p %zu\n", sk, len); 799 pr_debug("%p %zu\n", sk, len);
648 800
801 msg->msg_namelen = 0;
802
649 lock_sock(sk); 803 lock_sock(sk);
650 804
651 if (sk->sk_state == LLCP_CLOSED && 805 if (sk->sk_state == LLCP_CLOSED &&
@@ -691,6 +845,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
691 845
692 pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap); 846 pr_debug("Datagram socket %d %d\n", ui_cb->dsap, ui_cb->ssap);
693 847
848 memset(sockaddr, 0, sizeof(*sockaddr));
694 sockaddr->sa_family = AF_NFC; 849 sockaddr->sa_family = AF_NFC;
695 sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP; 850 sockaddr->nfc_protocol = NFC_PROTO_NFC_DEP;
696 sockaddr->dsap = ui_cb->dsap; 851 sockaddr->dsap = ui_cb->dsap;
@@ -737,8 +892,8 @@ static const struct proto_ops llcp_sock_ops = {
737 .ioctl = sock_no_ioctl, 892 .ioctl = sock_no_ioctl,
738 .listen = llcp_sock_listen, 893 .listen = llcp_sock_listen,
739 .shutdown = sock_no_shutdown, 894 .shutdown = sock_no_shutdown,
740 .setsockopt = sock_no_setsockopt, 895 .setsockopt = nfc_llcp_setsockopt,
741 .getsockopt = sock_no_getsockopt, 896 .getsockopt = nfc_llcp_getsockopt,
742 .sendmsg = llcp_sock_sendmsg, 897 .sendmsg = llcp_sock_sendmsg,
743 .recvmsg = llcp_sock_recvmsg, 898 .recvmsg = llcp_sock_recvmsg,
744 .mmap = sock_no_mmap, 899 .mmap = sock_no_mmap,
@@ -802,12 +957,13 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
802 957
803 llcp_sock->ssap = 0; 958 llcp_sock->ssap = 0;
804 llcp_sock->dsap = LLCP_SAP_SDP; 959 llcp_sock->dsap = LLCP_SAP_SDP;
805 llcp_sock->rw = LLCP_DEFAULT_RW; 960 llcp_sock->rw = LLCP_MAX_RW + 1;
806 llcp_sock->miu = LLCP_DEFAULT_MIU; 961 llcp_sock->miux = cpu_to_be16(LLCP_MAX_MIUX + 1);
807 llcp_sock->send_n = llcp_sock->send_ack_n = 0; 962 llcp_sock->send_n = llcp_sock->send_ack_n = 0;
808 llcp_sock->recv_n = llcp_sock->recv_ack_n = 0; 963 llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
809 llcp_sock->remote_ready = 1; 964 llcp_sock->remote_ready = 1;
810 llcp_sock->reserved_ssap = LLCP_SAP_MAX; 965 llcp_sock->reserved_ssap = LLCP_SAP_MAX;
966 nfc_llcp_socket_remote_param_init(llcp_sock);
811 skb_queue_head_init(&llcp_sock->tx_queue); 967 skb_queue_head_init(&llcp_sock->tx_queue);
812 skb_queue_head_init(&llcp_sock->tx_pending_queue); 968 skb_queue_head_init(&llcp_sock->tx_pending_queue);
813 INIT_LIST_HEAD(&llcp_sock->accept_queue); 969 INIT_LIST_HEAD(&llcp_sock->accept_queue);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 504b883439f1..f0c4d61f37c0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -28,8 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29 29
30#include "nfc.h" 30#include "nfc.h"
31 31#include "llcp.h"
32#include "llcp/llcp.h"
33 32
34static struct genl_multicast_group nfc_genl_event_mcgrp = { 33static struct genl_multicast_group nfc_genl_event_mcgrp = {
35 .name = NFC_GENL_MCAST_EVENT_NAME, 34 .name = NFC_GENL_MCAST_EVENT_NAME,
@@ -53,6 +52,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
53 [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, 52 [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },
54 [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, 53 [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },
55 [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, 54 [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 },
55 [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
56 [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
57 [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
58 [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
59};
60
61static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
62 [NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
63 [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
56}; 64};
57 65
58static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, 66static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -348,6 +356,74 @@ free_msg:
348 return -EMSGSIZE; 356 return -EMSGSIZE;
349} 357}
350 358
359int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
360{
361 struct sk_buff *msg;
362 struct nlattr *sdp_attr, *uri_attr;
363 struct nfc_llcp_sdp_tlv *sdres;
364 struct hlist_node *n;
365 void *hdr;
366 int rc = -EMSGSIZE;
367 int i;
368
369 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
370 if (!msg)
371 return -ENOMEM;
372
373 hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
374 NFC_EVENT_LLC_SDRES);
375 if (!hdr)
376 goto free_msg;
377
378 if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
379 goto nla_put_failure;
380
381 sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
382 if (sdp_attr == NULL) {
383 rc = -ENOMEM;
384 goto nla_put_failure;
385 }
386
387 i = 1;
388 hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
389 pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
390
391 uri_attr = nla_nest_start(msg, i++);
392 if (uri_attr == NULL) {
393 rc = -ENOMEM;
394 goto nla_put_failure;
395 }
396
397 if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
398 goto nla_put_failure;
399
400 if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
401 goto nla_put_failure;
402
403 nla_nest_end(msg, uri_attr);
404
405 hlist_del(&sdres->node);
406
407 nfc_llcp_free_sdp_tlv(sdres);
408 }
409
410 nla_nest_end(msg, sdp_attr);
411
412 genlmsg_end(msg, hdr);
413
414 return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
415
416nla_put_failure:
417 genlmsg_cancel(msg, hdr);
418
419free_msg:
420 nlmsg_free(msg);
421
422 nfc_llcp_free_sdp_tlv_list(sdres_list);
423
424 return rc;
425}
426
351static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, 427static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
352 u32 portid, u32 seq, 428 u32 portid, u32 seq,
353 struct netlink_callback *cb, 429 struct netlink_callback *cb,
@@ -859,6 +935,96 @@ exit:
859 return rc; 935 return rc;
860} 936}
861 937
938static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
939{
940 struct nfc_dev *dev;
941 struct nfc_llcp_local *local;
942 struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
943 u32 idx;
944 u8 tid;
945 char *uri;
946 int rc = 0, rem;
947 size_t uri_len, tlvs_len;
948 struct hlist_head sdreq_list;
949 struct nfc_llcp_sdp_tlv *sdreq;
950
951 if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
952 !info->attrs[NFC_ATTR_LLC_SDP])
953 return -EINVAL;
954
955 idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
956
957 dev = nfc_get_device(idx);
958 if (!dev) {
959 rc = -ENODEV;
960 goto exit;
961 }
962
963 device_lock(&dev->dev);
964
965 if (dev->dep_link_up == false) {
966 rc = -ENOLINK;
967 goto exit;
968 }
969
970 local = nfc_llcp_find_local(dev);
971 if (!local) {
972 nfc_put_device(dev);
973 rc = -ENODEV;
974 goto exit;
975 }
976
977 INIT_HLIST_HEAD(&sdreq_list);
978
979 tlvs_len = 0;
980
981 nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
982 rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
983 nfc_sdp_genl_policy);
984
985 if (rc != 0) {
986 rc = -EINVAL;
987 goto exit;
988 }
989
990 if (!sdp_attrs[NFC_SDP_ATTR_URI])
991 continue;
992
993 uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
994 if (uri_len == 0)
995 continue;
996
997 uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
998 if (uri == NULL || *uri == 0)
999 continue;
1000
1001 tid = local->sdreq_next_tid++;
1002
1003 sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
1004 if (sdreq == NULL) {
1005 rc = -ENOMEM;
1006 goto exit;
1007 }
1008
1009 tlvs_len += sdreq->tlv_len;
1010
1011 hlist_add_head(&sdreq->node, &sdreq_list);
1012 }
1013
1014 if (hlist_empty(&sdreq_list)) {
1015 rc = -EINVAL;
1016 goto exit;
1017 }
1018
1019 rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
1020exit:
1021 device_unlock(&dev->dev);
1022
1023 nfc_put_device(dev);
1024
1025 return rc;
1026}
1027
862static struct genl_ops nfc_genl_ops[] = { 1028static struct genl_ops nfc_genl_ops[] = {
863 { 1029 {
864 .cmd = NFC_CMD_GET_DEVICE, 1030 .cmd = NFC_CMD_GET_DEVICE,
@@ -913,6 +1079,11 @@ static struct genl_ops nfc_genl_ops[] = {
913 .doit = nfc_genl_llc_set_params, 1079 .doit = nfc_genl_llc_set_params,
914 .policy = nfc_genl_policy, 1080 .policy = nfc_genl_policy,
915 }, 1081 },
1082 {
1083 .cmd = NFC_CMD_LLC_SDREQ,
1084 .doit = nfc_genl_llc_sdreq,
1085 .policy = nfc_genl_policy,
1086 },
916}; 1087};
917 1088
918 1089
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 87d914d2876a..afa1f84ba040 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,7 +46,7 @@ struct nfc_rawsock {
46#define to_rawsock_sk(_tx_work) \ 46#define to_rawsock_sk(_tx_work) \
47 ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) 47 ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
48 48
49#ifdef CONFIG_NFC_LLCP 49struct nfc_llcp_sdp_tlv;
50 50
51void nfc_llcp_mac_is_down(struct nfc_dev *dev); 51void nfc_llcp_mac_is_down(struct nfc_dev *dev);
52void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, 52void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
@@ -59,60 +59,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
59struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); 59struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
60int __init nfc_llcp_init(void); 60int __init nfc_llcp_init(void);
61void nfc_llcp_exit(void); 61void nfc_llcp_exit(void);
62 62void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
63#else 63void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);
64
65static inline void nfc_llcp_mac_is_down(struct nfc_dev *dev)
66{
67}
68
69static inline void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
70 u8 comm_mode, u8 rf_mode)
71{
72}
73
74static inline int nfc_llcp_register_device(struct nfc_dev *dev)
75{
76 return 0;
77}
78
79static inline void nfc_llcp_unregister_device(struct nfc_dev *dev)
80{
81}
82
83static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev,
84 u8 *gb, u8 gb_len)
85{
86 return 0;
87}
88
89static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len)
90{
91 *gb_len = 0;
92 return NULL;
93}
94
95static inline int nfc_llcp_data_received(struct nfc_dev *dev,
96 struct sk_buff *skb)
97{
98 return 0;
99}
100
101static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
102{
103 return NULL;
104}
105
106static inline int nfc_llcp_init(void)
107{
108 return 0;
109}
110
111static inline void nfc_llcp_exit(void)
112{
113}
114
115#endif
116 64
117int __init rawsock_init(void); 65int __init rawsock_init(void);
118void rawsock_exit(void); 66void rawsock_exit(void);
@@ -144,6 +92,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
144int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); 92int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
145int nfc_genl_tm_deactivated(struct nfc_dev *dev); 93int nfc_genl_tm_deactivated(struct nfc_dev *dev);
146 94
95int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
96
147struct nfc_dev *nfc_get_device(unsigned int idx); 97struct nfc_dev *nfc_get_device(unsigned int idx);
148 98
149static inline void nfc_put_device(struct nfc_dev *dev) 99static inline void nfc_put_device(struct nfc_dev *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index d4d5363c7ba7..894b6cbdd929 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)
98 if (unlikely(err)) 98 if (unlikely(err))
99 return err; 99 return err;
100 100
101 __vlan_hwaccel_put_tag(skb, ntohs(tci)); 101 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
102 return 0; 102 return 0;
103} 103}
104 104
@@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
110 /* push down current VLAN tag */ 110 /* push down current VLAN tag */
111 current_tag = vlan_tx_tag_get(skb); 111 current_tag = vlan_tx_tag_get(skb);
112 112
113 if (!__vlan_put_tag(skb, current_tag)) 113 if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
114 return -ENOMEM; 114 return -ENOMEM;
115 115
116 if (skb->ip_summed == CHECKSUM_COMPLETE) 116 if (skb->ip_summed == CHECKSUM_COMPLETE)
@@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
118 + (2 * ETH_ALEN), VLAN_HLEN, 0)); 118 + (2 * ETH_ALEN), VLAN_HLEN, 0));
119 119
120 } 120 }
121 __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); 121 __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
122 return 0; 122 return 0;
123} 123}
124 124
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a4b724708a1a..d12d6b8b5e8b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -44,6 +44,7 @@
44#include <linux/netfilter_ipv4.h> 44#include <linux/netfilter_ipv4.h>
45#include <linux/inetdevice.h> 45#include <linux/inetdevice.h>
46#include <linux/list.h> 46#include <linux/list.h>
47#include <linux/lockdep.h>
47#include <linux/openvswitch.h> 48#include <linux/openvswitch.h>
48#include <linux/rculist.h> 49#include <linux/rculist.h>
49#include <linux/dmi.h> 50#include <linux/dmi.h>
@@ -55,39 +56,61 @@
55#include "datapath.h" 56#include "datapath.h"
56#include "flow.h" 57#include "flow.h"
57#include "vport-internal_dev.h" 58#include "vport-internal_dev.h"
59#include "vport-netdev.h"
58 60
59/**
60 * struct ovs_net - Per net-namespace data for ovs.
61 * @dps: List of datapaths to enable dumping them all out.
62 * Protected by genl_mutex.
63 */
64struct ovs_net {
65 struct list_head dps;
66};
67
68static int ovs_net_id __read_mostly;
69 61
70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ) 62#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
71static void rehash_flow_table(struct work_struct *work); 63static void rehash_flow_table(struct work_struct *work);
72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); 64static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
73 65
66int ovs_net_id __read_mostly;
67
68static void ovs_notify(struct sk_buff *skb, struct genl_info *info,
69 struct genl_multicast_group *grp)
70{
71 genl_notify(skb, genl_info_net(info), info->snd_portid,
72 grp->id, info->nlhdr, GFP_KERNEL);
73}
74
74/** 75/**
75 * DOC: Locking: 76 * DOC: Locking:
76 * 77 *
77 * Writes to device state (add/remove datapath, port, set operations on vports, 78 * All writes e.g. Writes to device state (add/remove datapath, port, set
78 * etc.) are protected by RTNL. 79 * operations on vports, etc.), Writes to other state (flow table
79 * 80 * modifications, set miscellaneous datapath parameters, etc.) are protected
80 * Writes to other state (flow table modifications, set miscellaneous datapath 81 * by ovs_lock.
81 * parameters, etc.) are protected by genl_mutex. The RTNL lock nests inside
82 * genl_mutex.
83 * 82 *
84 * Reads are protected by RCU. 83 * Reads are protected by RCU.
85 * 84 *
86 * There are a few special cases (mostly stats) that have their own 85 * There are a few special cases (mostly stats) that have their own
87 * synchronization but they nest under all of above and don't interact with 86 * synchronization but they nest under all of above and don't interact with
88 * each other. 87 * each other.
88 *
89 * The RTNL lock nests inside ovs_mutex.
89 */ 90 */
90 91
92static DEFINE_MUTEX(ovs_mutex);
93
94void ovs_lock(void)
95{
96 mutex_lock(&ovs_mutex);
97}
98
99void ovs_unlock(void)
100{
101 mutex_unlock(&ovs_mutex);
102}
103
104#ifdef CONFIG_LOCKDEP
105int lockdep_ovsl_is_held(void)
106{
107 if (debug_locks)
108 return lockdep_is_held(&ovs_mutex);
109 else
110 return 1;
111}
112#endif
113
91static struct vport *new_vport(const struct vport_parms *); 114static struct vport *new_vport(const struct vport_parms *);
92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *, 115static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
93 const struct dp_upcall_info *); 116 const struct dp_upcall_info *);
@@ -95,7 +118,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,
95 struct sk_buff *, 118 struct sk_buff *,
96 const struct dp_upcall_info *); 119 const struct dp_upcall_info *);
97 120
98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ 121/* Must be called with rcu_read_lock or ovs_mutex. */
99static struct datapath *get_dp(struct net *net, int dp_ifindex) 122static struct datapath *get_dp(struct net *net, int dp_ifindex)
100{ 123{
101 struct datapath *dp = NULL; 124 struct datapath *dp = NULL;
@@ -113,10 +136,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)
113 return dp; 136 return dp;
114} 137}
115 138
116/* Must be called with rcu_read_lock or RTNL lock. */ 139/* Must be called with rcu_read_lock or ovs_mutex. */
117const char *ovs_dp_name(const struct datapath *dp) 140const char *ovs_dp_name(const struct datapath *dp)
118{ 141{
119 struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); 142 struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
120 return vport->ops->get_name(vport); 143 return vport->ops->get_name(vport);
121} 144}
122 145
@@ -129,7 +152,7 @@ static int get_dpifindex(struct datapath *dp)
129 152
130 local = ovs_vport_rcu(dp, OVSP_LOCAL); 153 local = ovs_vport_rcu(dp, OVSP_LOCAL);
131 if (local) 154 if (local)
132 ifindex = local->ops->get_ifindex(local); 155 ifindex = netdev_vport_priv(local)->dev->ifindex;
133 else 156 else
134 ifindex = 0; 157 ifindex = 0;
135 158
@@ -168,7 +191,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
168 return NULL; 191 return NULL;
169} 192}
170 193
171/* Called with RTNL lock and genl_lock. */ 194/* Called with ovs_mutex. */
172static struct vport *new_vport(const struct vport_parms *parms) 195static struct vport *new_vport(const struct vport_parms *parms)
173{ 196{
174 struct vport *vport; 197 struct vport *vport;
@@ -180,14 +203,12 @@ static struct vport *new_vport(const struct vport_parms *parms)
180 203
181 hlist_add_head_rcu(&vport->dp_hash_node, head); 204 hlist_add_head_rcu(&vport->dp_hash_node, head);
182 } 205 }
183
184 return vport; 206 return vport;
185} 207}
186 208
187/* Called with RTNL lock. */
188void ovs_dp_detach_port(struct vport *p) 209void ovs_dp_detach_port(struct vport *p)
189{ 210{
190 ASSERT_RTNL(); 211 ASSERT_OVSL();
191 212
192 /* First drop references to device. */ 213 /* First drop references to device. */
193 hlist_del_rcu(&p->dp_hash_node); 214 hlist_del_rcu(&p->dp_hash_node);
@@ -250,7 +271,8 @@ static struct genl_family dp_packet_genl_family = {
250 .name = OVS_PACKET_FAMILY, 271 .name = OVS_PACKET_FAMILY,
251 .version = OVS_PACKET_VERSION, 272 .version = OVS_PACKET_VERSION,
252 .maxattr = OVS_PACKET_ATTR_MAX, 273 .maxattr = OVS_PACKET_ATTR_MAX,
253 .netnsok = true 274 .netnsok = true,
275 .parallel_ops = true,
254}; 276};
255 277
256int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, 278int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
@@ -337,6 +359,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,
337 return err; 359 return err;
338} 360}
339 361
362static size_t key_attr_size(void)
363{
364 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
365 + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
366 + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
367 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
368 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
369 + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
370 + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
371 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
372 + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
373 + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
374 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
375}
376
377static size_t upcall_msg_size(const struct sk_buff *skb,
378 const struct nlattr *userdata)
379{
380 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
381 + nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */
382 + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
383
384 /* OVS_PACKET_ATTR_USERDATA */
385 if (userdata)
386 size += NLA_ALIGN(userdata->nla_len);
387
388 return size;
389}
390
340static int queue_userspace_packet(struct net *net, int dp_ifindex, 391static int queue_userspace_packet(struct net *net, int dp_ifindex,
341 struct sk_buff *skb, 392 struct sk_buff *skb,
342 const struct dp_upcall_info *upcall_info) 393 const struct dp_upcall_info *upcall_info)
@@ -345,7 +396,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
345 struct sk_buff *nskb = NULL; 396 struct sk_buff *nskb = NULL;
346 struct sk_buff *user_skb; /* to be queued to userspace */ 397 struct sk_buff *user_skb; /* to be queued to userspace */
347 struct nlattr *nla; 398 struct nlattr *nla;
348 unsigned int len;
349 int err; 399 int err;
350 400
351 if (vlan_tx_tag_present(skb)) { 401 if (vlan_tx_tag_present(skb)) {
@@ -353,7 +403,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
353 if (!nskb) 403 if (!nskb)
354 return -ENOMEM; 404 return -ENOMEM;
355 405
356 nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); 406 nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
357 if (!nskb) 407 if (!nskb)
358 return -ENOMEM; 408 return -ENOMEM;
359 409
@@ -366,13 +416,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
366 goto out; 416 goto out;
367 } 417 }
368 418
369 len = sizeof(struct ovs_header); 419 user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);
370 len += nla_total_size(skb->len);
371 len += nla_total_size(FLOW_BUFSIZE);
372 if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
373 len += nla_total_size(8);
374
375 user_skb = genlmsg_new(len, GFP_ATOMIC);
376 if (!user_skb) { 420 if (!user_skb) {
377 err = -ENOMEM; 421 err = -ENOMEM;
378 goto out; 422 goto out;
@@ -387,8 +431,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
387 nla_nest_end(user_skb, nla); 431 nla_nest_end(user_skb, nla);
388 432
389 if (upcall_info->userdata) 433 if (upcall_info->userdata)
390 nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, 434 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
391 nla_get_u64(upcall_info->userdata)); 435 nla_len(upcall_info->userdata),
436 nla_data(upcall_info->userdata));
392 437
393 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); 438 nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
394 439
@@ -402,13 +447,13 @@ out:
402 return err; 447 return err;
403} 448}
404 449
405/* Called with genl_mutex. */ 450/* Called with ovs_mutex. */
406static int flush_flows(struct datapath *dp) 451static int flush_flows(struct datapath *dp)
407{ 452{
408 struct flow_table *old_table; 453 struct flow_table *old_table;
409 struct flow_table *new_table; 454 struct flow_table *new_table;
410 455
411 old_table = genl_dereference(dp->table); 456 old_table = ovsl_dereference(dp->table);
412 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS); 457 new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
413 if (!new_table) 458 if (!new_table)
414 return -ENOMEM; 459 return -ENOMEM;
@@ -544,7 +589,7 @@ static int validate_userspace(const struct nlattr *attr)
544{ 589{
545 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { 590 static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
546 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, 591 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
547 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, 592 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
548 }; 593 };
549 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; 594 struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
550 int error; 595 int error;
@@ -661,8 +706,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
661 706
662 err = -EINVAL; 707 err = -EINVAL;
663 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || 708 if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
664 !a[OVS_PACKET_ATTR_ACTIONS] || 709 !a[OVS_PACKET_ATTR_ACTIONS])
665 nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
666 goto err; 710 goto err;
667 711
668 len = nla_len(a[OVS_PACKET_ATTR_PACKET]); 712 len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
@@ -672,7 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
672 goto err; 716 goto err;
673 skb_reserve(packet, NET_IP_ALIGN); 717 skb_reserve(packet, NET_IP_ALIGN);
674 718
675 memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); 719 nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
676 720
677 skb_reset_mac_header(packet); 721 skb_reset_mac_header(packet);
678 eth = eth_hdr(packet); 722 eth = eth_hdr(packet);
@@ -680,7 +724,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
680 /* Normally, setting the skb 'protocol' field would be handled by a 724 /* Normally, setting the skb 'protocol' field would be handled by a
681 * call to eth_type_trans(), but it assumes there's a sending 725 * call to eth_type_trans(), but it assumes there's a sending
682 * device, which we may not have. */ 726 * device, which we may not have. */
683 if (ntohs(eth->h_proto) >= 1536) 727 if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
684 packet->protocol = eth->h_proto; 728 packet->protocol = eth->h_proto;
685 else 729 else
686 packet->protocol = htons(ETH_P_802_2); 730 packet->protocol = htons(ETH_P_802_2);
@@ -743,7 +787,7 @@ err:
743} 787}
744 788
745static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { 789static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
746 [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, 790 [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
747 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, 791 [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
748 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, 792 [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
749}; 793};
@@ -759,7 +803,7 @@ static struct genl_ops dp_packet_genl_ops[] = {
759static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats) 803static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
760{ 804{
761 int i; 805 int i;
762 struct flow_table *table = genl_dereference(dp->table); 806 struct flow_table *table = ovsl_dereference(dp->table);
763 807
764 stats->n_flows = ovs_flow_tbl_count(table); 808 stats->n_flows = ovs_flow_tbl_count(table);
765 809
@@ -794,14 +838,25 @@ static struct genl_family dp_flow_genl_family = {
794 .name = OVS_FLOW_FAMILY, 838 .name = OVS_FLOW_FAMILY,
795 .version = OVS_FLOW_VERSION, 839 .version = OVS_FLOW_VERSION,
796 .maxattr = OVS_FLOW_ATTR_MAX, 840 .maxattr = OVS_FLOW_ATTR_MAX,
797 .netnsok = true 841 .netnsok = true,
842 .parallel_ops = true,
798}; 843};
799 844
800static struct genl_multicast_group ovs_dp_flow_multicast_group = { 845static struct genl_multicast_group ovs_dp_flow_multicast_group = {
801 .name = OVS_FLOW_MCGROUP 846 .name = OVS_FLOW_MCGROUP
802}; 847};
803 848
804/* Called with genl_lock. */ 849static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
850{
851 return NLMSG_ALIGN(sizeof(struct ovs_header))
852 + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
853 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
854 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
855 + nla_total_size(8) /* OVS_FLOW_ATTR_USED */
856 + nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
857}
858
859/* Called with ovs_mutex. */
805static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, 860static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
806 struct sk_buff *skb, u32 portid, 861 struct sk_buff *skb, u32 portid,
807 u32 seq, u32 flags, u8 cmd) 862 u32 seq, u32 flags, u8 cmd)
@@ -815,8 +870,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
815 u8 tcp_flags; 870 u8 tcp_flags;
816 int err; 871 int err;
817 872
818 sf_acts = rcu_dereference_protected(flow->sf_acts, 873 sf_acts = ovsl_dereference(flow->sf_acts);
819 lockdep_genl_is_held());
820 874
821 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); 875 ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
822 if (!ovs_header) 876 if (!ovs_header)
@@ -879,25 +933,10 @@ error:
879static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) 933static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
880{ 934{
881 const struct sw_flow_actions *sf_acts; 935 const struct sw_flow_actions *sf_acts;
882 int len;
883
884 sf_acts = rcu_dereference_protected(flow->sf_acts,
885 lockdep_genl_is_held());
886
887 /* OVS_FLOW_ATTR_KEY */
888 len = nla_total_size(FLOW_BUFSIZE);
889 /* OVS_FLOW_ATTR_ACTIONS */
890 len += nla_total_size(sf_acts->actions_len);
891 /* OVS_FLOW_ATTR_STATS */
892 len += nla_total_size(sizeof(struct ovs_flow_stats));
893 /* OVS_FLOW_ATTR_TCP_FLAGS */
894 len += nla_total_size(1);
895 /* OVS_FLOW_ATTR_USED */
896 len += nla_total_size(8);
897 936
898 len += NLMSG_ALIGN(sizeof(struct ovs_header)); 937 sf_acts = ovsl_dereference(flow->sf_acts);
899 938
900 return genlmsg_new(len, GFP_KERNEL); 939 return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);
901} 940}
902 941
903static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, 942static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
@@ -946,12 +985,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
946 goto error; 985 goto error;
947 } 986 }
948 987
988 ovs_lock();
949 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 989 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
950 error = -ENODEV; 990 error = -ENODEV;
951 if (!dp) 991 if (!dp)
952 goto error; 992 goto err_unlock_ovs;
953 993
954 table = genl_dereference(dp->table); 994 table = ovsl_dereference(dp->table);
955 flow = ovs_flow_tbl_lookup(table, &key, key_len); 995 flow = ovs_flow_tbl_lookup(table, &key, key_len);
956 if (!flow) { 996 if (!flow) {
957 struct sw_flow_actions *acts; 997 struct sw_flow_actions *acts;
@@ -959,7 +999,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
959 /* Bail out if we're not allowed to create a new flow. */ 999 /* Bail out if we're not allowed to create a new flow. */
960 error = -ENOENT; 1000 error = -ENOENT;
961 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) 1001 if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
962 goto error; 1002 goto err_unlock_ovs;
963 1003
964 /* Expand table, if necessary, to make room. */ 1004 /* Expand table, if necessary, to make room. */
965 if (ovs_flow_tbl_need_to_expand(table)) { 1005 if (ovs_flow_tbl_need_to_expand(table)) {
@@ -969,7 +1009,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
969 if (!IS_ERR(new_table)) { 1009 if (!IS_ERR(new_table)) {
970 rcu_assign_pointer(dp->table, new_table); 1010 rcu_assign_pointer(dp->table, new_table);
971 ovs_flow_tbl_deferred_destroy(table); 1011 ovs_flow_tbl_deferred_destroy(table);
972 table = genl_dereference(dp->table); 1012 table = ovsl_dereference(dp->table);
973 } 1013 }
974 } 1014 }
975 1015
@@ -977,7 +1017,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
977 flow = ovs_flow_alloc(); 1017 flow = ovs_flow_alloc();
978 if (IS_ERR(flow)) { 1018 if (IS_ERR(flow)) {
979 error = PTR_ERR(flow); 1019 error = PTR_ERR(flow);
980 goto error; 1020 goto err_unlock_ovs;
981 } 1021 }
982 flow->key = key; 1022 flow->key = key;
983 clear_stats(flow); 1023 clear_stats(flow);
@@ -1010,11 +1050,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1010 error = -EEXIST; 1050 error = -EEXIST;
1011 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW && 1051 if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
1012 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) 1052 info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
1013 goto error; 1053 goto err_unlock_ovs;
1014 1054
1015 /* Update actions. */ 1055 /* Update actions. */
1016 old_acts = rcu_dereference_protected(flow->sf_acts, 1056 old_acts = ovsl_dereference(flow->sf_acts);
1017 lockdep_genl_is_held());
1018 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; 1057 acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
1019 if (acts_attrs && 1058 if (acts_attrs &&
1020 (old_acts->actions_len != nla_len(acts_attrs) || 1059 (old_acts->actions_len != nla_len(acts_attrs) ||
@@ -1025,7 +1064,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1025 new_acts = ovs_flow_actions_alloc(acts_attrs); 1064 new_acts = ovs_flow_actions_alloc(acts_attrs);
1026 error = PTR_ERR(new_acts); 1065 error = PTR_ERR(new_acts);
1027 if (IS_ERR(new_acts)) 1066 if (IS_ERR(new_acts))
1028 goto error; 1067 goto err_unlock_ovs;
1029 1068
1030 rcu_assign_pointer(flow->sf_acts, new_acts); 1069 rcu_assign_pointer(flow->sf_acts, new_acts);
1031 ovs_flow_deferred_free_acts(old_acts); 1070 ovs_flow_deferred_free_acts(old_acts);
@@ -1041,11 +1080,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1041 spin_unlock_bh(&flow->lock); 1080 spin_unlock_bh(&flow->lock);
1042 } 1081 }
1043 } 1082 }
1083 ovs_unlock();
1044 1084
1045 if (!IS_ERR(reply)) 1085 if (!IS_ERR(reply))
1046 genl_notify(reply, genl_info_net(info), info->snd_portid, 1086 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1047 ovs_dp_flow_multicast_group.id, info->nlhdr,
1048 GFP_KERNEL);
1049 else 1087 else
1050 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1088 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1051 ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); 1089 ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
@@ -1053,6 +1091,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
1053 1091
1054error_free_flow: 1092error_free_flow:
1055 ovs_flow_free(flow); 1093 ovs_flow_free(flow);
1094err_unlock_ovs:
1095 ovs_unlock();
1056error: 1096error:
1057 return error; 1097 return error;
1058} 1098}
@@ -1075,21 +1115,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1075 if (err) 1115 if (err)
1076 return err; 1116 return err;
1077 1117
1118 ovs_lock();
1078 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1119 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1079 if (!dp) 1120 if (!dp) {
1080 return -ENODEV; 1121 err = -ENODEV;
1122 goto unlock;
1123 }
1081 1124
1082 table = genl_dereference(dp->table); 1125 table = ovsl_dereference(dp->table);
1083 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1126 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1084 if (!flow) 1127 if (!flow) {
1085 return -ENOENT; 1128 err = -ENOENT;
1129 goto unlock;
1130 }
1086 1131
1087 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, 1132 reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1088 info->snd_seq, OVS_FLOW_CMD_NEW); 1133 info->snd_seq, OVS_FLOW_CMD_NEW);
1089 if (IS_ERR(reply)) 1134 if (IS_ERR(reply)) {
1090 return PTR_ERR(reply); 1135 err = PTR_ERR(reply);
1136 goto unlock;
1137 }
1091 1138
1139 ovs_unlock();
1092 return genlmsg_reply(reply, info); 1140 return genlmsg_reply(reply, info);
1141unlock:
1142 ovs_unlock();
1143 return err;
1093} 1144}
1094 1145
1095static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) 1146static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
@@ -1104,25 +1155,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1104 int err; 1155 int err;
1105 int key_len; 1156 int key_len;
1106 1157
1158 ovs_lock();
1107 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1159 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1108 if (!dp) 1160 if (!dp) {
1109 return -ENODEV; 1161 err = -ENODEV;
1110 1162 goto unlock;
1111 if (!a[OVS_FLOW_ATTR_KEY]) 1163 }
1112 return flush_flows(dp);
1113 1164
1165 if (!a[OVS_FLOW_ATTR_KEY]) {
1166 err = flush_flows(dp);
1167 goto unlock;
1168 }
1114 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]); 1169 err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1115 if (err) 1170 if (err)
1116 return err; 1171 goto unlock;
1117 1172
1118 table = genl_dereference(dp->table); 1173 table = ovsl_dereference(dp->table);
1119 flow = ovs_flow_tbl_lookup(table, &key, key_len); 1174 flow = ovs_flow_tbl_lookup(table, &key, key_len);
1120 if (!flow) 1175 if (!flow) {
1121 return -ENOENT; 1176 err = -ENOENT;
1177 goto unlock;
1178 }
1122 1179
1123 reply = ovs_flow_cmd_alloc_info(flow); 1180 reply = ovs_flow_cmd_alloc_info(flow);
1124 if (!reply) 1181 if (!reply) {
1125 return -ENOMEM; 1182 err = -ENOMEM;
1183 goto unlock;
1184 }
1126 1185
1127 ovs_flow_tbl_remove(table, flow); 1186 ovs_flow_tbl_remove(table, flow);
1128 1187
@@ -1131,10 +1190,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1131 BUG_ON(err < 0); 1190 BUG_ON(err < 0);
1132 1191
1133 ovs_flow_deferred_free(flow); 1192 ovs_flow_deferred_free(flow);
1193 ovs_unlock();
1134 1194
1135 genl_notify(reply, genl_info_net(info), info->snd_portid, 1195 ovs_notify(reply, info, &ovs_dp_flow_multicast_group);
1136 ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1137 return 0; 1196 return 0;
1197unlock:
1198 ovs_unlock();
1199 return err;
1138} 1200}
1139 1201
1140static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1202static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1143,11 +1205,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1143 struct datapath *dp; 1205 struct datapath *dp;
1144 struct flow_table *table; 1206 struct flow_table *table;
1145 1207
1208 ovs_lock();
1146 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1209 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1147 if (!dp) 1210 if (!dp) {
1211 ovs_unlock();
1148 return -ENODEV; 1212 return -ENODEV;
1213 }
1149 1214
1150 table = genl_dereference(dp->table); 1215 table = ovsl_dereference(dp->table);
1151 1216
1152 for (;;) { 1217 for (;;) {
1153 struct sw_flow *flow; 1218 struct sw_flow *flow;
@@ -1168,6 +1233,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1168 cb->args[0] = bucket; 1233 cb->args[0] = bucket;
1169 cb->args[1] = obj; 1234 cb->args[1] = obj;
1170 } 1235 }
1236 ovs_unlock();
1171 return skb->len; 1237 return skb->len;
1172} 1238}
1173 1239
@@ -1206,13 +1272,24 @@ static struct genl_family dp_datapath_genl_family = {
1206 .name = OVS_DATAPATH_FAMILY, 1272 .name = OVS_DATAPATH_FAMILY,
1207 .version = OVS_DATAPATH_VERSION, 1273 .version = OVS_DATAPATH_VERSION,
1208 .maxattr = OVS_DP_ATTR_MAX, 1274 .maxattr = OVS_DP_ATTR_MAX,
1209 .netnsok = true 1275 .netnsok = true,
1276 .parallel_ops = true,
1210}; 1277};
1211 1278
1212static struct genl_multicast_group ovs_dp_datapath_multicast_group = { 1279static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1213 .name = OVS_DATAPATH_MCGROUP 1280 .name = OVS_DATAPATH_MCGROUP
1214}; 1281};
1215 1282
1283static size_t ovs_dp_cmd_msg_size(void)
1284{
1285 size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1286
1287 msgsize += nla_total_size(IFNAMSIZ);
1288 msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1289
1290 return msgsize;
1291}
1292
1216static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, 1293static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1217 u32 portid, u32 seq, u32 flags, u8 cmd) 1294 u32 portid, u32 seq, u32 flags, u8 cmd)
1218{ 1295{
@@ -1251,7 +1328,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1251 struct sk_buff *skb; 1328 struct sk_buff *skb;
1252 int retval; 1329 int retval;
1253 1330
1254 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1331 skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1255 if (!skb) 1332 if (!skb)
1256 return ERR_PTR(-ENOMEM); 1333 return ERR_PTR(-ENOMEM);
1257 1334
@@ -1263,7 +1340,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1263 return skb; 1340 return skb;
1264} 1341}
1265 1342
1266/* Called with genl_mutex and optionally with RTNL lock also. */ 1343/* Called with ovs_mutex. */
1267static struct datapath *lookup_datapath(struct net *net, 1344static struct datapath *lookup_datapath(struct net *net,
1268 struct ovs_header *ovs_header, 1345 struct ovs_header *ovs_header,
1269 struct nlattr *a[OVS_DP_ATTR_MAX + 1]) 1346 struct nlattr *a[OVS_DP_ATTR_MAX + 1])
@@ -1297,12 +1374,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1297 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) 1374 if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1298 goto err; 1375 goto err;
1299 1376
1300 rtnl_lock(); 1377 ovs_lock();
1301 1378
1302 err = -ENOMEM; 1379 err = -ENOMEM;
1303 dp = kzalloc(sizeof(*dp), GFP_KERNEL); 1380 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1304 if (dp == NULL) 1381 if (dp == NULL)
1305 goto err_unlock_rtnl; 1382 goto err_unlock_ovs;
1306 1383
1307 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); 1384 ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1308 1385
@@ -1353,37 +1430,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1353 1430
1354 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id); 1431 ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1355 list_add_tail(&dp->list_node, &ovs_net->dps); 1432 list_add_tail(&dp->list_node, &ovs_net->dps);
1356 rtnl_unlock();
1357 1433
1358 genl_notify(reply, genl_info_net(info), info->snd_portid, 1434 ovs_unlock();
1359 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1435
1360 GFP_KERNEL); 1436 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1361 return 0; 1437 return 0;
1362 1438
1363err_destroy_local_port: 1439err_destroy_local_port:
1364 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); 1440 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1365err_destroy_ports_array: 1441err_destroy_ports_array:
1366 kfree(dp->ports); 1442 kfree(dp->ports);
1367err_destroy_percpu: 1443err_destroy_percpu:
1368 free_percpu(dp->stats_percpu); 1444 free_percpu(dp->stats_percpu);
1369err_destroy_table: 1445err_destroy_table:
1370 ovs_flow_tbl_destroy(genl_dereference(dp->table)); 1446 ovs_flow_tbl_destroy(ovsl_dereference(dp->table));
1371err_free_dp: 1447err_free_dp:
1372 release_net(ovs_dp_get_net(dp)); 1448 release_net(ovs_dp_get_net(dp));
1373 kfree(dp); 1449 kfree(dp);
1374err_unlock_rtnl: 1450err_unlock_ovs:
1375 rtnl_unlock(); 1451 ovs_unlock();
1376err: 1452err:
1377 return err; 1453 return err;
1378} 1454}
1379 1455
1380/* Called with genl_mutex. */ 1456/* Called with ovs_mutex. */
1381static void __dp_destroy(struct datapath *dp) 1457static void __dp_destroy(struct datapath *dp)
1382{ 1458{
1383 int i; 1459 int i;
1384 1460
1385 rtnl_lock();
1386
1387 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { 1461 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1388 struct vport *vport; 1462 struct vport *vport;
1389 struct hlist_node *n; 1463 struct hlist_node *n;
@@ -1394,14 +1468,11 @@ static void __dp_destroy(struct datapath *dp)
1394 } 1468 }
1395 1469
1396 list_del(&dp->list_node); 1470 list_del(&dp->list_node);
1397 ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1398 1471
1399 /* rtnl_unlock() will wait until all the references to devices that 1472 /* OVSP_LOCAL is datapath internal port. We need to make sure that
1400 * are pending unregistration have been dropped. We do it here to 1473 * all port in datapath are destroyed first before freeing datapath.
1401 * ensure that any internal devices (which contain DP pointers) are
1402 * fully destroyed before freeing the datapath.
1403 */ 1474 */
1404 rtnl_unlock(); 1475 ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1405 1476
1406 call_rcu(&dp->rcu, destroy_dp_rcu); 1477 call_rcu(&dp->rcu, destroy_dp_rcu);
1407} 1478}
@@ -1412,24 +1483,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1412 struct datapath *dp; 1483 struct datapath *dp;
1413 int err; 1484 int err;
1414 1485
1486 ovs_lock();
1415 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1487 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1416 err = PTR_ERR(dp); 1488 err = PTR_ERR(dp);
1417 if (IS_ERR(dp)) 1489 if (IS_ERR(dp))
1418 return err; 1490 goto unlock;
1419 1491
1420 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1492 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1421 info->snd_seq, OVS_DP_CMD_DEL); 1493 info->snd_seq, OVS_DP_CMD_DEL);
1422 err = PTR_ERR(reply); 1494 err = PTR_ERR(reply);
1423 if (IS_ERR(reply)) 1495 if (IS_ERR(reply))
1424 return err; 1496 goto unlock;
1425 1497
1426 __dp_destroy(dp); 1498 __dp_destroy(dp);
1499 ovs_unlock();
1427 1500
1428 genl_notify(reply, genl_info_net(info), info->snd_portid, 1501 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1429 ovs_dp_datapath_multicast_group.id, info->nlhdr,
1430 GFP_KERNEL);
1431 1502
1432 return 0; 1503 return 0;
1504unlock:
1505 ovs_unlock();
1506 return err;
1433} 1507}
1434 1508
1435static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) 1509static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
@@ -1438,9 +1512,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1438 struct datapath *dp; 1512 struct datapath *dp;
1439 int err; 1513 int err;
1440 1514
1515 ovs_lock();
1441 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1516 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1517 err = PTR_ERR(dp);
1442 if (IS_ERR(dp)) 1518 if (IS_ERR(dp))
1443 return PTR_ERR(dp); 1519 goto unlock;
1444 1520
1445 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1521 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1446 info->snd_seq, OVS_DP_CMD_NEW); 1522 info->snd_seq, OVS_DP_CMD_NEW);
@@ -1448,31 +1524,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1448 err = PTR_ERR(reply); 1524 err = PTR_ERR(reply);
1449 netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 1525 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1450 ovs_dp_datapath_multicast_group.id, err); 1526 ovs_dp_datapath_multicast_group.id, err);
1451 return 0; 1527 err = 0;
1528 goto unlock;
1452 } 1529 }
1453 1530
1454 genl_notify(reply, genl_info_net(info), info->snd_portid, 1531 ovs_unlock();
1455 ovs_dp_datapath_multicast_group.id, info->nlhdr, 1532 ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);
1456 GFP_KERNEL);
1457 1533
1458 return 0; 1534 return 0;
1535unlock:
1536 ovs_unlock();
1537 return err;
1459} 1538}
1460 1539
1461static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) 1540static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1462{ 1541{
1463 struct sk_buff *reply; 1542 struct sk_buff *reply;
1464 struct datapath *dp; 1543 struct datapath *dp;
1544 int err;
1465 1545
1546 ovs_lock();
1466 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); 1547 dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1467 if (IS_ERR(dp)) 1548 if (IS_ERR(dp)) {
1468 return PTR_ERR(dp); 1549 err = PTR_ERR(dp);
1550 goto unlock;
1551 }
1469 1552
1470 reply = ovs_dp_cmd_build_info(dp, info->snd_portid, 1553 reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1471 info->snd_seq, OVS_DP_CMD_NEW); 1554 info->snd_seq, OVS_DP_CMD_NEW);
1472 if (IS_ERR(reply)) 1555 if (IS_ERR(reply)) {
1473 return PTR_ERR(reply); 1556 err = PTR_ERR(reply);
1557 goto unlock;
1558 }
1474 1559
1560 ovs_unlock();
1475 return genlmsg_reply(reply, info); 1561 return genlmsg_reply(reply, info);
1562
1563unlock:
1564 ovs_unlock();
1565 return err;
1476} 1566}
1477 1567
1478static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) 1568static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -1482,6 +1572,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1482 int skip = cb->args[0]; 1572 int skip = cb->args[0];
1483 int i = 0; 1573 int i = 0;
1484 1574
1575 ovs_lock();
1485 list_for_each_entry(dp, &ovs_net->dps, list_node) { 1576 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1486 if (i >= skip && 1577 if (i >= skip &&
1487 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, 1578 ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
@@ -1490,6 +1581,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1490 break; 1581 break;
1491 i++; 1582 i++;
1492 } 1583 }
1584 ovs_unlock();
1493 1585
1494 cb->args[0] = i; 1586 cb->args[0] = i;
1495 1587
@@ -1535,14 +1627,15 @@ static struct genl_family dp_vport_genl_family = {
1535 .name = OVS_VPORT_FAMILY, 1627 .name = OVS_VPORT_FAMILY,
1536 .version = OVS_VPORT_VERSION, 1628 .version = OVS_VPORT_VERSION,
1537 .maxattr = OVS_VPORT_ATTR_MAX, 1629 .maxattr = OVS_VPORT_ATTR_MAX,
1538 .netnsok = true 1630 .netnsok = true,
1631 .parallel_ops = true,
1539}; 1632};
1540 1633
1541struct genl_multicast_group ovs_dp_vport_multicast_group = { 1634struct genl_multicast_group ovs_dp_vport_multicast_group = {
1542 .name = OVS_VPORT_MCGROUP 1635 .name = OVS_VPORT_MCGROUP
1543}; 1636};
1544 1637
1545/* Called with RTNL lock or RCU read lock. */ 1638/* Called with ovs_mutex or RCU read lock. */
1546static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, 1639static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1547 u32 portid, u32 seq, u32 flags, u8 cmd) 1640 u32 portid, u32 seq, u32 flags, u8 cmd)
1548{ 1641{
@@ -1581,7 +1674,7 @@ error:
1581 return err; 1674 return err;
1582} 1675}
1583 1676
1584/* Called with RTNL lock or RCU read lock. */ 1677/* Called with ovs_mutex or RCU read lock. */
1585struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, 1678struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1586 u32 seq, u8 cmd) 1679 u32 seq, u8 cmd)
1587{ 1680{
@@ -1593,14 +1686,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1593 return ERR_PTR(-ENOMEM); 1686 return ERR_PTR(-ENOMEM);
1594 1687
1595 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); 1688 retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1596 if (retval < 0) { 1689 BUG_ON(retval < 0);
1597 kfree_skb(skb); 1690
1598 return ERR_PTR(retval);
1599 }
1600 return skb; 1691 return skb;
1601} 1692}
1602 1693
1603/* Called with RTNL lock or RCU read lock. */ 1694/* Called with ovs_mutex or RCU read lock. */
1604static struct vport *lookup_vport(struct net *net, 1695static struct vport *lookup_vport(struct net *net,
1605 struct ovs_header *ovs_header, 1696 struct ovs_header *ovs_header,
1606 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) 1697 struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
@@ -1626,9 +1717,9 @@ static struct vport *lookup_vport(struct net *net,
1626 if (!dp) 1717 if (!dp)
1627 return ERR_PTR(-ENODEV); 1718 return ERR_PTR(-ENODEV);
1628 1719
1629 vport = ovs_vport_rtnl_rcu(dp, port_no); 1720 vport = ovs_vport_ovsl_rcu(dp, port_no);
1630 if (!vport) 1721 if (!vport)
1631 return ERR_PTR(-ENOENT); 1722 return ERR_PTR(-ENODEV);
1632 return vport; 1723 return vport;
1633 } else 1724 } else
1634 return ERR_PTR(-EINVAL); 1725 return ERR_PTR(-EINVAL);
@@ -1650,7 +1741,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1650 !a[OVS_VPORT_ATTR_UPCALL_PID]) 1741 !a[OVS_VPORT_ATTR_UPCALL_PID])
1651 goto exit; 1742 goto exit;
1652 1743
1653 rtnl_lock(); 1744 ovs_lock();
1654 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); 1745 dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1655 err = -ENODEV; 1746 err = -ENODEV;
1656 if (!dp) 1747 if (!dp)
@@ -1663,7 +1754,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1663 if (port_no >= DP_MAX_PORTS) 1754 if (port_no >= DP_MAX_PORTS)
1664 goto exit_unlock; 1755 goto exit_unlock;
1665 1756
1666 vport = ovs_vport_rtnl_rcu(dp, port_no); 1757 vport = ovs_vport_ovsl(dp, port_no);
1667 err = -EBUSY; 1758 err = -EBUSY;
1668 if (vport) 1759 if (vport)
1669 goto exit_unlock; 1760 goto exit_unlock;
@@ -1673,7 +1764,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1673 err = -EFBIG; 1764 err = -EFBIG;
1674 goto exit_unlock; 1765 goto exit_unlock;
1675 } 1766 }
1676 vport = ovs_vport_rtnl(dp, port_no); 1767 vport = ovs_vport_ovsl(dp, port_no);
1677 if (!vport) 1768 if (!vport)
1678 break; 1769 break;
1679 } 1770 }
@@ -1699,11 +1790,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1699 ovs_dp_detach_port(vport); 1790 ovs_dp_detach_port(vport);
1700 goto exit_unlock; 1791 goto exit_unlock;
1701 } 1792 }
1702 genl_notify(reply, genl_info_net(info), info->snd_portid, 1793
1703 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1794 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1704 1795
1705exit_unlock: 1796exit_unlock:
1706 rtnl_unlock(); 1797 ovs_unlock();
1707exit: 1798exit:
1708 return err; 1799 return err;
1709} 1800}
@@ -1715,7 +1806,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1715 struct vport *vport; 1806 struct vport *vport;
1716 int err; 1807 int err;
1717 1808
1718 rtnl_lock(); 1809 ovs_lock();
1719 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1810 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1720 err = PTR_ERR(vport); 1811 err = PTR_ERR(vport);
1721 if (IS_ERR(vport)) 1812 if (IS_ERR(vport))
@@ -1726,26 +1817,35 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1726 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) 1817 nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1727 err = -EINVAL; 1818 err = -EINVAL;
1728 1819
1820 reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1821 if (!reply) {
1822 err = -ENOMEM;
1823 goto exit_unlock;
1824 }
1825
1729 if (!err && a[OVS_VPORT_ATTR_OPTIONS]) 1826 if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1730 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); 1827 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1731 if (err) 1828 if (err)
1732 goto exit_unlock; 1829 goto exit_free;
1830
1733 if (a[OVS_VPORT_ATTR_UPCALL_PID]) 1831 if (a[OVS_VPORT_ATTR_UPCALL_PID])
1734 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); 1832 vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1735 1833
1736 reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, 1834 err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1737 OVS_VPORT_CMD_NEW); 1835 info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1738 if (IS_ERR(reply)) { 1836 BUG_ON(err < 0);
1739 netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1740 ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
1741 goto exit_unlock;
1742 }
1743 1837
1744 genl_notify(reply, genl_info_net(info), info->snd_portid, 1838 ovs_unlock();
1745 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); 1839 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1840 return 0;
1746 1841
1747exit_unlock:
1748 rtnl_unlock(); 1842 rtnl_unlock();
1843 return 0;
1844
1845exit_free:
1846 kfree_skb(reply);
1847exit_unlock:
1848 ovs_unlock();
1749 return err; 1849 return err;
1750} 1850}
1751 1851
@@ -1756,7 +1856,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1756 struct vport *vport; 1856 struct vport *vport;
1757 int err; 1857 int err;
1758 1858
1759 rtnl_lock(); 1859 ovs_lock();
1760 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a); 1860 vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1761 err = PTR_ERR(vport); 1861 err = PTR_ERR(vport);
1762 if (IS_ERR(vport)) 1862 if (IS_ERR(vport))
@@ -1776,11 +1876,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1776 err = 0; 1876 err = 0;
1777 ovs_dp_detach_port(vport); 1877 ovs_dp_detach_port(vport);
1778 1878
1779 genl_notify(reply, genl_info_net(info), info->snd_portid, 1879 ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
1780 ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1781 1880
1782exit_unlock: 1881exit_unlock:
1783 rtnl_unlock(); 1882 ovs_unlock();
1784 return err; 1883 return err;
1785} 1884}
1786 1885
@@ -1940,13 +2039,13 @@ static void rehash_flow_table(struct work_struct *work)
1940 struct datapath *dp; 2039 struct datapath *dp;
1941 struct net *net; 2040 struct net *net;
1942 2041
1943 genl_lock(); 2042 ovs_lock();
1944 rtnl_lock(); 2043 rtnl_lock();
1945 for_each_net(net) { 2044 for_each_net(net) {
1946 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2045 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1947 2046
1948 list_for_each_entry(dp, &ovs_net->dps, list_node) { 2047 list_for_each_entry(dp, &ovs_net->dps, list_node) {
1949 struct flow_table *old_table = genl_dereference(dp->table); 2048 struct flow_table *old_table = ovsl_dereference(dp->table);
1950 struct flow_table *new_table; 2049 struct flow_table *new_table;
1951 2050
1952 new_table = ovs_flow_tbl_rehash(old_table); 2051 new_table = ovs_flow_tbl_rehash(old_table);
@@ -1957,8 +2056,7 @@ static void rehash_flow_table(struct work_struct *work)
1957 } 2056 }
1958 } 2057 }
1959 rtnl_unlock(); 2058 rtnl_unlock();
1960 genl_unlock(); 2059 ovs_unlock();
1961
1962 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL); 2060 schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1963} 2061}
1964 2062
@@ -1967,18 +2065,21 @@ static int __net_init ovs_init_net(struct net *net)
1967 struct ovs_net *ovs_net = net_generic(net, ovs_net_id); 2065 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1968 2066
1969 INIT_LIST_HEAD(&ovs_net->dps); 2067 INIT_LIST_HEAD(&ovs_net->dps);
2068 INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
1970 return 0; 2069 return 0;
1971} 2070}
1972 2071
1973static void __net_exit ovs_exit_net(struct net *net) 2072static void __net_exit ovs_exit_net(struct net *net)
1974{ 2073{
1975 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1976 struct datapath *dp, *dp_next; 2074 struct datapath *dp, *dp_next;
2075 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1977 2076
1978 genl_lock(); 2077 ovs_lock();
1979 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) 2078 list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1980 __dp_destroy(dp); 2079 __dp_destroy(dp);
1981 genl_unlock(); 2080 ovs_unlock();
2081
2082 cancel_work_sync(&ovs_net->dp_notify_work);
1982} 2083}
1983 2084
1984static struct pernet_operations ovs_net_ops = { 2085static struct pernet_operations ovs_net_ops = {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 031dfbf37c93..16b840695216 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -57,10 +57,9 @@ struct dp_stats_percpu {
57 * struct datapath - datapath for flow-based packet switching 57 * struct datapath - datapath for flow-based packet switching
58 * @rcu: RCU callback head for deferred destruction. 58 * @rcu: RCU callback head for deferred destruction.
59 * @list_node: Element in global 'dps' list. 59 * @list_node: Element in global 'dps' list.
60 * @n_flows: Number of flows currently in flow table. 60 * @table: Current flow table. Protected by ovs_mutex and RCU.
61 * @table: Current flow table. Protected by genl_lock and RCU.
62 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by 61 * @ports: Hash table for ports. %OVSP_LOCAL port always exists. Protected by
63 * RTNL and RCU. 62 * ovs_mutex and RCU.
64 * @stats_percpu: Per-CPU datapath statistics. 63 * @stats_percpu: Per-CPU datapath statistics.
65 * @net: Reference to net namespace. 64 * @net: Reference to net namespace.
66 * 65 *
@@ -86,26 +85,6 @@ struct datapath {
86#endif 85#endif
87}; 86};
88 87
89struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
90
91static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
92{
93 WARN_ON_ONCE(!rcu_read_lock_held());
94 return ovs_lookup_vport(dp, port_no);
95}
96
97static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no)
98{
99 WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
100 return ovs_lookup_vport(dp, port_no);
101}
102
103static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no)
104{
105 ASSERT_RTNL();
106 return ovs_lookup_vport(dp, port_no);
107}
108
109/** 88/**
110 * struct ovs_skb_cb - OVS data in skb CB 89 * struct ovs_skb_cb - OVS data in skb CB
111 * @flow: The flow associated with this packet. May be %NULL if no flow. 90 * @flow: The flow associated with this packet. May be %NULL if no flow.
@@ -119,7 +98,7 @@ struct ovs_skb_cb {
119 * struct dp_upcall - metadata to include with a packet to send to userspace 98 * struct dp_upcall - metadata to include with a packet to send to userspace
120 * @cmd: One of %OVS_PACKET_CMD_*. 99 * @cmd: One of %OVS_PACKET_CMD_*.
121 * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. 100 * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull.
122 * @userdata: If nonnull, its u64 value is extracted and passed to userspace as 101 * @userdata: If nonnull, its variable-length value is passed to userspace as
123 * %OVS_PACKET_ATTR_USERDATA. 102 * %OVS_PACKET_ATTR_USERDATA.
124 * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no 103 * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no
125 * packet is sent and the packet is accounted in the datapath's @n_lost 104 * packet is sent and the packet is accounted in the datapath's @n_lost
@@ -132,6 +111,30 @@ struct dp_upcall_info {
132 u32 portid; 111 u32 portid;
133}; 112};
134 113
114/**
115 * struct ovs_net - Per net-namespace data for ovs.
116 * @dps: List of datapaths to enable dumping them all out.
117 * Protected by genl_mutex.
118 */
119struct ovs_net {
120 struct list_head dps;
121 struct work_struct dp_notify_work;
122};
123
124extern int ovs_net_id;
125void ovs_lock(void);
126void ovs_unlock(void);
127
128#ifdef CONFIG_LOCKDEP
129int lockdep_ovsl_is_held(void);
130#else
131#define lockdep_ovsl_is_held() 1
132#endif
133
134#define ASSERT_OVSL() WARN_ON(unlikely(!lockdep_ovsl_is_held()))
135#define ovsl_dereference(p) \
136 rcu_dereference_protected(p, lockdep_ovsl_is_held())
137
135static inline struct net *ovs_dp_get_net(struct datapath *dp) 138static inline struct net *ovs_dp_get_net(struct datapath *dp)
136{ 139{
137 return read_pnet(&dp->net); 140 return read_pnet(&dp->net);
@@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)
142 write_pnet(&dp->net, net); 145 write_pnet(&dp->net, net);
143} 146}
144 147
148struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no);
149
150static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no)
151{
152 WARN_ON_ONCE(!rcu_read_lock_held());
153 return ovs_lookup_vport(dp, port_no);
154}
155
156static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no)
157{
158 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
159 return ovs_lookup_vport(dp, port_no);
160}
161
162static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no)
163{
164 ASSERT_OVSL();
165 return ovs_lookup_vport(dp, port_no);
166}
167
145extern struct notifier_block ovs_dp_device_notifier; 168extern struct notifier_block ovs_dp_device_notifier;
146extern struct genl_multicast_group ovs_dp_vport_multicast_group; 169extern struct genl_multicast_group ovs_dp_vport_multicast_group;
147 170
@@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
155 u8 cmd); 178 u8 cmd);
156 179
157int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); 180int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
181void ovs_dp_notify_wq(struct work_struct *work);
158#endif /* datapath.h */ 182#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 5558350e0d33..ef4feec6cd84 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -18,46 +18,78 @@
18 18
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <net/genetlink.h> 20#include <net/genetlink.h>
21#include <net/netns/generic.h>
21 22
22#include "datapath.h" 23#include "datapath.h"
23#include "vport-internal_dev.h" 24#include "vport-internal_dev.h"
24#include "vport-netdev.h" 25#include "vport-netdev.h"
25 26
27static void dp_detach_port_notify(struct vport *vport)
28{
29 struct sk_buff *notify;
30 struct datapath *dp;
31
32 dp = vport->dp;
33 notify = ovs_vport_cmd_build_info(vport, 0, 0,
34 OVS_VPORT_CMD_DEL);
35 ovs_dp_detach_port(vport);
36 if (IS_ERR(notify)) {
37 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
38 ovs_dp_vport_multicast_group.id,
39 PTR_ERR(notify));
40 return;
41 }
42
43 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
44 ovs_dp_vport_multicast_group.id,
45 GFP_KERNEL);
46}
47
48void ovs_dp_notify_wq(struct work_struct *work)
49{
50 struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work);
51 struct datapath *dp;
52
53 ovs_lock();
54 list_for_each_entry(dp, &ovs_net->dps, list_node) {
55 int i;
56
57 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
58 struct vport *vport;
59 struct hlist_node *n;
60
61 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
62 struct netdev_vport *netdev_vport;
63
64 if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
65 continue;
66
67 netdev_vport = netdev_vport_priv(vport);
68 if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED ||
69 netdev_vport->dev->reg_state == NETREG_UNREGISTERING)
70 dp_detach_port_notify(vport);
71 }
72 }
73 }
74 ovs_unlock();
75}
76
26static int dp_device_event(struct notifier_block *unused, unsigned long event, 77static int dp_device_event(struct notifier_block *unused, unsigned long event,
27 void *ptr) 78 void *ptr)
28{ 79{
80 struct ovs_net *ovs_net;
29 struct net_device *dev = ptr; 81 struct net_device *dev = ptr;
30 struct vport *vport; 82 struct vport *vport = NULL;
31 83
32 if (ovs_is_internal_dev(dev)) 84 if (!ovs_is_internal_dev(dev))
33 vport = ovs_internal_dev_get_vport(dev);
34 else
35 vport = ovs_netdev_get_vport(dev); 85 vport = ovs_netdev_get_vport(dev);
36 86
37 if (!vport) 87 if (!vport)
38 return NOTIFY_DONE; 88 return NOTIFY_DONE;
39 89
40 switch (event) { 90 if (event == NETDEV_UNREGISTER) {
41 case NETDEV_UNREGISTER: 91 ovs_net = net_generic(dev_net(dev), ovs_net_id);
42 if (!ovs_is_internal_dev(dev)) { 92 queue_work(system_wq, &ovs_net->dp_notify_work);
43 struct sk_buff *notify;
44 struct datapath *dp = vport->dp;
45
46 notify = ovs_vport_cmd_build_info(vport, 0, 0,
47 OVS_VPORT_CMD_DEL);
48 ovs_dp_detach_port(vport);
49 if (IS_ERR(notify)) {
50 netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0,
51 ovs_dp_vport_multicast_group.id,
52 PTR_ERR(notify));
53 break;
54 }
55
56 genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0,
57 ovs_dp_vport_multicast_group.id,
58 GFP_KERNEL);
59 }
60 break;
61 } 93 }
62 94
63 return NOTIFY_DONE; 95 return NOTIFY_DONE;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fe0e4215c73d..b15321a2228c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
211 return ERR_PTR(-ENOMEM); 211 return ERR_PTR(-ENOMEM);
212 212
213 sfa->actions_len = actions_len; 213 sfa->actions_len = actions_len;
214 memcpy(sfa->actions, nla_data(actions), actions_len); 214 nla_memcpy(sfa->actions, actions, actions_len);
215 return sfa; 215 return sfa;
216} 216}
217 217
@@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
466 proto = *(__be16 *) skb->data; 466 proto = *(__be16 *) skb->data;
467 __skb_pull(skb, sizeof(__be16)); 467 __skb_pull(skb, sizeof(__be16));
468 468
469 if (ntohs(proto) >= 1536) 469 if (ntohs(proto) >= ETH_P_802_3_MIN)
470 return proto; 470 return proto;
471 471
472 if (skb->len < sizeof(struct llc_snap_hdr)) 472 if (skb->len < sizeof(struct llc_snap_hdr))
@@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
483 483
484 __skb_pull(skb, sizeof(struct llc_snap_hdr)); 484 __skb_pull(skb, sizeof(struct llc_snap_hdr));
485 485
486 if (ntohs(llc->ethertype) >= 1536) 486 if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
487 return llc->ethertype; 487 return llc->ethertype;
488 488
489 return htons(ETH_P_802_2); 489 return htons(ETH_P_802_2);
@@ -795,9 +795,9 @@ void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
795 795
796void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) 796void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
797{ 797{
798 BUG_ON(table->count == 0);
798 hlist_del_rcu(&flow->hash_node[table->node_ver]); 799 hlist_del_rcu(&flow->hash_node[table->node_ver]);
799 table->count--; 800 table->count--;
800 BUG_ON(table->count < 0);
801} 801}
802 802
803/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 803/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
1038 1038
1039 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { 1039 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1040 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); 1040 swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1041 if (ntohs(swkey->eth.type) < 1536) 1041 if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
1042 return -EINVAL; 1042 return -EINVAL;
1043 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); 1043 attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1044 } else { 1044 } else {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a7bb60ff3b5b..0875fde65b9c 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
138void ovs_flow_used(struct sw_flow *, struct sk_buff *); 138void ovs_flow_used(struct sw_flow *, struct sk_buff *);
139u64 ovs_flow_used_time(unsigned long flow_jiffies); 139u64 ovs_flow_used_time(unsigned long flow_jiffies);
140 140
141/* Upper bound on the length of a nlattr-formatted flow key. The longest
142 * nlattr-formatted flow key would be:
143 *
144 * struct pad nl hdr total
145 * ------ --- ------ -----
146 * OVS_KEY_ATTR_PRIORITY 4 -- 4 8
147 * OVS_KEY_ATTR_IN_PORT 4 -- 4 8
148 * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8
149 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16
150 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype)
151 * OVS_KEY_ATTR_8021Q 4 -- 4 8
152 * OVS_KEY_ATTR_ENCAP 0 -- 4 4 (VLAN encapsulation)
153 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (inner VLAN ethertype)
154 * OVS_KEY_ATTR_IPV6 40 -- 4 44
155 * OVS_KEY_ATTR_ICMPV6 2 2 4 8
156 * OVS_KEY_ATTR_ND 28 -- 4 32
157 * -------------------------------------------------
158 * total 152
159 */
160#define FLOW_BUFSIZE 152
161
162int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); 141int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
163int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, 142int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
164 const struct nlattr *); 143 const struct nlattr *);
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 0531de6c7a4a..84e0a0379186 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde
63 return stats; 63 return stats;
64} 64}
65 65
66static int internal_dev_mac_addr(struct net_device *dev, void *p)
67{
68 struct sockaddr *addr = p;
69
70 if (!is_valid_ether_addr(addr->sa_data))
71 return -EADDRNOTAVAIL;
72 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
73 return 0;
74}
75
76/* Called with rcu_read_lock_bh. */ 66/* Called with rcu_read_lock_bh. */
77static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) 67static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
78{ 68{
@@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
126 .ndo_open = internal_dev_open, 116 .ndo_open = internal_dev_open,
127 .ndo_stop = internal_dev_stop, 117 .ndo_stop = internal_dev_stop,
128 .ndo_start_xmit = internal_dev_xmit, 118 .ndo_start_xmit = internal_dev_xmit,
129 .ndo_set_mac_address = internal_dev_mac_addr, 119 .ndo_set_mac_address = eth_mac_addr,
130 .ndo_change_mtu = internal_dev_change_mtu, 120 .ndo_change_mtu = internal_dev_change_mtu,
131 .ndo_get_stats64 = internal_dev_get_stats, 121 .ndo_get_stats64 = internal_dev_get_stats,
132}; 122};
@@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev)
138 netdev->netdev_ops = &internal_dev_netdev_ops; 128 netdev->netdev_ops = &internal_dev_netdev_ops;
139 129
140 netdev->priv_flags &= ~IFF_TX_SKB_SHARING; 130 netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
131 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
141 netdev->destructor = internal_dev_destructor; 132 netdev->destructor = internal_dev_destructor;
142 SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); 133 SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
143 netdev->tx_queue_len = 0; 134 netdev->tx_queue_len = 0;
@@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev)
146 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO; 137 NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
147 138
148 netdev->vlan_features = netdev->features; 139 netdev->vlan_features = netdev->features;
149 netdev->features |= NETIF_F_HW_VLAN_TX; 140 netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;
150 netdev->hw_features = netdev->features & ~NETIF_F_LLTX; 141 netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
151 eth_hw_addr_random(netdev); 142 eth_hw_addr_random(netdev);
152} 143}
@@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
182 if (vport->port_no == OVSP_LOCAL) 173 if (vport->port_no == OVSP_LOCAL)
183 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; 174 netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
184 175
176 rtnl_lock();
185 err = register_netdevice(netdev_vport->dev); 177 err = register_netdevice(netdev_vport->dev);
186 if (err) 178 if (err)
187 goto error_free_netdev; 179 goto error_free_netdev;
188 180
189 dev_set_promiscuity(netdev_vport->dev, 1); 181 dev_set_promiscuity(netdev_vport->dev, 1);
182 rtnl_unlock();
190 netif_start_queue(netdev_vport->dev); 183 netif_start_queue(netdev_vport->dev);
191 184
192 return vport; 185 return vport;
193 186
194error_free_netdev: 187error_free_netdev:
188 rtnl_unlock();
195 free_netdev(netdev_vport->dev); 189 free_netdev(netdev_vport->dev);
196error_free_vport: 190error_free_vport:
197 ovs_vport_free(vport); 191 ovs_vport_free(vport);
@@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)
204 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 198 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
205 199
206 netif_stop_queue(netdev_vport->dev); 200 netif_stop_queue(netdev_vport->dev);
201 rtnl_lock();
207 dev_set_promiscuity(netdev_vport->dev, -1); 202 dev_set_promiscuity(netdev_vport->dev, -1);
208 203
209 /* unregister_netdevice() waits for an RCU grace period. */ 204 /* unregister_netdevice() waits for an RCU grace period. */
210 unregister_netdevice(netdev_vport->dev); 205 unregister_netdevice(netdev_vport->dev);
206
207 rtnl_unlock();
211} 208}
212 209
213static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) 210static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
@@ -235,7 +232,6 @@ const struct vport_ops ovs_internal_vport_ops = {
235 .create = internal_dev_create, 232 .create = internal_dev_create,
236 .destroy = internal_dev_destroy, 233 .destroy = internal_dev_destroy,
237 .get_name = ovs_netdev_get_name, 234 .get_name = ovs_netdev_get_name,
238 .get_ifindex = ovs_netdev_get_ifindex,
239 .send = internal_dev_recv, 235 .send = internal_dev_recv,
240}; 236};
241 237
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 2130d61c384a..4f01c6d2ffa4 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)
100 goto error_put; 100 goto error_put;
101 } 101 }
102 102
103 rtnl_lock();
103 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook, 104 err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
104 vport); 105 vport);
105 if (err) 106 if (err)
106 goto error_put; 107 goto error_unlock;
107 108
108 dev_set_promiscuity(netdev_vport->dev, 1); 109 dev_set_promiscuity(netdev_vport->dev, 1);
109 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; 110 netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
111 rtnl_unlock();
110 112
111 return vport; 113 return vport;
112 114
115error_unlock:
116 rtnl_unlock();
113error_put: 117error_put:
114 dev_put(netdev_vport->dev); 118 dev_put(netdev_vport->dev);
115error_free_vport: 119error_free_vport:
@@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)
131{ 135{
132 struct netdev_vport *netdev_vport = netdev_vport_priv(vport); 136 struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
133 137
138 rtnl_lock();
134 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; 139 netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
135 netdev_rx_handler_unregister(netdev_vport->dev); 140 netdev_rx_handler_unregister(netdev_vport->dev);
136 dev_set_promiscuity(netdev_vport->dev, -1); 141 dev_set_promiscuity(netdev_vport->dev, -1);
142 rtnl_unlock();
137 143
138 call_rcu(&netdev_vport->rcu, free_port_rcu); 144 call_rcu(&netdev_vport->rcu, free_port_rcu);
139} 145}
@@ -144,12 +150,6 @@ const char *ovs_netdev_get_name(const struct vport *vport)
144 return netdev_vport->dev->name; 150 return netdev_vport->dev->name;
145} 151}
146 152
147int ovs_netdev_get_ifindex(const struct vport *vport)
148{
149 const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
150 return netdev_vport->dev->ifindex;
151}
152
153static unsigned int packet_length(const struct sk_buff *skb) 153static unsigned int packet_length(const struct sk_buff *skb)
154{ 154{
155 unsigned int length = skb->len - ETH_HLEN; 155 unsigned int length = skb->len - ETH_HLEN;
@@ -200,6 +200,5 @@ const struct vport_ops ovs_netdev_vport_ops = {
200 .create = netdev_create, 200 .create = netdev_create,
201 .destroy = netdev_destroy, 201 .destroy = netdev_destroy,
202 .get_name = ovs_netdev_get_name, 202 .get_name = ovs_netdev_get_name,
203 .get_ifindex = ovs_netdev_get_ifindex,
204 .send = netdev_send, 203 .send = netdev_send,
205}; 204};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6478079b3417..a3cb3a32cd77 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -40,6 +40,5 @@ netdev_vport_priv(const struct vport *vport)
40 40
41const char *ovs_netdev_get_name(const struct vport *); 41const char *ovs_netdev_get_name(const struct vport *);
42const char *ovs_netdev_get_config(const struct vport *); 42const char *ovs_netdev_get_config(const struct vport *);
43int ovs_netdev_get_ifindex(const struct vport *);
44 43
45#endif /* vport_netdev.h */ 44#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index f6b8132ce4cb..720623190eaa 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {
40 &ovs_internal_vport_ops, 40 &ovs_internal_vport_ops,
41}; 41};
42 42
43/* Protected by RCU read lock for reading, RTNL lock for writing. */ 43/* Protected by RCU read lock for reading, ovs_mutex for writing. */
44static struct hlist_head *dev_table; 44static struct hlist_head *dev_table;
45#define VPORT_HASH_BUCKETS 1024 45#define VPORT_HASH_BUCKETS 1024
46 46
@@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
80 * 80 *
81 * @name: name of port to find 81 * @name: name of port to find
82 * 82 *
83 * Must be called with RTNL or RCU read lock. 83 * Must be called with ovs or RCU read lock.
84 */ 84 */
85struct vport *ovs_vport_locate(struct net *net, const char *name) 85struct vport *ovs_vport_locate(struct net *net, const char *name)
86{ 86{
@@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
128 vport->ops = ops; 128 vport->ops = ops;
129 INIT_HLIST_NODE(&vport->dp_hash_node); 129 INIT_HLIST_NODE(&vport->dp_hash_node);
130 130
131 vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); 131 vport->percpu_stats = alloc_percpu(struct pcpu_tstats);
132 if (!vport->percpu_stats) { 132 if (!vport->percpu_stats) {
133 kfree(vport); 133 kfree(vport);
134 return ERR_PTR(-ENOMEM); 134 return ERR_PTR(-ENOMEM);
@@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)
161 * @parms: Information about new vport. 161 * @parms: Information about new vport.
162 * 162 *
163 * Creates a new vport with the specified configuration (which is dependent on 163 * Creates a new vport with the specified configuration (which is dependent on
164 * device type). RTNL lock must be held. 164 * device type). ovs_mutex must be held.
165 */ 165 */
166struct vport *ovs_vport_add(const struct vport_parms *parms) 166struct vport *ovs_vport_add(const struct vport_parms *parms)
167{ 167{
@@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
169 int err = 0; 169 int err = 0;
170 int i; 170 int i;
171 171
172 ASSERT_RTNL();
173
174 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) { 172 for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
175 if (vport_ops_list[i]->type == parms->type) { 173 if (vport_ops_list[i]->type == parms->type) {
176 struct hlist_head *bucket; 174 struct hlist_head *bucket;
@@ -201,12 +199,10 @@ out:
201 * @port: New configuration. 199 * @port: New configuration.
202 * 200 *
203 * Modifies an existing device with the specified configuration (which is 201 * Modifies an existing device with the specified configuration (which is
204 * dependent on device type). RTNL lock must be held. 202 * dependent on device type). ovs_mutex must be held.
205 */ 203 */
206int ovs_vport_set_options(struct vport *vport, struct nlattr *options) 204int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
207{ 205{
208 ASSERT_RTNL();
209
210 if (!vport->ops->set_options) 206 if (!vport->ops->set_options)
211 return -EOPNOTSUPP; 207 return -EOPNOTSUPP;
212 return vport->ops->set_options(vport, options); 208 return vport->ops->set_options(vport, options);
@@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
218 * @vport: vport to delete. 214 * @vport: vport to delete.
219 * 215 *
220 * Detaches @vport from its datapath and destroys it. It is possible to fail 216 * Detaches @vport from its datapath and destroys it. It is possible to fail
221 * for reasons such as lack of memory. RTNL lock must be held. 217 * for reasons such as lack of memory. ovs_mutex must be held.
222 */ 218 */
223void ovs_vport_del(struct vport *vport) 219void ovs_vport_del(struct vport *vport)
224{ 220{
225 ASSERT_RTNL(); 221 ASSERT_OVSL();
226 222
227 hlist_del_rcu(&vport->hash_node); 223 hlist_del_rcu(&vport->hash_node);
228 224
@@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)
237 * 233 *
238 * Retrieves transmit, receive, and error stats for the given device. 234 * Retrieves transmit, receive, and error stats for the given device.
239 * 235 *
240 * Must be called with RTNL lock or rcu_read_lock. 236 * Must be called with ovs_mutex or rcu_read_lock.
241 */ 237 */
242void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) 238void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
243{ 239{
@@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
264 spin_unlock_bh(&vport->stats_lock); 260 spin_unlock_bh(&vport->stats_lock);
265 261
266 for_each_possible_cpu(i) { 262 for_each_possible_cpu(i) {
267 const struct vport_percpu_stats *percpu_stats; 263 const struct pcpu_tstats *percpu_stats;
268 struct vport_percpu_stats local_stats; 264 struct pcpu_tstats local_stats;
269 unsigned int start; 265 unsigned int start;
270 266
271 percpu_stats = per_cpu_ptr(vport->percpu_stats, i); 267 percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
272 268
273 do { 269 do {
274 start = u64_stats_fetch_begin_bh(&percpu_stats->sync); 270 start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);
275 local_stats = *percpu_stats; 271 local_stats = *percpu_stats;
276 } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); 272 } while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));
277 273
278 stats->rx_bytes += local_stats.rx_bytes; 274 stats->rx_bytes += local_stats.rx_bytes;
279 stats->rx_packets += local_stats.rx_packets; 275 stats->rx_packets += local_stats.rx_packets;
@@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
296 * negative error code if a real error occurred. If an error occurs, @skb is 292 * negative error code if a real error occurred. If an error occurs, @skb is
297 * left unmodified. 293 * left unmodified.
298 * 294 *
299 * Must be called with RTNL lock or rcu_read_lock. 295 * Must be called with ovs_mutex or rcu_read_lock.
300 */ 296 */
301int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) 297int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
302{ 298{
303 struct nlattr *nla; 299 struct nlattr *nla;
300 int err;
301
302 if (!vport->ops->get_options)
303 return 0;
304 304
305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS); 305 nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
306 if (!nla) 306 if (!nla)
307 return -EMSGSIZE; 307 return -EMSGSIZE;
308 308
309 if (vport->ops->get_options) { 309 err = vport->ops->get_options(vport, skb);
310 int err = vport->ops->get_options(vport, skb); 310 if (err) {
311 if (err) { 311 nla_nest_cancel(skb, nla);
312 nla_nest_cancel(skb, nla); 312 return err;
313 return err;
314 }
315 } 313 }
316 314
317 nla_nest_end(skb, nla); 315 nla_nest_end(skb, nla);
@@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
329 */ 327 */
330void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) 328void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
331{ 329{
332 struct vport_percpu_stats *stats; 330 struct pcpu_tstats *stats;
333 331
334 stats = this_cpu_ptr(vport->percpu_stats); 332 stats = this_cpu_ptr(vport->percpu_stats);
335 u64_stats_update_begin(&stats->sync); 333 u64_stats_update_begin(&stats->syncp);
336 stats->rx_packets++; 334 stats->rx_packets++;
337 stats->rx_bytes += skb->len; 335 stats->rx_bytes += skb->len;
338 u64_stats_update_end(&stats->sync); 336 u64_stats_update_end(&stats->syncp);
339 337
340 ovs_dp_process_received_packet(vport, skb); 338 ovs_dp_process_received_packet(vport, skb);
341} 339}
@@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
346 * @vport: vport on which to send the packet 344 * @vport: vport on which to send the packet
347 * @skb: skb to send 345 * @skb: skb to send
348 * 346 *
349 * Sends the given packet and returns the length of data sent. Either RTNL 347 * Sends the given packet and returns the length of data sent. Either ovs
350 * lock or rcu_read_lock must be held. 348 * lock or rcu_read_lock must be held.
351 */ 349 */
352int ovs_vport_send(struct vport *vport, struct sk_buff *skb) 350int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
@@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
354 int sent = vport->ops->send(vport, skb); 352 int sent = vport->ops->send(vport, skb);
355 353
356 if (likely(sent)) { 354 if (likely(sent)) {
357 struct vport_percpu_stats *stats; 355 struct pcpu_tstats *stats;
358 356
359 stats = this_cpu_ptr(vport->percpu_stats); 357 stats = this_cpu_ptr(vport->percpu_stats);
360 358
361 u64_stats_update_begin(&stats->sync); 359 u64_stats_update_begin(&stats->syncp);
362 stats->tx_packets++; 360 stats->tx_packets++;
363 stats->tx_bytes += sent; 361 stats->tx_bytes += sent;
364 u64_stats_update_end(&stats->sync); 362 u64_stats_update_end(&stats->syncp);
365 } 363 }
366 return sent; 364 return sent;
367} 365}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3f7961ea3c56..68a377bc0841 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -19,6 +19,7 @@
19#ifndef VPORT_H 19#ifndef VPORT_H
20#define VPORT_H 1 20#define VPORT_H 1
21 21
22#include <linux/if_tunnel.h>
22#include <linux/list.h> 23#include <linux/list.h>
23#include <linux/netlink.h> 24#include <linux/netlink.h>
24#include <linux/openvswitch.h> 25#include <linux/openvswitch.h>
@@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);
50 51
51/* The following definitions are for implementers of vport devices: */ 52/* The following definitions are for implementers of vport devices: */
52 53
53struct vport_percpu_stats {
54 u64 rx_bytes;
55 u64 rx_packets;
56 u64 tx_bytes;
57 u64 tx_packets;
58 struct u64_stats_sync sync;
59};
60
61struct vport_err_stats { 54struct vport_err_stats {
62 u64 rx_dropped; 55 u64 rx_dropped;
63 u64 rx_errors; 56 u64 rx_errors;
@@ -68,10 +61,10 @@ struct vport_err_stats {
68/** 61/**
69 * struct vport - one port within a datapath 62 * struct vport - one port within a datapath
70 * @rcu: RCU callback head for deferred destruction. 63 * @rcu: RCU callback head for deferred destruction.
71 * @port_no: Index into @dp's @ports array.
72 * @dp: Datapath to which this port belongs. 64 * @dp: Datapath to which this port belongs.
73 * @upcall_portid: The Netlink port to use for packets received on this port that 65 * @upcall_portid: The Netlink port to use for packets received on this port that
74 * miss the flow table. 66 * miss the flow table.
67 * @port_no: Index into @dp's @ports array.
75 * @hash_node: Element in @dev_table hash table in vport.c. 68 * @hash_node: Element in @dev_table hash table in vport.c.
76 * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. 69 * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.
77 * @ops: Class structure. 70 * @ops: Class structure.
@@ -81,15 +74,15 @@ struct vport_err_stats {
81 */ 74 */
82struct vport { 75struct vport {
83 struct rcu_head rcu; 76 struct rcu_head rcu;
84 u16 port_no;
85 struct datapath *dp; 77 struct datapath *dp;
86 u32 upcall_portid; 78 u32 upcall_portid;
79 u16 port_no;
87 80
88 struct hlist_node hash_node; 81 struct hlist_node hash_node;
89 struct hlist_node dp_hash_node; 82 struct hlist_node dp_hash_node;
90 const struct vport_ops *ops; 83 const struct vport_ops *ops;
91 84
92 struct vport_percpu_stats __percpu *percpu_stats; 85 struct pcpu_tstats __percpu *percpu_stats;
93 86
94 spinlock_t stats_lock; 87 spinlock_t stats_lock;
95 struct vport_err_stats err_stats; 88 struct vport_err_stats err_stats;
@@ -131,24 +124,22 @@ struct vport_parms {
131 * have any configuration. 124 * have any configuration.
132 * @get_name: Get the device's name. 125 * @get_name: Get the device's name.
133 * @get_config: Get the device's configuration. 126 * @get_config: Get the device's configuration.
134 * @get_ifindex: Get the system interface index associated with the device.
135 * May be null if the device does not have an ifindex. 127 * May be null if the device does not have an ifindex.
136 * @send: Send a packet on the device. Returns the length of the packet sent. 128 * @send: Send a packet on the device. Returns the length of the packet sent.
137 */ 129 */
138struct vport_ops { 130struct vport_ops {
139 enum ovs_vport_type type; 131 enum ovs_vport_type type;
140 132
141 /* Called with RTNL lock. */ 133 /* Called with ovs_mutex. */
142 struct vport *(*create)(const struct vport_parms *); 134 struct vport *(*create)(const struct vport_parms *);
143 void (*destroy)(struct vport *); 135 void (*destroy)(struct vport *);
144 136
145 int (*set_options)(struct vport *, struct nlattr *); 137 int (*set_options)(struct vport *, struct nlattr *);
146 int (*get_options)(const struct vport *, struct sk_buff *); 138 int (*get_options)(const struct vport *, struct sk_buff *);
147 139
148 /* Called with rcu_read_lock or RTNL lock. */ 140 /* Called with rcu_read_lock or ovs_mutex. */
149 const char *(*get_name)(const struct vport *); 141 const char *(*get_name)(const struct vport *);
150 void (*get_config)(const struct vport *, void *); 142 void (*get_config)(const struct vport *, void *);
151 int (*get_ifindex)(const struct vport *);
152 143
153 int (*send)(struct vport *, struct sk_buff *); 144 int (*send)(struct vport *, struct sk_buff *);
154}; 145};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d6793dbfbae..8ec1bca7f859 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -158,10 +158,16 @@ struct packet_mreq_max {
158 unsigned char mr_address[MAX_ADDR_LEN]; 158 unsigned char mr_address[MAX_ADDR_LEN];
159}; 159};
160 160
161union tpacket_uhdr {
162 struct tpacket_hdr *h1;
163 struct tpacket2_hdr *h2;
164 struct tpacket3_hdr *h3;
165 void *raw;
166};
167
161static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
162 int closing, int tx_ring); 169 int closing, int tx_ring);
163 170
164
165#define V3_ALIGNMENT (8) 171#define V3_ALIGNMENT (8)
166 172
167#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) 173#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
@@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
181 187
182struct packet_sock; 188struct packet_sock;
183static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 189static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
190static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
191 struct packet_type *pt, struct net_device *orig_dev);
184 192
185static void *packet_previous_frame(struct packet_sock *po, 193static void *packet_previous_frame(struct packet_sock *po,
186 struct packet_ring_buffer *rb, 194 struct packet_ring_buffer *rb,
@@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)
288 296
289static void __packet_set_status(struct packet_sock *po, void *frame, int status) 297static void __packet_set_status(struct packet_sock *po, void *frame, int status)
290{ 298{
291 union { 299 union tpacket_uhdr h;
292 struct tpacket_hdr *h1;
293 struct tpacket2_hdr *h2;
294 void *raw;
295 } h;
296 300
297 h.raw = frame; 301 h.raw = frame;
298 switch (po->tp_version) { 302 switch (po->tp_version) {
@@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
315 319
316static int __packet_get_status(struct packet_sock *po, void *frame) 320static int __packet_get_status(struct packet_sock *po, void *frame)
317{ 321{
318 union { 322 union tpacket_uhdr h;
319 struct tpacket_hdr *h1;
320 struct tpacket2_hdr *h2;
321 void *raw;
322 } h;
323 323
324 smp_rmb(); 324 smp_rmb();
325 325
@@ -339,17 +339,66 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
339 } 339 }
340} 340}
341 341
342static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
343 unsigned int flags)
344{
345 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
346
347 if (shhwtstamps) {
348 if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
349 ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
350 return TP_STATUS_TS_SYS_HARDWARE;
351 if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
352 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
353 return TP_STATUS_TS_RAW_HARDWARE;
354 }
355
356 if (ktime_to_timespec_cond(skb->tstamp, ts))
357 return TP_STATUS_TS_SOFTWARE;
358
359 return 0;
360}
361
362static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
363 struct sk_buff *skb)
364{
365 union tpacket_uhdr h;
366 struct timespec ts;
367 __u32 ts_status;
368
369 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
370 return 0;
371
372 h.raw = frame;
373 switch (po->tp_version) {
374 case TPACKET_V1:
375 h.h1->tp_sec = ts.tv_sec;
376 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
377 break;
378 case TPACKET_V2:
379 h.h2->tp_sec = ts.tv_sec;
380 h.h2->tp_nsec = ts.tv_nsec;
381 break;
382 case TPACKET_V3:
383 default:
384 WARN(1, "TPACKET version not supported.\n");
385 BUG();
386 }
387
388 /* one flush is safe, as both fields always lie on the same cacheline */
389 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
390 smp_wmb();
391
392 return ts_status;
393}
394
342static void *packet_lookup_frame(struct packet_sock *po, 395static void *packet_lookup_frame(struct packet_sock *po,
343 struct packet_ring_buffer *rb, 396 struct packet_ring_buffer *rb,
344 unsigned int position, 397 unsigned int position,
345 int status) 398 int status)
346{ 399{
347 unsigned int pg_vec_pos, frame_offset; 400 unsigned int pg_vec_pos, frame_offset;
348 union { 401 union tpacket_uhdr h;
349 struct tpacket_hdr *h1;
350 struct tpacket2_hdr *h2;
351 void *raw;
352 } h;
353 402
354 pg_vec_pos = position / rb->frames_per_block; 403 pg_vec_pos = position / rb->frames_per_block;
355 frame_offset = position % rb->frames_per_block; 404 frame_offset = position % rb->frames_per_block;
@@ -479,7 +528,7 @@ static void init_prb_bdqc(struct packet_sock *po,
479 p1->hdrlen = po->tp_hdrlen; 528 p1->hdrlen = po->tp_hdrlen;
480 p1->version = po->tp_version; 529 p1->version = po->tp_version;
481 p1->last_kactive_blk_num = 0; 530 p1->last_kactive_blk_num = 0;
482 po->stats_u.stats3.tp_freeze_q_cnt = 0; 531 po->stats.stats3.tp_freeze_q_cnt = 0;
483 if (req_u->req3.tp_retire_blk_tov) 532 if (req_u->req3.tp_retire_blk_tov)
484 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; 533 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
485 else 534 else
@@ -647,7 +696,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,
647 struct tpacket3_hdr *last_pkt; 696 struct tpacket3_hdr *last_pkt;
648 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; 697 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
649 698
650 if (po->stats.tp_drops) 699 if (po->stats.stats3.tp_drops)
651 status |= TP_STATUS_LOSING; 700 status |= TP_STATUS_LOSING;
652 701
653 last_pkt = (struct tpacket3_hdr *)pkc1->prev; 702 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
@@ -693,36 +742,33 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1,
693 742
694 smp_rmb(); 743 smp_rmb();
695 744
696 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { 745 /* We could have just memset this but we will lose the
746 * flexibility of making the priv area sticky
747 */
697 748
698 /* We could have just memset this but we will lose the 749 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
699 * flexibility of making the priv area sticky 750 BLOCK_NUM_PKTS(pbd1) = 0;
700 */ 751 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
701 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
702 BLOCK_NUM_PKTS(pbd1) = 0;
703 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
704 getnstimeofday(&ts);
705 h1->ts_first_pkt.ts_sec = ts.tv_sec;
706 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
707 pkc1->pkblk_start = (char *)pbd1;
708 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
709 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
710 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
711 pbd1->version = pkc1->version;
712 pkc1->prev = pkc1->nxt_offset;
713 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
714 prb_thaw_queue(pkc1);
715 _prb_refresh_rx_retire_blk_timer(pkc1);
716 752
717 smp_wmb(); 753 getnstimeofday(&ts);
718 754
719 return; 755 h1->ts_first_pkt.ts_sec = ts.tv_sec;
720 } 756 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
757
758 pkc1->pkblk_start = (char *)pbd1;
759 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
760
761 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
762 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
763
764 pbd1->version = pkc1->version;
765 pkc1->prev = pkc1->nxt_offset;
766 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
767
768 prb_thaw_queue(pkc1);
769 _prb_refresh_rx_retire_blk_timer(pkc1);
721 770
722 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", 771 smp_wmb();
723 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
724 dump_stack();
725 BUG();
726} 772}
727 773
728/* 774/*
@@ -752,7 +798,7 @@ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
752 struct packet_sock *po) 798 struct packet_sock *po)
753{ 799{
754 pkc->reset_pending_on_curr_blk = 1; 800 pkc->reset_pending_on_curr_blk = 1;
755 po->stats_u.stats3.tp_freeze_q_cnt++; 801 po->stats.stats3.tp_freeze_q_cnt++;
756} 802}
757 803
758#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) 804#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
@@ -813,10 +859,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
813 prb_close_block(pkc, pbd, po, status); 859 prb_close_block(pkc, pbd, po, status);
814 return; 860 return;
815 } 861 }
816
817 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
818 dump_stack();
819 BUG();
820} 862}
821 863
822static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, 864static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
@@ -973,11 +1015,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,
973 1015
974static void *prb_lookup_block(struct packet_sock *po, 1016static void *prb_lookup_block(struct packet_sock *po,
975 struct packet_ring_buffer *rb, 1017 struct packet_ring_buffer *rb,
976 unsigned int previous, 1018 unsigned int idx,
977 int status) 1019 int status)
978{ 1020{
979 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 1021 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
980 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); 1022 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
981 1023
982 if (status != BLOCK_STATUS(pbd)) 1024 if (status != BLOCK_STATUS(pbd))
983 return NULL; 1025 return NULL;
@@ -1041,6 +1083,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
1041 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 1083 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1042} 1084}
1043 1085
1086static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1087{
1088 struct sock *sk = &po->sk;
1089 bool has_room;
1090
1091 if (po->prot_hook.func != tpacket_rcv)
1092 return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
1093 <= sk->sk_rcvbuf;
1094
1095 spin_lock(&sk->sk_receive_queue.lock);
1096 if (po->tp_version == TPACKET_V3)
1097 has_room = prb_lookup_block(po, &po->rx_ring,
1098 po->rx_ring.prb_bdqc.kactive_blk_num,
1099 TP_STATUS_KERNEL);
1100 else
1101 has_room = packet_lookup_frame(po, &po->rx_ring,
1102 po->rx_ring.head,
1103 TP_STATUS_KERNEL);
1104 spin_unlock(&sk->sk_receive_queue.lock);
1105
1106 return has_room;
1107}
1108
1044static void packet_sock_destruct(struct sock *sk) 1109static void packet_sock_destruct(struct sock *sk)
1045{ 1110{
1046 skb_queue_purge(&sk->sk_error_queue); 1111 skb_queue_purge(&sk->sk_error_queue);
@@ -1066,16 +1131,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1066 return x; 1131 return x;
1067} 1132}
1068 1133
1069static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1134static unsigned int fanout_demux_hash(struct packet_fanout *f,
1135 struct sk_buff *skb,
1136 unsigned int num)
1070{ 1137{
1071 u32 idx, hash = skb->rxhash; 1138 return (((u64)skb->rxhash) * num) >> 32;
1072
1073 idx = ((u64)hash * num) >> 32;
1074
1075 return f->arr[idx];
1076} 1139}
1077 1140
1078static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1141static unsigned int fanout_demux_lb(struct packet_fanout *f,
1142 struct sk_buff *skb,
1143 unsigned int num)
1079{ 1144{
1080 int cur, old; 1145 int cur, old;
1081 1146
@@ -1083,14 +1148,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb
1083 while ((old = atomic_cmpxchg(&f->rr_cur, cur, 1148 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1084 fanout_rr_next(f, num))) != cur) 1149 fanout_rr_next(f, num))) != cur)
1085 cur = old; 1150 cur = old;
1086 return f->arr[cur]; 1151 return cur;
1152}
1153
1154static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1155 struct sk_buff *skb,
1156 unsigned int num)
1157{
1158 return smp_processor_id() % num;
1087} 1159}
1088 1160
1089static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) 1161static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1162 struct sk_buff *skb,
1163 unsigned int idx, unsigned int skip,
1164 unsigned int num)
1090{ 1165{
1091 unsigned int cpu = smp_processor_id(); 1166 unsigned int i, j;
1167
1168 i = j = min_t(int, f->next[idx], num - 1);
1169 do {
1170 if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
1171 if (i != j)
1172 f->next[idx] = i;
1173 return i;
1174 }
1175 if (++i == num)
1176 i = 0;
1177 } while (i != j);
1178
1179 return idx;
1180}
1092 1181
1093 return f->arr[cpu % num]; 1182static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1183{
1184 return f->flags & (flag >> 8);
1094} 1185}
1095 1186
1096static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, 1187static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
@@ -1099,7 +1190,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1099 struct packet_fanout *f = pt->af_packet_priv; 1190 struct packet_fanout *f = pt->af_packet_priv;
1100 unsigned int num = f->num_members; 1191 unsigned int num = f->num_members;
1101 struct packet_sock *po; 1192 struct packet_sock *po;
1102 struct sock *sk; 1193 unsigned int idx;
1103 1194
1104 if (!net_eq(dev_net(dev), read_pnet(&f->net)) || 1195 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1105 !num) { 1196 !num) {
@@ -1110,23 +1201,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1110 switch (f->type) { 1201 switch (f->type) {
1111 case PACKET_FANOUT_HASH: 1202 case PACKET_FANOUT_HASH:
1112 default: 1203 default:
1113 if (f->defrag) { 1204 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1114 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); 1205 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1115 if (!skb) 1206 if (!skb)
1116 return 0; 1207 return 0;
1117 } 1208 }
1118 skb_get_rxhash(skb); 1209 skb_get_rxhash(skb);
1119 sk = fanout_demux_hash(f, skb, num); 1210 idx = fanout_demux_hash(f, skb, num);
1120 break; 1211 break;
1121 case PACKET_FANOUT_LB: 1212 case PACKET_FANOUT_LB:
1122 sk = fanout_demux_lb(f, skb, num); 1213 idx = fanout_demux_lb(f, skb, num);
1123 break; 1214 break;
1124 case PACKET_FANOUT_CPU: 1215 case PACKET_FANOUT_CPU:
1125 sk = fanout_demux_cpu(f, skb, num); 1216 idx = fanout_demux_cpu(f, skb, num);
1217 break;
1218 case PACKET_FANOUT_ROLLOVER:
1219 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1126 break; 1220 break;
1127 } 1221 }
1128 1222
1129 po = pkt_sk(sk); 1223 po = pkt_sk(f->arr[idx]);
1224 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
1225 unlikely(!packet_rcv_has_room(po, skb))) {
1226 idx = fanout_demux_rollover(f, skb, idx, idx, num);
1227 po = pkt_sk(f->arr[idx]);
1228 }
1130 1229
1131 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); 1230 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1132} 1231}
@@ -1175,10 +1274,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1175 struct packet_sock *po = pkt_sk(sk); 1274 struct packet_sock *po = pkt_sk(sk);
1176 struct packet_fanout *f, *match; 1275 struct packet_fanout *f, *match;
1177 u8 type = type_flags & 0xff; 1276 u8 type = type_flags & 0xff;
1178 u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; 1277 u8 flags = type_flags >> 8;
1179 int err; 1278 int err;
1180 1279
1181 switch (type) { 1280 switch (type) {
1281 case PACKET_FANOUT_ROLLOVER:
1282 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1283 return -EINVAL;
1182 case PACKET_FANOUT_HASH: 1284 case PACKET_FANOUT_HASH:
1183 case PACKET_FANOUT_LB: 1285 case PACKET_FANOUT_LB:
1184 case PACKET_FANOUT_CPU: 1286 case PACKET_FANOUT_CPU:
@@ -1203,7 +1305,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1203 } 1305 }
1204 } 1306 }
1205 err = -EINVAL; 1307 err = -EINVAL;
1206 if (match && match->defrag != defrag) 1308 if (match && match->flags != flags)
1207 goto out; 1309 goto out;
1208 if (!match) { 1310 if (!match) {
1209 err = -ENOMEM; 1311 err = -ENOMEM;
@@ -1213,7 +1315,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1213 write_pnet(&match->net, sock_net(sk)); 1315 write_pnet(&match->net, sock_net(sk));
1214 match->id = id; 1316 match->id = id;
1215 match->type = type; 1317 match->type = type;
1216 match->defrag = defrag; 1318 match->flags = flags;
1217 atomic_set(&match->rr_cur, 0); 1319 atomic_set(&match->rr_cur, 0);
1218 INIT_LIST_HEAD(&match->list); 1320 INIT_LIST_HEAD(&match->list);
1219 spin_lock_init(&match->lock); 1321 spin_lock_init(&match->lock);
@@ -1443,13 +1545,14 @@ retry:
1443 skb->dev = dev; 1545 skb->dev = dev;
1444 skb->priority = sk->sk_priority; 1546 skb->priority = sk->sk_priority;
1445 skb->mark = sk->sk_mark; 1547 skb->mark = sk->sk_mark;
1446 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 1548
1447 if (err < 0) 1549 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1448 goto out_unlock;
1449 1550
1450 if (unlikely(extra_len == 4)) 1551 if (unlikely(extra_len == 4))
1451 skb->no_fcs = 1; 1552 skb->no_fcs = 1;
1452 1553
1554 skb_probe_transport_header(skb, 0);
1555
1453 dev_queue_xmit(skb); 1556 dev_queue_xmit(skb);
1454 rcu_read_unlock(); 1557 rcu_read_unlock();
1455 return len; 1558 return len;
@@ -1577,7 +1680,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1577 nf_reset(skb); 1680 nf_reset(skb);
1578 1681
1579 spin_lock(&sk->sk_receive_queue.lock); 1682 spin_lock(&sk->sk_receive_queue.lock);
1580 po->stats.tp_packets++; 1683 po->stats.stats1.tp_packets++;
1581 skb->dropcount = atomic_read(&sk->sk_drops); 1684 skb->dropcount = atomic_read(&sk->sk_drops);
1582 __skb_queue_tail(&sk->sk_receive_queue, skb); 1685 __skb_queue_tail(&sk->sk_receive_queue, skb);
1583 spin_unlock(&sk->sk_receive_queue.lock); 1686 spin_unlock(&sk->sk_receive_queue.lock);
@@ -1586,7 +1689,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1586 1689
1587drop_n_acct: 1690drop_n_acct:
1588 spin_lock(&sk->sk_receive_queue.lock); 1691 spin_lock(&sk->sk_receive_queue.lock);
1589 po->stats.tp_drops++; 1692 po->stats.stats1.tp_drops++;
1590 atomic_inc(&sk->sk_drops); 1693 atomic_inc(&sk->sk_drops);
1591 spin_unlock(&sk->sk_receive_queue.lock); 1694 spin_unlock(&sk->sk_receive_queue.lock);
1592 1695
@@ -1606,21 +1709,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1606 struct sock *sk; 1709 struct sock *sk;
1607 struct packet_sock *po; 1710 struct packet_sock *po;
1608 struct sockaddr_ll *sll; 1711 struct sockaddr_ll *sll;
1609 union { 1712 union tpacket_uhdr h;
1610 struct tpacket_hdr *h1;
1611 struct tpacket2_hdr *h2;
1612 struct tpacket3_hdr *h3;
1613 void *raw;
1614 } h;
1615 u8 *skb_head = skb->data; 1713 u8 *skb_head = skb->data;
1616 int skb_len = skb->len; 1714 int skb_len = skb->len;
1617 unsigned int snaplen, res; 1715 unsigned int snaplen, res;
1618 unsigned long status = TP_STATUS_USER; 1716 unsigned long status = TP_STATUS_USER;
1619 unsigned short macoff, netoff, hdrlen; 1717 unsigned short macoff, netoff, hdrlen;
1620 struct sk_buff *copy_skb = NULL; 1718 struct sk_buff *copy_skb = NULL;
1621 struct timeval tv;
1622 struct timespec ts; 1719 struct timespec ts;
1623 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); 1720 __u32 ts_status;
1624 1721
1625 if (skb->pkt_type == PACKET_LOOPBACK) 1722 if (skb->pkt_type == PACKET_LOOPBACK)
1626 goto drop; 1723 goto drop;
@@ -1692,10 +1789,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1692 * Anyways, moving it for V1/V2 only as V3 doesn't need this 1789 * Anyways, moving it for V1/V2 only as V3 doesn't need this
1693 * at packet level. 1790 * at packet level.
1694 */ 1791 */
1695 if (po->stats.tp_drops) 1792 if (po->stats.stats1.tp_drops)
1696 status |= TP_STATUS_LOSING; 1793 status |= TP_STATUS_LOSING;
1697 } 1794 }
1698 po->stats.tp_packets++; 1795 po->stats.stats1.tp_packets++;
1699 if (copy_skb) { 1796 if (copy_skb) {
1700 status |= TP_STATUS_COPY; 1797 status |= TP_STATUS_COPY;
1701 __skb_queue_tail(&sk->sk_receive_queue, copy_skb); 1798 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
@@ -1704,24 +1801,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1704 1801
1705 skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 1802 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1706 1803
1804 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
1805 getnstimeofday(&ts);
1806
1807 status |= ts_status;
1808
1707 switch (po->tp_version) { 1809 switch (po->tp_version) {
1708 case TPACKET_V1: 1810 case TPACKET_V1:
1709 h.h1->tp_len = skb->len; 1811 h.h1->tp_len = skb->len;
1710 h.h1->tp_snaplen = snaplen; 1812 h.h1->tp_snaplen = snaplen;
1711 h.h1->tp_mac = macoff; 1813 h.h1->tp_mac = macoff;
1712 h.h1->tp_net = netoff; 1814 h.h1->tp_net = netoff;
1713 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) 1815 h.h1->tp_sec = ts.tv_sec;
1714 && shhwtstamps->syststamp.tv64) 1816 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
1715 tv = ktime_to_timeval(shhwtstamps->syststamp);
1716 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1717 && shhwtstamps->hwtstamp.tv64)
1718 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
1719 else if (skb->tstamp.tv64)
1720 tv = ktime_to_timeval(skb->tstamp);
1721 else
1722 do_gettimeofday(&tv);
1723 h.h1->tp_sec = tv.tv_sec;
1724 h.h1->tp_usec = tv.tv_usec;
1725 hdrlen = sizeof(*h.h1); 1817 hdrlen = sizeof(*h.h1);
1726 break; 1818 break;
1727 case TPACKET_V2: 1819 case TPACKET_V2:
@@ -1729,16 +1821,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1729 h.h2->tp_snaplen = snaplen; 1821 h.h2->tp_snaplen = snaplen;
1730 h.h2->tp_mac = macoff; 1822 h.h2->tp_mac = macoff;
1731 h.h2->tp_net = netoff; 1823 h.h2->tp_net = netoff;
1732 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1733 && shhwtstamps->syststamp.tv64)
1734 ts = ktime_to_timespec(shhwtstamps->syststamp);
1735 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1736 && shhwtstamps->hwtstamp.tv64)
1737 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1738 else if (skb->tstamp.tv64)
1739 ts = ktime_to_timespec(skb->tstamp);
1740 else
1741 getnstimeofday(&ts);
1742 h.h2->tp_sec = ts.tv_sec; 1824 h.h2->tp_sec = ts.tv_sec;
1743 h.h2->tp_nsec = ts.tv_nsec; 1825 h.h2->tp_nsec = ts.tv_nsec;
1744 if (vlan_tx_tag_present(skb)) { 1826 if (vlan_tx_tag_present(skb)) {
@@ -1759,16 +1841,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1759 h.h3->tp_snaplen = snaplen; 1841 h.h3->tp_snaplen = snaplen;
1760 h.h3->tp_mac = macoff; 1842 h.h3->tp_mac = macoff;
1761 h.h3->tp_net = netoff; 1843 h.h3->tp_net = netoff;
1762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1763 && shhwtstamps->syststamp.tv64)
1764 ts = ktime_to_timespec(shhwtstamps->syststamp);
1765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1766 && shhwtstamps->hwtstamp.tv64)
1767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1768 else if (skb->tstamp.tv64)
1769 ts = ktime_to_timespec(skb->tstamp);
1770 else
1771 getnstimeofday(&ts);
1772 h.h3->tp_sec = ts.tv_sec; 1844 h.h3->tp_sec = ts.tv_sec;
1773 h.h3->tp_nsec = ts.tv_nsec; 1845 h.h3->tp_nsec = ts.tv_nsec;
1774 hdrlen = sizeof(*h.h3); 1846 hdrlen = sizeof(*h.h3);
@@ -1819,7 +1891,7 @@ drop:
1819 return 0; 1891 return 0;
1820 1892
1821ring_is_full: 1893ring_is_full:
1822 po->stats.tp_drops++; 1894 po->stats.stats1.tp_drops++;
1823 spin_unlock(&sk->sk_receive_queue.lock); 1895 spin_unlock(&sk->sk_receive_queue.lock);
1824 1896
1825 sk->sk_data_ready(sk, 0); 1897 sk->sk_data_ready(sk, 0);
@@ -1833,10 +1905,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
1833 void *ph; 1905 void *ph;
1834 1906
1835 if (likely(po->tx_ring.pg_vec)) { 1907 if (likely(po->tx_ring.pg_vec)) {
1908 __u32 ts;
1909
1836 ph = skb_shinfo(skb)->destructor_arg; 1910 ph = skb_shinfo(skb)->destructor_arg;
1837 BUG_ON(atomic_read(&po->tx_ring.pending) == 0); 1911 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
1838 atomic_dec(&po->tx_ring.pending); 1912 atomic_dec(&po->tx_ring.pending);
1839 __packet_set_status(po, ph, TP_STATUS_AVAILABLE); 1913
1914 ts = __packet_set_timestamp(po, ph, skb);
1915 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
1840 } 1916 }
1841 1917
1842 sock_wfree(skb); 1918 sock_wfree(skb);
@@ -1846,11 +1922,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1846 void *frame, struct net_device *dev, int size_max, 1922 void *frame, struct net_device *dev, int size_max,
1847 __be16 proto, unsigned char *addr, int hlen) 1923 __be16 proto, unsigned char *addr, int hlen)
1848{ 1924{
1849 union { 1925 union tpacket_uhdr ph;
1850 struct tpacket_hdr *h1;
1851 struct tpacket2_hdr *h2;
1852 void *raw;
1853 } ph;
1854 int to_write, offset, len, tp_len, nr_frags, len_max; 1926 int to_write, offset, len, tp_len, nr_frags, len_max;
1855 struct socket *sock = po->sk.sk_socket; 1927 struct socket *sock = po->sk.sk_socket;
1856 struct page *page; 1928 struct page *page;
@@ -1863,6 +1935,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1863 skb->dev = dev; 1935 skb->dev = dev;
1864 skb->priority = po->sk.sk_priority; 1936 skb->priority = po->sk.sk_priority;
1865 skb->mark = po->sk.sk_mark; 1937 skb->mark = po->sk.sk_mark;
1938 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
1866 skb_shinfo(skb)->destructor_arg = ph.raw; 1939 skb_shinfo(skb)->destructor_arg = ph.raw;
1867 1940
1868 switch (po->tp_version) { 1941 switch (po->tp_version) {
@@ -1880,6 +1953,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1880 1953
1881 skb_reserve(skb, hlen); 1954 skb_reserve(skb, hlen);
1882 skb_reset_network_header(skb); 1955 skb_reset_network_header(skb);
1956 skb_probe_transport_header(skb, 0);
1883 1957
1884 if (po->tp_tx_has_off) { 1958 if (po->tp_tx_has_off) {
1885 int off_min, off_max, off; 1959 int off_min, off_max, off;
@@ -2247,9 +2321,8 @@ static int packet_snd(struct socket *sock,
2247 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 2321 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2248 if (err) 2322 if (err)
2249 goto out_free; 2323 goto out_free;
2250 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); 2324
2251 if (err < 0) 2325 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2252 goto out_free;
2253 2326
2254 if (!gso_type && (len > dev->mtu + reserve + extra_len)) { 2327 if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
2255 /* Earlier code assumed this would be a VLAN pkt, 2328 /* Earlier code assumed this would be a VLAN pkt,
@@ -2289,6 +2362,8 @@ static int packet_snd(struct socket *sock,
2289 len += vnet_hdr_len; 2362 len += vnet_hdr_len;
2290 } 2363 }
2291 2364
2365 skb_probe_transport_header(skb, reserve);
2366
2292 if (unlikely(extra_len == 4)) 2367 if (unlikely(extra_len == 4))
2293 skb->no_fcs = 1; 2368 skb->no_fcs = 1;
2294 2369
@@ -3165,8 +3240,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3165 struct sock *sk = sock->sk; 3240 struct sock *sk = sock->sk;
3166 struct packet_sock *po = pkt_sk(sk); 3241 struct packet_sock *po = pkt_sk(sk);
3167 void *data = &val; 3242 void *data = &val;
3168 struct tpacket_stats st; 3243 union tpacket_stats_u st;
3169 union tpacket_stats_u st_u;
3170 3244
3171 if (level != SOL_PACKET) 3245 if (level != SOL_PACKET)
3172 return -ENOPROTOOPT; 3246 return -ENOPROTOOPT;
@@ -3180,22 +3254,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3180 switch (optname) { 3254 switch (optname) {
3181 case PACKET_STATISTICS: 3255 case PACKET_STATISTICS:
3182 spin_lock_bh(&sk->sk_receive_queue.lock); 3256 spin_lock_bh(&sk->sk_receive_queue.lock);
3257 memcpy(&st, &po->stats, sizeof(st));
3258 memset(&po->stats, 0, sizeof(po->stats));
3259 spin_unlock_bh(&sk->sk_receive_queue.lock);
3260
3183 if (po->tp_version == TPACKET_V3) { 3261 if (po->tp_version == TPACKET_V3) {
3184 lv = sizeof(struct tpacket_stats_v3); 3262 lv = sizeof(struct tpacket_stats_v3);
3185 memcpy(&st_u.stats3, &po->stats, 3263 data = &st.stats3;
3186 sizeof(struct tpacket_stats));
3187 st_u.stats3.tp_freeze_q_cnt =
3188 po->stats_u.stats3.tp_freeze_q_cnt;
3189 st_u.stats3.tp_packets += po->stats.tp_drops;
3190 data = &st_u.stats3;
3191 } else { 3264 } else {
3192 lv = sizeof(struct tpacket_stats); 3265 lv = sizeof(struct tpacket_stats);
3193 st = po->stats; 3266 data = &st.stats1;
3194 st.tp_packets += st.tp_drops;
3195 data = &st;
3196 } 3267 }
3197 memset(&po->stats, 0, sizeof(st)); 3268
3198 spin_unlock_bh(&sk->sk_receive_queue.lock);
3199 break; 3269 break;
3200 case PACKET_AUXDATA: 3270 case PACKET_AUXDATA:
3201 val = po->auxdata; 3271 val = po->auxdata;
@@ -3240,7 +3310,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
3240 case PACKET_FANOUT: 3310 case PACKET_FANOUT:
3241 val = (po->fanout ? 3311 val = (po->fanout ?
3242 ((u32)po->fanout->id | 3312 ((u32)po->fanout->id |
3243 ((u32)po->fanout->type << 16)) : 3313 ((u32)po->fanout->type << 16) |
3314 ((u32)po->fanout->flags << 24)) :
3244 0); 3315 0);
3245 break; 3316 break;
3246 case PACKET_TX_HAS_OFF: 3317 case PACKET_TX_HAS_OFF:
diff --git a/net/packet/diag.c b/net/packet/diag.c
index d3fcd1ebef7e..a9584a2f6d69 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
125 return ret; 125 return ret;
126} 126}
127 127
128static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, 128static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
129 u32 portid, u32 seq, u32 flags, int sk_ino) 129 struct packet_diag_req *req,
130 struct user_namespace *user_ns,
131 u32 portid, u32 seq, u32 flags, int sk_ino)
130{ 132{
131 struct nlmsghdr *nlh; 133 struct nlmsghdr *nlh;
132 struct packet_diag_msg *rp; 134 struct packet_diag_msg *rp;
@@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
147 pdiag_put_info(po, skb)) 149 pdiag_put_info(po, skb))
148 goto out_nlmsg_trim; 150 goto out_nlmsg_trim;
149 151
152 if ((req->pdiag_show & PACKET_SHOW_INFO) &&
153 nla_put_u32(skb, PACKET_DIAG_UID,
154 from_kuid_munged(user_ns, sock_i_uid(sk))))
155 goto out_nlmsg_trim;
156
150 if ((req->pdiag_show & PACKET_SHOW_MCLIST) && 157 if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
151 pdiag_put_mclist(po, skb)) 158 pdiag_put_mclist(po, skb))
152 goto out_nlmsg_trim; 159 goto out_nlmsg_trim;
@@ -159,6 +166,14 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
159 pdiag_put_fanout(po, skb)) 166 pdiag_put_fanout(po, skb))
160 goto out_nlmsg_trim; 167 goto out_nlmsg_trim;
161 168
169 if ((req->pdiag_show & PACKET_SHOW_MEMINFO) &&
170 sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO))
171 goto out_nlmsg_trim;
172
173 if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
174 sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
175 goto out_nlmsg_trim;
176
162 return nlmsg_end(skb, nlh); 177 return nlmsg_end(skb, nlh);
163 178
164out_nlmsg_trim: 179out_nlmsg_trim:
@@ -183,9 +198,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
183 if (num < s_num) 198 if (num < s_num)
184 goto next; 199 goto next;
185 200
186 if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, 201 if (sk_diag_fill(sk, skb, req,
187 cb->nlh->nlmsg_seq, NLM_F_MULTI, 202 sk_user_ns(NETLINK_CB(cb->skb).sk),
188 sock_i_ino(sk)) < 0) 203 NETLINK_CB(cb->skb).portid,
204 cb->nlh->nlmsg_seq, NLM_F_MULTI,
205 sock_i_ino(sk)) < 0)
189 goto done; 206 goto done;
190next: 207next:
191 num++; 208 num++;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8cb7a9..c4e4b4561207 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -54,6 +54,7 @@ struct pgv {
54 54
55struct packet_ring_buffer { 55struct packet_ring_buffer {
56 struct pgv *pg_vec; 56 struct pgv *pg_vec;
57
57 unsigned int head; 58 unsigned int head;
58 unsigned int frames_per_block; 59 unsigned int frames_per_block;
59 unsigned int frame_size; 60 unsigned int frame_size;
@@ -63,8 +64,9 @@ struct packet_ring_buffer {
63 unsigned int pg_vec_pages; 64 unsigned int pg_vec_pages;
64 unsigned int pg_vec_len; 65 unsigned int pg_vec_len;
65 66
66 struct tpacket_kbdq_core prb_bdqc;
67 atomic_t pending; 67 atomic_t pending;
68
69 struct tpacket_kbdq_core prb_bdqc;
68}; 70};
69 71
70extern struct mutex fanout_mutex; 72extern struct mutex fanout_mutex;
@@ -77,10 +79,11 @@ struct packet_fanout {
77 unsigned int num_members; 79 unsigned int num_members;
78 u16 id; 80 u16 id;
79 u8 type; 81 u8 type;
80 u8 defrag; 82 u8 flags;
81 atomic_t rr_cur; 83 atomic_t rr_cur;
82 struct list_head list; 84 struct list_head list;
83 struct sock *arr[PACKET_FANOUT_MAX]; 85 struct sock *arr[PACKET_FANOUT_MAX];
86 int next[PACKET_FANOUT_MAX];
84 spinlock_t lock; 87 spinlock_t lock;
85 atomic_t sk_ref; 88 atomic_t sk_ref;
86 struct packet_type prot_hook ____cacheline_aligned_in_smp; 89 struct packet_type prot_hook ____cacheline_aligned_in_smp;
@@ -90,8 +93,7 @@ struct packet_sock {
90 /* struct sock has to be the first member of packet_sock */ 93 /* struct sock has to be the first member of packet_sock */
91 struct sock sk; 94 struct sock sk;
92 struct packet_fanout *fanout; 95 struct packet_fanout *fanout;
93 struct tpacket_stats stats; 96 union tpacket_stats_u stats;
94 union tpacket_stats_u stats_u;
95 struct packet_ring_buffer rx_ring; 97 struct packet_ring_buffer rx_ring;
96 struct packet_ring_buffer tx_ring; 98 struct packet_ring_buffer tx_ring;
97 int copy_thresh; 99 int copy_thresh;
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 0193630d3061..dc15f4300808 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {
61 [IFA_LOCAL] = { .type = NLA_U8 }, 61 [IFA_LOCAL] = { .type = NLA_U8 },
62}; 62};
63 63
64static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) 64static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
65{ 65{
66 struct net *net = sock_net(skb->sk); 66 struct net *net = sock_net(skb->sk);
67 struct nlattr *tb[IFA_MAX+1]; 67 struct nlattr *tb[IFA_MAX+1];
@@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {
224 [RTA_OIF] = { .type = NLA_U32 }, 224 [RTA_OIF] = { .type = NLA_U32 },
225}; 225};
226 226
227static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) 227static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)
228{ 228{
229 struct net *net = sock_net(skb->sk); 229 struct net *net = sock_net(skb->sk);
230 struct nlattr *tb[RTA_MAX+1]; 230 struct nlattr *tb[RTA_MAX+1];
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 9b9be5279f5d..1cec5e4f3a5e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev,
587 587
588static const char *rfkill_get_type_str(enum rfkill_type type) 588static const char *rfkill_get_type_str(enum rfkill_type type)
589{ 589{
590 BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1); 590 BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
591 591
592 switch (type) { 592 switch (type) {
593 case RFKILL_TYPE_WLAN: 593 case RFKILL_TYPE_WLAN:
@@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
604 return "gps"; 604 return "gps";
605 case RFKILL_TYPE_FM: 605 case RFKILL_TYPE_FM:
606 return "fm"; 606 return "fm";
607 case RFKILL_TYPE_NFC:
608 return "nfc";
607 default: 609 default:
608 BUG(); 610 BUG();
609 } 611 }
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 78fc0937948d..fb076cd6f808 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -131,6 +131,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
131 rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name); 131 rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);
132 if (IS_ERR(rfkill->pwr_clk)) { 132 if (IS_ERR(rfkill->pwr_clk)) {
133 pr_warn("%s: can't find pwr_clk.\n", __func__); 133 pr_warn("%s: can't find pwr_clk.\n", __func__);
134 ret = PTR_ERR(rfkill->pwr_clk);
134 goto fail_shutdown_name; 135 goto fail_shutdown_name;
135 } 136 }
136 } 137 }
@@ -152,9 +153,11 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
152 } 153 }
153 154
154 rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type, 155 rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type,
155 &rfkill_gpio_ops, rfkill); 156 &rfkill_gpio_ops, rfkill);
156 if (!rfkill->rfkill_dev) 157 if (!rfkill->rfkill_dev) {
158 ret = -ENOMEM;
157 goto fail_shutdown; 159 goto fail_shutdown;
160 }
158 161
159 ret = rfkill_register(rfkill->rfkill_dev); 162 ret = rfkill_register(rfkill->rfkill_dev);
160 if (ret < 0) 163 if (ret < 0)
diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c
index 4b5ab21ecb24..d11ac79246e4 100644
--- a/net/rfkill/rfkill-regulator.c
+++ b/net/rfkill/rfkill-regulator.c
@@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked)
51 return 0; 51 return 0;
52} 52}
53 53
54struct rfkill_ops rfkill_regulator_ops = { 54static struct rfkill_ops rfkill_regulator_ops = {
55 .set_block = rfkill_regulator_set_block, 55 .set_block = rfkill_regulator_set_block,
56}; 56};
57 57
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index cf68e6e4054a..9c8347451597 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1253,6 +1253,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
1253 skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); 1253 skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1254 1254
1255 if (srose != NULL) { 1255 if (srose != NULL) {
1256 memset(srose, 0, msg->msg_namelen);
1256 srose->srose_family = AF_ROSE; 1257 srose->srose_family = AF_ROSE;
1257 srose->srose_addr = rose->dest_addr; 1258 srose->srose_addr = rose->dest_addr;
1258 srose->srose_call = rose->dest_call; 1259 srose->srose_call = rose->dest_call;
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8579c4bb20c9..fd7072827a40 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -982,7 +982,7 @@ done:
982 return ret; 982 return ret;
983} 983}
984 984
985static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 985static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
986{ 986{
987 struct net *net = sock_net(skb->sk); 987 struct net *net = sock_net(skb->sk);
988 struct nlattr *tca[TCA_ACT_MAX + 1]; 988 struct nlattr *tca[TCA_ACT_MAX + 1];
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 08fa1e8a4ca4..3a4c0caa1f7d 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
166 return 1; 166 return 1;
167} 167}
168 168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, 169static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
170 unsigned int ihl, unsigned int ipl) 170 unsigned int ihl, unsigned int ipl)
171{ 171{
172 struct icmp6hdr *icmp6h; 172 struct icmp6hdr *icmp6h;
173 const struct ipv6hdr *ip6h;
173 174
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); 175 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL) 176 if (icmp6h == NULL)
176 return 0; 177 return 0;
177 178
179 ip6h = ipv6_hdr(skb);
178 icmp6h->icmp6_cksum = 0; 180 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0); 181 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 182 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
186 return 1; 188 return 1;
187} 189}
188 190
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, 191static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
190 unsigned int ihl, unsigned int ipl) 192 unsigned int ihl, unsigned int ipl)
191{ 193{
192 struct tcphdr *tcph; 194 struct tcphdr *tcph;
195 const struct iphdr *iph;
193 196
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); 197 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL) 198 if (tcph == NULL)
196 return 0; 199 return 0;
197 200
201 iph = ip_hdr(skb);
198 tcph->check = 0; 202 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0); 203 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl, 204 tcph->check = tcp_v4_check(ipl - ihl,
@@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
205 return 1; 209 return 1;
206} 210}
207 211
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, 212static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
209 unsigned int ihl, unsigned int ipl) 213 unsigned int ihl, unsigned int ipl)
210{ 214{
211 struct tcphdr *tcph; 215 struct tcphdr *tcph;
216 const struct ipv6hdr *ip6h;
212 217
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); 218 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL) 219 if (tcph == NULL)
215 return 0; 220 return 0;
216 221
222 ip6h = ipv6_hdr(skb);
217 tcph->check = 0; 223 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0); 224 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 225 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
225 return 1; 231 return 1;
226} 232}
227 233
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, 234static int tcf_csum_ipv4_udp(struct sk_buff *skb,
229 unsigned int ihl, unsigned int ipl, int udplite) 235 unsigned int ihl, unsigned int ipl, int udplite)
230{ 236{
231 struct udphdr *udph; 237 struct udphdr *udph;
238 const struct iphdr *iph;
232 u16 ul; 239 u16 ul;
233 240
234 /* 241 /*
@@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
242 if (udph == NULL) 249 if (udph == NULL)
243 return 0; 250 return 0;
244 251
252 iph = ip_hdr(skb);
245 ul = ntohs(udph->len); 253 ul = ntohs(udph->len);
246 254
247 if (udplite || udph->check) { 255 if (udplite || udph->check) {
@@ -276,10 +284,11 @@ ignore_obscure_skb:
276 return 1; 284 return 1;
277} 285}
278 286
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, 287static int tcf_csum_ipv6_udp(struct sk_buff *skb,
280 unsigned int ihl, unsigned int ipl, int udplite) 288 unsigned int ihl, unsigned int ipl, int udplite)
281{ 289{
282 struct udphdr *udph; 290 struct udphdr *udph;
291 const struct ipv6hdr *ip6h;
283 u16 ul; 292 u16 ul;
284 293
285 /* 294 /*
@@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
293 if (udph == NULL) 302 if (udph == NULL)
294 return 0; 303 return 0;
295 304
305 ip6h = ipv6_hdr(skb);
296 ul = ntohs(udph->len); 306 ul = ntohs(udph->len);
297 307
298 udph->check = 0; 308 udph->check = 0;
@@ -328,7 +338,7 @@ ignore_obscure_skb:
328 338
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) 339static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{ 340{
331 struct iphdr *iph; 341 const struct iphdr *iph;
332 int ntkoff; 342 int ntkoff;
333 343
334 ntkoff = skb_network_offset(skb); 344 ntkoff = skb_network_offset(skb);
@@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
353 break; 363 break;
354 case IPPROTO_TCP: 364 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) 365 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, 366 if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,
357 ntohs(iph->tot_len))) 367 ntohs(iph->tot_len)))
358 goto fail; 368 goto fail;
359 break; 369 break;
360 case IPPROTO_UDP: 370 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) 371 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, 372 if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
363 ntohs(iph->tot_len), 0)) 373 ntohs(iph->tot_len), 0))
364 goto fail; 374 goto fail;
365 break; 375 break;
366 case IPPROTO_UDPLITE: 376 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) 377 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, 378 if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,
369 ntohs(iph->tot_len), 1)) 379 ntohs(iph->tot_len), 1))
370 goto fail; 380 goto fail;
371 break; 381 break;
@@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 387 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail; 388 goto fail;
379 389
380 ip_send_check(iph); 390 ip_send_check(ip_hdr(skb));
381 } 391 }
382 392
383 return 1; 393 return 1;
@@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
456 ixhl = ipv6_optlen(ip6xh); 466 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) 467 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail; 468 goto fail;
469 ip6xh = (void *)(skb_network_header(skb) + hl);
459 if ((nexthdr == NEXTHDR_HOP) && 470 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) 471 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail; 472 goto fail;
@@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
464 break; 475 break;
465 case IPPROTO_ICMPV6: 476 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) 477 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h, 478 if (!tcf_csum_ipv6_icmp(skb,
468 hl, pl + sizeof(*ip6h))) 479 hl, pl + sizeof(*ip6h)))
469 goto fail; 480 goto fail;
470 goto done; 481 goto done;
471 case IPPROTO_TCP: 482 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) 483 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h, 484 if (!tcf_csum_ipv6_tcp(skb,
474 hl, pl + sizeof(*ip6h))) 485 hl, pl + sizeof(*ip6h)))
475 goto fail; 486 goto fail;
476 goto done; 487 goto done;
477 case IPPROTO_UDP: 488 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) 489 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl, 490 if (!tcf_csum_ipv6_udp(skb, hl,
480 pl + sizeof(*ip6h), 0)) 491 pl + sizeof(*ip6h), 0))
481 goto fail; 492 goto fail;
482 goto done; 493 goto done;
483 case IPPROTO_UDPLITE: 494 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) 495 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl, 496 if (!tcf_csum_ipv6_udp(skb, hl,
486 pl + sizeof(*ip6h), 1)) 497 pl + sizeof(*ip6h), 1))
487 goto fail; 498 goto fail;
488 goto done; 499 goto done;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index e0f6de64afec..60d88b6b9560 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -8,7 +8,7 @@
8 * as published by the Free Software Foundation; either version 8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 * 10 *
11 * Copyright: Jamal Hadi Salim (2002-4) 11 * Copyright: Jamal Hadi Salim (2002-13)
12 */ 12 */
13 13
14#include <linux/types.h> 14#include <linux/types.h>
@@ -303,17 +303,44 @@ static struct tc_action_ops act_ipt_ops = {
303 .walk = tcf_generic_walker 303 .walk = tcf_generic_walker
304}; 304};
305 305
306MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); 306static struct tc_action_ops act_xt_ops = {
307 .kind = "xt",
308 .hinfo = &ipt_hash_info,
309 .type = TCA_ACT_IPT,
310 .capab = TCA_CAP_NONE,
311 .owner = THIS_MODULE,
312 .act = tcf_ipt,
313 .dump = tcf_ipt_dump,
314 .cleanup = tcf_ipt_cleanup,
315 .lookup = tcf_hash_search,
316 .init = tcf_ipt_init,
317 .walk = tcf_generic_walker
318};
319
320MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
307MODULE_DESCRIPTION("Iptables target actions"); 321MODULE_DESCRIPTION("Iptables target actions");
308MODULE_LICENSE("GPL"); 322MODULE_LICENSE("GPL");
323MODULE_ALIAS("act_xt");
309 324
310static int __init ipt_init_module(void) 325static int __init ipt_init_module(void)
311{ 326{
312 return tcf_register_action(&act_ipt_ops); 327 int ret1, ret2;
328 ret1 = tcf_register_action(&act_xt_ops);
329 if (ret1 < 0)
330 printk("Failed to load xt action\n");
331 ret2 = tcf_register_action(&act_ipt_ops);
332 if (ret2 < 0)
333 printk("Failed to load ipt action\n");
334
335 if (ret1 < 0 && ret2 < 0)
336 return ret1;
337 else
338 return 0;
313} 339}
314 340
315static void __exit ipt_cleanup_module(void) 341static void __exit ipt_cleanup_module(void)
316{ 342{
343 tcf_unregister_action(&act_xt_ops);
317 tcf_unregister_action(&act_ipt_ops); 344 tcf_unregister_action(&act_ipt_ops);
318} 345}
319 346
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 964f5e4f4b8a..8e118af90973 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,7 +22,6 @@
22#include <linux/skbuff.h> 22#include <linux/skbuff.h>
23#include <linux/init.h> 23#include <linux/init.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25#include <linux/netlink.h>
26#include <linux/err.h> 25#include <linux/err.h>
27#include <linux/slab.h> 26#include <linux/slab.h>
28#include <net/net_namespace.h> 27#include <net/net_namespace.h>
@@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
118 117
119/* Add/change/delete/get a filter node */ 118/* Add/change/delete/get a filter node */
120 119
121static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 120static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
122{ 121{
123 struct net *net = sock_net(skb->sk); 122 struct net *net = sock_net(skb->sk);
124 struct nlattr *tca[TCA_MAX + 1]; 123 struct nlattr *tca[TCA_MAX + 1];
@@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
141 140
142 if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) 141 if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))
143 return -EPERM; 142 return -EPERM;
143
144replay: 144replay:
145 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
146 if (err < 0)
147 return err;
148
145 t = nlmsg_data(n); 149 t = nlmsg_data(n);
146 protocol = TC_H_MIN(t->tcm_info); 150 protocol = TC_H_MIN(t->tcm_info);
147 prio = TC_H_MAJ(t->tcm_info); 151 prio = TC_H_MAJ(t->tcm_info);
@@ -164,10 +168,6 @@ replay:
164 if (dev == NULL) 168 if (dev == NULL)
165 return -ENODEV; 169 return -ENODEV;
166 170
167 err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL);
168 if (err < 0)
169 return err;
170
171 /* Find qdisc */ 171 /* Find qdisc */
172 if (!parent) { 172 if (!parent) {
173 q = dev->qdisc; 173 q = dev->qdisc;
@@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
427 const struct Qdisc_class_ops *cops; 427 const struct Qdisc_class_ops *cops;
428 struct tcf_dump_args arg; 428 struct tcf_dump_args arg;
429 429
430 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 430 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
431 return skb->len; 431 return skb->len;
432 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 432 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
433 if (!dev) 433 if (!dev)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index aa36a8c8b33b..7881e2fccbc2 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
393 return -EOPNOTSUPP; 393 return -EOPNOTSUPP;
394 394
395 if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && 395 if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) &&
396 sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) 396 sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)
397 return -EOPNOTSUPP; 397 return -EOPNOTSUPP;
398 } 398 }
399 399
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 1135d8227f9b..9b97172db84a 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -204,7 +204,6 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
204 if (err < 0) 204 if (err < 0)
205 return err; 205 return err;
206 206
207 err = -EINVAL;
208 if (tb[TCA_FW_CLASSID]) { 207 if (tb[TCA_FW_CLASSID]) {
209 f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); 208 f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
210 tcf_bind_filter(tp, &f->res, base); 209 tcf_bind_filter(tp, &f->res, base);
@@ -218,6 +217,7 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
218 } 217 }
219#endif /* CONFIG_NET_CLS_IND */ 218#endif /* CONFIG_NET_CLS_IND */
220 219
220 err = -EINVAL;
221 if (tb[TCA_FW_MASK]) { 221 if (tb[TCA_FW_MASK]) {
222 mask = nla_get_u32(tb[TCA_FW_MASK]); 222 mask = nla_get_u32(tb[TCA_FW_MASK]);
223 if (mask != head->mask) 223 if (mask != head->mask)
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 3130320997e2..938b7cbf5627 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -83,7 +83,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
83 opt.dim = set->dim; 83 opt.dim = set->dim;
84 opt.flags = set->flags; 84 opt.flags = set->flags;
85 opt.cmdflags = 0; 85 opt.cmdflags = 0;
86 opt.timeout = ~0u; 86 opt.ext.timeout = ~0u;
87 87
88 network_offset = skb_network_offset(skb); 88 network_offset = skb_network_offset(skb);
89 skb_pull(skb, network_offset); 89 skb_pull(skb, network_offset);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c297e2a8e2a1..2b935e7cfe7b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
971 * Delete/get qdisc. 971 * Delete/get qdisc.
972 */ 972 */
973 973
974static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 974static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
975{ 975{
976 struct net *net = sock_net(skb->sk); 976 struct net *net = sock_net(skb->sk);
977 struct tcmsg *tcm = nlmsg_data(n); 977 struct tcmsg *tcm = nlmsg_data(n);
978 struct nlattr *tca[TCA_MAX + 1]; 978 struct nlattr *tca[TCA_MAX + 1];
979 struct net_device *dev; 979 struct net_device *dev;
980 u32 clid = tcm->tcm_parent; 980 u32 clid;
981 struct Qdisc *q = NULL; 981 struct Qdisc *q = NULL;
982 struct Qdisc *p = NULL; 982 struct Qdisc *p = NULL;
983 int err; 983 int err;
@@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
985 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) 985 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
986 return -EPERM; 986 return -EPERM;
987 987
988 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
989 if (!dev)
990 return -ENODEV;
991
992 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 988 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
993 if (err < 0) 989 if (err < 0)
994 return err; 990 return err;
995 991
992 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
993 if (!dev)
994 return -ENODEV;
995
996 clid = tcm->tcm_parent;
996 if (clid) { 997 if (clid) {
997 if (clid != TC_H_ROOT) { 998 if (clid != TC_H_ROOT) {
998 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 999 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
@@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1038 * Create/change qdisc. 1039 * Create/change qdisc.
1039 */ 1040 */
1040 1041
1041static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1042static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1042{ 1043{
1043 struct net *net = sock_net(skb->sk); 1044 struct net *net = sock_net(skb->sk);
1044 struct tcmsg *tcm; 1045 struct tcmsg *tcm;
@@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1053 1054
1054replay: 1055replay:
1055 /* Reinit, just in case something touches this. */ 1056 /* Reinit, just in case something touches this. */
1057 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1058 if (err < 0)
1059 return err;
1060
1056 tcm = nlmsg_data(n); 1061 tcm = nlmsg_data(n);
1057 clid = tcm->tcm_parent; 1062 clid = tcm->tcm_parent;
1058 q = p = NULL; 1063 q = p = NULL;
@@ -1061,9 +1066,6 @@ replay:
1061 if (!dev) 1066 if (!dev)
1062 return -ENODEV; 1067 return -ENODEV;
1063 1068
1064 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1065 if (err < 0)
1066 return err;
1067 1069
1068 if (clid) { 1070 if (clid) {
1069 if (clid != TC_H_ROOT) { 1071 if (clid != TC_H_ROOT) {
@@ -1372,7 +1374,7 @@ done:
1372 1374
1373 1375
1374 1376
1375static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1377static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1376{ 1378{
1377 struct net *net = sock_net(skb->sk); 1379 struct net *net = sock_net(skb->sk);
1378 struct tcmsg *tcm = nlmsg_data(n); 1380 struct tcmsg *tcm = nlmsg_data(n);
@@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1382 const struct Qdisc_class_ops *cops; 1384 const struct Qdisc_class_ops *cops;
1383 unsigned long cl = 0; 1385 unsigned long cl = 0;
1384 unsigned long new_cl; 1386 unsigned long new_cl;
1385 u32 portid = tcm->tcm_parent; 1387 u32 portid;
1386 u32 clid = tcm->tcm_handle; 1388 u32 clid;
1387 u32 qid = TC_H_MAJ(clid); 1389 u32 qid;
1388 int err; 1390 int err;
1389 1391
1390 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) 1392 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1391 return -EPERM; 1393 return -EPERM;
1392 1394
1393 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1394 if (!dev)
1395 return -ENODEV;
1396
1397 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1395 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1398 if (err < 0) 1396 if (err < 0)
1399 return err; 1397 return err;
1400 1398
1399 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1400 if (!dev)
1401 return -ENODEV;
1402
1401 /* 1403 /*
1402 parent == TC_H_UNSPEC - unspecified parent. 1404 parent == TC_H_UNSPEC - unspecified parent.
1403 parent == TC_H_ROOT - class is root, which has no parent. 1405 parent == TC_H_ROOT - class is root, which has no parent.
@@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1413 1415
1414 /* Step 1. Determine qdisc handle X:0 */ 1416 /* Step 1. Determine qdisc handle X:0 */
1415 1417
1418 portid = tcm->tcm_parent;
1419 clid = tcm->tcm_handle;
1420 qid = TC_H_MAJ(clid);
1421
1416 if (portid != TC_H_ROOT) { 1422 if (portid != TC_H_ROOT) {
1417 u32 qid1 = TC_H_MAJ(portid); 1423 u32 qid1 = TC_H_MAJ(portid);
1418 1424
@@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1636 struct net_device *dev; 1642 struct net_device *dev;
1637 int t, s_t; 1643 int t, s_t;
1638 1644
1639 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1645 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1640 return 0; 1646 return 0;
1641 dev = dev_get_by_index(net, tcm->tcm_ifindex); 1647 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1642 if (!dev) 1648 if (!dev)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index cc37dd52ecf9..ef53ab8d0aae 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -80,7 +80,7 @@ struct choke_sched_data {
80/* deliver a random number between 0 and N - 1 */ 80/* deliver a random number between 0 and N - 1 */
81static u32 random_N(unsigned int N) 81static u32 random_N(unsigned int N)
82{ 82{
83 return reciprocal_divide(random32(), N); 83 return reciprocal_divide(prandom_u32(), N);
84} 84}
85 85
86/* number of elements in queue including holes */ 86/* number of elements in queue including holes */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 571f1d211f4d..79b1876b6cd2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
981 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, 981 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
982 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 982 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
983 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 983 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
984 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
984}; 985};
985 986
986static void htb_work_func(struct work_struct *work) 987static void htb_work_func(struct work_struct *work)
@@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work)
994static int htb_init(struct Qdisc *sch, struct nlattr *opt) 995static int htb_init(struct Qdisc *sch, struct nlattr *opt)
995{ 996{
996 struct htb_sched *q = qdisc_priv(sch); 997 struct htb_sched *q = qdisc_priv(sch);
997 struct nlattr *tb[TCA_HTB_INIT + 1]; 998 struct nlattr *tb[TCA_HTB_MAX + 1];
998 struct tc_htb_glob *gopt; 999 struct tc_htb_glob *gopt;
999 int err; 1000 int err;
1000 int i; 1001 int i;
@@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1002 if (!opt) 1003 if (!opt)
1003 return -EINVAL; 1004 return -EINVAL;
1004 1005
1005 err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); 1006 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1006 if (err < 0) 1007 if (err < 0)
1007 return err; 1008 return err;
1008 1009
1009 if (tb[TCA_HTB_INIT] == NULL) { 1010 if (!tb[TCA_HTB_INIT])
1010 pr_err("HTB: hey probably you have bad tc tool ?\n");
1011 return -EINVAL; 1011 return -EINVAL;
1012 } 1012
1013 gopt = nla_data(tb[TCA_HTB_INIT]); 1013 gopt = nla_data(tb[TCA_HTB_INIT]);
1014 if (gopt->version != HTB_VER >> 16) { 1014 if (gopt->version != HTB_VER >> 16)
1015 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1016 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1017 return -EINVAL; 1015 return -EINVAL;
1018 }
1019 1016
1020 err = qdisc_class_hash_init(&q->clhash); 1017 err = qdisc_class_hash_init(&q->clhash);
1021 if (err < 0) 1018 if (err < 0)
@@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1027 INIT_WORK(&q->work, htb_work_func); 1024 INIT_WORK(&q->work, htb_work_func);
1028 skb_queue_head_init(&q->direct_queue); 1025 skb_queue_head_init(&q->direct_queue);
1029 1026
1030 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1027 if (tb[TCA_HTB_DIRECT_QLEN])
1031 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ 1028 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1032 q->direct_qlen = 2; 1029 else {
1033 1030 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1031 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1032 q->direct_qlen = 2;
1033 }
1034 if ((q->rate2quantum = gopt->rate2quantum) < 1) 1034 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1035 q->rate2quantum = 1; 1035 q->rate2quantum = 1;
1036 q->defcls = gopt->defcls; 1036 q->defcls = gopt->defcls;
@@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1056 nest = nla_nest_start(skb, TCA_OPTIONS); 1056 nest = nla_nest_start(skb, TCA_OPTIONS);
1057 if (nest == NULL) 1057 if (nest == NULL)
1058 goto nla_put_failure; 1058 goto nla_put_failure;
1059 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) 1059 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1060 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1060 goto nla_put_failure; 1061 goto nla_put_failure;
1061 nla_nest_end(skb, nest); 1062 nla_nest_end(skb, nest);
1062 1063
@@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1311 struct htb_sched *q = qdisc_priv(sch); 1312 struct htb_sched *q = qdisc_priv(sch);
1312 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1313 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1313 struct nlattr *opt = tca[TCA_OPTIONS]; 1314 struct nlattr *opt = tca[TCA_OPTIONS];
1314 struct nlattr *tb[__TCA_HTB_MAX]; 1315 struct nlattr *tb[TCA_HTB_MAX + 1];
1315 struct tc_htb_opt *hopt; 1316 struct tc_htb_opt *hopt;
1316 1317
1317 /* extract all subattrs from opt attr */ 1318 /* extract all subattrs from opt attr */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d2709e2b7be6..91cfd8f94a19 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -66,13 +66,6 @@ static void sctp_assoc_bh_rcv(struct work_struct *work);
66static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc); 66static void sctp_assoc_free_asconf_acks(struct sctp_association *asoc);
67static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); 67static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc);
68 68
69/* Keep track of the new idr low so that we don't re-use association id
70 * numbers too fast. It is protected by they idr spin lock is in the
71 * range of 1 - INT_MAX.
72 */
73static u32 idr_low = 1;
74
75
76/* 1st Level Abstractions. */ 69/* 1st Level Abstractions. */
77 70
78/* Initialize a new association from provided memory. */ 71/* Initialize a new association from provided memory. */
@@ -104,8 +97,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
104 97
105 /* Initialize the object handling fields. */ 98 /* Initialize the object handling fields. */
106 atomic_set(&asoc->base.refcnt, 1); 99 atomic_set(&asoc->base.refcnt, 1);
107 asoc->base.dead = 0; 100 asoc->base.dead = false;
108 asoc->base.malloced = 0;
109 101
110 /* Initialize the bind addr area. */ 102 /* Initialize the bind addr area. */
111 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); 103 sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port);
@@ -371,7 +363,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
371 if (!sctp_association_init(asoc, ep, sk, scope, gfp)) 363 if (!sctp_association_init(asoc, ep, sk, scope, gfp))
372 goto fail_init; 364 goto fail_init;
373 365
374 asoc->base.malloced = 1;
375 SCTP_DBG_OBJCNT_INC(assoc); 366 SCTP_DBG_OBJCNT_INC(assoc);
376 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); 367 SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc);
377 368
@@ -409,7 +400,7 @@ void sctp_association_free(struct sctp_association *asoc)
409 /* Mark as dead, so other users can know this structure is 400 /* Mark as dead, so other users can know this structure is
410 * going away. 401 * going away.
411 */ 402 */
412 asoc->base.dead = 1; 403 asoc->base.dead = true;
413 404
414 /* Dispose of any data lying around in the outqueue. */ 405 /* Dispose of any data lying around in the outqueue. */
415 sctp_outq_free(&asoc->outqueue); 406 sctp_outq_free(&asoc->outqueue);
@@ -484,10 +475,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)
484 475
485 WARN_ON(atomic_read(&asoc->rmem_alloc)); 476 WARN_ON(atomic_read(&asoc->rmem_alloc));
486 477
487 if (asoc->base.malloced) { 478 kfree(asoc);
488 kfree(asoc); 479 SCTP_DBG_OBJCNT_DEC(assoc);
489 SCTP_DBG_OBJCNT_DEC(assoc);
490 }
491} 480}
492 481
493/* Change the primary destination address for the peer. */ 482/* Change the primary destination address for the peer. */
@@ -1601,13 +1590,8 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
1601 if (preload) 1590 if (preload)
1602 idr_preload(gfp); 1591 idr_preload(gfp);
1603 spin_lock_bh(&sctp_assocs_id_lock); 1592 spin_lock_bh(&sctp_assocs_id_lock);
1604 /* 0 is not a valid id, idr_low is always >= 1 */ 1593 /* 0 is not a valid assoc_id, must be >= 1 */
1605 ret = idr_alloc(&sctp_assocs_id, asoc, idr_low, 0, GFP_NOWAIT); 1594 ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT);
1606 if (ret >= 0) {
1607 idr_low = ret + 1;
1608 if (idr_low == INT_MAX)
1609 idr_low = 1;
1610 }
1611 spin_unlock_bh(&sctp_assocs_id_lock); 1595 spin_unlock_bh(&sctp_assocs_id_lock);
1612 if (preload) 1596 if (preload)
1613 idr_preload_end(); 1597 idr_preload_end();
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index d886b3bf84f5..41145fe31813 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
131 */ 131 */
132void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port) 132void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)
133{ 133{
134 bp->malloced = 0;
135
136 INIT_LIST_HEAD(&bp->address_list); 134 INIT_LIST_HEAD(&bp->address_list);
137 bp->port = port; 135 bp->port = port;
138} 136}
@@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
155{ 153{
156 /* Empty the bind address list. */ 154 /* Empty the bind address list. */
157 sctp_bind_addr_clean(bp); 155 sctp_bind_addr_clean(bp);
158
159 if (bp->malloced) {
160 kfree(bp);
161 SCTP_DBG_OBJCNT_DEC(bind_addr);
162 }
163} 156}
164 157
165/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 158/* Add an address to the bind address list in the SCTP_bind_addr structure. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 12ed45dbe75d..5fbd7bc6bb11 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
121 121
122 /* Initialize the basic object fields. */ 122 /* Initialize the basic object fields. */
123 atomic_set(&ep->base.refcnt, 1); 123 atomic_set(&ep->base.refcnt, 1);
124 ep->base.dead = 0; 124 ep->base.dead = false;
125 ep->base.malloced = 1;
126 125
127 /* Create an input queue. */ 126 /* Create an input queue. */
128 sctp_inq_init(&ep->base.inqueue); 127 sctp_inq_init(&ep->base.inqueue);
@@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)
198 goto fail; 197 goto fail;
199 if (!sctp_endpoint_init(ep, sk, gfp)) 198 if (!sctp_endpoint_init(ep, sk, gfp))
200 goto fail_init; 199 goto fail_init;
201 ep->base.malloced = 1; 200
202 SCTP_DBG_OBJCNT_INC(ep); 201 SCTP_DBG_OBJCNT_INC(ep);
203 return ep; 202 return ep;
204 203
@@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,
234 */ 233 */
235void sctp_endpoint_free(struct sctp_endpoint *ep) 234void sctp_endpoint_free(struct sctp_endpoint *ep)
236{ 235{
237 ep->base.dead = 1; 236 ep->base.dead = true;
238 237
239 ep->base.sk->sk_state = SCTP_SS_CLOSED; 238 ep->base.sk->sk_state = SCTP_SS_CLOSED;
240 239
@@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
279 if (ep->base.sk) 278 if (ep->base.sk)
280 sock_put(ep->base.sk); 279 sock_put(ep->base.sk);
281 280
282 /* Finally, free up our memory. */ 281 kfree(ep);
283 if (ep->base.malloced) { 282 SCTP_DBG_OBJCNT_DEC(ep);
284 kfree(ep);
285 SCTP_DBG_OBJCNT_DEC(ep);
286 }
287} 283}
288 284
289/* Hold a reference to an endpoint. */ 285/* Hold a reference to an endpoint. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 2d5ad280de38..3221d073448c 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue)
58 58
59 /* Create a task for delivering data. */ 59 /* Create a task for delivering data. */
60 INIT_WORK(&queue->immediate, NULL); 60 INIT_WORK(&queue->immediate, NULL);
61
62 queue->malloced = 0;
63} 61}
64 62
65/* Release the memory associated with an SCTP inqueue. */ 63/* Release the memory associated with an SCTP inqueue. */
@@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue)
80 sctp_chunk_free(queue->in_progress); 78 sctp_chunk_free(queue->in_progress);
81 queue->in_progress = NULL; 79 queue->in_progress = NULL;
82 } 80 }
83
84 if (queue->malloced) {
85 /* Dump the master memory segment. */
86 kfree(queue);
87 }
88} 81}
89 82
90/* Put a new packet in an SCTP inqueue. 83/* Put a new packet in an SCTP inqueue.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f5200a2ad852..bbef4a7a9b56 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
136 packet->overhead = overhead; 136 packet->overhead = overhead;
137 sctp_packet_reset(packet); 137 sctp_packet_reset(packet);
138 packet->vtag = 0; 138 packet->vtag = 0;
139 packet->malloced = 0; 139
140 return packet; 140 return packet;
141} 141}
142 142
@@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet)
151 list_del_init(&chunk->list); 151 list_del_init(&chunk->list);
152 sctp_chunk_free(chunk); 152 sctp_chunk_free(chunk);
153 } 153 }
154
155 if (packet->malloced)
156 kfree(packet);
157} 154}
158 155
159/* This routine tries to append the chunk to the offered packet. If adding 156/* This routine tries to append the chunk to the offered packet. If adding
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 01dca753db16..32a4625fef77 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
217 q->outstanding_bytes = 0; 217 q->outstanding_bytes = 0;
218 q->empty = 1; 218 q->empty = 1;
219 q->cork = 0; 219 q->cork = 0;
220
221 q->malloced = 0;
222 q->out_qlen = 0; 220 q->out_qlen = 0;
223} 221}
224 222
@@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q)
295{ 293{
296 /* Throw away leftover chunks. */ 294 /* Throw away leftover chunks. */
297 __sctp_outq_teardown(q); 295 __sctp_outq_teardown(q);
298
299 /* If we were kmalloc()'d, free the memory. */
300 if (q->malloced)
301 kfree(q);
302} 296}
303 297
304/* Put a new chunk in an sctp_outq. */ 298/* Put a new chunk in an sctp_outq. */
@@ -707,11 +701,10 @@ redo:
707/* Cork the outqueue so queued chunks are really queued. */ 701/* Cork the outqueue so queued chunks are really queued. */
708int sctp_outq_uncork(struct sctp_outq *q) 702int sctp_outq_uncork(struct sctp_outq *q)
709{ 703{
710 int error = 0;
711 if (q->cork) 704 if (q->cork)
712 q->cork = 0; 705 q->cork = 0;
713 error = sctp_outq_flush(q, 0); 706
714 return error; 707 return sctp_outq_flush(q, 0);
715} 708}
716 709
717 710
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index ad0dba870341..e62c22535be4 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -63,7 +63,7 @@ static struct {
63 struct timespec tstart; 63 struct timespec tstart;
64} sctpw; 64} sctpw;
65 65
66static void printl(const char *fmt, ...) 66static __printf(1, 2) void printl(const char *fmt, ...)
67{ 67{
68 va_list args; 68 va_list args;
69 int len; 69 int len;
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ab3bba8cb0a8..4e45ee35d0db 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
295 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " 295 seq_printf(seq, " ASSOC SOCK STY SST ST HBKT "
296 "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " 296 "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "
297 "RPORT LADDRS <-> RADDRS " 297 "RPORT LADDRS <-> RADDRS "
298 "HBINT INS OUTS MAXRT T1X T2X RTXC\n"); 298 "HBINT INS OUTS MAXRT T1X T2X RTXC "
299 "wmema wmemq sndbuf rcvbuf\n");
299 300
300 return (void *)pos; 301 return (void *)pos;
301} 302}
@@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
349 sctp_seq_dump_local_addrs(seq, epb); 350 sctp_seq_dump_local_addrs(seq, epb);
350 seq_printf(seq, "<-> "); 351 seq_printf(seq, "<-> ");
351 sctp_seq_dump_remote_addrs(seq, assoc); 352 sctp_seq_dump_remote_addrs(seq, assoc);
352 seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ", 353 seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d "
354 "%8d %8d %8d %8d",
353 assoc->hbinterval, assoc->c.sinit_max_instreams, 355 assoc->hbinterval, assoc->c.sinit_max_instreams,
354 assoc->c.sinit_num_ostreams, assoc->max_retrans, 356 assoc->c.sinit_num_ostreams, assoc->max_retrans,
355 assoc->init_retries, assoc->shutdown_retries, 357 assoc->init_retries, assoc->shutdown_retries,
356 assoc->rtx_data_chunks); 358 assoc->rtx_data_chunks,
359 atomic_read(&sk->sk_wmem_alloc),
360 sk->sk_wmem_queued,
361 sk->sk_sndbuf,
362 sk->sk_rcvbuf);
357 seq_printf(seq, "\n"); 363 seq_printf(seq, "\n");
358 } 364 }
359 read_unlock(&head->lock); 365 read_unlock(&head->lock);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 1c2e46cb9191..eaee00c61139 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1403,7 +1403,7 @@ SCTP_STATIC __init int sctp_init(void)
1403 1403
1404 /* Allocate and initialize the endpoint hash table. */ 1404 /* Allocate and initialize the endpoint hash table. */
1405 sctp_ep_hashsize = 64; 1405 sctp_ep_hashsize = 64;
1406 sctp_ep_hashtable = (struct sctp_hashbucket *) 1406 sctp_ep_hashtable =
1407 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1407 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1408 if (!sctp_ep_hashtable) { 1408 if (!sctp_ep_hashtable) {
1409 pr_err("Failed endpoint_hash alloc\n"); 1409 pr_err("Failed endpoint_hash alloc\n");
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b9070736b8d9..f631c5ff4dbf 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,
1119 /* Make sure the destination port is correctly set 1119 /* Make sure the destination port is correctly set
1120 * in all addresses. 1120 * in all addresses.
1121 */ 1121 */
1122 if (asoc && asoc->peer.port && asoc->peer.port != port) 1122 if (asoc && asoc->peer.port && asoc->peer.port != port) {
1123 err = -EINVAL;
1123 goto out_free; 1124 goto out_free;
1124 1125 }
1125 1126
1126 /* Check if there already is a matching association on the 1127 /* Check if there already is a matching association on the
1127 * endpoint (other than the one created here). 1128 * endpoint (other than the one created here).
@@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
6185 6186
6186 /* Is there any exceptional events? */ 6187 /* Is there any exceptional events? */
6187 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 6188 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
6188 mask |= POLLERR; 6189 mask |= POLLERR |
6190 sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
6189 if (sk->sk_shutdown & RCV_SHUTDOWN) 6191 if (sk->sk_shutdown & RCV_SHUTDOWN)
6190 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 6192 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
6191 if (sk->sk_shutdown == SHUTDOWN_MASK) 6193 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index 825ea94415b3..da8603523808 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
74 if (!sctp_ssnmap_init(retval, in, out)) 74 if (!sctp_ssnmap_init(retval, in, out))
75 goto fail_map; 75 goto fail_map;
76 76
77 retval->malloced = 1;
78 SCTP_DBG_OBJCNT_INC(ssnmap); 77 SCTP_DBG_OBJCNT_INC(ssnmap);
79 78
80 return retval; 79 return retval;
@@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map)
118/* Dispose of a ssnmap. */ 117/* Dispose of a ssnmap. */
119void sctp_ssnmap_free(struct sctp_ssnmap *map) 118void sctp_ssnmap_free(struct sctp_ssnmap *map)
120{ 119{
121 if (map && map->malloced) { 120 int size;
122 int size; 121
123 122 if (unlikely(!map))
124 size = sctp_ssnmap_size(map->in.len, map->out.len); 123 return;
125 if (size <= KMALLOC_MAX_SIZE) 124
126 kfree(map); 125 size = sctp_ssnmap_size(map->in.len, map->out.len);
127 else 126 if (size <= KMALLOC_MAX_SIZE)
128 free_pages((unsigned long)map, get_order(size)); 127 kfree(map);
129 SCTP_DBG_OBJCNT_DEC(ssnmap); 128 else
130 } 129 free_pages((unsigned long)map, get_order(size));
130
131 SCTP_DBG_OBJCNT_DEC(ssnmap);
131} 132}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index fafd2a461ba0..098f1d5f769e 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net,
123 if (!sctp_transport_init(net, transport, addr, gfp)) 123 if (!sctp_transport_init(net, transport, addr, gfp))
124 goto fail_init; 124 goto fail_init;
125 125
126 transport->malloced = 1;
127 SCTP_DBG_OBJCNT_INC(transport); 126 SCTP_DBG_OBJCNT_INC(transport);
128 127
129 return transport; 128 return transport;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 0fd5b3d2df03..04e3d470f877 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,
68 skb_queue_head_init(&ulpq->reasm); 68 skb_queue_head_init(&ulpq->reasm);
69 skb_queue_head_init(&ulpq->lobby); 69 skb_queue_head_init(&ulpq->lobby);
70 ulpq->pd_mode = 0; 70 ulpq->pd_mode = 0;
71 ulpq->malloced = 0;
72 71
73 return ulpq; 72 return ulpq;
74} 73}
@@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)
96void sctp_ulpq_free(struct sctp_ulpq *ulpq) 95void sctp_ulpq_free(struct sctp_ulpq *ulpq)
97{ 96{
98 sctp_ulpq_flush(ulpq); 97 sctp_ulpq_flush(ulpq);
99 if (ulpq->malloced)
100 kfree(ulpq);
101} 98}
102 99
103/* Process an incoming DATA chunk. */ 100/* Process an incoming DATA chunk. */
diff --git a/net/socket.c b/net/socket.c
index 88f759adf3af..6b94633ca61d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -600,7 +600,7 @@ void sock_release(struct socket *sock)
600} 600}
601EXPORT_SYMBOL(sock_release); 601EXPORT_SYMBOL(sock_release);
602 602
603int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) 603void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
604{ 604{
605 *tx_flags = 0; 605 *tx_flags = 0;
606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 606 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
@@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
609 *tx_flags |= SKBTX_SW_TSTAMP; 609 *tx_flags |= SKBTX_SW_TSTAMP;
610 if (sock_flag(sk, SOCK_WIFI_STATUS)) 610 if (sock_flag(sk, SOCK_WIFI_STATUS))
611 *tx_flags |= SKBTX_WIFI_STATUS; 611 *tx_flags |= SKBTX_WIFI_STATUS;
612 return 0;
613} 612}
614EXPORT_SYMBOL(sock_tx_timestamp); 613EXPORT_SYMBOL(sock_tx_timestamp);
615 614
@@ -682,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
682} 681}
683EXPORT_SYMBOL(kernel_sendmsg); 682EXPORT_SYMBOL(kernel_sendmsg);
684 683
685static int ktime2ts(ktime_t kt, struct timespec *ts)
686{
687 if (kt.tv64) {
688 *ts = ktime_to_timespec(kt);
689 return 1;
690 } else {
691 return 0;
692 }
693}
694
695/* 684/*
696 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 685 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
697 */ 686 */
@@ -724,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
724 713
725 714
726 memset(ts, 0, sizeof(ts)); 715 memset(ts, 0, sizeof(ts));
727 if (skb->tstamp.tv64 && 716 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
728 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { 717 ktime_to_timespec_cond(skb->tstamp, ts + 0))
729 skb_get_timestampns(skb, ts + 0);
730 empty = 0; 718 empty = 0;
731 }
732 if (shhwtstamps) { 719 if (shhwtstamps) {
733 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && 720 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
734 ktime2ts(shhwtstamps->syststamp, ts + 1)) 721 ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
735 empty = 0; 722 empty = 0;
736 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && 723 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
737 ktime2ts(shhwtstamps->hwtstamp, ts + 2)) 724 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
738 empty = 0; 725 empty = 0;
739 } 726 }
740 if (!empty) 727 if (!empty)
@@ -1173,15 +1160,6 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1173 1160
1174static int sock_close(struct inode *inode, struct file *filp) 1161static int sock_close(struct inode *inode, struct file *filp)
1175{ 1162{
1176 /*
1177 * It was possible the inode is NULL we were
1178 * closing an unfinished socket.
1179 */
1180
1181 if (!inode) {
1182 printk(KERN_DEBUG "sock_close: NULL inode\n");
1183 return 0;
1184 }
1185 sock_release(SOCKET_I(inode)); 1163 sock_release(SOCKET_I(inode));
1186 return 0; 1164 return 0;
1187} 1165}
@@ -2434,7 +2412,7 @@ static const unsigned char nargs[21] = {
2434 2412
2435SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2413SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2436{ 2414{
2437 unsigned long a[6]; 2415 unsigned long a[AUDITSC_ARGS];
2438 unsigned long a0, a1; 2416 unsigned long a0, a1;
2439 int err; 2417 int err;
2440 unsigned int len; 2418 unsigned int len;
@@ -2450,7 +2428,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
2450 if (copy_from_user(a, args, len)) 2428 if (copy_from_user(a, args, len))
2451 return -EFAULT; 2429 return -EFAULT;
2452 2430
2453 audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2431 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
2432 if (err)
2433 return err;
2454 2434
2455 a0 = a[0]; 2435 a0 = a[0];
2456 a1 = a[1]; 2436 a1 = a[1];
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 516fe2caac2c..241b54f30204 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -3,6 +3,7 @@ config SUNRPC
3 3
4config SUNRPC_GSS 4config SUNRPC_GSS
5 tristate 5 tristate
6 select OID_REGISTRY
6 7
7config SUNRPC_BACKCHANNEL 8config SUNRPC_BACKCHANNEL
8 bool 9 bool
@@ -24,7 +25,6 @@ config SUNRPC_XPRT_RDMA
24config SUNRPC_SWAP 25config SUNRPC_SWAP
25 bool 26 bool
26 depends on SUNRPC 27 depends on SUNRPC
27 select NETVM
28 28
29config RPCSEC_GSS_KRB5 29config RPCSEC_GSS_KRB5
30 tristate "Secure RPC: Kerberos V mechanism" 30 tristate "Secure RPC: Kerberos V mechanism"
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f5294047df77..ed2fdd210c0b 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -82,7 +82,7 @@ MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
82 82
83static u32 83static u32
84pseudoflavor_to_flavor(u32 flavor) { 84pseudoflavor_to_flavor(u32 flavor) {
85 if (flavor >= RPC_AUTH_MAXFLAVOR) 85 if (flavor > RPC_AUTH_MAXFLAVOR)
86 return RPC_AUTH_GSS; 86 return RPC_AUTH_GSS;
87 return flavor; 87 return flavor;
88} 88}
@@ -124,6 +124,79 @@ rpcauth_unregister(const struct rpc_authops *ops)
124EXPORT_SYMBOL_GPL(rpcauth_unregister); 124EXPORT_SYMBOL_GPL(rpcauth_unregister);
125 125
126/** 126/**
127 * rpcauth_get_pseudoflavor - check if security flavor is supported
128 * @flavor: a security flavor
129 * @info: a GSS mech OID, quality of protection, and service value
130 *
131 * Verifies that an appropriate kernel module is available or already loaded.
132 * Returns an equivalent pseudoflavor, or RPC_AUTH_MAXFLAVOR if "flavor" is
133 * not supported locally.
134 */
135rpc_authflavor_t
136rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
137{
138 const struct rpc_authops *ops;
139 rpc_authflavor_t pseudoflavor;
140
141 ops = auth_flavors[flavor];
142 if (ops == NULL)
143 request_module("rpc-auth-%u", flavor);
144 spin_lock(&rpc_authflavor_lock);
145 ops = auth_flavors[flavor];
146 if (ops == NULL || !try_module_get(ops->owner)) {
147 spin_unlock(&rpc_authflavor_lock);
148 return RPC_AUTH_MAXFLAVOR;
149 }
150 spin_unlock(&rpc_authflavor_lock);
151
152 pseudoflavor = flavor;
153 if (ops->info2flavor != NULL)
154 pseudoflavor = ops->info2flavor(info);
155
156 module_put(ops->owner);
157 return pseudoflavor;
158}
159EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
160
161/**
162 * rpcauth_get_gssinfo - find GSS tuple matching a GSS pseudoflavor
163 * @pseudoflavor: GSS pseudoflavor to match
164 * @info: rpcsec_gss_info structure to fill in
165 *
166 * Returns zero and fills in "info" if pseudoflavor matches a
167 * supported mechanism.
168 */
169int
170rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info)
171{
172 rpc_authflavor_t flavor = pseudoflavor_to_flavor(pseudoflavor);
173 const struct rpc_authops *ops;
174 int result;
175
176 if (flavor >= RPC_AUTH_MAXFLAVOR)
177 return -EINVAL;
178
179 ops = auth_flavors[flavor];
180 if (ops == NULL)
181 request_module("rpc-auth-%u", flavor);
182 spin_lock(&rpc_authflavor_lock);
183 ops = auth_flavors[flavor];
184 if (ops == NULL || !try_module_get(ops->owner)) {
185 spin_unlock(&rpc_authflavor_lock);
186 return -ENOENT;
187 }
188 spin_unlock(&rpc_authflavor_lock);
189
190 result = -ENOENT;
191 if (ops->flavor2info != NULL)
192 result = ops->flavor2info(pseudoflavor, info);
193
194 module_put(ops->owner);
195 return result;
196}
197EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
198
199/**
127 * rpcauth_list_flavors - discover registered flavors and pseudoflavors 200 * rpcauth_list_flavors - discover registered flavors and pseudoflavors
128 * @array: array to fill in 201 * @array: array to fill in
129 * @size: size of "array" 202 * @size: size of "array"
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 9e4cb59ef9f0..14e9e53e63d5 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -5,7 +5,8 @@
5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o 5obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
6 6
7auth_rpcgss-y := auth_gss.o gss_generic_token.o \ 7auth_rpcgss-y := auth_gss.o gss_generic_token.o \
8 gss_mech_switch.o svcauth_gss.o 8 gss_mech_switch.o svcauth_gss.o \
9 gss_rpc_upcall.o gss_rpc_xdr.o
9 10
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 11obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 12
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5257d2982ba5..7da6b457f66a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
238 p = ERR_PTR(-EFAULT); 238 p = ERR_PTR(-EFAULT);
239 goto err; 239 goto err;
240 } 240 }
241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS); 241 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
242 if (ret < 0) { 242 if (ret < 0) {
243 p = ERR_PTR(ret); 243 p = ERR_PTR(ret);
244 goto err; 244 goto err;
@@ -867,8 +867,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
867 err = -EINVAL; 867 err = -EINVAL;
868 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); 868 gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
869 if (!gss_auth->mech) { 869 if (!gss_auth->mech) {
870 printk(KERN_WARNING "%s: Pseudoflavor %d not found!\n", 870 dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
871 __func__, flavor);
872 goto err_free; 871 goto err_free;
873 } 872 }
874 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); 873 gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
@@ -1641,6 +1640,8 @@ static const struct rpc_authops authgss_ops = {
1641 .pipes_create = gss_pipes_dentries_create, 1640 .pipes_create = gss_pipes_dentries_create,
1642 .pipes_destroy = gss_pipes_dentries_destroy, 1641 .pipes_destroy = gss_pipes_dentries_destroy,
1643 .list_pseudoflavors = gss_mech_list_pseudoflavors, 1642 .list_pseudoflavors = gss_mech_list_pseudoflavors,
1643 .info2flavor = gss_mech_info2flavor,
1644 .flavor2info = gss_mech_flavor2info,
1644}; 1645};
1645 1646
1646static const struct rpc_credops gss_credops = { 1647static const struct rpc_credops gss_credops = {
@@ -1733,6 +1734,7 @@ static void __exit exit_rpcsec_gss(void)
1733 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 1734 rcu_barrier(); /* Wait for completion of call_rcu()'s */
1734} 1735}
1735 1736
1737MODULE_ALIAS("rpc-auth-6");
1736MODULE_LICENSE("GPL"); 1738MODULE_LICENSE("GPL");
1737module_param_named(expired_cred_retry_delay, 1739module_param_named(expired_cred_retry_delay,
1738 gss_expired_cred_retry_delay, 1740 gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index d3611f11a8df..0d3c158ef8fa 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -679,6 +679,7 @@ out_err:
679static int 679static int
680gss_import_sec_context_kerberos(const void *p, size_t len, 680gss_import_sec_context_kerberos(const void *p, size_t len,
681 struct gss_ctx *ctx_id, 681 struct gss_ctx *ctx_id,
682 time_t *endtime,
682 gfp_t gfp_mask) 683 gfp_t gfp_mask)
683{ 684{
684 const void *end = (const void *)((const char *)p + len); 685 const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
694 else 695 else
695 ret = gss_import_v2_context(p, end, ctx, gfp_mask); 696 ret = gss_import_v2_context(p, end, ctx, gfp_mask);
696 697
697 if (ret == 0) 698 if (ret == 0) {
698 ctx_id->internal_ctx_id = ctx; 699 ctx_id->internal_ctx_id = ctx;
699 else 700 if (endtime)
701 *endtime = ctx->endtime;
702 } else
700 kfree(ctx); 703 kfree(ctx);
701 704
702 dprintk("RPC: %s: returning %d\n", __func__, ret); 705 dprintk("RPC: %s: returning %d\n", __func__, ret);
@@ -729,16 +732,19 @@ static const struct gss_api_ops gss_kerberos_ops = {
729static struct pf_desc gss_kerberos_pfs[] = { 732static struct pf_desc gss_kerberos_pfs[] = {
730 [0] = { 733 [0] = {
731 .pseudoflavor = RPC_AUTH_GSS_KRB5, 734 .pseudoflavor = RPC_AUTH_GSS_KRB5,
735 .qop = GSS_C_QOP_DEFAULT,
732 .service = RPC_GSS_SVC_NONE, 736 .service = RPC_GSS_SVC_NONE,
733 .name = "krb5", 737 .name = "krb5",
734 }, 738 },
735 [1] = { 739 [1] = {
736 .pseudoflavor = RPC_AUTH_GSS_KRB5I, 740 .pseudoflavor = RPC_AUTH_GSS_KRB5I,
741 .qop = GSS_C_QOP_DEFAULT,
737 .service = RPC_GSS_SVC_INTEGRITY, 742 .service = RPC_GSS_SVC_INTEGRITY,
738 .name = "krb5i", 743 .name = "krb5i",
739 }, 744 },
740 [2] = { 745 [2] = {
741 .pseudoflavor = RPC_AUTH_GSS_KRB5P, 746 .pseudoflavor = RPC_AUTH_GSS_KRB5P,
747 .qop = GSS_C_QOP_DEFAULT,
742 .service = RPC_GSS_SVC_PRIVACY, 748 .service = RPC_GSS_SVC_PRIVACY,
743 .name = "krb5p", 749 .name = "krb5p",
744 }, 750 },
@@ -750,11 +756,12 @@ MODULE_ALIAS("rpc-auth-gss-krb5p");
750MODULE_ALIAS("rpc-auth-gss-390003"); 756MODULE_ALIAS("rpc-auth-gss-390003");
751MODULE_ALIAS("rpc-auth-gss-390004"); 757MODULE_ALIAS("rpc-auth-gss-390004");
752MODULE_ALIAS("rpc-auth-gss-390005"); 758MODULE_ALIAS("rpc-auth-gss-390005");
759MODULE_ALIAS("rpc-auth-gss-1.2.840.113554.1.2.2");
753 760
754static struct gss_api_mech gss_kerberos_mech = { 761static struct gss_api_mech gss_kerberos_mech = {
755 .gm_name = "krb5", 762 .gm_name = "krb5",
756 .gm_owner = THIS_MODULE, 763 .gm_owner = THIS_MODULE,
757 .gm_oid = {9, (void *)"\x2a\x86\x48\x86\xf7\x12\x01\x02\x02"}, 764 .gm_oid = { 9, "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02" },
758 .gm_ops = &gss_kerberos_ops, 765 .gm_ops = &gss_kerberos_ops,
759 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), 766 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
760 .gm_pfs = gss_kerberos_pfs, 767 .gm_pfs = gss_kerberos_pfs,
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 88edec929d73..1da52d1406fc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
130 130
131 /* initialize to random value */ 131 /* initialize to random value */
132 if (i == 0) { 132 if (i == 0) {
133 i = random32(); 133 i = prandom_u32();
134 i = (i << 32) | random32(); 134 i = (i << 32) | prandom_u32();
135 } 135 }
136 136
137 switch (conflen) { 137 switch (conflen) {
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index f0f4eee63a35..defa9d33925c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -36,6 +36,7 @@
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/module.h> 38#include <linux/module.h>
39#include <linux/oid_registry.h>
39#include <linux/sunrpc/msg_prot.h> 40#include <linux/sunrpc/msg_prot.h>
40#include <linux/sunrpc/gss_asn1.h> 41#include <linux/sunrpc/gss_asn1.h>
41#include <linux/sunrpc/auth_gss.h> 42#include <linux/sunrpc/auth_gss.h>
@@ -102,8 +103,13 @@ out:
102 return status; 103 return status;
103} 104}
104 105
105int 106/**
106gss_mech_register(struct gss_api_mech *gm) 107 * gss_mech_register - register a GSS mechanism
108 * @gm: GSS mechanism handle
109 *
110 * Returns zero if successful, or a negative errno.
111 */
112int gss_mech_register(struct gss_api_mech *gm)
107{ 113{
108 int status; 114 int status;
109 115
@@ -116,11 +122,14 @@ gss_mech_register(struct gss_api_mech *gm)
116 dprintk("RPC: registered gss mechanism %s\n", gm->gm_name); 122 dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
117 return 0; 123 return 0;
118} 124}
119
120EXPORT_SYMBOL_GPL(gss_mech_register); 125EXPORT_SYMBOL_GPL(gss_mech_register);
121 126
122void 127/**
123gss_mech_unregister(struct gss_api_mech *gm) 128 * gss_mech_unregister - release a GSS mechanism
129 * @gm: GSS mechanism handle
130 *
131 */
132void gss_mech_unregister(struct gss_api_mech *gm)
124{ 133{
125 spin_lock(&registered_mechs_lock); 134 spin_lock(&registered_mechs_lock);
126 list_del(&gm->gm_list); 135 list_del(&gm->gm_list);
@@ -128,18 +137,14 @@ gss_mech_unregister(struct gss_api_mech *gm)
128 dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name); 137 dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
129 gss_mech_free(gm); 138 gss_mech_free(gm);
130} 139}
131
132EXPORT_SYMBOL_GPL(gss_mech_unregister); 140EXPORT_SYMBOL_GPL(gss_mech_unregister);
133 141
134struct gss_api_mech * 142static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm)
135gss_mech_get(struct gss_api_mech *gm)
136{ 143{
137 __module_get(gm->gm_owner); 144 __module_get(gm->gm_owner);
138 return gm; 145 return gm;
139} 146}
140 147
141EXPORT_SYMBOL_GPL(gss_mech_get);
142
143static struct gss_api_mech * 148static struct gss_api_mech *
144_gss_mech_get_by_name(const char *name) 149_gss_mech_get_by_name(const char *name)
145{ 150{
@@ -169,12 +174,16 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
169 } 174 }
170 return gm; 175 return gm;
171} 176}
172EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
173 177
174struct gss_api_mech * 178struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
175gss_mech_get_by_OID(struct xdr_netobj *obj)
176{ 179{
177 struct gss_api_mech *pos, *gm = NULL; 180 struct gss_api_mech *pos, *gm = NULL;
181 char buf[32];
182
183 if (sprint_oid(obj->data, obj->len, buf, sizeof(buf)) < 0)
184 return NULL;
185 dprintk("RPC: %s(%s)\n", __func__, buf);
186 request_module("rpc-auth-gss-%s", buf);
178 187
179 spin_lock(&registered_mechs_lock); 188 spin_lock(&registered_mechs_lock);
180 list_for_each_entry(pos, &registered_mechs, gm_list) { 189 list_for_each_entry(pos, &registered_mechs, gm_list) {
@@ -188,11 +197,8 @@ gss_mech_get_by_OID(struct xdr_netobj *obj)
188 } 197 }
189 spin_unlock(&registered_mechs_lock); 198 spin_unlock(&registered_mechs_lock);
190 return gm; 199 return gm;
191
192} 200}
193 201
194EXPORT_SYMBOL_GPL(gss_mech_get_by_OID);
195
196static inline int 202static inline int
197mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor) 203mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
198{ 204{
@@ -237,8 +243,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
237 return gm; 243 return gm;
238} 244}
239 245
240EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
241
242/** 246/**
243 * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors 247 * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors
244 * @array: array to fill in 248 * @array: array to fill in
@@ -268,19 +272,82 @@ int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size)
268 return i; 272 return i;
269} 273}
270 274
271u32 275/**
272gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service) 276 * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor
277 * @gm: GSS mechanism handle
278 * @qop: GSS quality-of-protection value
279 * @service: GSS service value
280 *
281 * Returns a matching security flavor, or RPC_AUTH_MAXFLAVOR if none is found.
282 */
283rpc_authflavor_t gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 qop,
284 u32 service)
273{ 285{
274 int i; 286 int i;
275 287
276 for (i = 0; i < gm->gm_pf_num; i++) { 288 for (i = 0; i < gm->gm_pf_num; i++) {
277 if (gm->gm_pfs[i].service == service) { 289 if (gm->gm_pfs[i].qop == qop &&
290 gm->gm_pfs[i].service == service) {
278 return gm->gm_pfs[i].pseudoflavor; 291 return gm->gm_pfs[i].pseudoflavor;
279 } 292 }
280 } 293 }
281 return RPC_AUTH_MAXFLAVOR; /* illegal value */ 294 return RPC_AUTH_MAXFLAVOR;
295}
296
297/**
298 * gss_mech_info2flavor - look up a pseudoflavor given a GSS tuple
299 * @info: a GSS mech OID, quality of protection, and service value
300 *
301 * Returns a matching pseudoflavor, or RPC_AUTH_MAXFLAVOR if the tuple is
302 * not supported.
303 */
304rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *info)
305{
306 rpc_authflavor_t pseudoflavor;
307 struct gss_api_mech *gm;
308
309 gm = gss_mech_get_by_OID(&info->oid);
310 if (gm == NULL)
311 return RPC_AUTH_MAXFLAVOR;
312
313 pseudoflavor = gss_svc_to_pseudoflavor(gm, info->qop, info->service);
314
315 gss_mech_put(gm);
316 return pseudoflavor;
317}
318
319/**
320 * gss_mech_flavor2info - look up a GSS tuple for a given pseudoflavor
321 * @pseudoflavor: GSS pseudoflavor to match
322 * @info: rpcsec_gss_info structure to fill in
323 *
324 * Returns zero and fills in "info" if pseudoflavor matches a
325 * supported mechanism. Otherwise a negative errno is returned.
326 */
327int gss_mech_flavor2info(rpc_authflavor_t pseudoflavor,
328 struct rpcsec_gss_info *info)
329{
330 struct gss_api_mech *gm;
331 int i;
332
333 gm = gss_mech_get_by_pseudoflavor(pseudoflavor);
334 if (gm == NULL)
335 return -ENOENT;
336
337 for (i = 0; i < gm->gm_pf_num; i++) {
338 if (gm->gm_pfs[i].pseudoflavor == pseudoflavor) {
339 memcpy(info->oid.data, gm->gm_oid.data, gm->gm_oid.len);
340 info->oid.len = gm->gm_oid.len;
341 info->qop = gm->gm_pfs[i].qop;
342 info->service = gm->gm_pfs[i].service;
343 gss_mech_put(gm);
344 return 0;
345 }
346 }
347
348 gss_mech_put(gm);
349 return -ENOENT;
282} 350}
283EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor);
284 351
285u32 352u32
286gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) 353gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
@@ -294,8 +361,6 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
294 return 0; 361 return 0;
295} 362}
296 363
297EXPORT_SYMBOL_GPL(gss_pseudoflavor_to_service);
298
299char * 364char *
300gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) 365gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
301{ 366{
@@ -308,8 +373,6 @@ gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
308 return NULL; 373 return NULL;
309} 374}
310 375
311EXPORT_SYMBOL_GPL(gss_service_to_auth_domain_name);
312
313void 376void
314gss_mech_put(struct gss_api_mech * gm) 377gss_mech_put(struct gss_api_mech * gm)
315{ 378{
@@ -317,22 +380,21 @@ gss_mech_put(struct gss_api_mech * gm)
317 module_put(gm->gm_owner); 380 module_put(gm->gm_owner);
318} 381}
319 382
320EXPORT_SYMBOL_GPL(gss_mech_put);
321
322/* The mech could probably be determined from the token instead, but it's just 383/* The mech could probably be determined from the token instead, but it's just
323 * as easy for now to pass it in. */ 384 * as easy for now to pass it in. */
324int 385int
325gss_import_sec_context(const void *input_token, size_t bufsize, 386gss_import_sec_context(const void *input_token, size_t bufsize,
326 struct gss_api_mech *mech, 387 struct gss_api_mech *mech,
327 struct gss_ctx **ctx_id, 388 struct gss_ctx **ctx_id,
389 time_t *endtime,
328 gfp_t gfp_mask) 390 gfp_t gfp_mask)
329{ 391{
330 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask))) 392 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
331 return -ENOMEM; 393 return -ENOMEM;
332 (*ctx_id)->mech_type = gss_mech_get(mech); 394 (*ctx_id)->mech_type = gss_mech_get(mech);
333 395
334 return mech->gm_ops 396 return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
335 ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask); 397 *ctx_id, endtime, gfp_mask);
336} 398}
337 399
338/* gss_get_mic: compute a mic over message and return mic_token. */ 400/* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644
index 000000000000..d304f41260f2
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -0,0 +1,358 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.c
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/types.h>
22#include <linux/un.h>
23
24#include <linux/sunrpc/svcauth.h>
25#include "gss_rpc_upcall.h"
26
27#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
28
29#define GSSPROXY_PROGRAM (400112u)
30#define GSSPROXY_VERS_1 (1u)
31
32/*
33 * Encoding/Decoding functions
34 */
35
36enum {
37 GSSX_NULL = 0, /* Unused */
38 GSSX_INDICATE_MECHS = 1,
39 GSSX_GET_CALL_CONTEXT = 2,
40 GSSX_IMPORT_AND_CANON_NAME = 3,
41 GSSX_EXPORT_CRED = 4,
42 GSSX_IMPORT_CRED = 5,
43 GSSX_ACQUIRE_CRED = 6,
44 GSSX_STORE_CRED = 7,
45 GSSX_INIT_SEC_CONTEXT = 8,
46 GSSX_ACCEPT_SEC_CONTEXT = 9,
47 GSSX_RELEASE_HANDLE = 10,
48 GSSX_GET_MIC = 11,
49 GSSX_VERIFY = 12,
50 GSSX_WRAP = 13,
51 GSSX_UNWRAP = 14,
52 GSSX_WRAP_SIZE_LIMIT = 15,
53};
54
55#define PROC(proc, name) \
56[GSSX_##proc] = { \
57 .p_proc = GSSX_##proc, \
58 .p_encode = (kxdreproc_t)gssx_enc_##name, \
59 .p_decode = (kxdrdproc_t)gssx_dec_##name, \
60 .p_arglen = GSSX_ARG_##name##_sz, \
61 .p_replen = GSSX_RES_##name##_sz, \
62 .p_statidx = GSSX_##proc, \
63 .p_name = #proc, \
64}
65
66static struct rpc_procinfo gssp_procedures[] = {
67 PROC(INDICATE_MECHS, indicate_mechs),
68 PROC(GET_CALL_CONTEXT, get_call_context),
69 PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
70 PROC(EXPORT_CRED, export_cred),
71 PROC(IMPORT_CRED, import_cred),
72 PROC(ACQUIRE_CRED, acquire_cred),
73 PROC(STORE_CRED, store_cred),
74 PROC(INIT_SEC_CONTEXT, init_sec_context),
75 PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
76 PROC(RELEASE_HANDLE, release_handle),
77 PROC(GET_MIC, get_mic),
78 PROC(VERIFY, verify),
79 PROC(WRAP, wrap),
80 PROC(UNWRAP, unwrap),
81 PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
82};
83
84
85
86/*
87 * Common transport functions
88 */
89
90static const struct rpc_program gssp_program;
91
92static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
93{
94 static const struct sockaddr_un gssp_localaddr = {
95 .sun_family = AF_LOCAL,
96 .sun_path = GSSPROXY_SOCK_PATHNAME,
97 };
98 struct rpc_create_args args = {
99 .net = net,
100 .protocol = XPRT_TRANSPORT_LOCAL,
101 .address = (struct sockaddr *)&gssp_localaddr,
102 .addrsize = sizeof(gssp_localaddr),
103 .servername = "localhost",
104 .program = &gssp_program,
105 .version = GSSPROXY_VERS_1,
106 .authflavor = RPC_AUTH_NULL,
107 /*
108 * Note we want connection to be done in the caller's
109 * filesystem namespace. We therefore turn off the idle
110 * timeout, which would result in reconnections being
111 * done without the correct namespace:
112 */
113 .flags = RPC_CLNT_CREATE_NOPING |
114 RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
115 };
116 struct rpc_clnt *clnt;
117 int result = 0;
118
119 clnt = rpc_create(&args);
120 if (IS_ERR(clnt)) {
121 dprintk("RPC: failed to create AF_LOCAL gssproxy "
122 "client (errno %ld).\n", PTR_ERR(clnt));
123 result = -PTR_ERR(clnt);
124 *_clnt = NULL;
125 goto out;
126 }
127
128 dprintk("RPC: created new gssp local client (gssp_local_clnt: "
129 "%p)\n", clnt);
130 *_clnt = clnt;
131
132out:
133 return result;
134}
135
136void init_gssp_clnt(struct sunrpc_net *sn)
137{
138 mutex_init(&sn->gssp_lock);
139 sn->gssp_clnt = NULL;
140 init_waitqueue_head(&sn->gssp_wq);
141}
142
143int set_gssp_clnt(struct net *net)
144{
145 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
146 struct rpc_clnt *clnt;
147 int ret;
148
149 mutex_lock(&sn->gssp_lock);
150 ret = gssp_rpc_create(net, &clnt);
151 if (!ret) {
152 if (sn->gssp_clnt)
153 rpc_shutdown_client(sn->gssp_clnt);
154 sn->gssp_clnt = clnt;
155 }
156 mutex_unlock(&sn->gssp_lock);
157 wake_up(&sn->gssp_wq);
158 return ret;
159}
160
161void clear_gssp_clnt(struct sunrpc_net *sn)
162{
163 mutex_lock(&sn->gssp_lock);
164 if (sn->gssp_clnt) {
165 rpc_shutdown_client(sn->gssp_clnt);
166 sn->gssp_clnt = NULL;
167 }
168 mutex_unlock(&sn->gssp_lock);
169}
170
171static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
172{
173 struct rpc_clnt *clnt;
174
175 mutex_lock(&sn->gssp_lock);
176 clnt = sn->gssp_clnt;
177 if (clnt)
178 atomic_inc(&clnt->cl_count);
179 mutex_unlock(&sn->gssp_lock);
180 return clnt;
181}
182
183static int gssp_call(struct net *net, struct rpc_message *msg)
184{
185 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
186 struct rpc_clnt *clnt;
187 int status;
188
189 clnt = get_gssp_clnt(sn);
190 if (!clnt)
191 return -EIO;
192 status = rpc_call_sync(clnt, msg, 0);
193 if (status < 0) {
194 dprintk("gssp: rpc_call returned error %d\n", -status);
195 switch (status) {
196 case -EPROTONOSUPPORT:
197 status = -EINVAL;
198 break;
199 case -ECONNREFUSED:
200 case -ETIMEDOUT:
201 case -ENOTCONN:
202 status = -EAGAIN;
203 break;
204 case -ERESTARTSYS:
205 if (signalled ())
206 status = -EINTR;
207 break;
208 default:
209 break;
210 }
211 }
212 rpc_release_client(clnt);
213 return status;
214}
215
216
217/*
218 * Public functions
219 */
220
221/* numbers somewhat arbitrary but large enough for current needs */
222#define GSSX_MAX_OUT_HANDLE 128
223#define GSSX_MAX_SRC_PRINC 256
224#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
225 GSSX_max_oid_sz + \
226 GSSX_max_princ_sz + \
227 sizeof(struct svc_cred))
228
229int gssp_accept_sec_context_upcall(struct net *net,
230 struct gssp_upcall_data *data)
231{
232 struct gssx_ctx ctxh = {
233 .state = data->in_handle
234 };
235 struct gssx_arg_accept_sec_context arg = {
236 .input_token = data->in_token,
237 };
238 struct gssx_ctx rctxh = {
239 /*
240 * pass in the max length we expect for each of these
241 * buffers but let the xdr code kmalloc them:
242 */
243 .exported_context_token.len = GSSX_max_output_handle_sz,
244 .mech.len = GSS_OID_MAX_LEN,
245 .src_name.display_name.len = GSSX_max_princ_sz
246 };
247 struct gssx_res_accept_sec_context res = {
248 .context_handle = &rctxh,
249 .output_token = &data->out_token
250 };
251 struct rpc_message msg = {
252 .rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
253 .rpc_argp = &arg,
254 .rpc_resp = &res,
255 .rpc_cred = NULL, /* FIXME ? */
256 };
257 struct xdr_netobj client_name = { 0 , NULL };
258 int ret;
259
260 if (data->in_handle.len != 0)
261 arg.context_handle = &ctxh;
262 res.output_token->len = GSSX_max_output_token_sz;
263
264 /* use nfs/ for targ_name ? */
265
266 ret = gssp_call(net, &msg);
267
268 /* we need to fetch all data even in case of error so
269 * that we can free special strctures is they have been allocated */
270 data->major_status = res.status.major_status;
271 data->minor_status = res.status.minor_status;
272 if (res.context_handle) {
273 data->out_handle = rctxh.exported_context_token;
274 data->mech_oid.len = rctxh.mech.len;
275 memcpy(data->mech_oid.data, rctxh.mech.data,
276 data->mech_oid.len);
277 client_name = rctxh.src_name.display_name;
278 }
279
280 if (res.options.count == 1) {
281 gssx_buffer *value = &res.options.data[0].value;
282 /* Currently we only decode CREDS_VALUE, if we add
283 * anything else we'll have to loop and match on the
284 * option name */
285 if (value->len == 1) {
286 /* steal group info from struct svc_cred */
287 data->creds = *(struct svc_cred *)value->data;
288 data->found_creds = 1;
289 }
290 /* whether we use it or not, free data */
291 kfree(value->data);
292 }
293
294 if (res.options.count != 0) {
295 kfree(res.options.data);
296 }
297
298 /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
299 if (data->found_creds && client_name.data != NULL) {
300 char *c;
301
302 data->creds.cr_principal = kstrndup(client_name.data,
303 client_name.len, GFP_KERNEL);
304 if (data->creds.cr_principal) {
305 /* terminate and remove realm part */
306 c = strchr(data->creds.cr_principal, '@');
307 if (c) {
308 *c = '\0';
309
310 /* change service-hostname delimiter */
311 c = strchr(data->creds.cr_principal, '/');
312 if (c) *c = '@';
313 }
314 if (!c) {
315 /* not a service principal */
316 kfree(data->creds.cr_principal);
317 data->creds.cr_principal = NULL;
318 }
319 }
320 }
321 kfree(client_name.data);
322
323 return ret;
324}
325
326void gssp_free_upcall_data(struct gssp_upcall_data *data)
327{
328 kfree(data->in_handle.data);
329 kfree(data->out_handle.data);
330 kfree(data->out_token.data);
331 kfree(data->mech_oid.data);
332 free_svc_cred(&data->creds);
333}
334
335/*
336 * Initialization stuff
337 */
338
339static const struct rpc_version gssp_version1 = {
340 .number = GSSPROXY_VERS_1,
341 .nrprocs = ARRAY_SIZE(gssp_procedures),
342 .procs = gssp_procedures,
343};
344
345static const struct rpc_version *gssp_version[] = {
346 NULL,
347 &gssp_version1,
348};
349
350static struct rpc_stat gssp_stats;
351
352static const struct rpc_program gssp_program = {
353 .name = "gssproxy",
354 .number = GSSPROXY_PROGRAM,
355 .nrvers = ARRAY_SIZE(gssp_version),
356 .version = gssp_version,
357 .stats = &gssp_stats,
358};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644
index 000000000000..1e542aded90a
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -0,0 +1,48 @@
1/*
2 * linux/net/sunrpc/gss_rpc_upcall.h
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _GSS_RPC_UPCALL_H
22#define _GSS_RPC_UPCALL_H
23
24#include <linux/sunrpc/gss_api.h>
25#include <linux/sunrpc/auth_gss.h>
26#include "gss_rpc_xdr.h"
27#include "../netns.h"
28
29struct gssp_upcall_data {
30 struct xdr_netobj in_handle;
31 struct gssp_in_token in_token;
32 struct xdr_netobj out_handle;
33 struct xdr_netobj out_token;
34 struct rpcsec_gss_oid mech_oid;
35 struct svc_cred creds;
36 int found_creds;
37 int major_status;
38 int minor_status;
39};
40
41int gssp_accept_sec_context_upcall(struct net *net,
42 struct gssp_upcall_data *data);
43void gssp_free_upcall_data(struct gssp_upcall_data *data);
44
45void init_gssp_clnt(struct sunrpc_net *);
46int set_gssp_clnt(struct net *);
47void clear_gssp_clnt(struct sunrpc_net *);
48#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644
index 000000000000..357f613df7ff
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -0,0 +1,840 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#include <linux/sunrpc/svcauth.h>
22#include "gss_rpc_xdr.h"
23
24static int gssx_enc_bool(struct xdr_stream *xdr, int v)
25{
26 __be32 *p;
27
28 p = xdr_reserve_space(xdr, 4);
29 if (unlikely(p == NULL))
30 return -ENOSPC;
31 *p = v ? xdr_one : xdr_zero;
32 return 0;
33}
34
35static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
36{
37 __be32 *p;
38
39 p = xdr_inline_decode(xdr, 4);
40 if (unlikely(p == NULL))
41 return -ENOSPC;
42 *v = be32_to_cpu(*p);
43 return 0;
44}
45
46static int gssx_enc_buffer(struct xdr_stream *xdr,
47 gssx_buffer *buf)
48{
49 __be32 *p;
50
51 p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
52 if (!p)
53 return -ENOSPC;
54 xdr_encode_opaque(p, buf->data, buf->len);
55 return 0;
56}
57
58static int gssx_enc_in_token(struct xdr_stream *xdr,
59 struct gssp_in_token *in)
60{
61 __be32 *p;
62
63 p = xdr_reserve_space(xdr, 4);
64 if (!p)
65 return -ENOSPC;
66 *p = cpu_to_be32(in->page_len);
67
68 /* all we need to do is to write pages */
69 xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
70
71 return 0;
72}
73
74
75static int gssx_dec_buffer(struct xdr_stream *xdr,
76 gssx_buffer *buf)
77{
78 u32 length;
79 __be32 *p;
80
81 p = xdr_inline_decode(xdr, 4);
82 if (unlikely(p == NULL))
83 return -ENOSPC;
84
85 length = be32_to_cpup(p);
86 p = xdr_inline_decode(xdr, length);
87 if (unlikely(p == NULL))
88 return -ENOSPC;
89
90 if (buf->len == 0) {
91 /* we intentionally are not interested in this buffer */
92 return 0;
93 }
94 if (length > buf->len)
95 return -ENOSPC;
96
97 if (!buf->data) {
98 buf->data = kmemdup(p, length, GFP_KERNEL);
99 if (!buf->data)
100 return -ENOMEM;
101 } else {
102 memcpy(buf->data, p, length);
103 }
104 buf->len = length;
105 return 0;
106}
107
108static int gssx_enc_option(struct xdr_stream *xdr,
109 struct gssx_option *opt)
110{
111 int err;
112
113 err = gssx_enc_buffer(xdr, &opt->option);
114 if (err)
115 return err;
116 err = gssx_enc_buffer(xdr, &opt->value);
117 return err;
118}
119
120static int gssx_dec_option(struct xdr_stream *xdr,
121 struct gssx_option *opt)
122{
123 int err;
124
125 err = gssx_dec_buffer(xdr, &opt->option);
126 if (err)
127 return err;
128 err = gssx_dec_buffer(xdr, &opt->value);
129 return err;
130}
131
132static int dummy_enc_opt_array(struct xdr_stream *xdr,
133 struct gssx_option_array *oa)
134{
135 __be32 *p;
136
137 if (oa->count != 0)
138 return -EINVAL;
139
140 p = xdr_reserve_space(xdr, 4);
141 if (!p)
142 return -ENOSPC;
143 *p = 0;
144
145 return 0;
146}
147
148static int dummy_dec_opt_array(struct xdr_stream *xdr,
149 struct gssx_option_array *oa)
150{
151 struct gssx_option dummy;
152 u32 count, i;
153 __be32 *p;
154
155 p = xdr_inline_decode(xdr, 4);
156 if (unlikely(p == NULL))
157 return -ENOSPC;
158 count = be32_to_cpup(p++);
159 memset(&dummy, 0, sizeof(dummy));
160 for (i = 0; i < count; i++) {
161 gssx_dec_option(xdr, &dummy);
162 }
163
164 oa->count = 0;
165 oa->data = NULL;
166 return 0;
167}
168
169static int get_s32(void **p, void *max, s32 *res)
170{
171 void *base = *p;
172 void *next = (void *)((char *)base + sizeof(s32));
173 if (unlikely(next > max || next < base))
174 return -EINVAL;
175 memcpy(res, base, sizeof(s32));
176 *p = next;
177 return 0;
178}
179
180static int gssx_dec_linux_creds(struct xdr_stream *xdr,
181 struct svc_cred *creds)
182{
183 u32 length;
184 __be32 *p;
185 void *q, *end;
186 s32 tmp;
187 int N, i, err;
188
189 p = xdr_inline_decode(xdr, 4);
190 if (unlikely(p == NULL))
191 return -ENOSPC;
192
193 length = be32_to_cpup(p);
194
195 /* FIXME: we do not want to use the scratch buffer for this one
196 * may need to use functions that allows us to access an io vector
197 * directly */
198 p = xdr_inline_decode(xdr, length);
199 if (unlikely(p == NULL))
200 return -ENOSPC;
201
202 q = p;
203 end = q + length;
204
205 /* uid */
206 err = get_s32(&q, end, &tmp);
207 if (err)
208 return err;
209 creds->cr_uid = make_kuid(&init_user_ns, tmp);
210
211 /* gid */
212 err = get_s32(&q, end, &tmp);
213 if (err)
214 return err;
215 creds->cr_gid = make_kgid(&init_user_ns, tmp);
216
217 /* number of additional gid's */
218 err = get_s32(&q, end, &tmp);
219 if (err)
220 return err;
221 N = tmp;
222 creds->cr_group_info = groups_alloc(N);
223 if (creds->cr_group_info == NULL)
224 return -ENOMEM;
225
226 /* gid's */
227 for (i = 0; i < N; i++) {
228 kgid_t kgid;
229 err = get_s32(&q, end, &tmp);
230 if (err)
231 goto out_free_groups;
232 err = -EINVAL;
233 kgid = make_kgid(&init_user_ns, tmp);
234 if (!gid_valid(kgid))
235 goto out_free_groups;
236 GROUP_AT(creds->cr_group_info, i) = kgid;
237 }
238
239 return 0;
240out_free_groups:
241 groups_free(creds->cr_group_info);
242 return err;
243}
244
245static int gssx_dec_option_array(struct xdr_stream *xdr,
246 struct gssx_option_array *oa)
247{
248 struct svc_cred *creds;
249 u32 count, i;
250 __be32 *p;
251 int err;
252
253 p = xdr_inline_decode(xdr, 4);
254 if (unlikely(p == NULL))
255 return -ENOSPC;
256 count = be32_to_cpup(p++);
257 if (!count)
258 return 0;
259
260 /* we recognize only 1 currently: CREDS_VALUE */
261 oa->count = 1;
262
263 oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
264 if (!oa->data)
265 return -ENOMEM;
266
267 creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
268 if (!creds) {
269 kfree(oa->data);
270 return -ENOMEM;
271 }
272
273 oa->data[0].option.data = CREDS_VALUE;
274 oa->data[0].option.len = sizeof(CREDS_VALUE);
275 oa->data[0].value.data = (void *)creds;
276 oa->data[0].value.len = 0;
277
278 for (i = 0; i < count; i++) {
279 gssx_buffer dummy = { 0, NULL };
280 u32 length;
281
282 /* option buffer */
283 p = xdr_inline_decode(xdr, 4);
284 if (unlikely(p == NULL))
285 return -ENOSPC;
286
287 length = be32_to_cpup(p);
288 p = xdr_inline_decode(xdr, length);
289 if (unlikely(p == NULL))
290 return -ENOSPC;
291
292 if (length == sizeof(CREDS_VALUE) &&
293 memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
294 /* We have creds here. parse them */
295 err = gssx_dec_linux_creds(xdr, creds);
296 if (err)
297 return err;
298 oa->data[0].value.len = 1; /* presence */
299 } else {
300 /* consume uninteresting buffer */
301 err = gssx_dec_buffer(xdr, &dummy);
302 if (err)
303 return err;
304 }
305 }
306 return 0;
307}
308
309static int gssx_dec_status(struct xdr_stream *xdr,
310 struct gssx_status *status)
311{
312 __be32 *p;
313 int err;
314
315 /* status->major_status */
316 p = xdr_inline_decode(xdr, 8);
317 if (unlikely(p == NULL))
318 return -ENOSPC;
319 p = xdr_decode_hyper(p, &status->major_status);
320
321 /* status->mech */
322 err = gssx_dec_buffer(xdr, &status->mech);
323 if (err)
324 return err;
325
326 /* status->minor_status */
327 p = xdr_inline_decode(xdr, 8);
328 if (unlikely(p == NULL))
329 return -ENOSPC;
330 p = xdr_decode_hyper(p, &status->minor_status);
331
332 /* status->major_status_string */
333 err = gssx_dec_buffer(xdr, &status->major_status_string);
334 if (err)
335 return err;
336
337 /* status->minor_status_string */
338 err = gssx_dec_buffer(xdr, &status->minor_status_string);
339 if (err)
340 return err;
341
342 /* status->server_ctx */
343 err = gssx_dec_buffer(xdr, &status->server_ctx);
344 if (err)
345 return err;
346
347 /* we assume we have no options for now, so simply consume them */
348 /* status->options */
349 err = dummy_dec_opt_array(xdr, &status->options);
350
351 return err;
352}
353
354static int gssx_enc_call_ctx(struct xdr_stream *xdr,
355 struct gssx_call_ctx *ctx)
356{
357 struct gssx_option opt;
358 __be32 *p;
359 int err;
360
361 /* ctx->locale */
362 err = gssx_enc_buffer(xdr, &ctx->locale);
363 if (err)
364 return err;
365
366 /* ctx->server_ctx */
367 err = gssx_enc_buffer(xdr, &ctx->server_ctx);
368 if (err)
369 return err;
370
371 /* we always want to ask for lucid contexts */
372 /* ctx->options */
373 p = xdr_reserve_space(xdr, 4);
374 *p = cpu_to_be32(2);
375
376 /* we want a lucid_v1 context */
377 opt.option.data = LUCID_OPTION;
378 opt.option.len = sizeof(LUCID_OPTION);
379 opt.value.data = LUCID_VALUE;
380 opt.value.len = sizeof(LUCID_VALUE);
381 err = gssx_enc_option(xdr, &opt);
382
383 /* ..and user creds */
384 opt.option.data = CREDS_OPTION;
385 opt.option.len = sizeof(CREDS_OPTION);
386 opt.value.data = CREDS_VALUE;
387 opt.value.len = sizeof(CREDS_VALUE);
388 err = gssx_enc_option(xdr, &opt);
389
390 return err;
391}
392
393static int gssx_dec_name_attr(struct xdr_stream *xdr,
394 struct gssx_name_attr *attr)
395{
396 int err;
397
398 /* attr->attr */
399 err = gssx_dec_buffer(xdr, &attr->attr);
400 if (err)
401 return err;
402
403 /* attr->value */
404 err = gssx_dec_buffer(xdr, &attr->value);
405 if (err)
406 return err;
407
408 /* attr->extensions */
409 err = dummy_dec_opt_array(xdr, &attr->extensions);
410
411 return err;
412}
413
414static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
415 struct gssx_name_attr_array *naa)
416{
417 __be32 *p;
418
419 if (naa->count != 0)
420 return -EINVAL;
421
422 p = xdr_reserve_space(xdr, 4);
423 if (!p)
424 return -ENOSPC;
425 *p = 0;
426
427 return 0;
428}
429
430static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
431 struct gssx_name_attr_array *naa)
432{
433 struct gssx_name_attr dummy;
434 u32 count, i;
435 __be32 *p;
436
437 p = xdr_inline_decode(xdr, 4);
438 if (unlikely(p == NULL))
439 return -ENOSPC;
440 count = be32_to_cpup(p++);
441 for (i = 0; i < count; i++) {
442 gssx_dec_name_attr(xdr, &dummy);
443 }
444
445 naa->count = 0;
446 naa->data = NULL;
447 return 0;
448}
449
450static struct xdr_netobj zero_netobj = {};
451
452static struct gssx_name_attr_array zero_name_attr_array = {};
453
454static struct gssx_option_array zero_option_array = {};
455
456static int gssx_enc_name(struct xdr_stream *xdr,
457 struct gssx_name *name)
458{
459 int err;
460
461 /* name->display_name */
462 err = gssx_enc_buffer(xdr, &name->display_name);
463 if (err)
464 return err;
465
466 /* name->name_type */
467 err = gssx_enc_buffer(xdr, &zero_netobj);
468 if (err)
469 return err;
470
471 /* name->exported_name */
472 err = gssx_enc_buffer(xdr, &zero_netobj);
473 if (err)
474 return err;
475
476 /* name->exported_composite_name */
477 err = gssx_enc_buffer(xdr, &zero_netobj);
478 if (err)
479 return err;
480
481 /* leave name_attributes empty for now, will add once we have any
482 * to pass up at all */
483 /* name->name_attributes */
484 err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
485 if (err)
486 return err;
487
488 /* leave options empty for now, will add once we have any options
489 * to pass up at all */
490 /* name->extensions */
491 err = dummy_enc_opt_array(xdr, &zero_option_array);
492
493 return err;
494}
495
496static int gssx_dec_name(struct xdr_stream *xdr,
497 struct gssx_name *name)
498{
499 struct xdr_netobj dummy_netobj;
500 struct gssx_name_attr_array dummy_name_attr_array;
501 struct gssx_option_array dummy_option_array;
502 int err;
503
504 /* name->display_name */
505 err = gssx_dec_buffer(xdr, &name->display_name);
506 if (err)
507 return err;
508
509 /* name->name_type */
510 err = gssx_dec_buffer(xdr, &dummy_netobj);
511 if (err)
512 return err;
513
514 /* name->exported_name */
515 err = gssx_dec_buffer(xdr, &dummy_netobj);
516 if (err)
517 return err;
518
519 /* name->exported_composite_name */
520 err = gssx_dec_buffer(xdr, &dummy_netobj);
521 if (err)
522 return err;
523
524 /* we assume we have no attributes for now, so simply consume them */
525 /* name->name_attributes */
526 err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
527 if (err)
528 return err;
529
530 /* we assume we have no options for now, so simply consume them */
531 /* name->extensions */
532 err = dummy_dec_opt_array(xdr, &dummy_option_array);
533
534 return err;
535}
536
537static int dummy_enc_credel_array(struct xdr_stream *xdr,
538 struct gssx_cred_element_array *cea)
539{
540 __be32 *p;
541
542 if (cea->count != 0)
543 return -EINVAL;
544
545 p = xdr_reserve_space(xdr, 4);
546 if (!p)
547 return -ENOSPC;
548 *p = 0;
549
550 return 0;
551}
552
553static int gssx_enc_cred(struct xdr_stream *xdr,
554 struct gssx_cred *cred)
555{
556 int err;
557
558 /* cred->desired_name */
559 err = gssx_enc_name(xdr, &cred->desired_name);
560 if (err)
561 return err;
562
563 /* cred->elements */
564 err = dummy_enc_credel_array(xdr, &cred->elements);
565
566 /* cred->cred_handle_reference */
567 err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
568 if (err)
569 return err;
570
571 /* cred->needs_release */
572 err = gssx_enc_bool(xdr, cred->needs_release);
573
574 return err;
575}
576
577static int gssx_enc_ctx(struct xdr_stream *xdr,
578 struct gssx_ctx *ctx)
579{
580 __be32 *p;
581 int err;
582
583 /* ctx->exported_context_token */
584 err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
585 if (err)
586 return err;
587
588 /* ctx->state */
589 err = gssx_enc_buffer(xdr, &ctx->state);
590 if (err)
591 return err;
592
593 /* ctx->need_release */
594 err = gssx_enc_bool(xdr, ctx->need_release);
595 if (err)
596 return err;
597
598 /* ctx->mech */
599 err = gssx_enc_buffer(xdr, &ctx->mech);
600 if (err)
601 return err;
602
603 /* ctx->src_name */
604 err = gssx_enc_name(xdr, &ctx->src_name);
605 if (err)
606 return err;
607
608 /* ctx->targ_name */
609 err = gssx_enc_name(xdr, &ctx->targ_name);
610 if (err)
611 return err;
612
613 /* ctx->lifetime */
614 p = xdr_reserve_space(xdr, 8+8);
615 if (!p)
616 return -ENOSPC;
617 p = xdr_encode_hyper(p, ctx->lifetime);
618
619 /* ctx->ctx_flags */
620 p = xdr_encode_hyper(p, ctx->ctx_flags);
621
622 /* ctx->locally_initiated */
623 err = gssx_enc_bool(xdr, ctx->locally_initiated);
624 if (err)
625 return err;
626
627 /* ctx->open */
628 err = gssx_enc_bool(xdr, ctx->open);
629 if (err)
630 return err;
631
632 /* leave options empty for now, will add once we have any options
633 * to pass up at all */
634 /* ctx->options */
635 err = dummy_enc_opt_array(xdr, &ctx->options);
636
637 return err;
638}
639
640static int gssx_dec_ctx(struct xdr_stream *xdr,
641 struct gssx_ctx *ctx)
642{
643 __be32 *p;
644 int err;
645
646 /* ctx->exported_context_token */
647 err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
648 if (err)
649 return err;
650
651 /* ctx->state */
652 err = gssx_dec_buffer(xdr, &ctx->state);
653 if (err)
654 return err;
655
656 /* ctx->need_release */
657 err = gssx_dec_bool(xdr, &ctx->need_release);
658 if (err)
659 return err;
660
661 /* ctx->mech */
662 err = gssx_dec_buffer(xdr, &ctx->mech);
663 if (err)
664 return err;
665
666 /* ctx->src_name */
667 err = gssx_dec_name(xdr, &ctx->src_name);
668 if (err)
669 return err;
670
671 /* ctx->targ_name */
672 err = gssx_dec_name(xdr, &ctx->targ_name);
673 if (err)
674 return err;
675
676 /* ctx->lifetime */
677 p = xdr_inline_decode(xdr, 8+8);
678 if (unlikely(p == NULL))
679 return -ENOSPC;
680 p = xdr_decode_hyper(p, &ctx->lifetime);
681
682 /* ctx->ctx_flags */
683 p = xdr_decode_hyper(p, &ctx->ctx_flags);
684
685 /* ctx->locally_initiated */
686 err = gssx_dec_bool(xdr, &ctx->locally_initiated);
687 if (err)
688 return err;
689
690 /* ctx->open */
691 err = gssx_dec_bool(xdr, &ctx->open);
692 if (err)
693 return err;
694
695 /* we assume we have no options for now, so simply consume them */
696 /* ctx->options */
697 err = dummy_dec_opt_array(xdr, &ctx->options);
698
699 return err;
700}
701
702static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
703{
704 __be32 *p;
705 int err;
706
707 /* cb->initiator_addrtype */
708 p = xdr_reserve_space(xdr, 8);
709 if (!p)
710 return -ENOSPC;
711 p = xdr_encode_hyper(p, cb->initiator_addrtype);
712
713 /* cb->initiator_address */
714 err = gssx_enc_buffer(xdr, &cb->initiator_address);
715 if (err)
716 return err;
717
718 /* cb->acceptor_addrtype */
719 p = xdr_reserve_space(xdr, 8);
720 if (!p)
721 return -ENOSPC;
722 p = xdr_encode_hyper(p, cb->acceptor_addrtype);
723
724 /* cb->acceptor_address */
725 err = gssx_enc_buffer(xdr, &cb->acceptor_address);
726 if (err)
727 return err;
728
729 /* cb->application_data */
730 err = gssx_enc_buffer(xdr, &cb->application_data);
731
732 return err;
733}
734
735void gssx_enc_accept_sec_context(struct rpc_rqst *req,
736 struct xdr_stream *xdr,
737 struct gssx_arg_accept_sec_context *arg)
738{
739 int err;
740
741 err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
742 if (err)
743 goto done;
744
745 /* arg->context_handle */
746 if (arg->context_handle) {
747 err = gssx_enc_ctx(xdr, arg->context_handle);
748 if (err)
749 goto done;
750 } else {
751 err = gssx_enc_bool(xdr, 0);
752 }
753
754 /* arg->cred_handle */
755 if (arg->cred_handle) {
756 err = gssx_enc_cred(xdr, arg->cred_handle);
757 if (err)
758 goto done;
759 } else {
760 err = gssx_enc_bool(xdr, 0);
761 }
762
763 /* arg->input_token */
764 err = gssx_enc_in_token(xdr, &arg->input_token);
765 if (err)
766 goto done;
767
768 /* arg->input_cb */
769 if (arg->input_cb) {
770 err = gssx_enc_cb(xdr, arg->input_cb);
771 if (err)
772 goto done;
773 } else {
774 err = gssx_enc_bool(xdr, 0);
775 }
776
777 err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
778 if (err)
779 goto done;
780
781 /* leave options empty for now, will add once we have any options
782 * to pass up at all */
783 /* arg->options */
784 err = dummy_enc_opt_array(xdr, &arg->options);
785
786done:
787 if (err)
788 dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
789}
790
791int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
792 struct xdr_stream *xdr,
793 struct gssx_res_accept_sec_context *res)
794{
795 u32 value_follows;
796 int err;
797
798 /* res->status */
799 err = gssx_dec_status(xdr, &res->status);
800 if (err)
801 return err;
802
803 /* res->context_handle */
804 err = gssx_dec_bool(xdr, &value_follows);
805 if (err)
806 return err;
807 if (value_follows) {
808 err = gssx_dec_ctx(xdr, res->context_handle);
809 if (err)
810 return err;
811 } else {
812 res->context_handle = NULL;
813 }
814
815 /* res->output_token */
816 err = gssx_dec_bool(xdr, &value_follows);
817 if (err)
818 return err;
819 if (value_follows) {
820 err = gssx_dec_buffer(xdr, res->output_token);
821 if (err)
822 return err;
823 } else {
824 res->output_token = NULL;
825 }
826
827 /* res->delegated_cred_handle */
828 err = gssx_dec_bool(xdr, &value_follows);
829 if (err)
830 return err;
831 if (value_follows) {
832 /* we do not support upcall servers sending this data. */
833 return -EINVAL;
834 }
835
836 /* res->options */
837 err = gssx_dec_option_array(xdr, &res->options);
838
839 return err;
840}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644
index 000000000000..1c98b27d870c
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -0,0 +1,264 @@
1/*
2 * GSS Proxy upcall module
3 *
4 * Copyright (C) 2012 Simo Sorce <simo@redhat.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20
21#ifndef _LINUX_GSS_RPC_XDR_H
22#define _LINUX_GSS_RPC_XDR_H
23
24#include <linux/sunrpc/xdr.h>
25#include <linux/sunrpc/clnt.h>
26#include <linux/sunrpc/xprtsock.h>
27
28#ifdef RPC_DEBUG
29# define RPCDBG_FACILITY RPCDBG_AUTH
30#endif
31
32#define LUCID_OPTION "exported_context_type"
33#define LUCID_VALUE "linux_lucid_v1"
34#define CREDS_OPTION "exported_creds_type"
35#define CREDS_VALUE "linux_creds_v1"
36
37typedef struct xdr_netobj gssx_buffer;
38typedef struct xdr_netobj utf8string;
39typedef struct xdr_netobj gssx_OID;
40
41enum gssx_cred_usage {
42 GSSX_C_INITIATE = 1,
43 GSSX_C_ACCEPT = 2,
44 GSSX_C_BOTH = 3,
45};
46
47struct gssx_option {
48 gssx_buffer option;
49 gssx_buffer value;
50};
51
52struct gssx_option_array {
53 u32 count;
54 struct gssx_option *data;
55};
56
57struct gssx_status {
58 u64 major_status;
59 gssx_OID mech;
60 u64 minor_status;
61 utf8string major_status_string;
62 utf8string minor_status_string;
63 gssx_buffer server_ctx;
64 struct gssx_option_array options;
65};
66
67struct gssx_call_ctx {
68 utf8string locale;
69 gssx_buffer server_ctx;
70 struct gssx_option_array options;
71};
72
73struct gssx_name_attr {
74 gssx_buffer attr;
75 gssx_buffer value;
76 struct gssx_option_array extensions;
77};
78
79struct gssx_name_attr_array {
80 u32 count;
81 struct gssx_name_attr *data;
82};
83
84struct gssx_name {
85 gssx_buffer display_name;
86};
87typedef struct gssx_name gssx_name;
88
89struct gssx_cred_element {
90 gssx_name MN;
91 gssx_OID mech;
92 u32 cred_usage;
93 u64 initiator_time_rec;
94 u64 acceptor_time_rec;
95 struct gssx_option_array options;
96};
97
98struct gssx_cred_element_array {
99 u32 count;
100 struct gssx_cred_element *data;
101};
102
103struct gssx_cred {
104 gssx_name desired_name;
105 struct gssx_cred_element_array elements;
106 gssx_buffer cred_handle_reference;
107 u32 needs_release;
108};
109
110struct gssx_ctx {
111 gssx_buffer exported_context_token;
112 gssx_buffer state;
113 u32 need_release;
114 gssx_OID mech;
115 gssx_name src_name;
116 gssx_name targ_name;
117 u64 lifetime;
118 u64 ctx_flags;
119 u32 locally_initiated;
120 u32 open;
121 struct gssx_option_array options;
122};
123
124struct gssx_cb {
125 u64 initiator_addrtype;
126 gssx_buffer initiator_address;
127 u64 acceptor_addrtype;
128 gssx_buffer acceptor_address;
129 gssx_buffer application_data;
130};
131
132
133/* This structure is not defined in the protocol.
134 * It is used in the kernel to carry around a big buffer
135 * as a set of pages */
136struct gssp_in_token {
137 struct page **pages; /* Array of contiguous pages */
138 unsigned int page_base; /* Start of page data */
139 unsigned int page_len; /* Length of page data */
140};
141
142struct gssx_arg_accept_sec_context {
143 struct gssx_call_ctx call_ctx;
144 struct gssx_ctx *context_handle;
145 struct gssx_cred *cred_handle;
146 struct gssp_in_token input_token;
147 struct gssx_cb *input_cb;
148 u32 ret_deleg_cred;
149 struct gssx_option_array options;
150};
151
152struct gssx_res_accept_sec_context {
153 struct gssx_status status;
154 struct gssx_ctx *context_handle;
155 gssx_buffer *output_token;
156 /* struct gssx_cred *delegated_cred_handle; not used in kernel */
157 struct gssx_option_array options;
158};
159
160
161
162#define gssx_enc_indicate_mechs NULL
163#define gssx_dec_indicate_mechs NULL
164#define gssx_enc_get_call_context NULL
165#define gssx_dec_get_call_context NULL
166#define gssx_enc_import_and_canon_name NULL
167#define gssx_dec_import_and_canon_name NULL
168#define gssx_enc_export_cred NULL
169#define gssx_dec_export_cred NULL
170#define gssx_enc_import_cred NULL
171#define gssx_dec_import_cred NULL
172#define gssx_enc_acquire_cred NULL
173#define gssx_dec_acquire_cred NULL
174#define gssx_enc_store_cred NULL
175#define gssx_dec_store_cred NULL
176#define gssx_enc_init_sec_context NULL
177#define gssx_dec_init_sec_context NULL
178void gssx_enc_accept_sec_context(struct rpc_rqst *req,
179 struct xdr_stream *xdr,
180 struct gssx_arg_accept_sec_context *args);
181int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
182 struct xdr_stream *xdr,
183 struct gssx_res_accept_sec_context *res);
184#define gssx_enc_release_handle NULL
185#define gssx_dec_release_handle NULL
186#define gssx_enc_get_mic NULL
187#define gssx_dec_get_mic NULL
188#define gssx_enc_verify NULL
189#define gssx_dec_verify NULL
190#define gssx_enc_wrap NULL
191#define gssx_dec_wrap NULL
192#define gssx_enc_unwrap NULL
193#define gssx_dec_unwrap NULL
194#define gssx_enc_wrap_size_limit NULL
195#define gssx_dec_wrap_size_limit NULL
196
197/* non implemented calls are set to 0 size */
198#define GSSX_ARG_indicate_mechs_sz 0
199#define GSSX_RES_indicate_mechs_sz 0
200#define GSSX_ARG_get_call_context_sz 0
201#define GSSX_RES_get_call_context_sz 0
202#define GSSX_ARG_import_and_canon_name_sz 0
203#define GSSX_RES_import_and_canon_name_sz 0
204#define GSSX_ARG_export_cred_sz 0
205#define GSSX_RES_export_cred_sz 0
206#define GSSX_ARG_import_cred_sz 0
207#define GSSX_RES_import_cred_sz 0
208#define GSSX_ARG_acquire_cred_sz 0
209#define GSSX_RES_acquire_cred_sz 0
210#define GSSX_ARG_store_cred_sz 0
211#define GSSX_RES_store_cred_sz 0
212#define GSSX_ARG_init_sec_context_sz 0
213#define GSSX_RES_init_sec_context_sz 0
214
215#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
216 8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
217 8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
218#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
219 4 + 4 + 4)
220#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
221#define GSSX_default_in_token_sz 4 /* does *not* include token data */
222#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
223#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
224 GSSX_default_in_ctx_hndl_sz + \
225 GSSX_default_in_cred_sz + \
226 GSSX_default_in_token_sz + \
227 GSSX_default_in_cb_sz + \
228 4 /* no deleg creds boolean */ + \
229 4) /* empty options */
230
231/* somewhat arbitrary numbers but large enough (we ignore some of the data
232 * sent down, but it is part of the protocol so we need enough space to take
233 * it in) */
234#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
235#define GSSX_max_output_handle_sz 128
236#define GSSX_max_oid_sz 16
237#define GSSX_max_princ_sz 256
238#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
239 16 + 4 + GSSX_max_oid_sz + \
240 2 * GSSX_max_princ_sz + \
241 8 + 8 + 4 + 4 + 4)
242#define GSSX_max_output_token_sz 1024
243#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
244#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
245 GSSX_default_ctx_sz + \
246 GSSX_max_output_token_sz + \
247 4 + GSSX_max_creds_sz)
248
249#define GSSX_ARG_release_handle_sz 0
250#define GSSX_RES_release_handle_sz 0
251#define GSSX_ARG_get_mic_sz 0
252#define GSSX_RES_get_mic_sz 0
253#define GSSX_ARG_verify_sz 0
254#define GSSX_RES_verify_sz 0
255#define GSSX_ARG_wrap_sz 0
256#define GSSX_RES_wrap_sz 0
257#define GSSX_ARG_unwrap_sz 0
258#define GSSX_RES_unwrap_sz 0
259#define GSSX_ARG_wrap_size_limit_sz 0
260#define GSSX_RES_wrap_size_limit_sz 0
261
262
263
264#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 5ead60550895..871c73c92165 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -48,8 +48,8 @@
48#include <linux/sunrpc/svcauth.h> 48#include <linux/sunrpc/svcauth.h>
49#include <linux/sunrpc/svcauth_gss.h> 49#include <linux/sunrpc/svcauth_gss.h>
50#include <linux/sunrpc/cache.h> 50#include <linux/sunrpc/cache.h>
51#include "gss_rpc_upcall.h"
51 52
52#include "../netns.h"
53 53
54#ifdef RPC_DEBUG 54#ifdef RPC_DEBUG
55# define RPCDBG_FACILITY RPCDBG_AUTH 55# define RPCDBG_FACILITY RPCDBG_AUTH
@@ -497,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
497 len = qword_get(&mesg, buf, mlen); 497 len = qword_get(&mesg, buf, mlen);
498 if (len < 0) 498 if (len < 0)
499 goto out; 499 goto out;
500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL); 500 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
501 NULL, GFP_KERNEL);
501 if (status) 502 if (status)
502 goto out; 503 goto out;
503 504
@@ -505,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
505 len = qword_get(&mesg, buf, mlen); 506 len = qword_get(&mesg, buf, mlen);
506 if (len > 0) { 507 if (len > 0) {
507 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL); 508 rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
508 if (!rsci.cred.cr_principal) 509 if (!rsci.cred.cr_principal) {
510 status = -ENOMEM;
509 goto out; 511 goto out;
512 }
510 } 513 }
511 514
512 } 515 }
@@ -987,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
987} 990}
988 991
989static inline int 992static inline int
990gss_read_verf(struct rpc_gss_wire_cred *gc, 993gss_read_common_verf(struct rpc_gss_wire_cred *gc,
991 struct kvec *argv, __be32 *authp, 994 struct kvec *argv, __be32 *authp,
992 struct xdr_netobj *in_handle, 995 struct xdr_netobj *in_handle)
993 struct xdr_netobj *in_token)
994{ 996{
995 struct xdr_netobj tmpobj;
996
997 /* Read the verifier; should be NULL: */ 997 /* Read the verifier; should be NULL: */
998 *authp = rpc_autherr_badverf; 998 *authp = rpc_autherr_badverf;
999 if (argv->iov_len < 2 * 4) 999 if (argv->iov_len < 2 * 4)
@@ -1009,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1009 if (dup_netobj(in_handle, &gc->gc_ctx)) 1009 if (dup_netobj(in_handle, &gc->gc_ctx))
1010 return SVC_CLOSE; 1010 return SVC_CLOSE;
1011 *authp = rpc_autherr_badverf; 1011 *authp = rpc_autherr_badverf;
1012
1013 return 0;
1014}
1015
1016static inline int
1017gss_read_verf(struct rpc_gss_wire_cred *gc,
1018 struct kvec *argv, __be32 *authp,
1019 struct xdr_netobj *in_handle,
1020 struct xdr_netobj *in_token)
1021{
1022 struct xdr_netobj tmpobj;
1023 int res;
1024
1025 res = gss_read_common_verf(gc, argv, authp, in_handle);
1026 if (res)
1027 return res;
1028
1012 if (svc_safe_getnetobj(argv, &tmpobj)) { 1029 if (svc_safe_getnetobj(argv, &tmpobj)) {
1013 kfree(in_handle->data); 1030 kfree(in_handle->data);
1014 return SVC_DENIED; 1031 return SVC_DENIED;
@@ -1021,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
1021 return 0; 1038 return 0;
1022} 1039}
1023 1040
1041/* Ok this is really heavily depending on a set of semantics in
1042 * how rqstp is set up by svc_recv and pages laid down by the
1043 * server when reading a request. We are basically guaranteed that
1044 * the token lays all down linearly across a set of pages, starting
1045 * at iov_base in rq_arg.head[0] which happens to be the first of a
1046 * set of pages stored in rq_pages[].
1047 * rq_arg.head[0].iov_base will provide us the page_base to pass
1048 * to the upcall.
1049 */
1050static inline int
1051gss_read_proxy_verf(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp,
1053 struct xdr_netobj *in_handle,
1054 struct gssp_in_token *in_token)
1055{
1056 struct kvec *argv = &rqstp->rq_arg.head[0];
1057 u32 inlen;
1058 int res;
1059
1060 res = gss_read_common_verf(gc, argv, authp, in_handle);
1061 if (res)
1062 return res;
1063
1064 inlen = svc_getnl(argv);
1065 if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
1066 return SVC_DENIED;
1067
1068 in_token->pages = rqstp->rq_pages;
1069 in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
1070 in_token->page_len = inlen;
1071
1072 return 0;
1073}
1074
1024static inline int 1075static inline int
1025gss_write_resv(struct kvec *resv, size_t size_limit, 1076gss_write_resv(struct kvec *resv, size_t size_limit,
1026 struct xdr_netobj *out_handle, struct xdr_netobj *out_token, 1077 struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1048,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
1048 * the upcall results are available, write the verifier and result. 1099 * the upcall results are available, write the verifier and result.
1049 * Otherwise, drop the request pending an answer to the upcall. 1100 * Otherwise, drop the request pending an answer to the upcall.
1050 */ 1101 */
1051static int svcauth_gss_handle_init(struct svc_rqst *rqstp, 1102static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
1052 struct rpc_gss_wire_cred *gc, __be32 *authp) 1103 struct rpc_gss_wire_cred *gc, __be32 *authp)
1053{ 1104{
1054 struct kvec *argv = &rqstp->rq_arg.head[0]; 1105 struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1088,6 +1139,287 @@ out:
1088 return ret; 1139 return ret;
1089} 1140}
1090 1141
1142static int gss_proxy_save_rsc(struct cache_detail *cd,
1143 struct gssp_upcall_data *ud,
1144 uint64_t *handle)
1145{
1146 struct rsc rsci, *rscp = NULL;
1147 static atomic64_t ctxhctr;
1148 long long ctxh;
1149 struct gss_api_mech *gm = NULL;
1150 time_t expiry;
1151 int status = -EINVAL;
1152
1153 memset(&rsci, 0, sizeof(rsci));
1154 /* context handle */
1155 status = -ENOMEM;
1156 /* the handle needs to be just a unique id,
1157 * use a static counter */
1158 ctxh = atomic64_inc_return(&ctxhctr);
1159
1160 /* make a copy for the caller */
1161 *handle = ctxh;
1162
1163 /* make a copy for the rsc cache */
1164 if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
1165 goto out;
1166 rscp = rsc_lookup(cd, &rsci);
1167 if (!rscp)
1168 goto out;
1169
1170 /* creds */
1171 if (!ud->found_creds) {
1172 /* userspace seem buggy, we should always get at least a
1173 * mapping to nobody */
1174 dprintk("RPC: No creds found, marking Negative!\n");
1175 set_bit(CACHE_NEGATIVE, &rsci.h.flags);
1176 } else {
1177
1178 /* steal creds */
1179 rsci.cred = ud->creds;
1180 memset(&ud->creds, 0, sizeof(struct svc_cred));
1181
1182 status = -EOPNOTSUPP;
1183 /* get mech handle from OID */
1184 gm = gss_mech_get_by_OID(&ud->mech_oid);
1185 if (!gm)
1186 goto out;
1187
1188 status = -EINVAL;
1189 /* mech-specific data: */
1190 status = gss_import_sec_context(ud->out_handle.data,
1191 ud->out_handle.len,
1192 gm, &rsci.mechctx,
1193 &expiry, GFP_KERNEL);
1194 if (status)
1195 goto out;
1196 }
1197
1198 rsci.h.expiry_time = expiry;
1199 rscp = rsc_update(cd, &rsci, rscp);
1200 status = 0;
1201out:
1202 gss_mech_put(gm);
1203 rsc_free(&rsci);
1204 if (rscp)
1205 cache_put(&rscp->h, cd);
1206 else
1207 status = -ENOMEM;
1208 return status;
1209}
1210
1211static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
1212 struct rpc_gss_wire_cred *gc, __be32 *authp)
1213{
1214 struct kvec *resv = &rqstp->rq_res.head[0];
1215 struct xdr_netobj cli_handle;
1216 struct gssp_upcall_data ud;
1217 uint64_t handle;
1218 int status;
1219 int ret;
1220 struct net *net = rqstp->rq_xprt->xpt_net;
1221 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1222
1223 memset(&ud, 0, sizeof(ud));
1224 ret = gss_read_proxy_verf(rqstp, gc, authp,
1225 &ud.in_handle, &ud.in_token);
1226 if (ret)
1227 return ret;
1228
1229 ret = SVC_CLOSE;
1230
1231 /* Perform synchronous upcall to gss-proxy */
1232 status = gssp_accept_sec_context_upcall(net, &ud);
1233 if (status)
1234 goto out;
1235
1236 dprintk("RPC: svcauth_gss: gss major status = %d\n",
1237 ud.major_status);
1238
1239 switch (ud.major_status) {
1240 case GSS_S_CONTINUE_NEEDED:
1241 cli_handle = ud.out_handle;
1242 break;
1243 case GSS_S_COMPLETE:
1244 status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
1245 if (status)
1246 goto out;
1247 cli_handle.data = (u8 *)&handle;
1248 cli_handle.len = sizeof(handle);
1249 break;
1250 default:
1251 ret = SVC_CLOSE;
1252 goto out;
1253 }
1254
1255 /* Got an answer to the upcall; use it: */
1256 if (gss_write_init_verf(sn->rsc_cache, rqstp,
1257 &cli_handle, &ud.major_status))
1258 goto out;
1259 if (gss_write_resv(resv, PAGE_SIZE,
1260 &cli_handle, &ud.out_token,
1261 ud.major_status, ud.minor_status))
1262 goto out;
1263
1264 ret = SVC_COMPLETE;
1265out:
1266 gssp_free_upcall_data(&ud);
1267 return ret;
1268}
1269
1270DEFINE_SPINLOCK(use_gssp_lock);
1271
1272static bool use_gss_proxy(struct net *net)
1273{
1274 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1275
1276 if (sn->use_gss_proxy != -1)
1277 return sn->use_gss_proxy;
1278 spin_lock(&use_gssp_lock);
1279 /*
1280 * If you wanted gss-proxy, you should have said so before
1281 * starting to accept requests:
1282 */
1283 sn->use_gss_proxy = 0;
1284 spin_unlock(&use_gssp_lock);
1285 return 0;
1286}
1287
1288#ifdef CONFIG_PROC_FS
1289
1290static bool set_gss_proxy(struct net *net, int type)
1291{
1292 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1293 int ret = 0;
1294
1295 WARN_ON_ONCE(type != 0 && type != 1);
1296 spin_lock(&use_gssp_lock);
1297 if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
1298 sn->use_gss_proxy = type;
1299 else
1300 ret = -EBUSY;
1301 spin_unlock(&use_gssp_lock);
1302 wake_up(&sn->gssp_wq);
1303 return ret;
1304}
1305
1306static inline bool gssp_ready(struct sunrpc_net *sn)
1307{
1308 switch (sn->use_gss_proxy) {
1309 case -1:
1310 return false;
1311 case 0:
1312 return true;
1313 case 1:
1314 return sn->gssp_clnt;
1315 }
1316 WARN_ON_ONCE(1);
1317 return false;
1318}
1319
1320static int wait_for_gss_proxy(struct net *net)
1321{
1322 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1323
1324 return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
1325}
1326
1327
1328static ssize_t write_gssp(struct file *file, const char __user *buf,
1329 size_t count, loff_t *ppos)
1330{
1331 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1332 char tbuf[20];
1333 unsigned long i;
1334 int res;
1335
1336 if (*ppos || count > sizeof(tbuf)-1)
1337 return -EINVAL;
1338 if (copy_from_user(tbuf, buf, count))
1339 return -EFAULT;
1340
1341 tbuf[count] = 0;
1342 res = kstrtoul(tbuf, 0, &i);
1343 if (res)
1344 return res;
1345 if (i != 1)
1346 return -EINVAL;
1347 res = set_gss_proxy(net, 1);
1348 if (res)
1349 return res;
1350 res = set_gssp_clnt(net);
1351 if (res)
1352 return res;
1353 return count;
1354}
1355
1356static ssize_t read_gssp(struct file *file, char __user *buf,
1357 size_t count, loff_t *ppos)
1358{
1359 struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
1360 unsigned long p = *ppos;
1361 char tbuf[10];
1362 size_t len;
1363 int ret;
1364
1365 ret = wait_for_gss_proxy(net);
1366 if (ret)
1367 return ret;
1368
1369 snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
1370 len = strlen(tbuf);
1371 if (p >= len)
1372 return 0;
1373 len -= p;
1374 if (len > count)
1375 len = count;
1376 if (copy_to_user(buf, (void *)(tbuf+p), len))
1377 return -EFAULT;
1378 *ppos += len;
1379 return len;
1380}
1381
1382static const struct file_operations use_gss_proxy_ops = {
1383 .open = nonseekable_open,
1384 .write = write_gssp,
1385 .read = read_gssp,
1386};
1387
1388static int create_use_gss_proxy_proc_entry(struct net *net)
1389{
1390 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1391 struct proc_dir_entry **p = &sn->use_gssp_proc;
1392
1393 sn->use_gss_proxy = -1;
1394 *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
1395 sn->proc_net_rpc,
1396 &use_gss_proxy_ops, net);
1397 if (!*p)
1398 return -ENOMEM;
1399 init_gssp_clnt(sn);
1400 return 0;
1401}
1402
1403static void destroy_use_gss_proxy_proc_entry(struct net *net)
1404{
1405 struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
1406
1407 if (sn->use_gssp_proc) {
1408 remove_proc_entry("use-gss-proxy", sn->proc_net_rpc);
1409 clear_gssp_clnt(sn);
1410 }
1411}
1412#else /* CONFIG_PROC_FS */
1413
1414static int create_use_gss_proxy_proc_entry(struct net *net)
1415{
1416 return 0;
1417}
1418
1419static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
1420
1421#endif /* CONFIG_PROC_FS */
1422
1091/* 1423/*
1092 * Accept an rpcsec packet. 1424 * Accept an rpcsec packet.
1093 * If context establishment, punt to user space 1425 * If context establishment, punt to user space
@@ -1154,7 +1486,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1154 switch (gc->gc_proc) { 1486 switch (gc->gc_proc) {
1155 case RPC_GSS_PROC_INIT: 1487 case RPC_GSS_PROC_INIT:
1156 case RPC_GSS_PROC_CONTINUE_INIT: 1488 case RPC_GSS_PROC_CONTINUE_INIT:
1157 return svcauth_gss_handle_init(rqstp, gc, authp); 1489 if (use_gss_proxy(SVC_NET(rqstp)))
1490 return svcauth_gss_proxy_init(rqstp, gc, authp);
1491 else
1492 return svcauth_gss_legacy_init(rqstp, gc, authp);
1158 case RPC_GSS_PROC_DATA: 1493 case RPC_GSS_PROC_DATA:
1159 case RPC_GSS_PROC_DESTROY: 1494 case RPC_GSS_PROC_DESTROY:
1160 /* Look up the context, and check the verifier: */ 1495 /* Look up the context, and check the verifier: */
@@ -1220,7 +1555,9 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
1220 svcdata->rsci = rsci; 1555 svcdata->rsci = rsci;
1221 cache_get(&rsci->h); 1556 cache_get(&rsci->h);
1222 rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor( 1557 rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor(
1223 rsci->mechctx->mech_type, gc->gc_svc); 1558 rsci->mechctx->mech_type,
1559 GSS_C_QOP_DEFAULT,
1560 gc->gc_svc);
1224 ret = SVC_OK; 1561 ret = SVC_OK;
1225 goto out; 1562 goto out;
1226 } 1563 }
@@ -1529,7 +1866,12 @@ gss_svc_init_net(struct net *net)
1529 rv = rsi_cache_create_net(net); 1866 rv = rsi_cache_create_net(net);
1530 if (rv) 1867 if (rv)
1531 goto out1; 1868 goto out1;
1869 rv = create_use_gss_proxy_proc_entry(net);
1870 if (rv)
1871 goto out2;
1532 return 0; 1872 return 0;
1873out2:
1874 destroy_use_gss_proxy_proc_entry(net);
1533out1: 1875out1:
1534 rsc_cache_destroy_net(net); 1876 rsc_cache_destroy_net(net);
1535 return rv; 1877 return rv;
@@ -1538,6 +1880,7 @@ out1:
1538void 1880void
1539gss_svc_shutdown_net(struct net *net) 1881gss_svc_shutdown_net(struct net *net)
1540{ 1882{
1883 destroy_use_gss_proxy_proc_entry(net);
1541 rsi_cache_destroy_net(net); 1884 rsi_cache_destroy_net(net);
1542 rsc_cache_destroy_net(net); 1885 rsc_cache_destroy_net(net);
1543} 1886}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 25d58e766014..80fe5c86efd1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -986,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
986 nonseekable_open(inode, filp); 986 nonseekable_open(inode, filp);
987 if (filp->f_mode & FMODE_READ) { 987 if (filp->f_mode & FMODE_READ) {
988 rp = kmalloc(sizeof(*rp), GFP_KERNEL); 988 rp = kmalloc(sizeof(*rp), GFP_KERNEL);
989 if (!rp) 989 if (!rp) {
990 module_put(cd->owner);
990 return -ENOMEM; 991 return -ENOMEM;
992 }
991 rp->offset = 0; 993 rp->offset = 0;
992 rp->q.reader = 1; 994 rp->q.reader = 1;
993 atomic_inc(&cd->readers); 995 atomic_inc(&cd->readers);
@@ -1208,7 +1210,6 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
1208 * key and content are both parsed by cache 1210 * key and content are both parsed by cache
1209 */ 1211 */
1210 1212
1211#define isodigit(c) (isdigit(c) && c <= '7')
1212int qword_get(char **bpp, char *dest, int bufsize) 1213int qword_get(char **bpp, char *dest, int bufsize)
1213{ 1214{
1214 /* return bytes copied, or -1 on error */ 1215 /* return bytes copied, or -1 on error */
@@ -1461,7 +1462,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
1461static ssize_t cache_read_procfs(struct file *filp, char __user *buf, 1462static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
1462 size_t count, loff_t *ppos) 1463 size_t count, loff_t *ppos)
1463{ 1464{
1464 struct cache_detail *cd = PDE(file_inode(filp))->data; 1465 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1465 1466
1466 return cache_read(filp, buf, count, ppos, cd); 1467 return cache_read(filp, buf, count, ppos, cd);
1467} 1468}
@@ -1469,14 +1470,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
1469static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, 1470static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
1470 size_t count, loff_t *ppos) 1471 size_t count, loff_t *ppos)
1471{ 1472{
1472 struct cache_detail *cd = PDE(file_inode(filp))->data; 1473 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1473 1474
1474 return cache_write(filp, buf, count, ppos, cd); 1475 return cache_write(filp, buf, count, ppos, cd);
1475} 1476}
1476 1477
1477static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) 1478static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
1478{ 1479{
1479 struct cache_detail *cd = PDE(file_inode(filp))->data; 1480 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1480 1481
1481 return cache_poll(filp, wait, cd); 1482 return cache_poll(filp, wait, cd);
1482} 1483}
@@ -1485,21 +1486,21 @@ static long cache_ioctl_procfs(struct file *filp,
1485 unsigned int cmd, unsigned long arg) 1486 unsigned int cmd, unsigned long arg)
1486{ 1487{
1487 struct inode *inode = file_inode(filp); 1488 struct inode *inode = file_inode(filp);
1488 struct cache_detail *cd = PDE(inode)->data; 1489 struct cache_detail *cd = PDE_DATA(inode);
1489 1490
1490 return cache_ioctl(inode, filp, cmd, arg, cd); 1491 return cache_ioctl(inode, filp, cmd, arg, cd);
1491} 1492}
1492 1493
1493static int cache_open_procfs(struct inode *inode, struct file *filp) 1494static int cache_open_procfs(struct inode *inode, struct file *filp)
1494{ 1495{
1495 struct cache_detail *cd = PDE(inode)->data; 1496 struct cache_detail *cd = PDE_DATA(inode);
1496 1497
1497 return cache_open(inode, filp, cd); 1498 return cache_open(inode, filp, cd);
1498} 1499}
1499 1500
1500static int cache_release_procfs(struct inode *inode, struct file *filp) 1501static int cache_release_procfs(struct inode *inode, struct file *filp)
1501{ 1502{
1502 struct cache_detail *cd = PDE(inode)->data; 1503 struct cache_detail *cd = PDE_DATA(inode);
1503 1504
1504 return cache_release(inode, filp, cd); 1505 return cache_release(inode, filp, cd);
1505} 1506}
@@ -1517,14 +1518,14 @@ static const struct file_operations cache_file_operations_procfs = {
1517 1518
1518static int content_open_procfs(struct inode *inode, struct file *filp) 1519static int content_open_procfs(struct inode *inode, struct file *filp)
1519{ 1520{
1520 struct cache_detail *cd = PDE(inode)->data; 1521 struct cache_detail *cd = PDE_DATA(inode);
1521 1522
1522 return content_open(inode, filp, cd); 1523 return content_open(inode, filp, cd);
1523} 1524}
1524 1525
1525static int content_release_procfs(struct inode *inode, struct file *filp) 1526static int content_release_procfs(struct inode *inode, struct file *filp)
1526{ 1527{
1527 struct cache_detail *cd = PDE(inode)->data; 1528 struct cache_detail *cd = PDE_DATA(inode);
1528 1529
1529 return content_release(inode, filp, cd); 1530 return content_release(inode, filp, cd);
1530} 1531}
@@ -1538,14 +1539,14 @@ static const struct file_operations content_file_operations_procfs = {
1538 1539
1539static int open_flush_procfs(struct inode *inode, struct file *filp) 1540static int open_flush_procfs(struct inode *inode, struct file *filp)
1540{ 1541{
1541 struct cache_detail *cd = PDE(inode)->data; 1542 struct cache_detail *cd = PDE_DATA(inode);
1542 1543
1543 return open_flush(inode, filp, cd); 1544 return open_flush(inode, filp, cd);
1544} 1545}
1545 1546
1546static int release_flush_procfs(struct inode *inode, struct file *filp) 1547static int release_flush_procfs(struct inode *inode, struct file *filp)
1547{ 1548{
1548 struct cache_detail *cd = PDE(inode)->data; 1549 struct cache_detail *cd = PDE_DATA(inode);
1549 1550
1550 return release_flush(inode, filp, cd); 1551 return release_flush(inode, filp, cd);
1551} 1552}
@@ -1553,7 +1554,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp)
1553static ssize_t read_flush_procfs(struct file *filp, char __user *buf, 1554static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
1554 size_t count, loff_t *ppos) 1555 size_t count, loff_t *ppos)
1555{ 1556{
1556 struct cache_detail *cd = PDE(file_inode(filp))->data; 1557 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1557 1558
1558 return read_flush(filp, buf, count, ppos, cd); 1559 return read_flush(filp, buf, count, ppos, cd);
1559} 1560}
@@ -1562,7 +1563,7 @@ static ssize_t write_flush_procfs(struct file *filp,
1562 const char __user *buf, 1563 const char __user *buf,
1563 size_t count, loff_t *ppos) 1564 size_t count, loff_t *ppos)
1564{ 1565{
1565 struct cache_detail *cd = PDE(file_inode(filp))->data; 1566 struct cache_detail *cd = PDE_DATA(file_inode(filp));
1566 1567
1567 return write_flush(filp, buf, count, ppos, cd); 1568 return write_flush(filp, buf, count, ppos, cd);
1568} 1569}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index dcc446e7fbf6..5a750b9c3640 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -304,10 +304,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
304 err = rpciod_up(); 304 err = rpciod_up();
305 if (err) 305 if (err)
306 goto out_no_rpciod; 306 goto out_no_rpciod;
307 err = -EINVAL;
308 if (!xprt)
309 goto out_no_xprt;
310 307
308 err = -EINVAL;
311 if (args->version >= program->nrvers) 309 if (args->version >= program->nrvers)
312 goto out_err; 310 goto out_err;
313 version = program->version[args->version]; 311 version = program->version[args->version];
@@ -362,7 +360,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
362 360
363 auth = rpcauth_create(args->authflavor, clnt); 361 auth = rpcauth_create(args->authflavor, clnt);
364 if (IS_ERR(auth)) { 362 if (IS_ERR(auth)) {
365 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n", 363 dprintk("RPC: Couldn't create auth handle (flavor %u)\n",
366 args->authflavor); 364 args->authflavor);
367 err = PTR_ERR(auth); 365 err = PTR_ERR(auth);
368 goto out_no_auth; 366 goto out_no_auth;
@@ -382,10 +380,9 @@ out_no_principal:
382out_no_stats: 380out_no_stats:
383 kfree(clnt); 381 kfree(clnt);
384out_err: 382out_err:
385 xprt_put(xprt);
386out_no_xprt:
387 rpciod_down(); 383 rpciod_down();
388out_no_rpciod: 384out_no_rpciod:
385 xprt_put(xprt);
389 return ERR_PTR(err); 386 return ERR_PTR(err);
390} 387}
391 388
@@ -414,6 +411,10 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
414 }; 411 };
415 char servername[48]; 412 char servername[48];
416 413
414 if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
415 xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
416 if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
417 xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
417 /* 418 /*
418 * If the caller chooses not to specify a hostname, whip 419 * If the caller chooses not to specify a hostname, whip
419 * up a string representation of the passed-in address. 420 * up a string representation of the passed-in address.
@@ -512,7 +513,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
512 new = rpc_new_client(args, xprt); 513 new = rpc_new_client(args, xprt);
513 if (IS_ERR(new)) { 514 if (IS_ERR(new)) {
514 err = PTR_ERR(new); 515 err = PTR_ERR(new);
515 goto out_put; 516 goto out_err;
516 } 517 }
517 518
518 atomic_inc(&clnt->cl_count); 519 atomic_inc(&clnt->cl_count);
@@ -525,8 +526,6 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
525 new->cl_chatty = clnt->cl_chatty; 526 new->cl_chatty = clnt->cl_chatty;
526 return new; 527 return new;
527 528
528out_put:
529 xprt_put(xprt);
530out_err: 529out_err:
531 dprintk("RPC: %s: returned error %d\n", __func__, err); 530 dprintk("RPC: %s: returned error %d\n", __func__, err);
532 return ERR_PTR(err); 531 return ERR_PTR(err);
@@ -684,6 +683,7 @@ rpc_release_client(struct rpc_clnt *clnt)
684 if (atomic_dec_and_test(&clnt->cl_count)) 683 if (atomic_dec_and_test(&clnt->cl_count))
685 rpc_free_auth(clnt); 684 rpc_free_auth(clnt);
686} 685}
686EXPORT_SYMBOL_GPL(rpc_release_client);
687 687
688/** 688/**
689 * rpc_bind_new_program - bind a new RPC program to an existing client 689 * rpc_bind_new_program - bind a new RPC program to an existing client
@@ -1306,6 +1306,8 @@ call_reserve(struct rpc_task *task)
1306 xprt_reserve(task); 1306 xprt_reserve(task);
1307} 1307}
1308 1308
1309static void call_retry_reserve(struct rpc_task *task);
1310
1309/* 1311/*
1310 * 1b. Grok the result of xprt_reserve() 1312 * 1b. Grok the result of xprt_reserve()
1311 */ 1313 */
@@ -1347,7 +1349,7 @@ call_reserveresult(struct rpc_task *task)
1347 case -ENOMEM: 1349 case -ENOMEM:
1348 rpc_delay(task, HZ >> 2); 1350 rpc_delay(task, HZ >> 2);
1349 case -EAGAIN: /* woken up; retry */ 1351 case -EAGAIN: /* woken up; retry */
1350 task->tk_action = call_reserve; 1352 task->tk_action = call_retry_reserve;
1351 return; 1353 return;
1352 case -EIO: /* probably a shutdown */ 1354 case -EIO: /* probably a shutdown */
1353 break; 1355 break;
@@ -1360,6 +1362,19 @@ call_reserveresult(struct rpc_task *task)
1360} 1362}
1361 1363
1362/* 1364/*
1365 * 1c. Retry reserving an RPC call slot
1366 */
1367static void
1368call_retry_reserve(struct rpc_task *task)
1369{
1370 dprint_status(task);
1371
1372 task->tk_status = 0;
1373 task->tk_action = call_reserveresult;
1374 xprt_retry_reserve(task);
1375}
1376
1377/*
1363 * 2. Bind and/or refresh the credentials 1378 * 2. Bind and/or refresh the credentials
1364 */ 1379 */
1365static void 1380static void
@@ -1644,22 +1659,26 @@ call_connect_status(struct rpc_task *task)
1644 1659
1645 dprint_status(task); 1660 dprint_status(task);
1646 1661
1647 task->tk_status = 0;
1648 if (status >= 0 || status == -EAGAIN) {
1649 clnt->cl_stats->netreconn++;
1650 task->tk_action = call_transmit;
1651 return;
1652 }
1653
1654 trace_rpc_connect_status(task, status); 1662 trace_rpc_connect_status(task, status);
1655 switch (status) { 1663 switch (status) {
1656 /* if soft mounted, test if we've timed out */ 1664 /* if soft mounted, test if we've timed out */
1657 case -ETIMEDOUT: 1665 case -ETIMEDOUT:
1658 task->tk_action = call_timeout; 1666 task->tk_action = call_timeout;
1659 break; 1667 return;
1660 default: 1668 case -ECONNREFUSED:
1661 rpc_exit(task, -EIO); 1669 case -ECONNRESET:
1670 case -ENETUNREACH:
1671 if (RPC_IS_SOFTCONN(task))
1672 break;
1673 /* retry with existing socket, after a delay */
1674 case 0:
1675 case -EAGAIN:
1676 task->tk_status = 0;
1677 clnt->cl_stats->netreconn++;
1678 task->tk_action = call_transmit;
1679 return;
1662 } 1680 }
1681 rpc_exit(task, status);
1663} 1682}
1664 1683
1665/* 1684/*
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index ce7bd449173d..7111a4c9113b 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,12 @@ struct sunrpc_net {
23 struct rpc_clnt *rpcb_local_clnt4; 23 struct rpc_clnt *rpcb_local_clnt4;
24 spinlock_t rpcb_clnt_lock; 24 spinlock_t rpcb_clnt_lock;
25 unsigned int rpcb_users; 25 unsigned int rpcb_users;
26
27 struct mutex gssp_lock;
28 wait_queue_head_t gssp_wq;
29 struct rpc_clnt *gssp_clnt;
30 int use_gss_proxy;
31 struct proc_dir_entry *use_gssp_proc;
26}; 32};
27 33
28extern int sunrpc_net_id; 34extern int sunrpc_net_id;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index bc2068ee795b..21b75cb08c03 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -64,7 +64,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) {
64 64
65static int rpc_proc_open(struct inode *inode, struct file *file) 65static int rpc_proc_open(struct inode *inode, struct file *file)
66{ 66{
67 return single_open(file, rpc_proc_show, PDE(inode)->data); 67 return single_open(file, rpc_proc_show, PDE_DATA(inode));
68} 68}
69 69
70static const struct file_operations rpc_proc_fops = { 70static const struct file_operations rpc_proc_fops = {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b7478d5e7ffd..095363eee764 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -948,6 +948,34 @@ void xprt_transmit(struct rpc_task *task)
948 spin_unlock_bh(&xprt->transport_lock); 948 spin_unlock_bh(&xprt->transport_lock);
949} 949}
950 950
951static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
952{
953 set_bit(XPRT_CONGESTED, &xprt->state);
954 rpc_sleep_on(&xprt->backlog, task, NULL);
955}
956
957static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
958{
959 if (rpc_wake_up_next(&xprt->backlog) == NULL)
960 clear_bit(XPRT_CONGESTED, &xprt->state);
961}
962
963static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
964{
965 bool ret = false;
966
967 if (!test_bit(XPRT_CONGESTED, &xprt->state))
968 goto out;
969 spin_lock(&xprt->reserve_lock);
970 if (test_bit(XPRT_CONGESTED, &xprt->state)) {
971 rpc_sleep_on(&xprt->backlog, task, NULL);
972 ret = true;
973 }
974 spin_unlock(&xprt->reserve_lock);
975out:
976 return ret;
977}
978
951static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) 979static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
952{ 980{
953 struct rpc_rqst *req = ERR_PTR(-EAGAIN); 981 struct rpc_rqst *req = ERR_PTR(-EAGAIN);
@@ -992,7 +1020,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
992 task->tk_status = -ENOMEM; 1020 task->tk_status = -ENOMEM;
993 break; 1021 break;
994 case -EAGAIN: 1022 case -EAGAIN:
995 rpc_sleep_on(&xprt->backlog, task, NULL); 1023 xprt_add_backlog(xprt, task);
996 dprintk("RPC: waiting for request slot\n"); 1024 dprintk("RPC: waiting for request slot\n");
997 default: 1025 default:
998 task->tk_status = -EAGAIN; 1026 task->tk_status = -EAGAIN;
@@ -1028,7 +1056,7 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
1028 memset(req, 0, sizeof(*req)); /* mark unused */ 1056 memset(req, 0, sizeof(*req)); /* mark unused */
1029 list_add(&req->rq_list, &xprt->free); 1057 list_add(&req->rq_list, &xprt->free);
1030 } 1058 }
1031 rpc_wake_up_next(&xprt->backlog); 1059 xprt_wake_up_backlog(xprt);
1032 spin_unlock(&xprt->reserve_lock); 1060 spin_unlock(&xprt->reserve_lock);
1033} 1061}
1034 1062
@@ -1092,7 +1120,8 @@ EXPORT_SYMBOL_GPL(xprt_free);
1092 * xprt_reserve - allocate an RPC request slot 1120 * xprt_reserve - allocate an RPC request slot
1093 * @task: RPC task requesting a slot allocation 1121 * @task: RPC task requesting a slot allocation
1094 * 1122 *
1095 * If no more slots are available, place the task on the transport's 1123 * If the transport is marked as being congested, or if no more
1124 * slots are available, place the task on the transport's
1096 * backlog queue. 1125 * backlog queue.
1097 */ 1126 */
1098void xprt_reserve(struct rpc_task *task) 1127void xprt_reserve(struct rpc_task *task)
@@ -1107,6 +1136,32 @@ void xprt_reserve(struct rpc_task *task)
1107 task->tk_status = -EAGAIN; 1136 task->tk_status = -EAGAIN;
1108 rcu_read_lock(); 1137 rcu_read_lock();
1109 xprt = rcu_dereference(task->tk_client->cl_xprt); 1138 xprt = rcu_dereference(task->tk_client->cl_xprt);
1139 if (!xprt_throttle_congested(xprt, task))
1140 xprt->ops->alloc_slot(xprt, task);
1141 rcu_read_unlock();
1142}
1143
1144/**
1145 * xprt_retry_reserve - allocate an RPC request slot
1146 * @task: RPC task requesting a slot allocation
1147 *
1148 * If no more slots are available, place the task on the transport's
1149 * backlog queue.
1150 * Note that the only difference with xprt_reserve is that we now
1151 * ignore the value of the XPRT_CONGESTED flag.
1152 */
1153void xprt_retry_reserve(struct rpc_task *task)
1154{
1155 struct rpc_xprt *xprt;
1156
1157 task->tk_status = 0;
1158 if (task->tk_rqstp != NULL)
1159 return;
1160
1161 task->tk_timeout = 0;
1162 task->tk_status = -EAGAIN;
1163 rcu_read_lock();
1164 xprt = rcu_dereference(task->tk_client->cl_xprt);
1110 xprt->ops->alloc_slot(xprt, task); 1165 xprt->ops->alloc_slot(xprt, task);
1111 rcu_read_unlock(); 1166 rcu_read_unlock();
1112} 1167}
@@ -1245,6 +1300,8 @@ found:
1245 -PTR_ERR(xprt)); 1300 -PTR_ERR(xprt));
1246 goto out; 1301 goto out;
1247 } 1302 }
1303 if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
1304 xprt->idle_timeout = 0;
1248 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1305 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1249 if (xprt_has_timer(xprt)) 1306 if (xprt_has_timer(xprt))
1250 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1307 setup_timer(&xprt->timer, xprt_init_autodisconnect,
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d02130828da..ffd50348a509 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2207,10 +2207,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2207 */ 2207 */
2208 xs_tcp_force_close(xprt); 2208 xs_tcp_force_close(xprt);
2209 break; 2209 break;
2210 case -ECONNREFUSED:
2211 case -ECONNRESET:
2212 case -ENETUNREACH:
2213 /* retry with existing socket, after a delay */
2214 case 0: 2210 case 0:
2215 case -EINPROGRESS: 2211 case -EINPROGRESS:
2216 case -EALREADY: 2212 case -EALREADY:
@@ -2221,6 +2217,10 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2221 /* Happens, for instance, if the user specified a link 2217 /* Happens, for instance, if the user specified a link
2222 * local IPv6 address without a scope-id. 2218 * local IPv6 address without a scope-id.
2223 */ 2219 */
2220 case -ECONNREFUSED:
2221 case -ECONNRESET:
2222 case -ENETUNREACH:
2223 /* retry with existing socket, after a delay */
2224 goto out; 2224 goto out;
2225 } 2225 }
2226out_eagain: 2226out_eagain:
@@ -2655,6 +2655,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2655 } 2655 }
2656 xprt_set_bound(xprt); 2656 xprt_set_bound(xprt);
2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); 2657 xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
2658 ret = ERR_PTR(xs_local_setup_socket(transport));
2659 if (ret)
2660 goto out_err;
2658 break; 2661 break;
2659 default: 2662 default:
2660 ret = ERR_PTR(-EAFNOSUPPORT); 2663 ret = ERR_PTR(-EAFNOSUPPORT);
@@ -2767,9 +2770,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2767 struct rpc_xprt *xprt; 2770 struct rpc_xprt *xprt;
2768 struct sock_xprt *transport; 2771 struct sock_xprt *transport;
2769 struct rpc_xprt *ret; 2772 struct rpc_xprt *ret;
2773 unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries;
2774
2775 if (args->flags & XPRT_CREATE_INFINITE_SLOTS)
2776 max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT;
2770 2777
2771 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, 2778 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
2772 xprt_max_tcp_slot_table_entries); 2779 max_slot_table_size);
2773 if (IS_ERR(xprt)) 2780 if (IS_ERR(xprt))
2774 return xprt; 2781 return xprt;
2775 transport = container_of(xprt, struct sock_xprt, xprt); 2782 transport = container_of(xprt, struct sock_xprt, xprt);
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 4f99600a5fed..c890848f9d56 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -31,3 +31,10 @@ config TIPC_PORTS
31 31
32 Setting this to a smaller value saves some memory, 32 Setting this to a smaller value saves some memory,
33 setting it to higher allows for more ports. 33 setting it to higher allows for more ports.
34
35config TIPC_MEDIA_IB
36 bool "InfiniBand media type support"
37 depends on TIPC && INFINIBAND_IPOIB
38 help
39 Saying Y here will enable support for running TIPC on
40 IP-over-InfiniBand devices.
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 6cd55d671d3a..4df8e02d9008 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,3 +9,5 @@ tipc-y += addr.o bcast.o bearer.o config.o \
9 name_distr.o subscr.o name_table.o net.o \ 9 name_distr.o subscr.o name_table.o net.o \
10 netlink.o node.o node_subscr.o port.o ref.o \ 10 netlink.o node.o node_subscr.o port.o ref.o \
11 socket.o log.o eth_media.o 11 socket.o log.o eth_media.o
12
13tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 2655c9f4ecad..e5f3da507823 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -584,8 +584,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
584{ 584{
585 int bp_index; 585 int bp_index;
586 586
587 /* 587 /* Prepare broadcast link message for reliable transmission,
588 * Prepare broadcast link message for reliable transmission,
589 * if first time trying to send it; 588 * if first time trying to send it;
590 * preparation is skipped for broadcast link protocol messages 589 * preparation is skipped for broadcast link protocol messages
591 * since they are sent in an unreliable manner and don't need it 590 * since they are sent in an unreliable manner and don't need it
@@ -611,30 +610,43 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
611 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) { 610 for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
612 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary; 611 struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
613 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary; 612 struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
613 struct tipc_bearer *b = p;
614 struct sk_buff *tbuf;
614 615
615 if (!p) 616 if (!p)
616 break; /* no more bearers to try */ 617 break; /* No more bearers to try */
618
619 if (tipc_bearer_blocked(p)) {
620 if (!s || tipc_bearer_blocked(s))
621 continue; /* Can't use either bearer */
622 b = s;
623 }
617 624
618 tipc_nmap_diff(&bcbearer->remains, &p->nodes, &bcbearer->remains_new); 625 tipc_nmap_diff(&bcbearer->remains, &b->nodes,
626 &bcbearer->remains_new);
619 if (bcbearer->remains_new.count == bcbearer->remains.count) 627 if (bcbearer->remains_new.count == bcbearer->remains.count)
620 continue; /* bearer pair doesn't add anything */ 628 continue; /* Nothing added by bearer pair */
621 629
622 if (!tipc_bearer_blocked(p)) 630 if (bp_index == 0) {
623 tipc_bearer_send(p, buf, &p->media->bcast_addr); 631 /* Use original buffer for first bearer */
624 else if (s && !tipc_bearer_blocked(s)) 632 tipc_bearer_send(b, buf, &b->bcast_addr);
625 /* unable to send on primary bearer */ 633 } else {
626 tipc_bearer_send(s, buf, &s->media->bcast_addr); 634 /* Avoid concurrent buffer access */
627 else 635 tbuf = pskb_copy(buf, GFP_ATOMIC);
628 /* unable to send on either bearer */ 636 if (!tbuf)
629 continue; 637 break;
638 tipc_bearer_send(b, tbuf, &b->bcast_addr);
639 kfree_skb(tbuf); /* Bearer keeps a clone */
640 }
630 641
642 /* Swap bearers for next packet */
631 if (s) { 643 if (s) {
632 bcbearer->bpairs[bp_index].primary = s; 644 bcbearer->bpairs[bp_index].primary = s;
633 bcbearer->bpairs[bp_index].secondary = p; 645 bcbearer->bpairs[bp_index].secondary = p;
634 } 646 }
635 647
636 if (bcbearer->remains_new.count == 0) 648 if (bcbearer->remains_new.count == 0)
637 break; /* all targets reached */ 649 break; /* All targets reached */
638 650
639 bcbearer->remains = bcbearer->remains_new; 651 bcbearer->remains = bcbearer->remains_new;
640 } 652 }
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index aa62f93a9127..cb29ef7ba2f0 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -39,7 +39,7 @@
39#include "bearer.h" 39#include "bearer.h"
40#include "discover.h" 40#include "discover.h"
41 41
42#define MAX_ADDR_STR 32 42#define MAX_ADDR_STR 60
43 43
44static struct tipc_media *media_list[MAX_MEDIA]; 44static struct tipc_media *media_list[MAX_MEDIA];
45static u32 media_count; 45static u32 media_count;
@@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr)
89 89
90 if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME) 90 if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)
91 goto exit; 91 goto exit;
92 if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) ||
93 !m_ptr->bcast_addr.broadcast)
94 goto exit;
95 if (m_ptr->priority > TIPC_MAX_LINK_PRI) 92 if (m_ptr->priority > TIPC_MAX_LINK_PRI)
96 goto exit; 93 goto exit;
97 if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || 94 if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) ||
@@ -407,7 +404,7 @@ restart:
407 INIT_LIST_HEAD(&b_ptr->links); 404 INIT_LIST_HEAD(&b_ptr->links);
408 spin_lock_init(&b_ptr->lock); 405 spin_lock_init(&b_ptr->lock);
409 406
410 res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain); 407 res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);
411 if (res) { 408 if (res) {
412 bearer_disable(b_ptr); 409 bearer_disable(b_ptr);
413 pr_warn("Bearer <%s> rejected, discovery object creation failed\n", 410 pr_warn("Bearer <%s> rejected, discovery object creation failed\n",
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 39f1192d04bf..09c869adcfcf 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -56,6 +56,7 @@
56 * Identifiers of supported TIPC media types 56 * Identifiers of supported TIPC media types
57 */ 57 */
58#define TIPC_MEDIA_TYPE_ETH 1 58#define TIPC_MEDIA_TYPE_ETH 1
59#define TIPC_MEDIA_TYPE_IB 2
59 60
60/** 61/**
61 * struct tipc_media_addr - destination address used by TIPC bearers 62 * struct tipc_media_addr - destination address used by TIPC bearers
@@ -77,7 +78,6 @@ struct tipc_bearer;
77 * @enable_bearer: routine which enables a bearer 78 * @enable_bearer: routine which enables a bearer
78 * @disable_bearer: routine which disables a bearer 79 * @disable_bearer: routine which disables a bearer
79 * @addr2str: routine which converts media address to string 80 * @addr2str: routine which converts media address to string
80 * @str2addr: routine which converts media address from string
81 * @addr2msg: routine which converts media address to protocol message area 81 * @addr2msg: routine which converts media address to protocol message area
82 * @msg2addr: routine which converts media address from protocol message area 82 * @msg2addr: routine which converts media address from protocol message area
83 * @bcast_addr: media address used in broadcasting 83 * @bcast_addr: media address used in broadcasting
@@ -94,10 +94,9 @@ struct tipc_media {
94 int (*enable_bearer)(struct tipc_bearer *b_ptr); 94 int (*enable_bearer)(struct tipc_bearer *b_ptr);
95 void (*disable_bearer)(struct tipc_bearer *b_ptr); 95 void (*disable_bearer)(struct tipc_bearer *b_ptr);
96 int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); 96 int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size);
97 int (*str2addr)(struct tipc_media_addr *a, char *str_buf);
98 int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); 97 int (*addr2msg)(struct tipc_media_addr *a, char *msg_area);
99 int (*msg2addr)(struct tipc_media_addr *a, char *msg_area); 98 int (*msg2addr)(const struct tipc_bearer *b_ptr,
100 struct tipc_media_addr bcast_addr; 99 struct tipc_media_addr *a, char *msg_area);
101 u32 priority; 100 u32 priority;
102 u32 tolerance; 101 u32 tolerance;
103 u32 window; 102 u32 window;
@@ -136,6 +135,7 @@ struct tipc_bearer {
136 char name[TIPC_MAX_BEARER_NAME]; 135 char name[TIPC_MAX_BEARER_NAME];
137 spinlock_t lock; 136 spinlock_t lock;
138 struct tipc_media *media; 137 struct tipc_media *media;
138 struct tipc_media_addr bcast_addr;
139 u32 priority; 139 u32 priority;
140 u32 window; 140 u32 window;
141 u32 tolerance; 141 u32 tolerance;
@@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name);
175int tipc_eth_media_start(void); 175int tipc_eth_media_start(void);
176void tipc_eth_media_stop(void); 176void tipc_eth_media_stop(void);
177 177
178#ifdef CONFIG_TIPC_MEDIA_IB
179int tipc_ib_media_start(void);
180void tipc_ib_media_stop(void);
181#else
182static inline int tipc_ib_media_start(void) { return 0; }
183static inline void tipc_ib_media_stop(void) { return; }
184#endif
185
178int tipc_media_set_priority(const char *name, u32 new_value); 186int tipc_media_set_priority(const char *name, u32 new_value);
179int tipc_media_set_window(const char *name, u32 new_value); 187int tipc_media_set_window(const char *name, u32 new_value);
180void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); 188void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index fc05cecd7481..7ec2c1eb94f1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -82,6 +82,7 @@ static void tipc_core_stop_net(void)
82{ 82{
83 tipc_net_stop(); 83 tipc_net_stop();
84 tipc_eth_media_stop(); 84 tipc_eth_media_stop();
85 tipc_ib_media_stop();
85} 86}
86 87
87/** 88/**
@@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr)
93 94
94 tipc_net_start(addr); 95 tipc_net_start(addr);
95 res = tipc_eth_media_start(); 96 res = tipc_eth_media_start();
96 if (res) 97 if (res < 0)
97 tipc_core_stop_net(); 98 goto err;
99 res = tipc_ib_media_start();
100 if (res < 0)
101 goto err;
102 return res;
103
104err:
105 tipc_core_stop_net();
98 return res; 106 return res;
99} 107}
100 108
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 1074b9587e81..eedff58d0387 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)
129 int link_fully_up; 129 int link_fully_up;
130 130
131 media_addr.broadcast = 1; 131 media_addr.broadcast = 1;
132 b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg)); 132 b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));
133 kfree_skb(buf); 133 kfree_skb(buf);
134 134
135 /* Ensure message from node is valid and communication is permitted */ 135 /* Ensure message from node is valid and communication is permitted */
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 2132c1ef2951..120a676a3360 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -77,12 +77,13 @@ static struct notifier_block notifier = {
77 * Media-dependent "value" field stores MAC address in first 6 bytes 77 * Media-dependent "value" field stores MAC address in first 6 bytes
78 * and zeroes out the remaining bytes. 78 * and zeroes out the remaining bytes.
79 */ 79 */
80static void eth_media_addr_set(struct tipc_media_addr *a, char *mac) 80static void eth_media_addr_set(const struct tipc_bearer *tb_ptr,
81 struct tipc_media_addr *a, char *mac)
81{ 82{
82 memcpy(a->value, mac, ETH_ALEN); 83 memcpy(a->value, mac, ETH_ALEN);
83 memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN); 84 memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);
84 a->media_id = TIPC_MEDIA_TYPE_ETH; 85 a->media_id = TIPC_MEDIA_TYPE_ETH;
85 a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN); 86 a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);
86} 87}
87 88
88/** 89/**
@@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
110 111
111 skb_reset_network_header(clone); 112 skb_reset_network_header(clone);
112 clone->dev = dev; 113 clone->dev = dev;
114 clone->protocol = htons(ETH_P_TIPC);
113 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, 115 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
114 dev->dev_addr, clone->len); 116 dev->dev_addr, clone->len);
115 dev_queue_xmit(clone); 117 dev_queue_xmit(clone);
@@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
201 /* Associate TIPC bearer with Ethernet bearer */ 203 /* Associate TIPC bearer with Ethernet bearer */
202 eb_ptr->bearer = tb_ptr; 204 eb_ptr->bearer = tb_ptr;
203 tb_ptr->usr_handle = (void *)eb_ptr; 205 tb_ptr->usr_handle = (void *)eb_ptr;
206 memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
207 memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN);
208 tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH;
209 tb_ptr->bcast_addr.broadcast = 1;
204 tb_ptr->mtu = dev->mtu; 210 tb_ptr->mtu = dev->mtu;
205 tb_ptr->blocked = 0; 211 tb_ptr->blocked = 0;
206 eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr); 212 eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
207 return 0; 213 return 0;
208} 214}
209 215
@@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
302} 308}
303 309
304/** 310/**
305 * eth_str2addr - convert string to Ethernet address
306 */
307static int eth_str2addr(struct tipc_media_addr *a, char *str_buf)
308{
309 char mac[ETH_ALEN];
310 int r;
311
312 r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x",
313 (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2],
314 (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]);
315
316 if (r != ETH_ALEN)
317 return 1;
318
319 eth_media_addr_set(a, mac);
320 return 0;
321}
322
323/**
324 * eth_str2addr - convert Ethernet address format to message header format 311 * eth_str2addr - convert Ethernet address format to message header format
325 */ 312 */
326static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) 313static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
@@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)
334/** 321/**
335 * eth_str2addr - convert message header address format to Ethernet format 322 * eth_str2addr - convert message header address format to Ethernet format
336 */ 323 */
337static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area) 324static int eth_msg2addr(const struct tipc_bearer *tb_ptr,
325 struct tipc_media_addr *a, char *msg_area)
338{ 326{
339 if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH) 327 if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)
340 return 1; 328 return 1;
341 329
342 eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET); 330 eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);
343 return 0; 331 return 0;
344} 332}
345 333
@@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = {
351 .enable_bearer = enable_bearer, 339 .enable_bearer = enable_bearer,
352 .disable_bearer = disable_bearer, 340 .disable_bearer = disable_bearer,
353 .addr2str = eth_addr2str, 341 .addr2str = eth_addr2str,
354 .str2addr = eth_str2addr,
355 .addr2msg = eth_addr2msg, 342 .addr2msg = eth_addr2msg,
356 .msg2addr = eth_msg2addr, 343 .msg2addr = eth_msg2addr,
357 .bcast_addr = { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
358 TIPC_MEDIA_TYPE_ETH, 1 },
359 .priority = TIPC_DEF_LINK_PRI, 344 .priority = TIPC_DEF_LINK_PRI,
360 .tolerance = TIPC_DEF_LINK_TOL, 345 .tolerance = TIPC_DEF_LINK_TOL,
361 .window = TIPC_DEF_LINK_WIN, 346 .window = TIPC_DEF_LINK_WIN,
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
new file mode 100644
index 000000000000..2a2864c25e15
--- /dev/null
+++ b/net/tipc/ib_media.c
@@ -0,0 +1,387 @@
1/*
2 * net/tipc/ib_media.c: Infiniband bearer support for TIPC
3 *
4 * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
5 *
6 * Based on eth_media.c, which carries the following copyright notice:
7 *
8 * Copyright (c) 2001-2007, Ericsson AB
9 * Copyright (c) 2005-2008, 2011, Wind River Systems
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the names of the copyright holders nor the names of its
21 * contributors may be used to endorse or promote products derived from
22 * this software without specific prior written permission.
23 *
24 * Alternatively, this software may be distributed under the terms of the
25 * GNU General Public License ("GPL") version 2 as published by the Free
26 * Software Foundation.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41#include <linux/if_infiniband.h>
42#include "core.h"
43#include "bearer.h"
44
45#define MAX_IB_BEARERS MAX_BEARERS
46
47/**
48 * struct ib_bearer - Infiniband bearer data structure
49 * @bearer: ptr to associated "generic" bearer structure
50 * @dev: ptr to associated Infiniband network device
51 * @tipc_packet_type: used in binding TIPC to Infiniband driver
52 * @cleanup: work item used when disabling bearer
53 */
54
55struct ib_bearer {
56 struct tipc_bearer *bearer;
57 struct net_device *dev;
58 struct packet_type tipc_packet_type;
59 struct work_struct setup;
60 struct work_struct cleanup;
61};
62
63static struct tipc_media ib_media_info;
64static struct ib_bearer ib_bearers[MAX_IB_BEARERS];
65static int ib_started;
66
67/**
68 * ib_media_addr_set - initialize Infiniband media address structure
69 *
70 * Media-dependent "value" field stores MAC address in first 6 bytes
71 * and zeroes out the remaining bytes.
72 */
73static void ib_media_addr_set(const struct tipc_bearer *tb_ptr,
74 struct tipc_media_addr *a, char *mac)
75{
76 BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN);
77 memcpy(a->value, mac, INFINIBAND_ALEN);
78 a->media_id = TIPC_MEDIA_TYPE_IB;
79 a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN);
80}
81
82/**
83 * send_msg - send a TIPC message out over an InfiniBand interface
84 */
85static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
86 struct tipc_media_addr *dest)
87{
88 struct sk_buff *clone;
89 struct net_device *dev;
90 int delta;
91
92 clone = skb_clone(buf, GFP_ATOMIC);
93 if (!clone)
94 return 0;
95
96 dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev;
97 delta = dev->hard_header_len - skb_headroom(buf);
98
99 if ((delta > 0) &&
100 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
101 kfree_skb(clone);
102 return 0;
103 }
104
105 skb_reset_network_header(clone);
106 clone->dev = dev;
107 clone->protocol = htons(ETH_P_TIPC);
108 dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
109 dev->dev_addr, clone->len);
110 dev_queue_xmit(clone);
111 return 0;
112}
113
114/**
115 * recv_msg - handle incoming TIPC message from an InfiniBand interface
116 *
117 * Accept only packets explicitly sent to this node, or broadcast packets;
118 * ignores packets sent using InfiniBand multicast, and traffic sent to other
119 * nodes (which can happen if interface is running in promiscuous mode).
120 */
121static int recv_msg(struct sk_buff *buf, struct net_device *dev,
122 struct packet_type *pt, struct net_device *orig_dev)
123{
124 struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv;
125
126 if (!net_eq(dev_net(dev), &init_net)) {
127 kfree_skb(buf);
128 return 0;
129 }
130
131 if (likely(ib_ptr->bearer)) {
132 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
133 buf->next = NULL;
134 tipc_recv_msg(buf, ib_ptr->bearer);
135 return 0;
136 }
137 }
138 kfree_skb(buf);
139 return 0;
140}
141
142/**
143 * setup_bearer - setup association between InfiniBand bearer and interface
144 */
145static void setup_bearer(struct work_struct *work)
146{
147 struct ib_bearer *ib_ptr =
148 container_of(work, struct ib_bearer, setup);
149
150 dev_add_pack(&ib_ptr->tipc_packet_type);
151}
152
153/**
154 * enable_bearer - attach TIPC bearer to an InfiniBand interface
155 */
156static int enable_bearer(struct tipc_bearer *tb_ptr)
157{
158 struct net_device *dev = NULL;
159 struct net_device *pdev = NULL;
160 struct ib_bearer *ib_ptr = &ib_bearers[0];
161 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
162 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
163 int pending_dev = 0;
164
165 /* Find unused InfiniBand bearer structure */
166 while (ib_ptr->dev) {
167 if (!ib_ptr->bearer)
168 pending_dev++;
169 if (++ib_ptr == stop)
170 return pending_dev ? -EAGAIN : -EDQUOT;
171 }
172
173 /* Find device with specified name */
174 read_lock(&dev_base_lock);
175 for_each_netdev(&init_net, pdev) {
176 if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
177 dev = pdev;
178 dev_hold(dev);
179 break;
180 }
181 }
182 read_unlock(&dev_base_lock);
183 if (!dev)
184 return -ENODEV;
185
186 /* Create InfiniBand bearer for device */
187 ib_ptr->dev = dev;
188 ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC);
189 ib_ptr->tipc_packet_type.dev = dev;
190 ib_ptr->tipc_packet_type.func = recv_msg;
191 ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr;
192 INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list));
193 INIT_WORK(&ib_ptr->setup, setup_bearer);
194 schedule_work(&ib_ptr->setup);
195
196 /* Associate TIPC bearer with InfiniBand bearer */
197 ib_ptr->bearer = tb_ptr;
198 tb_ptr->usr_handle = (void *)ib_ptr;
199 memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value));
200 memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN);
201 tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB;
202 tb_ptr->bcast_addr.broadcast = 1;
203 tb_ptr->mtu = dev->mtu;
204 tb_ptr->blocked = 0;
205 ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);
206 return 0;
207}
208
209/**
210 * cleanup_bearer - break association between InfiniBand bearer and interface
211 *
212 * This routine must be invoked from a work queue because it can sleep.
213 */
214static void cleanup_bearer(struct work_struct *work)
215{
216 struct ib_bearer *ib_ptr =
217 container_of(work, struct ib_bearer, cleanup);
218
219 dev_remove_pack(&ib_ptr->tipc_packet_type);
220 dev_put(ib_ptr->dev);
221 ib_ptr->dev = NULL;
222}
223
224/**
225 * disable_bearer - detach TIPC bearer from an InfiniBand interface
226 *
227 * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away,
228 * then get worker thread to complete bearer cleanup. (Can't do cleanup
229 * here because cleanup code needs to sleep and caller holds spinlocks.)
230 */
231static void disable_bearer(struct tipc_bearer *tb_ptr)
232{
233 struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle;
234
235 ib_ptr->bearer = NULL;
236 INIT_WORK(&ib_ptr->cleanup, cleanup_bearer);
237 schedule_work(&ib_ptr->cleanup);
238}
239
240/**
241 * recv_notification - handle device updates from OS
242 *
243 * Change the state of the InfiniBand bearer (if any) associated with the
244 * specified device.
245 */
246static int recv_notification(struct notifier_block *nb, unsigned long evt,
247 void *dv)
248{
249 struct net_device *dev = (struct net_device *)dv;
250 struct ib_bearer *ib_ptr = &ib_bearers[0];
251 struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS];
252
253 if (!net_eq(dev_net(dev), &init_net))
254 return NOTIFY_DONE;
255
256 while ((ib_ptr->dev != dev)) {
257 if (++ib_ptr == stop)
258 return NOTIFY_DONE; /* couldn't find device */
259 }
260 if (!ib_ptr->bearer)
261 return NOTIFY_DONE; /* bearer had been disabled */
262
263 ib_ptr->bearer->mtu = dev->mtu;
264
265 switch (evt) {
266 case NETDEV_CHANGE:
267 if (netif_carrier_ok(dev))
268 tipc_continue(ib_ptr->bearer);
269 else
270 tipc_block_bearer(ib_ptr->bearer->name);
271 break;
272 case NETDEV_UP:
273 tipc_continue(ib_ptr->bearer);
274 break;
275 case NETDEV_DOWN:
276 tipc_block_bearer(ib_ptr->bearer->name);
277 break;
278 case NETDEV_CHANGEMTU:
279 case NETDEV_CHANGEADDR:
280 tipc_block_bearer(ib_ptr->bearer->name);
281 tipc_continue(ib_ptr->bearer);
282 break;
283 case NETDEV_UNREGISTER:
284 case NETDEV_CHANGENAME:
285 tipc_disable_bearer(ib_ptr->bearer->name);
286 break;
287 }
288 return NOTIFY_OK;
289}
290
291static struct notifier_block notifier = {
292 .notifier_call = recv_notification,
293 .priority = 0,
294};
295
296/**
297 * ib_addr2str - convert InfiniBand address to string
298 */
299static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)
300{
301 if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */
302 return 1;
303
304 sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:"
305 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
306 a->value[0], a->value[1], a->value[2], a->value[3],
307 a->value[4], a->value[5], a->value[6], a->value[7],
308 a->value[8], a->value[9], a->value[10], a->value[11],
309 a->value[12], a->value[13], a->value[14], a->value[15],
310 a->value[16], a->value[17], a->value[18], a->value[19]);
311
312 return 0;
313}
314
315/**
316 * ib_addr2msg - convert InfiniBand address format to message header format
317 */
318static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area)
319{
320 memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE);
321 msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB;
322 memcpy(msg_area, a->value, INFINIBAND_ALEN);
323 return 0;
324}
325
326/**
327 * ib_msg2addr - convert message header address format to InfiniBand format
328 */
329static int ib_msg2addr(const struct tipc_bearer *tb_ptr,
330 struct tipc_media_addr *a, char *msg_area)
331{
332 ib_media_addr_set(tb_ptr, a, msg_area);
333 return 0;
334}
335
336/*
337 * InfiniBand media registration info
338 */
339static struct tipc_media ib_media_info = {
340 .send_msg = send_msg,
341 .enable_bearer = enable_bearer,
342 .disable_bearer = disable_bearer,
343 .addr2str = ib_addr2str,
344 .addr2msg = ib_addr2msg,
345 .msg2addr = ib_msg2addr,
346 .priority = TIPC_DEF_LINK_PRI,
347 .tolerance = TIPC_DEF_LINK_TOL,
348 .window = TIPC_DEF_LINK_WIN,
349 .type_id = TIPC_MEDIA_TYPE_IB,
350 .name = "ib"
351};
352
353/**
354 * tipc_ib_media_start - activate InfiniBand bearer support
355 *
356 * Register InfiniBand media type with TIPC bearer code. Also register
357 * with OS for notifications about device state changes.
358 */
359int tipc_ib_media_start(void)
360{
361 int res;
362
363 if (ib_started)
364 return -EINVAL;
365
366 res = tipc_register_media(&ib_media_info);
367 if (res)
368 return res;
369
370 res = register_netdevice_notifier(&notifier);
371 if (!res)
372 ib_started = 1;
373 return res;
374}
375
376/**
377 * tipc_ib_media_stop - deactivate InfiniBand bearer support
378 */
379void tipc_ib_media_stop(void)
380{
381 if (!ib_started)
382 return;
383
384 flush_scheduled_work();
385 unregister_netdevice_notifier(&notifier);
386 ib_started = 0;
387}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index daa6080a2a0c..a80feee5197a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2306,8 +2306,11 @@ static int link_recv_changeover_msg(struct tipc_link **l_ptr,
2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf); 2306 struct tipc_msg *tunnel_msg = buf_msg(tunnel_buf);
2307 u32 msg_typ = msg_type(tunnel_msg); 2307 u32 msg_typ = msg_type(tunnel_msg);
2308 u32 msg_count = msg_msgcnt(tunnel_msg); 2308 u32 msg_count = msg_msgcnt(tunnel_msg);
2309 u32 bearer_id = msg_bearer_id(tunnel_msg);
2309 2310
2310 dest_link = (*l_ptr)->owner->links[msg_bearer_id(tunnel_msg)]; 2311 if (bearer_id >= MAX_BEARERS)
2312 goto exit;
2313 dest_link = (*l_ptr)->owner->links[bearer_id];
2311 if (!dest_link) 2314 if (!dest_link)
2312 goto exit; 2315 goto exit;
2313 if (dest_link == *l_ptr) { 2316 if (dest_link == *l_ptr) {
@@ -2521,14 +2524,16 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2521 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm); 2524 struct tipc_msg *imsg = (struct tipc_msg *)msg_data(fragm);
2522 u32 msg_sz = msg_size(imsg); 2525 u32 msg_sz = msg_size(imsg);
2523 u32 fragm_sz = msg_data_sz(fragm); 2526 u32 fragm_sz = msg_data_sz(fragm);
2524 u32 exp_fragm_cnt = msg_sz/fragm_sz + !!(msg_sz % fragm_sz); 2527 u32 exp_fragm_cnt;
2525 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE; 2528 u32 max = TIPC_MAX_USER_MSG_SIZE + NAMED_H_SIZE;
2529
2526 if (msg_type(imsg) == TIPC_MCAST_MSG) 2530 if (msg_type(imsg) == TIPC_MCAST_MSG)
2527 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE; 2531 max = TIPC_MAX_USER_MSG_SIZE + MCAST_H_SIZE;
2528 if (msg_size(imsg) > max) { 2532 if (fragm_sz == 0 || msg_size(imsg) > max) {
2529 kfree_skb(fbuf); 2533 kfree_skb(fbuf);
2530 return 0; 2534 return 0;
2531 } 2535 }
2536 exp_fragm_cnt = msg_sz / fragm_sz + !!(msg_sz % fragm_sz);
2532 pbuf = tipc_buf_acquire(msg_size(imsg)); 2537 pbuf = tipc_buf_acquire(msg_size(imsg));
2533 if (pbuf != NULL) { 2538 if (pbuf != NULL) {
2534 pbuf->next = *pending; 2539 pbuf->next = *pending;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6675914dc592..8bcd4985d0fb 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
44 struct nlmsghdr *rep_nlh; 44 struct nlmsghdr *rep_nlh;
45 struct nlmsghdr *req_nlh = info->nlhdr; 45 struct nlmsghdr *req_nlh = info->nlhdr;
46 struct tipc_genlmsghdr *req_userhdr = info->userhdr; 46 struct tipc_genlmsghdr *req_userhdr = info->userhdr;
47 int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); 47 int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
48 u16 cmd; 48 u16 cmd;
49 49
50 if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) 50 if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN)))
@@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
53 cmd = req_userhdr->cmd; 53 cmd = req_userhdr->cmd;
54 54
55 rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, 55 rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd,
56 NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, 56 nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN,
57 NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), 57 nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),
58 hdr_space); 58 hdr_space);
59 59
60 if (rep_buf) { 60 if (rep_buf) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a9622b6cd916..515ce38e4f4c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -790,6 +790,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
790 if (addr) { 790 if (addr) {
791 addr->family = AF_TIPC; 791 addr->family = AF_TIPC;
792 addr->addrtype = TIPC_ADDR_ID; 792 addr->addrtype = TIPC_ADDR_ID;
793 memset(&addr->addr, 0, sizeof(addr->addr));
793 addr->addr.id.ref = msg_origport(msg); 794 addr->addr.id.ref = msg_origport(msg);
794 addr->addr.id.node = msg_orignode(msg); 795 addr->addr.id.node = msg_orignode(msg);
795 addr->addr.name.domain = 0; /* could leave uninitialized */ 796 addr->addr.name.domain = 0; /* could leave uninitialized */
@@ -904,6 +905,9 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
904 goto exit; 905 goto exit;
905 } 906 }
906 907
908 /* will be updated in set_orig_addr() if needed */
909 m->msg_namelen = 0;
910
907 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 911 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
908restart: 912restart:
909 913
@@ -1013,6 +1017,9 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1013 goto exit; 1017 goto exit;
1014 } 1018 }
1015 1019
1020 /* will be updated in set_orig_addr() if needed */
1021 m->msg_namelen = 0;
1022
1016 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); 1023 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1017 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1024 timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
1018 1025
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2db702d82e7d..826e09938bff 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
1340 struct scm_cookie scm; 1340 struct scm_cookie scm;
1341 memset(&scm, 0, sizeof(scm)); 1341 memset(&scm, 0, sizeof(scm));
1342 scm.pid = UNIXCB(skb).pid; 1342 scm.pid = UNIXCB(skb).pid;
1343 scm.cred = UNIXCB(skb).cred;
1344 if (UNIXCB(skb).fp) 1343 if (UNIXCB(skb).fp)
1345 unix_detach_fds(&scm, skb); 1344 unix_detach_fds(&scm, skb);
1346 1345
@@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1391 int err = 0; 1390 int err = 0;
1392 1391
1393 UNIXCB(skb).pid = get_pid(scm->pid); 1392 UNIXCB(skb).pid = get_pid(scm->pid);
1394 if (scm->cred) 1393 UNIXCB(skb).uid = scm->creds.uid;
1395 UNIXCB(skb).cred = get_cred(scm->cred); 1394 UNIXCB(skb).gid = scm->creds.gid;
1396 UNIXCB(skb).fp = NULL; 1395 UNIXCB(skb).fp = NULL;
1397 if (scm->fp && send_fds) 1396 if (scm->fp && send_fds)
1398 err = unix_attach_fds(scm, skb); 1397 err = unix_attach_fds(scm, skb);
@@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
1409static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, 1408static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1410 const struct sock *other) 1409 const struct sock *other)
1411{ 1410{
1412 if (UNIXCB(skb).cred) 1411 if (UNIXCB(skb).pid)
1413 return; 1412 return;
1414 if (test_bit(SOCK_PASSCRED, &sock->flags) || 1413 if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1415 !other->sk_socket || 1414 !other->sk_socket ||
1416 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { 1415 test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1417 UNIXCB(skb).pid = get_pid(task_tgid(current)); 1416 UNIXCB(skb).pid = get_pid(task_tgid(current));
1418 UNIXCB(skb).cred = get_current_cred(); 1417 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1419 } 1418 }
1420} 1419}
1421 1420
@@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1819 siocb->scm = &tmp_scm; 1818 siocb->scm = &tmp_scm;
1820 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1819 memset(&tmp_scm, 0, sizeof(tmp_scm));
1821 } 1820 }
1822 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1821 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1823 unix_set_secdata(siocb->scm, skb); 1822 unix_set_secdata(siocb->scm, skb);
1824 1823
1825 if (!(flags & MSG_PEEK)) { 1824 if (!(flags & MSG_PEEK)) {
@@ -1859,10 +1858,10 @@ out:
1859} 1858}
1860 1859
1861/* 1860/*
1862 * Sleep until data has arrive. But check for races.. 1861 * Sleep until more data has arrived. But check for races..
1863 */ 1862 */
1864 1863static long unix_stream_data_wait(struct sock *sk, long timeo,
1865static long unix_stream_data_wait(struct sock *sk, long timeo) 1864 struct sk_buff *last)
1866{ 1865{
1867 DEFINE_WAIT(wait); 1866 DEFINE_WAIT(wait);
1868 1867
@@ -1871,7 +1870,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
1871 for (;;) { 1870 for (;;) {
1872 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 1871 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1873 1872
1874 if (!skb_queue_empty(&sk->sk_receive_queue) || 1873 if (skb_peek_tail(&sk->sk_receive_queue) != last ||
1875 sk->sk_err || 1874 sk->sk_err ||
1876 (sk->sk_shutdown & RCV_SHUTDOWN) || 1875 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1877 signal_pending(current) || 1876 signal_pending(current) ||
@@ -1890,8 +1889,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
1890 return timeo; 1889 return timeo;
1891} 1890}
1892 1891
1893
1894
1895static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, 1892static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1896 struct msghdr *msg, size_t size, 1893 struct msghdr *msg, size_t size,
1897 int flags) 1894 int flags)
@@ -1936,14 +1933,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1936 goto out; 1933 goto out;
1937 } 1934 }
1938 1935
1939 skip = sk_peek_offset(sk, flags);
1940
1941 do { 1936 do {
1942 int chunk; 1937 int chunk;
1943 struct sk_buff *skb; 1938 struct sk_buff *skb, *last;
1944 1939
1945 unix_state_lock(sk); 1940 unix_state_lock(sk);
1946 skb = skb_peek(&sk->sk_receive_queue); 1941 last = skb = skb_peek(&sk->sk_receive_queue);
1947again: 1942again:
1948 if (skb == NULL) { 1943 if (skb == NULL) {
1949 unix_sk(sk)->recursion_level = 0; 1944 unix_sk(sk)->recursion_level = 0;
@@ -1966,7 +1961,7 @@ again:
1966 break; 1961 break;
1967 mutex_unlock(&u->readlock); 1962 mutex_unlock(&u->readlock);
1968 1963
1969 timeo = unix_stream_data_wait(sk, timeo); 1964 timeo = unix_stream_data_wait(sk, timeo, last);
1970 1965
1971 if (signal_pending(current) 1966 if (signal_pending(current)
1972 || mutex_lock_interruptible(&u->readlock)) { 1967 || mutex_lock_interruptible(&u->readlock)) {
@@ -1980,10 +1975,13 @@ again:
1980 break; 1975 break;
1981 } 1976 }
1982 1977
1983 if (skip >= skb->len) { 1978 skip = sk_peek_offset(sk, flags);
1979 while (skip >= skb->len) {
1984 skip -= skb->len; 1980 skip -= skb->len;
1981 last = skb;
1985 skb = skb_peek_next(skb, &sk->sk_receive_queue); 1982 skb = skb_peek_next(skb, &sk->sk_receive_queue);
1986 goto again; 1983 if (!skb)
1984 goto again;
1987 } 1985 }
1988 1986
1989 unix_state_unlock(sk); 1987 unix_state_unlock(sk);
@@ -1991,11 +1989,12 @@ again:
1991 if (check_creds) { 1989 if (check_creds) {
1992 /* Never glue messages from different writers */ 1990 /* Never glue messages from different writers */
1993 if ((UNIXCB(skb).pid != siocb->scm->pid) || 1991 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1994 (UNIXCB(skb).cred != siocb->scm->cred)) 1992 !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
1993 !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
1995 break; 1994 break;
1996 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { 1995 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
1997 /* Copy credentials */ 1996 /* Copy credentials */
1998 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); 1997 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
1999 check_creds = 1; 1998 check_creds = 1;
2000 } 1999 }
2001 2000
@@ -2196,7 +2195,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2196 2195
2197 /* exceptional events? */ 2196 /* exceptional events? */
2198 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 2197 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2199 mask |= POLLERR; 2198 mask |= POLLERR |
2199 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2200
2200 if (sk->sk_shutdown & RCV_SHUTDOWN) 2201 if (sk->sk_shutdown & RCV_SHUTDOWN)
2201 mask |= POLLRDHUP | POLLIN | POLLRDNORM; 2202 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2202 if (sk->sk_shutdown == SHUTDOWN_MASK) 2203 if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index d0f6545b0010..9bc73f87f64a 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
185 * have been added to the queues after 185 * have been added to the queues after
186 * starting the garbage collection 186 * starting the garbage collection
187 */ 187 */
188 if (u->gc_candidate) { 188 if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {
189 hit = true; 189 hit = true;
190 func(u); 190 func(u);
191 } 191 }
@@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)
254 * of the list, so that it's checked even if it was already 254 * of the list, so that it's checked even if it was already
255 * passed over 255 * passed over
256 */ 256 */
257 if (u->gc_maybe_cycle) 257 if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))
258 list_move_tail(&u->link, &gc_candidates); 258 list_move_tail(&u->link, &gc_candidates);
259} 259}
260 260
@@ -315,8 +315,8 @@ void unix_gc(void)
315 BUG_ON(total_refs < inflight_refs); 315 BUG_ON(total_refs < inflight_refs);
316 if (total_refs == inflight_refs) { 316 if (total_refs == inflight_refs) {
317 list_move_tail(&u->link, &gc_candidates); 317 list_move_tail(&u->link, &gc_candidates);
318 u->gc_candidate = 1; 318 __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
319 u->gc_maybe_cycle = 1; 319 __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
320 } 320 }
321 } 321 }
322 322
@@ -344,7 +344,7 @@ void unix_gc(void)
344 344
345 if (atomic_long_read(&u->inflight) > 0) { 345 if (atomic_long_read(&u->inflight) > 0) {
346 list_move_tail(&u->link, &not_cycle_list); 346 list_move_tail(&u->link, &not_cycle_list);
347 u->gc_maybe_cycle = 0; 347 __clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
348 scan_children(&u->sk, inc_inflight_move_tail, NULL); 348 scan_children(&u->sk, inc_inflight_move_tail, NULL);
349 } 349 }
350 } 350 }
@@ -356,7 +356,7 @@ void unix_gc(void)
356 */ 356 */
357 while (!list_empty(&not_cycle_list)) { 357 while (!list_empty(&not_cycle_list)) {
358 u = list_entry(not_cycle_list.next, struct unix_sock, link); 358 u = list_entry(not_cycle_list.next, struct unix_sock, link);
359 u->gc_candidate = 0; 359 __clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
360 list_move_tail(&u->link, &gc_inflight_list); 360 list_move_tail(&u->link, &gc_inflight_list);
361 } 361 }
362 362
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index d8079daf1bde..3f77f42a3b58 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -165,7 +165,7 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
165static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; 165static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
166static DEFINE_SPINLOCK(vsock_table_lock); 166static DEFINE_SPINLOCK(vsock_table_lock);
167 167
168static __init void vsock_init_tables(void) 168static void vsock_init_tables(void)
169{ 169{
170 int i; 170 int i;
171 171
@@ -1670,6 +1670,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb,
1670 vsk = vsock_sk(sk); 1670 vsk = vsock_sk(sk);
1671 err = 0; 1671 err = 0;
1672 1672
1673 msg->msg_namelen = 0;
1674
1673 lock_sock(sk); 1675 lock_sock(sk);
1674 1676
1675 if (sk->sk_state != SS_CONNECTED) { 1677 if (sk->sk_state != SS_CONNECTED) {
@@ -1930,7 +1932,6 @@ static const struct file_operations vsock_device_ops = {
1930 1932
1931static struct miscdevice vsock_device = { 1933static struct miscdevice vsock_device = {
1932 .name = "vsock", 1934 .name = "vsock",
1933 .minor = MISC_DYNAMIC_MINOR,
1934 .fops = &vsock_device_ops, 1935 .fops = &vsock_device_ops,
1935}; 1936};
1936 1937
@@ -1940,6 +1941,7 @@ static int __vsock_core_init(void)
1940 1941
1941 vsock_init_tables(); 1942 vsock_init_tables();
1942 1943
1944 vsock_device.minor = MISC_DYNAMIC_MINOR;
1943 err = misc_register(&vsock_device); 1945 err = misc_register(&vsock_device);
1944 if (err) { 1946 if (err) {
1945 pr_err("Failed to register misc device\n"); 1947 pr_err("Failed to register misc device\n");
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 1f6508e249ae..daff75200e25 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
123 return err > 0 ? -err : err; 123 return err > 0 ? -err : err;
124} 124}
125 125
126static u32 vmci_transport_peer_rid(u32 peer_cid)
127{
128 if (VMADDR_CID_HYPERVISOR == peer_cid)
129 return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID;
130
131 return VMCI_TRANSPORT_PACKET_RID;
132}
133
126static inline void 134static inline void
127vmci_transport_packet_init(struct vmci_transport_packet *pkt, 135vmci_transport_packet_init(struct vmci_transport_packet *pkt,
128 struct sockaddr_vm *src, 136 struct sockaddr_vm *src,
@@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,
140 pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, 148 pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,
141 VMCI_TRANSPORT_PACKET_RID); 149 VMCI_TRANSPORT_PACKET_RID);
142 pkt->dg.dst = vmci_make_handle(dst->svm_cid, 150 pkt->dg.dst = vmci_make_handle(dst->svm_cid,
143 VMCI_TRANSPORT_PACKET_RID); 151 vmci_transport_peer_rid(dst->svm_cid));
144 pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); 152 pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);
145 pkt->version = VMCI_TRANSPORT_PACKET_VERSION; 153 pkt->version = VMCI_TRANSPORT_PACKET_VERSION;
146 pkt->type = type; 154 pkt->type = type;
@@ -508,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)
508 516
509static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) 517static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)
510{ 518{
519 if (VMADDR_CID_HYPERVISOR == peer_cid)
520 return true;
521
511 if (vsock->cached_peer != peer_cid) { 522 if (vsock->cached_peer != peer_cid) {
512 vsock->cached_peer = peer_cid; 523 vsock->cached_peer = peer_cid;
513 if (!vmci_transport_is_trusted(vsock, peer_cid) && 524 if (!vmci_transport_is_trusted(vsock, peer_cid) &&
@@ -628,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
628static bool vmci_transport_stream_allow(u32 cid, u32 port) 639static bool vmci_transport_stream_allow(u32 cid, u32 port)
629{ 640{
630 static const u32 non_socket_contexts[] = { 641 static const u32 non_socket_contexts[] = {
631 VMADDR_CID_HYPERVISOR,
632 VMADDR_CID_RESERVED, 642 VMADDR_CID_RESERVED,
633 }; 643 };
634 int i; 644 int i;
@@ -667,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
667 */ 677 */
668 678
669 if (!vmci_transport_stream_allow(dg->src.context, -1) 679 if (!vmci_transport_stream_allow(dg->src.context, -1)
670 || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) 680 || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)
671 return VMCI_ERROR_NO_ACCESS; 681 return VMCI_ERROR_NO_ACCESS;
672 682
673 if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) 683 if (VMCI_DG_SIZE(dg) < sizeof(*pkt))
@@ -1736,6 +1746,8 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
1736 if (flags & MSG_OOB || flags & MSG_ERRQUEUE) 1746 if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
1737 return -EOPNOTSUPP; 1747 return -EOPNOTSUPP;
1738 1748
1749 msg->msg_namelen = 0;
1750
1739 /* Retrieve the head sk_buff from the socket's receive queue. */ 1751 /* Retrieve the head sk_buff from the socket's receive queue. */
1740 err = 0; 1752 err = 0;
1741 skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); 1753 skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
@@ -1768,7 +1780,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
1768 if (err) 1780 if (err)
1769 goto out; 1781 goto out;
1770 1782
1771 msg->msg_namelen = 0;
1772 if (msg->msg_name) { 1783 if (msg->msg_name) {
1773 struct sockaddr_vm *vm_addr; 1784 struct sockaddr_vm *vm_addr;
1774 1785
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 1bf991803ec0..fd88ea8924e4 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -28,6 +28,9 @@
28/* The resource ID on which control packets are sent. */ 28/* The resource ID on which control packets are sent. */
29#define VMCI_TRANSPORT_PACKET_RID 1 29#define VMCI_TRANSPORT_PACKET_RID 1
30 30
31/* The resource ID on which control packets are sent to the hypervisor. */
32#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15
33
31#define VSOCK_PROTO_INVALID 0 34#define VSOCK_PROTO_INVALID 0
32#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) 35#define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)
33#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) 36#define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY)
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index a4a14e8f55cc..324e8d851dc4 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -46,65 +46,3 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
46 46
47 return err; 47 return err;
48} 48}
49
50void cfg80211_ch_switch_notify(struct net_device *dev,
51 struct cfg80211_chan_def *chandef)
52{
53 struct wireless_dev *wdev = dev->ieee80211_ptr;
54 struct wiphy *wiphy = wdev->wiphy;
55 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
56
57 trace_cfg80211_ch_switch_notify(dev, chandef);
58
59 wdev_lock(wdev);
60
61 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
62 wdev->iftype != NL80211_IFTYPE_P2P_GO))
63 goto out;
64
65 wdev->channel = chandef->chan;
66 nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
67out:
68 wdev_unlock(wdev);
69 return;
70}
71EXPORT_SYMBOL(cfg80211_ch_switch_notify);
72
73bool cfg80211_rx_spurious_frame(struct net_device *dev,
74 const u8 *addr, gfp_t gfp)
75{
76 struct wireless_dev *wdev = dev->ieee80211_ptr;
77 bool ret;
78
79 trace_cfg80211_rx_spurious_frame(dev, addr);
80
81 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
82 wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
83 trace_cfg80211_return_bool(false);
84 return false;
85 }
86 ret = nl80211_unexpected_frame(dev, addr, gfp);
87 trace_cfg80211_return_bool(ret);
88 return ret;
89}
90EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
91
92bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
93 const u8 *addr, gfp_t gfp)
94{
95 struct wireless_dev *wdev = dev->ieee80211_ptr;
96 bool ret;
97
98 trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
99
100 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
101 wdev->iftype != NL80211_IFTYPE_P2P_GO &&
102 wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
103 trace_cfg80211_return_bool(false);
104 return false;
105 }
106 ret = nl80211_unexpected_4addr_frame(dev, addr, gfp);
107 trace_cfg80211_return_bool(ret);
108 return ret;
109}
110EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6ddf74f0ae1e..84c9ad7e1dca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -842,6 +842,46 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
842 rdev->num_running_monitor_ifaces += num; 842 rdev->num_running_monitor_ifaces += num;
843} 843}
844 844
845void cfg80211_leave(struct cfg80211_registered_device *rdev,
846 struct wireless_dev *wdev)
847{
848 struct net_device *dev = wdev->netdev;
849
850 switch (wdev->iftype) {
851 case NL80211_IFTYPE_ADHOC:
852 cfg80211_leave_ibss(rdev, dev, true);
853 break;
854 case NL80211_IFTYPE_P2P_CLIENT:
855 case NL80211_IFTYPE_STATION:
856 mutex_lock(&rdev->sched_scan_mtx);
857 __cfg80211_stop_sched_scan(rdev, false);
858 mutex_unlock(&rdev->sched_scan_mtx);
859
860 wdev_lock(wdev);
861#ifdef CONFIG_CFG80211_WEXT
862 kfree(wdev->wext.ie);
863 wdev->wext.ie = NULL;
864 wdev->wext.ie_len = 0;
865 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
866#endif
867 __cfg80211_disconnect(rdev, dev,
868 WLAN_REASON_DEAUTH_LEAVING, true);
869 cfg80211_mlme_down(rdev, dev);
870 wdev_unlock(wdev);
871 break;
872 case NL80211_IFTYPE_MESH_POINT:
873 cfg80211_leave_mesh(rdev, dev);
874 break;
875 case NL80211_IFTYPE_AP:
876 cfg80211_stop_ap(rdev, dev);
877 break;
878 default:
879 break;
880 }
881
882 wdev->beacon_interval = 0;
883}
884
845static int cfg80211_netdev_notifier_call(struct notifier_block *nb, 885static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
846 unsigned long state, 886 unsigned long state,
847 void *ndev) 887 void *ndev)
@@ -910,38 +950,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
910 dev->priv_flags |= IFF_DONT_BRIDGE; 950 dev->priv_flags |= IFF_DONT_BRIDGE;
911 break; 951 break;
912 case NETDEV_GOING_DOWN: 952 case NETDEV_GOING_DOWN:
913 switch (wdev->iftype) { 953 cfg80211_leave(rdev, wdev);
914 case NL80211_IFTYPE_ADHOC:
915 cfg80211_leave_ibss(rdev, dev, true);
916 break;
917 case NL80211_IFTYPE_P2P_CLIENT:
918 case NL80211_IFTYPE_STATION:
919 mutex_lock(&rdev->sched_scan_mtx);
920 __cfg80211_stop_sched_scan(rdev, false);
921 mutex_unlock(&rdev->sched_scan_mtx);
922
923 wdev_lock(wdev);
924#ifdef CONFIG_CFG80211_WEXT
925 kfree(wdev->wext.ie);
926 wdev->wext.ie = NULL;
927 wdev->wext.ie_len = 0;
928 wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
929#endif
930 __cfg80211_disconnect(rdev, dev,
931 WLAN_REASON_DEAUTH_LEAVING, true);
932 cfg80211_mlme_down(rdev, dev);
933 wdev_unlock(wdev);
934 break;
935 case NL80211_IFTYPE_MESH_POINT:
936 cfg80211_leave_mesh(rdev, dev);
937 break;
938 case NL80211_IFTYPE_AP:
939 cfg80211_stop_ap(rdev, dev);
940 break;
941 default:
942 break;
943 }
944 wdev->beacon_interval = 0;
945 break; 954 break;
946 case NETDEV_DOWN: 955 case NETDEV_DOWN:
947 cfg80211_update_iface_num(rdev, wdev->iftype, -1); 956 cfg80211_update_iface_num(rdev, wdev->iftype, -1);
@@ -1117,8 +1126,10 @@ static int __init cfg80211_init(void)
1117 goto out_fail_reg; 1126 goto out_fail_reg;
1118 1127
1119 cfg80211_wq = create_singlethread_workqueue("cfg80211"); 1128 cfg80211_wq = create_singlethread_workqueue("cfg80211");
1120 if (!cfg80211_wq) 1129 if (!cfg80211_wq) {
1130 err = -ENOMEM;
1121 goto out_fail_wq; 1131 goto out_fail_wq;
1132 }
1122 1133
1123 return 0; 1134 return 0;
1124 1135
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5845c2b37aa8..fd35dae547c4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -88,6 +88,9 @@ struct cfg80211_registered_device {
88 88
89 struct delayed_work dfs_update_channels_wk; 89 struct delayed_work dfs_update_channels_wk;
90 90
91 /* netlink port which started critical protocol (0 means not started) */
92 u32 crit_proto_nlportid;
93
91 /* must be last because of the way we do wiphy_priv(), 94 /* must be last because of the way we do wiphy_priv(),
92 * and it should at least be aligned to NETDEV_ALIGN */ 95 * and it should at least be aligned to NETDEV_ALIGN */
93 struct wiphy wiphy __aligned(NETDEV_ALIGN); 96 struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -330,20 +333,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
330int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 333int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
331 struct net_device *dev, 334 struct net_device *dev,
332 struct ieee80211_channel *chan, 335 struct ieee80211_channel *chan,
333 const u8 *bssid, const u8 *prev_bssid, 336 const u8 *bssid,
334 const u8 *ssid, int ssid_len, 337 const u8 *ssid, int ssid_len,
335 const u8 *ie, int ie_len, bool use_mfp, 338 struct cfg80211_assoc_request *req);
336 struct cfg80211_crypto_settings *crypt,
337 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
338 struct ieee80211_ht_cap *ht_capa_mask);
339int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 339int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
340 struct net_device *dev, struct ieee80211_channel *chan, 340 struct net_device *dev,
341 const u8 *bssid, const u8 *prev_bssid, 341 struct ieee80211_channel *chan,
342 const u8 *bssid,
342 const u8 *ssid, int ssid_len, 343 const u8 *ssid, int ssid_len,
343 const u8 *ie, int ie_len, bool use_mfp, 344 struct cfg80211_assoc_request *req);
344 struct cfg80211_crypto_settings *crypt,
345 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
346 struct ieee80211_ht_cap *ht_capa_mask);
347int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 345int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
348 struct net_device *dev, const u8 *bssid, 346 struct net_device *dev, const u8 *bssid,
349 const u8 *ie, int ie_len, u16 reason, 347 const u8 *ie, int ie_len, u16 reason,
@@ -375,6 +373,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
375 bool no_cck, bool dont_wait_for_ack, u64 *cookie); 373 bool no_cck, bool dont_wait_for_ack, u64 *cookie);
376void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, 374void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
377 const struct ieee80211_ht_cap *ht_capa_mask); 375 const struct ieee80211_ht_cap *ht_capa_mask);
376void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
377 const struct ieee80211_vht_cap *vht_capa_mask);
378 378
379/* SME */ 379/* SME */
380int __cfg80211_connect(struct cfg80211_registered_device *rdev, 380int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -503,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
503void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, 503void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
504 enum nl80211_iftype iftype, int num); 504 enum nl80211_iftype iftype, int num);
505 505
506void cfg80211_leave(struct cfg80211_registered_device *rdev,
507 struct wireless_dev *wdev);
508
506void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, 509void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
507 struct wireless_dev *wdev); 510 struct wireless_dev *wdev);
508 511
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index 1526c211db66..dc0e59e53dbf 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -430,24 +430,23 @@ static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv)
430 return CCMP_TK_LEN; 430 return CCMP_TK_LEN;
431} 431}
432 432
433static char *lib80211_ccmp_print_stats(char *p, void *priv) 433static void lib80211_ccmp_print_stats(struct seq_file *m, void *priv)
434{ 434{
435 struct lib80211_ccmp_data *ccmp = priv; 435 struct lib80211_ccmp_data *ccmp = priv;
436 436
437 p += sprintf(p, "key[%d] alg=CCMP key_set=%d " 437 seq_printf(m,
438 "tx_pn=%02x%02x%02x%02x%02x%02x " 438 "key[%d] alg=CCMP key_set=%d "
439 "rx_pn=%02x%02x%02x%02x%02x%02x " 439 "tx_pn=%02x%02x%02x%02x%02x%02x "
440 "format_errors=%d replays=%d decrypt_errors=%d\n", 440 "rx_pn=%02x%02x%02x%02x%02x%02x "
441 ccmp->key_idx, ccmp->key_set, 441 "format_errors=%d replays=%d decrypt_errors=%d\n",
442 ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], 442 ccmp->key_idx, ccmp->key_set,
443 ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], 443 ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2],
444 ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], 444 ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5],
445 ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], 445 ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2],
446 ccmp->dot11RSNAStatsCCMPFormatErrors, 446 ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5],
447 ccmp->dot11RSNAStatsCCMPReplays, 447 ccmp->dot11RSNAStatsCCMPFormatErrors,
448 ccmp->dot11RSNAStatsCCMPDecryptErrors); 448 ccmp->dot11RSNAStatsCCMPReplays,
449 449 ccmp->dot11RSNAStatsCCMPDecryptErrors);
450 return p;
451} 450}
452 451
453static struct lib80211_crypto_ops lib80211_crypt_ccmp = { 452static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index d475cfc8568f..8c90ba79e56e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -703,30 +703,30 @@ static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv)
703 return TKIP_KEY_LEN; 703 return TKIP_KEY_LEN;
704} 704}
705 705
706static char *lib80211_tkip_print_stats(char *p, void *priv) 706static void lib80211_tkip_print_stats(struct seq_file *m, void *priv)
707{ 707{
708 struct lib80211_tkip_data *tkip = priv; 708 struct lib80211_tkip_data *tkip = priv;
709 p += sprintf(p, "key[%d] alg=TKIP key_set=%d " 709 seq_printf(m,
710 "tx_pn=%02x%02x%02x%02x%02x%02x " 710 "key[%d] alg=TKIP key_set=%d "
711 "rx_pn=%02x%02x%02x%02x%02x%02x " 711 "tx_pn=%02x%02x%02x%02x%02x%02x "
712 "replays=%d icv_errors=%d local_mic_failures=%d\n", 712 "rx_pn=%02x%02x%02x%02x%02x%02x "
713 tkip->key_idx, tkip->key_set, 713 "replays=%d icv_errors=%d local_mic_failures=%d\n",
714 (tkip->tx_iv32 >> 24) & 0xff, 714 tkip->key_idx, tkip->key_set,
715 (tkip->tx_iv32 >> 16) & 0xff, 715 (tkip->tx_iv32 >> 24) & 0xff,
716 (tkip->tx_iv32 >> 8) & 0xff, 716 (tkip->tx_iv32 >> 16) & 0xff,
717 tkip->tx_iv32 & 0xff, 717 (tkip->tx_iv32 >> 8) & 0xff,
718 (tkip->tx_iv16 >> 8) & 0xff, 718 tkip->tx_iv32 & 0xff,
719 tkip->tx_iv16 & 0xff, 719 (tkip->tx_iv16 >> 8) & 0xff,
720 (tkip->rx_iv32 >> 24) & 0xff, 720 tkip->tx_iv16 & 0xff,
721 (tkip->rx_iv32 >> 16) & 0xff, 721 (tkip->rx_iv32 >> 24) & 0xff,
722 (tkip->rx_iv32 >> 8) & 0xff, 722 (tkip->rx_iv32 >> 16) & 0xff,
723 tkip->rx_iv32 & 0xff, 723 (tkip->rx_iv32 >> 8) & 0xff,
724 (tkip->rx_iv16 >> 8) & 0xff, 724 tkip->rx_iv32 & 0xff,
725 tkip->rx_iv16 & 0xff, 725 (tkip->rx_iv16 >> 8) & 0xff,
726 tkip->dot11RSNAStatsTKIPReplays, 726 tkip->rx_iv16 & 0xff,
727 tkip->dot11RSNAStatsTKIPICVErrors, 727 tkip->dot11RSNAStatsTKIPReplays,
728 tkip->dot11RSNAStatsTKIPLocalMICFailures); 728 tkip->dot11RSNAStatsTKIPICVErrors,
729 return p; 729 tkip->dot11RSNAStatsTKIPLocalMICFailures);
730} 730}
731 731
732static struct lib80211_crypto_ops lib80211_crypt_tkip = { 732static struct lib80211_crypto_ops lib80211_crypt_tkip = {
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index c1304018fc1c..1c292e4ea7b6 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -253,11 +253,10 @@ static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv)
253 return wep->key_len; 253 return wep->key_len;
254} 254}
255 255
256static char *lib80211_wep_print_stats(char *p, void *priv) 256static void lib80211_wep_print_stats(struct seq_file *m, void *priv)
257{ 257{
258 struct lib80211_wep_data *wep = priv; 258 struct lib80211_wep_data *wep = priv;
259 p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); 259 seq_printf(m, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len);
260 return p;
261} 260}
262 261
263static struct lib80211_crypto_ops lib80211_crypt_wep = { 262static struct lib80211_crypto_ops lib80211_crypt_wep = {
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 55957a284f6c..0bb93f3061a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = {
85 .ie = NULL, 85 .ie = NULL,
86 .ie_len = 0, 86 .ie_len = 0,
87 .is_secure = false, 87 .is_secure = false,
88 .user_mpm = false,
88 .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, 89 .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,
89 .dtim_period = MESH_DEFAULT_DTIM_PERIOD, 90 .dtim_period = MESH_DEFAULT_DTIM_PERIOD,
90}; 91};
@@ -233,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,
233 return 0; 234 return 0;
234} 235}
235 236
236void cfg80211_notify_new_peer_candidate(struct net_device *dev,
237 const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp)
238{
239 struct wireless_dev *wdev = dev->ieee80211_ptr;
240
241 trace_cfg80211_notify_new_peer_candidate(dev, macaddr);
242 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
243 return;
244
245 nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev,
246 macaddr, ie, ie_len, gfp);
247}
248EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
249
250static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, 237static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
251 struct net_device *dev) 238 struct net_device *dev)
252{ 239{
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index caddca35d686..0c7b7dd855f6 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
187} 187}
188EXPORT_SYMBOL(cfg80211_send_disassoc); 188EXPORT_SYMBOL(cfg80211_send_disassoc);
189 189
190void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
191 size_t len)
192{
193 struct wireless_dev *wdev = dev->ieee80211_ptr;
194 struct wiphy *wiphy = wdev->wiphy;
195 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
196
197 trace_cfg80211_send_unprot_deauth(dev);
198 nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC);
199}
200EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
201
202void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
203 size_t len)
204{
205 struct wireless_dev *wdev = dev->ieee80211_ptr;
206 struct wiphy *wiphy = wdev->wiphy;
207 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
208
209 trace_cfg80211_send_unprot_disassoc(dev);
210 nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC);
211}
212EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
213
214void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) 190void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
215{ 191{
216 struct wireless_dev *wdev = dev->ieee80211_ptr; 192 struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
367 p1[i] &= p2[i]; 343 p1[i] &= p2[i];
368} 344}
369 345
346/* Do a logical ht_capa &= ht_capa_mask. */
347void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
348 const struct ieee80211_vht_cap *vht_capa_mask)
349{
350 int i;
351 u8 *p1, *p2;
352 if (!vht_capa_mask) {
353 memset(vht_capa, 0, sizeof(*vht_capa));
354 return;
355 }
356
357 p1 = (u8*)(vht_capa);
358 p2 = (u8*)(vht_capa_mask);
359 for (i = 0; i < sizeof(*vht_capa); i++)
360 p1[i] &= p2[i];
361}
362
370int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 363int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
371 struct net_device *dev, 364 struct net_device *dev,
372 struct ieee80211_channel *chan, 365 struct ieee80211_channel *chan,
373 const u8 *bssid, const u8 *prev_bssid, 366 const u8 *bssid,
374 const u8 *ssid, int ssid_len, 367 const u8 *ssid, int ssid_len,
375 const u8 *ie, int ie_len, bool use_mfp, 368 struct cfg80211_assoc_request *req)
376 struct cfg80211_crypto_settings *crypt,
377 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
378 struct ieee80211_ht_cap *ht_capa_mask)
379{ 369{
380 struct wireless_dev *wdev = dev->ieee80211_ptr; 370 struct wireless_dev *wdev = dev->ieee80211_ptr;
381 struct cfg80211_assoc_request req;
382 int err; 371 int err;
383 bool was_connected = false; 372 bool was_connected = false;
384 373
385 ASSERT_WDEV_LOCK(wdev); 374 ASSERT_WDEV_LOCK(wdev);
386 375
387 memset(&req, 0, sizeof(req)); 376 if (wdev->current_bss && req->prev_bssid &&
388 377 ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) {
389 if (wdev->current_bss && prev_bssid &&
390 ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) {
391 /* 378 /*
392 * Trying to reassociate: Allow this to proceed and let the old 379 * Trying to reassociate: Allow this to proceed and let the old
393 * association to be dropped when the new one is completed. 380 * association to be dropped when the new one is completed.
@@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
399 } else if (wdev->current_bss) 386 } else if (wdev->current_bss)
400 return -EALREADY; 387 return -EALREADY;
401 388
402 req.ie = ie; 389 cfg80211_oper_and_ht_capa(&req->ht_capa_mask,
403 req.ie_len = ie_len;
404 memcpy(&req.crypto, crypt, sizeof(req.crypto));
405 req.use_mfp = use_mfp;
406 req.prev_bssid = prev_bssid;
407 req.flags = assoc_flags;
408 if (ht_capa)
409 memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
410 if (ht_capa_mask)
411 memcpy(&req.ht_capa_mask, ht_capa_mask,
412 sizeof(req.ht_capa_mask));
413 cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
414 rdev->wiphy.ht_capa_mod_mask); 390 rdev->wiphy.ht_capa_mod_mask);
391 cfg80211_oper_and_vht_capa(&req->vht_capa_mask,
392 rdev->wiphy.vht_capa_mod_mask);
415 393
416 req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, 394 req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
417 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); 395 WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
418 if (!req.bss) { 396 if (!req->bss) {
419 if (was_connected) 397 if (was_connected)
420 wdev->sme_state = CFG80211_SME_CONNECTED; 398 wdev->sme_state = CFG80211_SME_CONNECTED;
421 return -ENOENT; 399 return -ENOENT;
422 } 400 }
423 401
424 err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, 402 err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);
425 CHAN_MODE_SHARED);
426 if (err) 403 if (err)
427 goto out; 404 goto out;
428 405
429 err = rdev_assoc(rdev, dev, &req); 406 err = rdev_assoc(rdev, dev, req);
430 407
431out: 408out:
432 if (err) { 409 if (err) {
433 if (was_connected) 410 if (was_connected)
434 wdev->sme_state = CFG80211_SME_CONNECTED; 411 wdev->sme_state = CFG80211_SME_CONNECTED;
435 cfg80211_put_bss(&rdev->wiphy, req.bss); 412 cfg80211_put_bss(&rdev->wiphy, req->bss);
436 } 413 }
437 414
438 return err; 415 return err;
@@ -441,21 +418,17 @@ out:
441int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 418int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
442 struct net_device *dev, 419 struct net_device *dev,
443 struct ieee80211_channel *chan, 420 struct ieee80211_channel *chan,
444 const u8 *bssid, const u8 *prev_bssid, 421 const u8 *bssid,
445 const u8 *ssid, int ssid_len, 422 const u8 *ssid, int ssid_len,
446 const u8 *ie, int ie_len, bool use_mfp, 423 struct cfg80211_assoc_request *req)
447 struct cfg80211_crypto_settings *crypt,
448 u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
449 struct ieee80211_ht_cap *ht_capa_mask)
450{ 424{
451 struct wireless_dev *wdev = dev->ieee80211_ptr; 425 struct wireless_dev *wdev = dev->ieee80211_ptr;
452 int err; 426 int err;
453 427
454 mutex_lock(&rdev->devlist_mtx); 428 mutex_lock(&rdev->devlist_mtx);
455 wdev_lock(wdev); 429 wdev_lock(wdev);
456 err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 430 err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid,
457 ssid, ssid_len, ie, ie_len, use_mfp, crypt, 431 ssid, ssid_len, req);
458 assoc_flags, ht_capa, ht_capa_mask);
459 wdev_unlock(wdev); 432 wdev_unlock(wdev);
460 mutex_unlock(&rdev->devlist_mtx); 433 mutex_unlock(&rdev->devlist_mtx);
461 434
@@ -577,62 +550,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
577 } 550 }
578} 551}
579 552
580void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
581 struct ieee80211_channel *chan,
582 unsigned int duration, gfp_t gfp)
583{
584 struct wiphy *wiphy = wdev->wiphy;
585 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
586
587 trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
588 nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp);
589}
590EXPORT_SYMBOL(cfg80211_ready_on_channel);
591
592void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
593 struct ieee80211_channel *chan,
594 gfp_t gfp)
595{
596 struct wiphy *wiphy = wdev->wiphy;
597 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
598
599 trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
600 nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp);
601}
602EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
603
604void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
605 struct station_info *sinfo, gfp_t gfp)
606{
607 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
608 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
609
610 trace_cfg80211_new_sta(dev, mac_addr, sinfo);
611 nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp);
612}
613EXPORT_SYMBOL(cfg80211_new_sta);
614
615void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
616{
617 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
618 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
619
620 trace_cfg80211_del_sta(dev, mac_addr);
621 nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp);
622}
623EXPORT_SYMBOL(cfg80211_del_sta);
624
625void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
626 enum nl80211_connect_failed_reason reason,
627 gfp_t gfp)
628{
629 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
630 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
631
632 nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp);
633}
634EXPORT_SYMBOL(cfg80211_conn_failed);
635
636struct cfg80211_mgmt_registration { 553struct cfg80211_mgmt_registration {
637 struct list_head list; 554 struct list_head list;
638 555
@@ -731,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
731 648
732 spin_unlock_bh(&wdev->mgmt_registrations_lock); 649 spin_unlock_bh(&wdev->mgmt_registrations_lock);
733 650
651 if (nlportid && rdev->crit_proto_nlportid == nlportid) {
652 rdev->crit_proto_nlportid = 0;
653 rdev_crit_proto_stop(rdev, wdev);
654 }
655
734 if (nlportid == wdev->ap_unexpected_nlportid) 656 if (nlportid == wdev->ap_unexpected_nlportid)
735 wdev->ap_unexpected_nlportid = 0; 657 wdev->ap_unexpected_nlportid = 0;
736} 658}
@@ -909,85 +831,6 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,
909} 831}
910EXPORT_SYMBOL(cfg80211_rx_mgmt); 832EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 833
912void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{
915 struct wiphy *wiphy = wdev->wiphy;
916 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
917
918 trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
919
920 /* Indicate TX status of the Action frame to user space */
921 nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp);
922}
923EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
924
925void cfg80211_cqm_rssi_notify(struct net_device *dev,
926 enum nl80211_cqm_rssi_threshold_event rssi_event,
927 gfp_t gfp)
928{
929 struct wireless_dev *wdev = dev->ieee80211_ptr;
930 struct wiphy *wiphy = wdev->wiphy;
931 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
932
933 trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
934
935 /* Indicate roaming trigger event to user space */
936 nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
937}
938EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
939
940void cfg80211_cqm_pktloss_notify(struct net_device *dev,
941 const u8 *peer, u32 num_packets, gfp_t gfp)
942{
943 struct wireless_dev *wdev = dev->ieee80211_ptr;
944 struct wiphy *wiphy = wdev->wiphy;
945 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
946
947 trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
948
949 /* Indicate roaming trigger event to user space */
950 nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp);
951}
952EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
953
954void cfg80211_cqm_txe_notify(struct net_device *dev,
955 const u8 *peer, u32 num_packets,
956 u32 rate, u32 intvl, gfp_t gfp)
957{
958 struct wireless_dev *wdev = dev->ieee80211_ptr;
959 struct wiphy *wiphy = wdev->wiphy;
960 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
961
962 nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets,
963 rate, intvl, gfp);
964}
965EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
966
967void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
968 const u8 *replay_ctr, gfp_t gfp)
969{
970 struct wireless_dev *wdev = dev->ieee80211_ptr;
971 struct wiphy *wiphy = wdev->wiphy;
972 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
973
974 trace_cfg80211_gtk_rekey_notify(dev, bssid);
975 nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
976}
977EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
978
979void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
980 const u8 *bssid, bool preauth, gfp_t gfp)
981{
982 struct wireless_dev *wdev = dev->ieee80211_ptr;
983 struct wiphy *wiphy = wdev->wiphy;
984 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
985
986 trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
987 nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
988}
989EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
990
991void cfg80211_dfs_channels_update_work(struct work_struct *work) 834void cfg80211_dfs_channels_update_work(struct work_struct *work)
992{ 835{
993 struct delayed_work *delayed_work; 836 struct delayed_work *delayed_work;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 58e13a8c95f9..afa283841e8c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -370,6 +370,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
370 [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, 370 [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
371 [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, 371 [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
372 [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, 372 [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
373 [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
374 [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG },
375 [NL80211_ATTR_VHT_CAPABILITY_MASK] = {
376 .len = NL80211_VHT_CAPABILITY_LEN,
377 },
378 [NL80211_ATTR_MDID] = { .type = NLA_U16 },
379 [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
380 .len = IEEE80211_MAX_DATA_LEN },
373}; 381};
374 382
375/* policy for the key attributes */ 383/* policy for the key attributes */
@@ -439,62 +447,69 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
439 [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, 447 [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
440}; 448};
441 449
442/* ifidx get helper */ 450static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
443static int nl80211_get_ifidx(struct netlink_callback *cb) 451 struct netlink_callback *cb,
452 struct cfg80211_registered_device **rdev,
453 struct wireless_dev **wdev)
444{ 454{
445 int res; 455 int err;
446
447 res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
448 nl80211_fam.attrbuf, nl80211_fam.maxattr,
449 nl80211_policy);
450 if (res)
451 return res;
452
453 if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
454 return -EINVAL;
455 456
456 res = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); 457 rtnl_lock();
457 if (!res) 458 mutex_lock(&cfg80211_mutex);
458 return -EINVAL;
459 return res;
460}
461 459
462static int nl80211_prepare_netdev_dump(struct sk_buff *skb, 460 if (!cb->args[0]) {
463 struct netlink_callback *cb, 461 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
464 struct cfg80211_registered_device **rdev, 462 nl80211_fam.attrbuf, nl80211_fam.maxattr,
465 struct net_device **dev) 463 nl80211_policy);
466{ 464 if (err)
467 int ifidx = cb->args[0]; 465 goto out_unlock;
468 int err;
469 466
470 if (!ifidx) 467 *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
471 ifidx = nl80211_get_ifidx(cb); 468 nl80211_fam.attrbuf);
472 if (ifidx < 0) 469 if (IS_ERR(*wdev)) {
473 return ifidx; 470 err = PTR_ERR(*wdev);
471 goto out_unlock;
472 }
473 *rdev = wiphy_to_dev((*wdev)->wiphy);
474 cb->args[0] = (*rdev)->wiphy_idx;
475 cb->args[1] = (*wdev)->identifier;
476 } else {
477 struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]);
478 struct wireless_dev *tmp;
474 479
475 cb->args[0] = ifidx; 480 if (!wiphy) {
481 err = -ENODEV;
482 goto out_unlock;
483 }
484 *rdev = wiphy_to_dev(wiphy);
485 *wdev = NULL;
476 486
477 rtnl_lock(); 487 mutex_lock(&(*rdev)->devlist_mtx);
488 list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
489 if (tmp->identifier == cb->args[1]) {
490 *wdev = tmp;
491 break;
492 }
493 }
494 mutex_unlock(&(*rdev)->devlist_mtx);
478 495
479 *dev = __dev_get_by_index(sock_net(skb->sk), ifidx); 496 if (!*wdev) {
480 if (!*dev) { 497 err = -ENODEV;
481 err = -ENODEV; 498 goto out_unlock;
482 goto out_rtnl; 499 }
483 } 500 }
484 501
485 *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); 502 cfg80211_lock_rdev(*rdev);
486 if (IS_ERR(*rdev)) {
487 err = PTR_ERR(*rdev);
488 goto out_rtnl;
489 }
490 503
504 mutex_unlock(&cfg80211_mutex);
491 return 0; 505 return 0;
492 out_rtnl: 506 out_unlock:
507 mutex_unlock(&cfg80211_mutex);
493 rtnl_unlock(); 508 rtnl_unlock();
494 return err; 509 return err;
495} 510}
496 511
497static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev) 512static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
498{ 513{
499 cfg80211_unlock_rdev(rdev); 514 cfg80211_unlock_rdev(rdev);
500 rtnl_unlock(); 515 rtnl_unlock();
@@ -539,7 +554,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
539} 554}
540 555
541static int nl80211_msg_put_channel(struct sk_buff *msg, 556static int nl80211_msg_put_channel(struct sk_buff *msg,
542 struct ieee80211_channel *chan) 557 struct ieee80211_channel *chan,
558 bool large)
543{ 559{
544 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, 560 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,
545 chan->center_freq)) 561 chan->center_freq))
@@ -554,9 +570,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
554 if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && 570 if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
555 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) 571 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS))
556 goto nla_put_failure; 572 goto nla_put_failure;
557 if ((chan->flags & IEEE80211_CHAN_RADAR) && 573 if (chan->flags & IEEE80211_CHAN_RADAR) {
558 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) 574 if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR))
559 goto nla_put_failure; 575 goto nla_put_failure;
576 if (large) {
577 u32 time;
578
579 time = elapsed_jiffies_msecs(chan->dfs_state_entered);
580
581 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE,
582 chan->dfs_state))
583 goto nla_put_failure;
584 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME,
585 time))
586 goto nla_put_failure;
587 }
588 }
589
590 if (large) {
591 if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) &&
592 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS))
593 goto nla_put_failure;
594 if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) &&
595 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS))
596 goto nla_put_failure;
597 if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) &&
598 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ))
599 goto nla_put_failure;
600 if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) &&
601 nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ))
602 goto nla_put_failure;
603 }
560 604
561 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, 605 if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
562 DBM_TO_MBM(chan->max_power))) 606 DBM_TO_MBM(chan->max_power)))
@@ -832,7 +876,8 @@ nla_put_failure:
832} 876}
833 877
834static int nl80211_put_iface_combinations(struct wiphy *wiphy, 878static int nl80211_put_iface_combinations(struct wiphy *wiphy,
835 struct sk_buff *msg) 879 struct sk_buff *msg,
880 bool large)
836{ 881{
837 struct nlattr *nl_combis; 882 struct nlattr *nl_combis;
838 int i, j; 883 int i, j;
@@ -881,6 +926,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
881 nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, 926 nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,
882 c->max_interfaces)) 927 c->max_interfaces))
883 goto nla_put_failure; 928 goto nla_put_failure;
929 if (large &&
930 nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
931 c->radar_detect_widths))
932 goto nla_put_failure;
884 933
885 nla_nest_end(msg, nl_combi); 934 nla_nest_end(msg, nl_combi);
886 } 935 }
@@ -892,412 +941,615 @@ nla_put_failure:
892 return -ENOBUFS; 941 return -ENOBUFS;
893} 942}
894 943
895static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, 944#ifdef CONFIG_PM
896 struct cfg80211_registered_device *dev) 945static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev,
946 struct sk_buff *msg)
897{ 947{
898 void *hdr; 948 const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp;
899 struct nlattr *nl_bands, *nl_band; 949 struct nlattr *nl_tcp;
900 struct nlattr *nl_freqs, *nl_freq;
901 struct nlattr *nl_rates, *nl_rate;
902 struct nlattr *nl_cmds;
903 enum ieee80211_band band;
904 struct ieee80211_channel *chan;
905 struct ieee80211_rate *rate;
906 int i;
907 const struct ieee80211_txrx_stypes *mgmt_stypes =
908 dev->wiphy.mgmt_stypes;
909 950
910 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); 951 if (!tcp)
911 if (!hdr) 952 return 0;
912 return -1;
913 953
914 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || 954 nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION);
915 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) || 955 if (!nl_tcp)
916 nla_put_u32(msg, NL80211_ATTR_GENERATION, 956 return -ENOBUFS;
917 cfg80211_rdev_list_generation) ||
918 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
919 dev->wiphy.retry_short) ||
920 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
921 dev->wiphy.retry_long) ||
922 nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
923 dev->wiphy.frag_threshold) ||
924 nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
925 dev->wiphy.rts_threshold) ||
926 nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
927 dev->wiphy.coverage_class) ||
928 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
929 dev->wiphy.max_scan_ssids) ||
930 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
931 dev->wiphy.max_sched_scan_ssids) ||
932 nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
933 dev->wiphy.max_scan_ie_len) ||
934 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
935 dev->wiphy.max_sched_scan_ie_len) ||
936 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
937 dev->wiphy.max_match_sets))
938 goto nla_put_failure;
939 957
940 if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && 958 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
941 nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) 959 tcp->data_payload_max))
942 goto nla_put_failure; 960 return -ENOBUFS;
943 if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
944 nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
945 goto nla_put_failure;
946 if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
947 nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
948 goto nla_put_failure;
949 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
950 nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
951 goto nla_put_failure;
952 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
953 nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
954 goto nla_put_failure;
955 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
956 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
957 goto nla_put_failure;
958 961
959 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, 962 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD,
960 sizeof(u32) * dev->wiphy.n_cipher_suites, 963 tcp->data_payload_max))
961 dev->wiphy.cipher_suites)) 964 return -ENOBUFS;
962 goto nla_put_failure;
963 965
964 if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 966 if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ))
965 dev->wiphy.max_num_pmkids)) 967 return -ENOBUFS;
966 goto nla_put_failure;
967 968
968 if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && 969 if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN,
969 nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) 970 sizeof(*tcp->tok), tcp->tok))
970 goto nla_put_failure; 971 return -ENOBUFS;
971 972
972 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, 973 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL,
973 dev->wiphy.available_antennas_tx) || 974 tcp->data_interval_max))
974 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, 975 return -ENOBUFS;
975 dev->wiphy.available_antennas_rx))
976 goto nla_put_failure;
977 976
978 if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && 977 if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD,
979 nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, 978 tcp->wake_payload_max))
980 dev->wiphy.probe_resp_offload)) 979 return -ENOBUFS;
981 goto nla_put_failure;
982 980
983 if ((dev->wiphy.available_antennas_tx || 981 nla_nest_end(msg, nl_tcp);
984 dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { 982 return 0;
985 u32 tx_ant = 0, rx_ant = 0; 983}
986 int res; 984
987 res = rdev_get_antenna(dev, &tx_ant, &rx_ant); 985static int nl80211_send_wowlan(struct sk_buff *msg,
988 if (!res) { 986 struct cfg80211_registered_device *dev,
989 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, 987 bool large)
990 tx_ant) || 988{
991 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, 989 struct nlattr *nl_wowlan;
992 rx_ant)) 990
993 goto nla_put_failure; 991 if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns)
994 } 992 return 0;
993
994 nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
995 if (!nl_wowlan)
996 return -ENOBUFS;
997
998 if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
999 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
1000 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
1001 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
1002 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
1003 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
1004 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
1005 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
1006 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
1007 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
1008 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
1009 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
1010 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
1011 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
1012 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
1013 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
1014 return -ENOBUFS;
1015
1016 if (dev->wiphy.wowlan.n_patterns) {
1017 struct nl80211_wowlan_pattern_support pat = {
1018 .max_patterns = dev->wiphy.wowlan.n_patterns,
1019 .min_pattern_len = dev->wiphy.wowlan.pattern_min_len,
1020 .max_pattern_len = dev->wiphy.wowlan.pattern_max_len,
1021 .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset,
1022 };
1023
1024 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
1025 sizeof(pat), &pat))
1026 return -ENOBUFS;
995 } 1027 }
996 1028
997 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, 1029 if (large && nl80211_send_wowlan_tcp_caps(dev, msg))
998 dev->wiphy.interface_modes)) 1030 return -ENOBUFS;
999 goto nla_put_failure;
1000 1031
1001 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); 1032 nla_nest_end(msg, nl_wowlan);
1002 if (!nl_bands)
1003 goto nla_put_failure;
1004 1033
1005 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1034 return 0;
1006 if (!dev->wiphy.bands[band]) 1035}
1007 continue; 1036#endif
1008 1037
1009 nl_band = nla_nest_start(msg, band); 1038static int nl80211_send_band_rateinfo(struct sk_buff *msg,
1010 if (!nl_band) 1039 struct ieee80211_supported_band *sband)
1011 goto nla_put_failure; 1040{
1041 struct nlattr *nl_rates, *nl_rate;
1042 struct ieee80211_rate *rate;
1043 int i;
1012 1044
1013 /* add HT info */ 1045 /* add HT info */
1014 if (dev->wiphy.bands[band]->ht_cap.ht_supported && 1046 if (sband->ht_cap.ht_supported &&
1015 (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, 1047 (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
1016 sizeof(dev->wiphy.bands[band]->ht_cap.mcs), 1048 sizeof(sband->ht_cap.mcs),
1017 &dev->wiphy.bands[band]->ht_cap.mcs) || 1049 &sband->ht_cap.mcs) ||
1018 nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, 1050 nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
1019 dev->wiphy.bands[band]->ht_cap.cap) || 1051 sband->ht_cap.cap) ||
1020 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, 1052 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
1021 dev->wiphy.bands[band]->ht_cap.ampdu_factor) || 1053 sband->ht_cap.ampdu_factor) ||
1022 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, 1054 nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
1023 dev->wiphy.bands[band]->ht_cap.ampdu_density))) 1055 sband->ht_cap.ampdu_density)))
1024 goto nla_put_failure; 1056 return -ENOBUFS;
1025 1057
1026 /* add VHT info */ 1058 /* add VHT info */
1027 if (dev->wiphy.bands[band]->vht_cap.vht_supported && 1059 if (sband->vht_cap.vht_supported &&
1028 (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, 1060 (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
1029 sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), 1061 sizeof(sband->vht_cap.vht_mcs),
1030 &dev->wiphy.bands[band]->vht_cap.vht_mcs) || 1062 &sband->vht_cap.vht_mcs) ||
1031 nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, 1063 nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
1032 dev->wiphy.bands[band]->vht_cap.cap))) 1064 sband->vht_cap.cap)))
1033 goto nla_put_failure; 1065 return -ENOBUFS;
1034 1066
1035 /* add frequencies */ 1067 /* add bitrates */
1036 nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); 1068 nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
1037 if (!nl_freqs) 1069 if (!nl_rates)
1038 goto nla_put_failure; 1070 return -ENOBUFS;
1039 1071
1040 for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { 1072 for (i = 0; i < sband->n_bitrates; i++) {
1041 nl_freq = nla_nest_start(msg, i); 1073 nl_rate = nla_nest_start(msg, i);
1042 if (!nl_freq) 1074 if (!nl_rate)
1043 goto nla_put_failure; 1075 return -ENOBUFS;
1044 1076
1045 chan = &dev->wiphy.bands[band]->channels[i]; 1077 rate = &sband->bitrates[i];
1078 if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
1079 rate->bitrate))
1080 return -ENOBUFS;
1081 if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
1082 nla_put_flag(msg,
1083 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
1084 return -ENOBUFS;
1046 1085
1047 if (nl80211_msg_put_channel(msg, chan)) 1086 nla_nest_end(msg, nl_rate);
1048 goto nla_put_failure; 1087 }
1049 1088
1050 nla_nest_end(msg, nl_freq); 1089 nla_nest_end(msg, nl_rates);
1051 }
1052 1090
1053 nla_nest_end(msg, nl_freqs); 1091 return 0;
1092}
1054 1093
1055 /* add bitrates */ 1094static int
1056 nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); 1095nl80211_send_mgmt_stypes(struct sk_buff *msg,
1057 if (!nl_rates) 1096 const struct ieee80211_txrx_stypes *mgmt_stypes)
1058 goto nla_put_failure; 1097{
1098 u16 stypes;
1099 struct nlattr *nl_ftypes, *nl_ifs;
1100 enum nl80211_iftype ift;
1101 int i;
1059 1102
1060 for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { 1103 if (!mgmt_stypes)
1061 nl_rate = nla_nest_start(msg, i); 1104 return 0;
1062 if (!nl_rate)
1063 goto nla_put_failure;
1064 1105
1065 rate = &dev->wiphy.bands[band]->bitrates[i]; 1106 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
1066 if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, 1107 if (!nl_ifs)
1067 rate->bitrate)) 1108 return -ENOBUFS;
1068 goto nla_put_failure;
1069 if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
1070 nla_put_flag(msg,
1071 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
1072 goto nla_put_failure;
1073 1109
1074 nla_nest_end(msg, nl_rate); 1110 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
1111 nl_ftypes = nla_nest_start(msg, ift);
1112 if (!nl_ftypes)
1113 return -ENOBUFS;
1114 i = 0;
1115 stypes = mgmt_stypes[ift].tx;
1116 while (stypes) {
1117 if ((stypes & 1) &&
1118 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1119 (i << 4) | IEEE80211_FTYPE_MGMT))
1120 return -ENOBUFS;
1121 stypes >>= 1;
1122 i++;
1075 } 1123 }
1124 nla_nest_end(msg, nl_ftypes);
1125 }
1126
1127 nla_nest_end(msg, nl_ifs);
1076 1128
1077 nla_nest_end(msg, nl_rates); 1129 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
1130 if (!nl_ifs)
1131 return -ENOBUFS;
1078 1132
1079 nla_nest_end(msg, nl_band); 1133 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
1134 nl_ftypes = nla_nest_start(msg, ift);
1135 if (!nl_ftypes)
1136 return -ENOBUFS;
1137 i = 0;
1138 stypes = mgmt_stypes[ift].rx;
1139 while (stypes) {
1140 if ((stypes & 1) &&
1141 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1142 (i << 4) | IEEE80211_FTYPE_MGMT))
1143 return -ENOBUFS;
1144 stypes >>= 1;
1145 i++;
1146 }
1147 nla_nest_end(msg, nl_ftypes);
1080 } 1148 }
1081 nla_nest_end(msg, nl_bands); 1149 nla_nest_end(msg, nl_ifs);
1082 1150
1083 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); 1151 return 0;
1084 if (!nl_cmds) 1152}
1085 goto nla_put_failure;
1086 1153
1087 i = 0; 1154static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
1088#define CMD(op, n) \ 1155 struct sk_buff *msg, u32 portid, u32 seq,
1089 do { \ 1156 int flags, bool split, long *split_start,
1090 if (dev->ops->op) { \ 1157 long *band_start, long *chan_start)
1091 i++; \ 1158{
1092 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ 1159 void *hdr;
1093 goto nla_put_failure; \ 1160 struct nlattr *nl_bands, *nl_band;
1094 } \ 1161 struct nlattr *nl_freqs, *nl_freq;
1095 } while (0) 1162 struct nlattr *nl_cmds;
1096 1163 enum ieee80211_band band;
1097 CMD(add_virtual_intf, NEW_INTERFACE); 1164 struct ieee80211_channel *chan;
1098 CMD(change_virtual_intf, SET_INTERFACE); 1165 int i;
1099 CMD(add_key, NEW_KEY); 1166 const struct ieee80211_txrx_stypes *mgmt_stypes =
1100 CMD(start_ap, START_AP); 1167 dev->wiphy.mgmt_stypes;
1101 CMD(add_station, NEW_STATION); 1168 long start = 0, start_chan = 0, start_band = 0;
1102 CMD(add_mpath, NEW_MPATH); 1169 u32 features;
1103 CMD(update_mesh_config, SET_MESH_CONFIG); 1170
1104 CMD(change_bss, SET_BSS); 1171 hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
1105 CMD(auth, AUTHENTICATE); 1172 if (!hdr)
1106 CMD(assoc, ASSOCIATE); 1173 return -ENOBUFS;
1107 CMD(deauth, DEAUTHENTICATE); 1174
1108 CMD(disassoc, DISASSOCIATE); 1175 /* allow always using the variables */
1109 CMD(join_ibss, JOIN_IBSS); 1176 if (!split) {
1110 CMD(join_mesh, JOIN_MESH); 1177 split_start = &start;
1111 CMD(set_pmksa, SET_PMKSA); 1178 band_start = &start_band;
1112 CMD(del_pmksa, DEL_PMKSA); 1179 chan_start = &start_chan;
1113 CMD(flush_pmksa, FLUSH_PMKSA);
1114 if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1115 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1116 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1117 CMD(mgmt_tx, FRAME);
1118 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1119 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1120 i++;
1121 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1122 goto nla_put_failure;
1123 } 1180 }
1124 if (dev->ops->set_monitor_channel || dev->ops->start_ap || 1181
1125 dev->ops->join_mesh) { 1182 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
1126 i++; 1183 nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
1127 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) 1184 wiphy_name(&dev->wiphy)) ||
1185 nla_put_u32(msg, NL80211_ATTR_GENERATION,
1186 cfg80211_rdev_list_generation))
1187 goto nla_put_failure;
1188
1189 switch (*split_start) {
1190 case 0:
1191 if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
1192 dev->wiphy.retry_short) ||
1193 nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
1194 dev->wiphy.retry_long) ||
1195 nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
1196 dev->wiphy.frag_threshold) ||
1197 nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
1198 dev->wiphy.rts_threshold) ||
1199 nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
1200 dev->wiphy.coverage_class) ||
1201 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
1202 dev->wiphy.max_scan_ssids) ||
1203 nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
1204 dev->wiphy.max_sched_scan_ssids) ||
1205 nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
1206 dev->wiphy.max_scan_ie_len) ||
1207 nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
1208 dev->wiphy.max_sched_scan_ie_len) ||
1209 nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
1210 dev->wiphy.max_match_sets))
1128 goto nla_put_failure; 1211 goto nla_put_failure;
1129 } 1212
1130 CMD(set_wds_peer, SET_WDS_PEER); 1213 if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
1131 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { 1214 nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
1132 CMD(tdls_mgmt, TDLS_MGMT); 1215 goto nla_put_failure;
1133 CMD(tdls_oper, TDLS_OPER); 1216 if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
1134 } 1217 nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
1135 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) 1218 goto nla_put_failure;
1136 CMD(sched_scan_start, START_SCHED_SCAN); 1219 if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
1137 CMD(probe_client, PROBE_CLIENT); 1220 nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
1138 CMD(set_noack_map, SET_NOACK_MAP); 1221 goto nla_put_failure;
1139 if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { 1222 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
1140 i++; 1223 nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
1141 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) 1224 goto nla_put_failure;
1225 if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
1226 nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
1227 goto nla_put_failure;
1228 if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
1229 nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
1142 goto nla_put_failure; 1230 goto nla_put_failure;
1143 }
1144 CMD(start_p2p_device, START_P2P_DEVICE);
1145 CMD(set_mcast_rate, SET_MCAST_RATE);
1146 1231
1147#ifdef CONFIG_NL80211_TESTMODE 1232 (*split_start)++;
1148 CMD(testmode_cmd, TESTMODE); 1233 if (split)
1149#endif 1234 break;
1235 case 1:
1236 if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
1237 sizeof(u32) * dev->wiphy.n_cipher_suites,
1238 dev->wiphy.cipher_suites))
1239 goto nla_put_failure;
1150 1240
1151#undef CMD 1241 if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
1242 dev->wiphy.max_num_pmkids))
1243 goto nla_put_failure;
1152 1244
1153 if (dev->ops->connect || dev->ops->auth) { 1245 if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
1154 i++; 1246 nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
1155 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1156 goto nla_put_failure; 1247 goto nla_put_failure;
1157 }
1158 1248
1159 if (dev->ops->disconnect || dev->ops->deauth) { 1249 if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
1160 i++; 1250 dev->wiphy.available_antennas_tx) ||
1161 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) 1251 nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
1252 dev->wiphy.available_antennas_rx))
1162 goto nla_put_failure; 1253 goto nla_put_failure;
1163 }
1164 1254
1165 nla_nest_end(msg, nl_cmds); 1255 if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
1256 nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
1257 dev->wiphy.probe_resp_offload))
1258 goto nla_put_failure;
1166 1259
1167 if (dev->ops->remain_on_channel && 1260 if ((dev->wiphy.available_antennas_tx ||
1168 (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && 1261 dev->wiphy.available_antennas_rx) &&
1169 nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, 1262 dev->ops->get_antenna) {
1170 dev->wiphy.max_remain_on_channel_duration)) 1263 u32 tx_ant = 0, rx_ant = 0;
1171 goto nla_put_failure; 1264 int res;
1265 res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
1266 if (!res) {
1267 if (nla_put_u32(msg,
1268 NL80211_ATTR_WIPHY_ANTENNA_TX,
1269 tx_ant) ||
1270 nla_put_u32(msg,
1271 NL80211_ATTR_WIPHY_ANTENNA_RX,
1272 rx_ant))
1273 goto nla_put_failure;
1274 }
1275 }
1172 1276
1173 if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && 1277 (*split_start)++;
1174 nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) 1278 if (split)
1175 goto nla_put_failure; 1279 break;
1280 case 2:
1281 if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
1282 dev->wiphy.interface_modes))
1283 goto nla_put_failure;
1284 (*split_start)++;
1285 if (split)
1286 break;
1287 case 3:
1288 nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
1289 if (!nl_bands)
1290 goto nla_put_failure;
1176 1291
1177 if (mgmt_stypes) { 1292 for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) {
1178 u16 stypes; 1293 struct ieee80211_supported_band *sband;
1179 struct nlattr *nl_ftypes, *nl_ifs;
1180 enum nl80211_iftype ift;
1181 1294
1182 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); 1295 sband = dev->wiphy.bands[band];
1183 if (!nl_ifs)
1184 goto nla_put_failure;
1185 1296
1186 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { 1297 if (!sband)
1187 nl_ftypes = nla_nest_start(msg, ift); 1298 continue;
1188 if (!nl_ftypes) 1299
1300 nl_band = nla_nest_start(msg, band);
1301 if (!nl_band)
1189 goto nla_put_failure; 1302 goto nla_put_failure;
1190 i = 0; 1303
1191 stypes = mgmt_stypes[ift].tx; 1304 switch (*chan_start) {
1192 while (stypes) { 1305 case 0:
1193 if ((stypes & 1) && 1306 if (nl80211_send_band_rateinfo(msg, sband))
1194 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1195 (i << 4) | IEEE80211_FTYPE_MGMT))
1196 goto nla_put_failure; 1307 goto nla_put_failure;
1197 stypes >>= 1; 1308 (*chan_start)++;
1198 i++; 1309 if (split)
1310 break;
1311 default:
1312 /* add frequencies */
1313 nl_freqs = nla_nest_start(
1314 msg, NL80211_BAND_ATTR_FREQS);
1315 if (!nl_freqs)
1316 goto nla_put_failure;
1317
1318 for (i = *chan_start - 1;
1319 i < sband->n_channels;
1320 i++) {
1321 nl_freq = nla_nest_start(msg, i);
1322 if (!nl_freq)
1323 goto nla_put_failure;
1324
1325 chan = &sband->channels[i];
1326
1327 if (nl80211_msg_put_channel(msg, chan,
1328 split))
1329 goto nla_put_failure;
1330
1331 nla_nest_end(msg, nl_freq);
1332 if (split)
1333 break;
1334 }
1335 if (i < sband->n_channels)
1336 *chan_start = i + 2;
1337 else
1338 *chan_start = 0;
1339 nla_nest_end(msg, nl_freqs);
1340 }
1341
1342 nla_nest_end(msg, nl_band);
1343
1344 if (split) {
1345 /* start again here */
1346 if (*chan_start)
1347 band--;
1348 break;
1199 } 1349 }
1200 nla_nest_end(msg, nl_ftypes);
1201 } 1350 }
1351 nla_nest_end(msg, nl_bands);
1202 1352
1203 nla_nest_end(msg, nl_ifs); 1353 if (band < IEEE80211_NUM_BANDS)
1354 *band_start = band + 1;
1355 else
1356 *band_start = 0;
1204 1357
1205 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); 1358 /* if bands & channels are done, continue outside */
1206 if (!nl_ifs) 1359 if (*band_start == 0 && *chan_start == 0)
1360 (*split_start)++;
1361 if (split)
1362 break;
1363 case 4:
1364 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
1365 if (!nl_cmds)
1207 goto nla_put_failure; 1366 goto nla_put_failure;
1208 1367
1209 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { 1368 i = 0;
1210 nl_ftypes = nla_nest_start(msg, ift); 1369#define CMD(op, n) \
1211 if (!nl_ftypes) 1370 do { \
1371 if (dev->ops->op) { \
1372 i++; \
1373 if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
1374 goto nla_put_failure; \
1375 } \
1376 } while (0)
1377
1378 CMD(add_virtual_intf, NEW_INTERFACE);
1379 CMD(change_virtual_intf, SET_INTERFACE);
1380 CMD(add_key, NEW_KEY);
1381 CMD(start_ap, START_AP);
1382 CMD(add_station, NEW_STATION);
1383 CMD(add_mpath, NEW_MPATH);
1384 CMD(update_mesh_config, SET_MESH_CONFIG);
1385 CMD(change_bss, SET_BSS);
1386 CMD(auth, AUTHENTICATE);
1387 CMD(assoc, ASSOCIATE);
1388 CMD(deauth, DEAUTHENTICATE);
1389 CMD(disassoc, DISASSOCIATE);
1390 CMD(join_ibss, JOIN_IBSS);
1391 CMD(join_mesh, JOIN_MESH);
1392 CMD(set_pmksa, SET_PMKSA);
1393 CMD(del_pmksa, DEL_PMKSA);
1394 CMD(flush_pmksa, FLUSH_PMKSA);
1395 if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
1396 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
1397 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
1398 CMD(mgmt_tx, FRAME);
1399 CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
1400 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
1401 i++;
1402 if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
1212 goto nla_put_failure; 1403 goto nla_put_failure;
1213 i = 0;
1214 stypes = mgmt_stypes[ift].rx;
1215 while (stypes) {
1216 if ((stypes & 1) &&
1217 nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
1218 (i << 4) | IEEE80211_FTYPE_MGMT))
1219 goto nla_put_failure;
1220 stypes >>= 1;
1221 i++;
1222 }
1223 nla_nest_end(msg, nl_ftypes);
1224 } 1404 }
1225 nla_nest_end(msg, nl_ifs); 1405 if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
1226 } 1406 dev->ops->join_mesh) {
1407 i++;
1408 if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
1409 goto nla_put_failure;
1410 }
1411 CMD(set_wds_peer, SET_WDS_PEER);
1412 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
1413 CMD(tdls_mgmt, TDLS_MGMT);
1414 CMD(tdls_oper, TDLS_OPER);
1415 }
1416 if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
1417 CMD(sched_scan_start, START_SCHED_SCAN);
1418 CMD(probe_client, PROBE_CLIENT);
1419 CMD(set_noack_map, SET_NOACK_MAP);
1420 if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
1421 i++;
1422 if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
1423 goto nla_put_failure;
1424 }
1425 CMD(start_p2p_device, START_P2P_DEVICE);
1426 CMD(set_mcast_rate, SET_MCAST_RATE);
1427 if (split) {
1428 CMD(crit_proto_start, CRIT_PROTOCOL_START);
1429 CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
1430 }
1227 1431
1228#ifdef CONFIG_PM 1432#ifdef CONFIG_NL80211_TESTMODE
1229 if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { 1433 CMD(testmode_cmd, TESTMODE);
1230 struct nlattr *nl_wowlan; 1434#endif
1231 1435
1232 nl_wowlan = nla_nest_start(msg, 1436#undef CMD
1233 NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
1234 if (!nl_wowlan)
1235 goto nla_put_failure;
1236 1437
1237 if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && 1438 if (dev->ops->connect || dev->ops->auth) {
1238 nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || 1439 i++;
1239 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && 1440 if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
1240 nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
1241 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
1242 nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
1243 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
1244 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
1245 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
1246 nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
1247 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
1248 nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
1249 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
1250 nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
1251 ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
1252 nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
1253 goto nla_put_failure;
1254 if (dev->wiphy.wowlan.n_patterns) {
1255 struct nl80211_wowlan_pattern_support pat = {
1256 .max_patterns = dev->wiphy.wowlan.n_patterns,
1257 .min_pattern_len =
1258 dev->wiphy.wowlan.pattern_min_len,
1259 .max_pattern_len =
1260 dev->wiphy.wowlan.pattern_max_len,
1261 .max_pkt_offset =
1262 dev->wiphy.wowlan.max_pkt_offset,
1263 };
1264 if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
1265 sizeof(pat), &pat))
1266 goto nla_put_failure; 1441 goto nla_put_failure;
1267 } 1442 }
1268 1443
1269 nla_nest_end(msg, nl_wowlan); 1444 if (dev->ops->disconnect || dev->ops->deauth) {
1270 } 1445 i++;
1446 if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
1447 goto nla_put_failure;
1448 }
1449
1450 nla_nest_end(msg, nl_cmds);
1451 (*split_start)++;
1452 if (split)
1453 break;
1454 case 5:
1455 if (dev->ops->remain_on_channel &&
1456 (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
1457 nla_put_u32(msg,
1458 NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
1459 dev->wiphy.max_remain_on_channel_duration))
1460 goto nla_put_failure;
1461
1462 if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
1463 nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
1464 goto nla_put_failure;
1465
1466 if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
1467 goto nla_put_failure;
1468 (*split_start)++;
1469 if (split)
1470 break;
1471 case 6:
1472#ifdef CONFIG_PM
1473 if (nl80211_send_wowlan(msg, dev, split))
1474 goto nla_put_failure;
1475 (*split_start)++;
1476 if (split)
1477 break;
1478#else
1479 (*split_start)++;
1271#endif 1480#endif
1481 case 7:
1482 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
1483 dev->wiphy.software_iftypes))
1484 goto nla_put_failure;
1272 1485
1273 if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, 1486 if (nl80211_put_iface_combinations(&dev->wiphy, msg, split))
1274 dev->wiphy.software_iftypes)) 1487 goto nla_put_failure;
1275 goto nla_put_failure;
1276 1488
1277 if (nl80211_put_iface_combinations(&dev->wiphy, msg)) 1489 (*split_start)++;
1278 goto nla_put_failure; 1490 if (split)
1491 break;
1492 case 8:
1493 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
1494 nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
1495 dev->wiphy.ap_sme_capa))
1496 goto nla_put_failure;
1279 1497
1280 if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && 1498 features = dev->wiphy.features;
1281 nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, 1499 /*
1282 dev->wiphy.ap_sme_capa)) 1500 * We can only add the per-channel limit information if the
1283 goto nla_put_failure; 1501 * dump is split, otherwise it makes it too big. Therefore
1502 * only advertise it in that case.
1503 */
1504 if (split)
1505 features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS;
1506 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features))
1507 goto nla_put_failure;
1284 1508
1285 if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, 1509 if (dev->wiphy.ht_capa_mod_mask &&
1286 dev->wiphy.features)) 1510 nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
1287 goto nla_put_failure; 1511 sizeof(*dev->wiphy.ht_capa_mod_mask),
1512 dev->wiphy.ht_capa_mod_mask))
1513 goto nla_put_failure;
1288 1514
1289 if (dev->wiphy.ht_capa_mod_mask && 1515 if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
1290 nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, 1516 dev->wiphy.max_acl_mac_addrs &&
1291 sizeof(*dev->wiphy.ht_capa_mod_mask), 1517 nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
1292 dev->wiphy.ht_capa_mod_mask)) 1518 dev->wiphy.max_acl_mac_addrs))
1293 goto nla_put_failure; 1519 goto nla_put_failure;
1294 1520
1295 if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && 1521 /*
1296 dev->wiphy.max_acl_mac_addrs && 1522 * Any information below this point is only available to
1297 nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, 1523 * applications that can deal with it being split. This
1298 dev->wiphy.max_acl_mac_addrs)) 1524 * helps ensure that newly added capabilities don't break
1299 goto nla_put_failure; 1525 * older tools by overrunning their buffers.
1526 *
1527 * We still increment split_start so that in the split
1528 * case we'll continue with more data in the next round,
1529 * but break unconditionally so unsplit data stops here.
1530 */
1531 (*split_start)++;
1532 break;
1533 case 9:
1534 if (dev->wiphy.extended_capabilities &&
1535 (nla_put(msg, NL80211_ATTR_EXT_CAPA,
1536 dev->wiphy.extended_capabilities_len,
1537 dev->wiphy.extended_capabilities) ||
1538 nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK,
1539 dev->wiphy.extended_capabilities_len,
1540 dev->wiphy.extended_capabilities_mask)))
1541 goto nla_put_failure;
1300 1542
1543 if (dev->wiphy.vht_capa_mod_mask &&
1544 nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
1545 sizeof(*dev->wiphy.vht_capa_mod_mask),
1546 dev->wiphy.vht_capa_mod_mask))
1547 goto nla_put_failure;
1548
1549 /* done */
1550 *split_start = 0;
1551 break;
1552 }
1301 return genlmsg_end(msg, hdr); 1553 return genlmsg_end(msg, hdr);
1302 1554
1303 nla_put_failure: 1555 nla_put_failure:
@@ -1310,39 +1562,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
1310 int idx = 0, ret; 1562 int idx = 0, ret;
1311 int start = cb->args[0]; 1563 int start = cb->args[0];
1312 struct cfg80211_registered_device *dev; 1564 struct cfg80211_registered_device *dev;
1565 s64 filter_wiphy = -1;
1566 bool split = false;
1567 struct nlattr **tb = nl80211_fam.attrbuf;
1568 int res;
1313 1569
1314 mutex_lock(&cfg80211_mutex); 1570 mutex_lock(&cfg80211_mutex);
1571 res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
1572 tb, nl80211_fam.maxattr, nl80211_policy);
1573 if (res == 0) {
1574 split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
1575 if (tb[NL80211_ATTR_WIPHY])
1576 filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
1577 if (tb[NL80211_ATTR_WDEV])
1578 filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
1579 if (tb[NL80211_ATTR_IFINDEX]) {
1580 struct net_device *netdev;
1581 int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
1582
1583 netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
1584 if (!netdev) {
1585 mutex_unlock(&cfg80211_mutex);
1586 return -ENODEV;
1587 }
1588 if (netdev->ieee80211_ptr) {
1589 dev = wiphy_to_dev(
1590 netdev->ieee80211_ptr->wiphy);
1591 filter_wiphy = dev->wiphy_idx;
1592 }
1593 dev_put(netdev);
1594 }
1595 }
1596
1315 list_for_each_entry(dev, &cfg80211_rdev_list, list) { 1597 list_for_each_entry(dev, &cfg80211_rdev_list, list) {
1316 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) 1598 if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
1317 continue; 1599 continue;
1318 if (++idx <= start) 1600 if (++idx <= start)
1319 continue; 1601 continue;
1320 ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, 1602 if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy)
1321 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1603 continue;
1322 dev); 1604 /* attempt to fit multiple wiphy data chunks into the skb */
1323 if (ret < 0) { 1605 do {
1324 /* 1606 ret = nl80211_send_wiphy(dev, skb,
1325 * If sending the wiphy data didn't fit (ENOBUFS or 1607 NETLINK_CB(cb->skb).portid,
1326 * EMSGSIZE returned), this SKB is still empty (so 1608 cb->nlh->nlmsg_seq,
1327 * it's not too big because another wiphy dataset is 1609 NLM_F_MULTI,
1328 * already in the skb) and we've not tried to adjust 1610 split, &cb->args[1],
1329 * the dump allocation yet ... then adjust the alloc 1611 &cb->args[2],
1330 * size to be bigger, and return 1 but with the empty 1612 &cb->args[3]);
1331 * skb. This results in an empty message being RX'ed 1613 if (ret < 0) {
1332 * in userspace, but that is ignored. 1614 /*
1333 * 1615 * If sending the wiphy data didn't fit (ENOBUFS
1334 * We can then retry with the larger buffer. 1616 * or EMSGSIZE returned), this SKB is still
1335 */ 1617 * empty (so it's not too big because another
1336 if ((ret == -ENOBUFS || ret == -EMSGSIZE) && 1618 * wiphy dataset is already in the skb) and
1337 !skb->len && 1619 * we've not tried to adjust the dump allocation
1338 cb->min_dump_alloc < 4096) { 1620 * yet ... then adjust the alloc size to be
1339 cb->min_dump_alloc = 4096; 1621 * bigger, and return 1 but with the empty skb.
1340 mutex_unlock(&cfg80211_mutex); 1622 * This results in an empty message being RX'ed
1341 return 1; 1623 * in userspace, but that is ignored.
1624 *
1625 * We can then retry with the larger buffer.
1626 */
1627 if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
1628 !skb->len &&
1629 cb->min_dump_alloc < 4096) {
1630 cb->min_dump_alloc = 4096;
1631 mutex_unlock(&cfg80211_mutex);
1632 return 1;
1633 }
1634 idx--;
1635 break;
1342 } 1636 }
1343 idx--; 1637 } while (cb->args[1] > 0);
1344 break; 1638 break;
1345 }
1346 } 1639 }
1347 mutex_unlock(&cfg80211_mutex); 1640 mutex_unlock(&cfg80211_mutex);
1348 1641
@@ -1360,7 +1653,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
1360 if (!msg) 1653 if (!msg)
1361 return -ENOMEM; 1654 return -ENOMEM;
1362 1655
1363 if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { 1656 if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
1657 false, NULL, NULL, NULL) < 0) {
1364 nlmsg_free(msg); 1658 nlmsg_free(msg);
1365 return -ENOBUFS; 1659 return -ENOBUFS;
1366 } 1660 }
@@ -2967,6 +3261,7 @@ static int parse_station_flags(struct genl_info *info,
2967 sta_flags = nla_data(nla); 3261 sta_flags = nla_data(nla);
2968 params->sta_flags_mask = sta_flags->mask; 3262 params->sta_flags_mask = sta_flags->mask;
2969 params->sta_flags_set = sta_flags->set; 3263 params->sta_flags_set = sta_flags->set;
3264 params->sta_flags_set &= params->sta_flags_mask;
2970 if ((params->sta_flags_mask | 3265 if ((params->sta_flags_mask |
2971 params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) 3266 params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
2972 return -EINVAL; 3267 return -EINVAL;
@@ -3241,15 +3536,20 @@ static int nl80211_dump_station(struct sk_buff *skb,
3241{ 3536{
3242 struct station_info sinfo; 3537 struct station_info sinfo;
3243 struct cfg80211_registered_device *dev; 3538 struct cfg80211_registered_device *dev;
3244 struct net_device *netdev; 3539 struct wireless_dev *wdev;
3245 u8 mac_addr[ETH_ALEN]; 3540 u8 mac_addr[ETH_ALEN];
3246 int sta_idx = cb->args[1]; 3541 int sta_idx = cb->args[2];
3247 int err; 3542 int err;
3248 3543
3249 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); 3544 err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
3250 if (err) 3545 if (err)
3251 return err; 3546 return err;
3252 3547
3548 if (!wdev->netdev) {
3549 err = -EINVAL;
3550 goto out_err;
3551 }
3552
3253 if (!dev->ops->dump_station) { 3553 if (!dev->ops->dump_station) {
3254 err = -EOPNOTSUPP; 3554 err = -EOPNOTSUPP;
3255 goto out_err; 3555 goto out_err;
@@ -3257,7 +3557,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
3257 3557
3258 while (1) { 3558 while (1) {
3259 memset(&sinfo, 0, sizeof(sinfo)); 3559 memset(&sinfo, 0, sizeof(sinfo));
3260 err = rdev_dump_station(dev, netdev, sta_idx, 3560 err = rdev_dump_station(dev, wdev->netdev, sta_idx,
3261 mac_addr, &sinfo); 3561 mac_addr, &sinfo);
3262 if (err == -ENOENT) 3562 if (err == -ENOENT)
3263 break; 3563 break;
@@ -3267,7 +3567,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
3267 if (nl80211_send_station(skb, 3567 if (nl80211_send_station(skb,
3268 NETLINK_CB(cb->skb).portid, 3568 NETLINK_CB(cb->skb).portid,
3269 cb->nlh->nlmsg_seq, NLM_F_MULTI, 3569 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3270 dev, netdev, mac_addr, 3570 dev, wdev->netdev, mac_addr,
3271 &sinfo) < 0) 3571 &sinfo) < 0)
3272 goto out; 3572 goto out;
3273 3573
@@ -3276,10 +3576,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
3276 3576
3277 3577
3278 out: 3578 out:
3279 cb->args[1] = sta_idx; 3579 cb->args[2] = sta_idx;
3280 err = skb->len; 3580 err = skb->len;
3281 out_err: 3581 out_err:
3282 nl80211_finish_netdev_dump(dev); 3582 nl80211_finish_wdev_dump(dev);
3283 3583
3284 return err; 3584 return err;
3285} 3585}
@@ -3320,6 +3620,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
3320 return genlmsg_reply(msg, info); 3620 return genlmsg_reply(msg, info);
3321} 3621}
3322 3622
3623int cfg80211_check_station_change(struct wiphy *wiphy,
3624 struct station_parameters *params,
3625 enum cfg80211_station_type statype)
3626{
3627 if (params->listen_interval != -1)
3628 return -EINVAL;
3629 if (params->aid)
3630 return -EINVAL;
3631
3632 /* When you run into this, adjust the code below for the new flag */
3633 BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
3634
3635 switch (statype) {
3636 case CFG80211_STA_MESH_PEER_KERNEL:
3637 case CFG80211_STA_MESH_PEER_USER:
3638 /*
3639 * No ignoring the TDLS flag here -- the userspace mesh
3640 * code doesn't have the bug of including TDLS in the
3641 * mask everywhere.
3642 */
3643 if (params->sta_flags_mask &
3644 ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3645 BIT(NL80211_STA_FLAG_MFP) |
3646 BIT(NL80211_STA_FLAG_AUTHORIZED)))
3647 return -EINVAL;
3648 break;
3649 case CFG80211_STA_TDLS_PEER_SETUP:
3650 case CFG80211_STA_TDLS_PEER_ACTIVE:
3651 if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
3652 return -EINVAL;
3653 /* ignore since it can't change */
3654 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3655 break;
3656 default:
3657 /* disallow mesh-specific things */
3658 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
3659 return -EINVAL;
3660 if (params->local_pm)
3661 return -EINVAL;
3662 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
3663 return -EINVAL;
3664 }
3665
3666 if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
3667 statype != CFG80211_STA_TDLS_PEER_ACTIVE) {
3668 /* TDLS can't be set, ... */
3669 if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3670 return -EINVAL;
3671 /*
3672 * ... but don't bother the driver with it. This works around
3673 * a hostapd/wpa_supplicant issue -- it always includes the
3674 * TLDS_PEER flag in the mask even for AP mode.
3675 */
3676 params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3677 }
3678
3679 if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
3680 /* reject other things that can't change */
3681 if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
3682 return -EINVAL;
3683 if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
3684 return -EINVAL;
3685 if (params->supported_rates)
3686 return -EINVAL;
3687 if (params->ext_capab || params->ht_capa || params->vht_capa)
3688 return -EINVAL;
3689 }
3690
3691 if (statype != CFG80211_STA_AP_CLIENT) {
3692 if (params->vlan)
3693 return -EINVAL;
3694 }
3695
3696 switch (statype) {
3697 case CFG80211_STA_AP_MLME_CLIENT:
3698 /* Use this only for authorizing/unauthorizing a station */
3699 if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
3700 return -EOPNOTSUPP;
3701 break;
3702 case CFG80211_STA_AP_CLIENT:
3703 /* accept only the listed bits */
3704 if (params->sta_flags_mask &
3705 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3706 BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3707 BIT(NL80211_STA_FLAG_ASSOCIATED) |
3708 BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
3709 BIT(NL80211_STA_FLAG_WME) |
3710 BIT(NL80211_STA_FLAG_MFP)))
3711 return -EINVAL;
3712
3713 /* but authenticated/associated only if driver handles it */
3714 if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
3715 params->sta_flags_mask &
3716 (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3717 BIT(NL80211_STA_FLAG_ASSOCIATED)))
3718 return -EINVAL;
3719 break;
3720 case CFG80211_STA_IBSS:
3721 case CFG80211_STA_AP_STA:
3722 /* reject any changes other than AUTHORIZED */
3723 if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
3724 return -EINVAL;
3725 break;
3726 case CFG80211_STA_TDLS_PEER_SETUP:
3727 /* reject any changes other than AUTHORIZED or WME */
3728 if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3729 BIT(NL80211_STA_FLAG_WME)))
3730 return -EINVAL;
3731 /* force (at least) rates when authorizing */
3732 if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
3733 !params->supported_rates)
3734 return -EINVAL;
3735 break;
3736 case CFG80211_STA_TDLS_PEER_ACTIVE:
3737 /* reject any changes */
3738 return -EINVAL;
3739 case CFG80211_STA_MESH_PEER_KERNEL:
3740 if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
3741 return -EINVAL;
3742 break;
3743 case CFG80211_STA_MESH_PEER_USER:
3744 if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
3745 return -EINVAL;
3746 break;
3747 }
3748
3749 return 0;
3750}
3751EXPORT_SYMBOL(cfg80211_check_station_change);
3752
3323/* 3753/*
3324 * Get vlan interface making sure it is running and on the right wiphy. 3754 * Get vlan interface making sure it is running and on the right wiphy.
3325 */ 3755 */
@@ -3342,6 +3772,13 @@ static struct net_device *get_vlan(struct genl_info *info,
3342 goto error; 3772 goto error;
3343 } 3773 }
3344 3774
3775 if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
3776 v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
3777 v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
3778 ret = -EINVAL;
3779 goto error;
3780 }
3781
3345 if (!netif_running(v)) { 3782 if (!netif_running(v)) {
3346 ret = -ENETDOWN; 3783 ret = -ENETDOWN;
3347 goto error; 3784 goto error;
@@ -3359,21 +3796,13 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {
3359 [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, 3796 [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
3360}; 3797};
3361 3798
3362static int nl80211_set_station_tdls(struct genl_info *info, 3799static int nl80211_parse_sta_wme(struct genl_info *info,
3363 struct station_parameters *params) 3800 struct station_parameters *params)
3364{ 3801{
3365 struct nlattr *tb[NL80211_STA_WME_MAX + 1]; 3802 struct nlattr *tb[NL80211_STA_WME_MAX + 1];
3366 struct nlattr *nla; 3803 struct nlattr *nla;
3367 int err; 3804 int err;
3368 3805
3369 /* Dummy STA entry gets updated once the peer capabilities are known */
3370 if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
3371 params->ht_capa =
3372 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
3373 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3374 params->vht_capa =
3375 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3376
3377 /* parse WME attributes if present */ 3806 /* parse WME attributes if present */
3378 if (!info->attrs[NL80211_ATTR_STA_WME]) 3807 if (!info->attrs[NL80211_ATTR_STA_WME])
3379 return 0; 3808 return 0;
@@ -3401,18 +3830,34 @@ static int nl80211_set_station_tdls(struct genl_info *info,
3401 return 0; 3830 return 0;
3402} 3831}
3403 3832
3833static int nl80211_set_station_tdls(struct genl_info *info,
3834 struct station_parameters *params)
3835{
3836 /* Dummy STA entry gets updated once the peer capabilities are known */
3837 if (info->attrs[NL80211_ATTR_HT_CAPABILITY])
3838 params->ht_capa =
3839 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
3840 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3841 params->vht_capa =
3842 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3843
3844 return nl80211_parse_sta_wme(info, params);
3845}
3846
3404static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) 3847static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3405{ 3848{
3406 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 3849 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3407 int err;
3408 struct net_device *dev = info->user_ptr[1]; 3850 struct net_device *dev = info->user_ptr[1];
3409 struct station_parameters params; 3851 struct station_parameters params;
3410 u8 *mac_addr = NULL; 3852 u8 *mac_addr;
3853 int err;
3411 3854
3412 memset(&params, 0, sizeof(params)); 3855 memset(&params, 0, sizeof(params));
3413 3856
3414 params.listen_interval = -1; 3857 params.listen_interval = -1;
3415 params.plink_state = -1; 3858
3859 if (!rdev->ops->change_station)
3860 return -EOPNOTSUPP;
3416 3861
3417 if (info->attrs[NL80211_ATTR_STA_AID]) 3862 if (info->attrs[NL80211_ATTR_STA_AID])
3418 return -EINVAL; 3863 return -EINVAL;
@@ -3445,19 +3890,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3445 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) 3890 if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
3446 return -EINVAL; 3891 return -EINVAL;
3447 3892
3448 if (!rdev->ops->change_station)
3449 return -EOPNOTSUPP;
3450
3451 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) 3893 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
3452 return -EINVAL; 3894 return -EINVAL;
3453 3895
3454 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) 3896 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
3455 params.plink_action = 3897 params.plink_action =
3456 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 3898 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
3899 if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
3900 return -EINVAL;
3901 }
3457 3902
3458 if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) 3903 if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
3459 params.plink_state = 3904 params.plink_state =
3460 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); 3905 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
3906 if (params.plink_state >= NUM_NL80211_PLINK_STATES)
3907 return -EINVAL;
3908 params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
3909 }
3461 3910
3462 if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { 3911 if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
3463 enum nl80211_mesh_power_mode pm = nla_get_u32( 3912 enum nl80211_mesh_power_mode pm = nla_get_u32(
@@ -3470,127 +3919,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
3470 params.local_pm = pm; 3919 params.local_pm = pm;
3471 } 3920 }
3472 3921
3922 /* Include parameters for TDLS peer (will check later) */
3923 err = nl80211_set_station_tdls(info, &params);
3924 if (err)
3925 return err;
3926
3927 params.vlan = get_vlan(info, rdev);
3928 if (IS_ERR(params.vlan))
3929 return PTR_ERR(params.vlan);
3930
3473 switch (dev->ieee80211_ptr->iftype) { 3931 switch (dev->ieee80211_ptr->iftype) {
3474 case NL80211_IFTYPE_AP: 3932 case NL80211_IFTYPE_AP:
3475 case NL80211_IFTYPE_AP_VLAN: 3933 case NL80211_IFTYPE_AP_VLAN:
3476 case NL80211_IFTYPE_P2P_GO: 3934 case NL80211_IFTYPE_P2P_GO:
3477 /* disallow mesh-specific things */
3478 if (params.plink_action)
3479 return -EINVAL;
3480 if (params.local_pm)
3481 return -EINVAL;
3482
3483 /* TDLS can't be set, ... */
3484 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3485 return -EINVAL;
3486 /*
3487 * ... but don't bother the driver with it. This works around
3488 * a hostapd/wpa_supplicant issue -- it always includes the
3489 * TLDS_PEER flag in the mask even for AP mode.
3490 */
3491 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3492
3493 /* accept only the listed bits */
3494 if (params.sta_flags_mask &
3495 ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3496 BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3497 BIT(NL80211_STA_FLAG_ASSOCIATED) |
3498 BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
3499 BIT(NL80211_STA_FLAG_WME) |
3500 BIT(NL80211_STA_FLAG_MFP)))
3501 return -EINVAL;
3502
3503 /* but authenticated/associated only if driver handles it */
3504 if (!(rdev->wiphy.features &
3505 NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
3506 params.sta_flags_mask &
3507 (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3508 BIT(NL80211_STA_FLAG_ASSOCIATED)))
3509 return -EINVAL;
3510
3511 /* reject other things that can't change */
3512 if (params.supported_rates)
3513 return -EINVAL;
3514 if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
3515 return -EINVAL;
3516 if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
3517 return -EINVAL;
3518 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3519 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3520 return -EINVAL;
3521
3522 /* must be last in here for error handling */
3523 params.vlan = get_vlan(info, rdev);
3524 if (IS_ERR(params.vlan))
3525 return PTR_ERR(params.vlan);
3526 break;
3527 case NL80211_IFTYPE_P2P_CLIENT: 3935 case NL80211_IFTYPE_P2P_CLIENT:
3528 case NL80211_IFTYPE_STATION: 3936 case NL80211_IFTYPE_STATION:
3529 /*
3530 * Don't allow userspace to change the TDLS_PEER flag,
3531 * but silently ignore attempts to change it since we
3532 * don't have state here to verify that it doesn't try
3533 * to change the flag.
3534 */
3535 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
3536 /* Include parameters for TDLS peer (driver will check) */
3537 err = nl80211_set_station_tdls(info, &params);
3538 if (err)
3539 return err;
3540 /* disallow things sta doesn't support */
3541 if (params.plink_action)
3542 return -EINVAL;
3543 if (params.local_pm)
3544 return -EINVAL;
3545 /* reject any changes other than AUTHORIZED or WME (for TDLS) */
3546 if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
3547 BIT(NL80211_STA_FLAG_WME)))
3548 return -EINVAL;
3549 break;
3550 case NL80211_IFTYPE_ADHOC: 3937 case NL80211_IFTYPE_ADHOC:
3551 /* disallow things sta doesn't support */
3552 if (params.plink_action)
3553 return -EINVAL;
3554 if (params.local_pm)
3555 return -EINVAL;
3556 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3557 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3558 return -EINVAL;
3559 /* reject any changes other than AUTHORIZED */
3560 if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
3561 return -EINVAL;
3562 break;
3563 case NL80211_IFTYPE_MESH_POINT: 3938 case NL80211_IFTYPE_MESH_POINT:
3564 /* disallow things mesh doesn't support */
3565 if (params.vlan)
3566 return -EINVAL;
3567 if (params.supported_rates)
3568 return -EINVAL;
3569 if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
3570 return -EINVAL;
3571 if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
3572 return -EINVAL;
3573 if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
3574 info->attrs[NL80211_ATTR_VHT_CAPABILITY])
3575 return -EINVAL;
3576 /*
3577 * No special handling for TDLS here -- the userspace
3578 * mesh code doesn't have this bug.
3579 */
3580 if (params.sta_flags_mask &
3581 ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
3582 BIT(NL80211_STA_FLAG_MFP) |
3583 BIT(NL80211_STA_FLAG_AUTHORIZED)))
3584 return -EINVAL;
3585 break; 3939 break;
3586 default: 3940 default:
3587 return -EOPNOTSUPP; 3941 err = -EOPNOTSUPP;
3942 goto out_put_vlan;
3588 } 3943 }
3589 3944
3590 /* be aware of params.vlan when changing code here */ 3945 /* driver will call cfg80211_check_station_change() */
3591
3592 err = rdev_change_station(rdev, dev, mac_addr, &params); 3946 err = rdev_change_station(rdev, dev, mac_addr, &params);
3593 3947
3948 out_put_vlan:
3594 if (params.vlan) 3949 if (params.vlan)
3595 dev_put(params.vlan); 3950 dev_put(params.vlan);
3596 3951
@@ -3607,6 +3962,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3607 3962
3608 memset(&params, 0, sizeof(params)); 3963 memset(&params, 0, sizeof(params));
3609 3964
3965 if (!rdev->ops->add_station)
3966 return -EOPNOTSUPP;
3967
3610 if (!info->attrs[NL80211_ATTR_MAC]) 3968 if (!info->attrs[NL80211_ATTR_MAC])
3611 return -EINVAL; 3969 return -EINVAL;
3612 3970
@@ -3652,50 +4010,32 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3652 params.vht_capa = 4010 params.vht_capa =
3653 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); 4011 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
3654 4012
3655 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) 4013 if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
3656 params.plink_action = 4014 params.plink_action =
3657 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 4015 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
4016 if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
4017 return -EINVAL;
4018 }
3658 4019
3659 if (!rdev->ops->add_station) 4020 err = nl80211_parse_sta_wme(info, &params);
3660 return -EOPNOTSUPP; 4021 if (err)
4022 return err;
3661 4023
3662 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) 4024 if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
3663 return -EINVAL; 4025 return -EINVAL;
3664 4026
4027 /* When you run into this, adjust the code below for the new flag */
4028 BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
4029
3665 switch (dev->ieee80211_ptr->iftype) { 4030 switch (dev->ieee80211_ptr->iftype) {
3666 case NL80211_IFTYPE_AP: 4031 case NL80211_IFTYPE_AP:
3667 case NL80211_IFTYPE_AP_VLAN: 4032 case NL80211_IFTYPE_AP_VLAN:
3668 case NL80211_IFTYPE_P2P_GO: 4033 case NL80211_IFTYPE_P2P_GO:
3669 /* parse WME attributes if sta is WME capable */ 4034 /* ignore WME attributes if iface/sta is not capable */
3670 if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && 4035 if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) ||
3671 (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) && 4036 !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)))
3672 info->attrs[NL80211_ATTR_STA_WME]) { 4037 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
3673 struct nlattr *tb[NL80211_STA_WME_MAX + 1];
3674 struct nlattr *nla;
3675
3676 nla = info->attrs[NL80211_ATTR_STA_WME];
3677 err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
3678 nl80211_sta_wme_policy);
3679 if (err)
3680 return err;
3681 4038
3682 if (tb[NL80211_STA_WME_UAPSD_QUEUES])
3683 params.uapsd_queues =
3684 nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
3685 if (params.uapsd_queues &
3686 ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
3687 return -EINVAL;
3688
3689 if (tb[NL80211_STA_WME_MAX_SP])
3690 params.max_sp =
3691 nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
3692
3693 if (params.max_sp &
3694 ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
3695 return -EINVAL;
3696
3697 params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
3698 }
3699 /* TDLS peers cannot be added */ 4039 /* TDLS peers cannot be added */
3700 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) 4040 if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
3701 return -EINVAL; 4041 return -EINVAL;
@@ -3716,6 +4056,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3716 return PTR_ERR(params.vlan); 4056 return PTR_ERR(params.vlan);
3717 break; 4057 break;
3718 case NL80211_IFTYPE_MESH_POINT: 4058 case NL80211_IFTYPE_MESH_POINT:
4059 /* ignore uAPSD data */
4060 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
4061
3719 /* associated is disallowed */ 4062 /* associated is disallowed */
3720 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) 4063 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
3721 return -EINVAL; 4064 return -EINVAL;
@@ -3724,8 +4067,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3724 return -EINVAL; 4067 return -EINVAL;
3725 break; 4068 break;
3726 case NL80211_IFTYPE_STATION: 4069 case NL80211_IFTYPE_STATION:
3727 /* associated is disallowed */ 4070 case NL80211_IFTYPE_P2P_CLIENT:
3728 if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) 4071 /* ignore uAPSD data */
4072 params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
4073
4074 /* these are disallowed */
4075 if (params.sta_flags_mask &
4076 (BIT(NL80211_STA_FLAG_ASSOCIATED) |
4077 BIT(NL80211_STA_FLAG_AUTHENTICATED)))
3729 return -EINVAL; 4078 return -EINVAL;
3730 /* Only TDLS peers can be added */ 4079 /* Only TDLS peers can be added */
3731 if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) 4080 if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
@@ -3736,6 +4085,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
3736 /* ... with external setup is supported */ 4085 /* ... with external setup is supported */
3737 if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) 4086 if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
3738 return -EOPNOTSUPP; 4087 return -EOPNOTSUPP;
4088 /*
4089 * Older wpa_supplicant versions always mark the TDLS peer
4090 * as authorized, but it shouldn't yet be.
4091 */
4092 params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);
3739 break; 4093 break;
3740 default: 4094 default:
3741 return -EOPNOTSUPP; 4095 return -EOPNOTSUPP;
@@ -3829,13 +4183,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
3829{ 4183{
3830 struct mpath_info pinfo; 4184 struct mpath_info pinfo;
3831 struct cfg80211_registered_device *dev; 4185 struct cfg80211_registered_device *dev;
3832 struct net_device *netdev; 4186 struct wireless_dev *wdev;
3833 u8 dst[ETH_ALEN]; 4187 u8 dst[ETH_ALEN];
3834 u8 next_hop[ETH_ALEN]; 4188 u8 next_hop[ETH_ALEN];
3835 int path_idx = cb->args[1]; 4189 int path_idx = cb->args[2];
3836 int err; 4190 int err;
3837 4191
3838 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); 4192 err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
3839 if (err) 4193 if (err)
3840 return err; 4194 return err;
3841 4195
@@ -3844,14 +4198,14 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
3844 goto out_err; 4198 goto out_err;
3845 } 4199 }
3846 4200
3847 if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 4201 if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) {
3848 err = -EOPNOTSUPP; 4202 err = -EOPNOTSUPP;
3849 goto out_err; 4203 goto out_err;
3850 } 4204 }
3851 4205
3852 while (1) { 4206 while (1) {
3853 err = rdev_dump_mpath(dev, netdev, path_idx, dst, next_hop, 4207 err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst,
3854 &pinfo); 4208 next_hop, &pinfo);
3855 if (err == -ENOENT) 4209 if (err == -ENOENT)
3856 break; 4210 break;
3857 if (err) 4211 if (err)
@@ -3859,7 +4213,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
3859 4213
3860 if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, 4214 if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,
3861 cb->nlh->nlmsg_seq, NLM_F_MULTI, 4215 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3862 netdev, dst, next_hop, 4216 wdev->netdev, dst, next_hop,
3863 &pinfo) < 0) 4217 &pinfo) < 0)
3864 goto out; 4218 goto out;
3865 4219
@@ -3868,10 +4222,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
3868 4222
3869 4223
3870 out: 4224 out:
3871 cb->args[1] = path_idx; 4225 cb->args[2] = path_idx;
3872 err = skb->len; 4226 err = skb->len;
3873 out_err: 4227 out_err:
3874 nl80211_finish_netdev_dump(dev); 4228 nl80211_finish_wdev_dump(dev);
3875 return err; 4229 return err;
3876} 4230}
3877 4231
@@ -4280,6 +4634,7 @@ static const struct nla_policy
4280 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, 4634 [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
4281 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, 4635 [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
4282 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, 4636 [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
4637 [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
4283 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, 4638 [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
4284 .len = IEEE80211_MAX_DATA_LEN }, 4639 .len = IEEE80211_MAX_DATA_LEN },
4285 [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, 4640 [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
@@ -4418,6 +4773,7 @@ do { \
4418static int nl80211_parse_mesh_setup(struct genl_info *info, 4773static int nl80211_parse_mesh_setup(struct genl_info *info,
4419 struct mesh_setup *setup) 4774 struct mesh_setup *setup)
4420{ 4775{
4776 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4421 struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; 4777 struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
4422 4778
4423 if (!info->attrs[NL80211_ATTR_MESH_SETUP]) 4779 if (!info->attrs[NL80211_ATTR_MESH_SETUP])
@@ -4454,8 +4810,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
4454 setup->ie = nla_data(ieattr); 4810 setup->ie = nla_data(ieattr);
4455 setup->ie_len = nla_len(ieattr); 4811 setup->ie_len = nla_len(ieattr);
4456 } 4812 }
4813 if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] &&
4814 !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM))
4815 return -EINVAL;
4816 setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);
4457 setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); 4817 setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
4458 setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); 4818 setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
4819 if (setup->is_secure)
4820 setup->user_mpm = true;
4459 4821
4460 return 0; 4822 return 0;
4461} 4823}
@@ -5219,9 +5581,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5219 5581
5220 genl_dump_check_consistent(cb, hdr, &nl80211_fam); 5582 genl_dump_check_consistent(cb, hdr, &nl80211_fam);
5221 5583
5222 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation) || 5584 if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation))
5585 goto nla_put_failure;
5586 if (wdev->netdev &&
5223 nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) 5587 nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex))
5224 goto nla_put_failure; 5588 goto nla_put_failure;
5589 if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
5590 goto nla_put_failure;
5225 5591
5226 bss = nla_nest_start(msg, NL80211_ATTR_BSS); 5592 bss = nla_nest_start(msg, NL80211_ATTR_BSS);
5227 if (!bss) 5593 if (!bss)
@@ -5301,22 +5667,18 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
5301 return -EMSGSIZE; 5667 return -EMSGSIZE;
5302} 5668}
5303 5669
5304static int nl80211_dump_scan(struct sk_buff *skb, 5670static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
5305 struct netlink_callback *cb)
5306{ 5671{
5307 struct cfg80211_registered_device *rdev; 5672 struct cfg80211_registered_device *rdev;
5308 struct net_device *dev;
5309 struct cfg80211_internal_bss *scan; 5673 struct cfg80211_internal_bss *scan;
5310 struct wireless_dev *wdev; 5674 struct wireless_dev *wdev;
5311 int start = cb->args[1], idx = 0; 5675 int start = cb->args[2], idx = 0;
5312 int err; 5676 int err;
5313 5677
5314 err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev); 5678 err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
5315 if (err) 5679 if (err)
5316 return err; 5680 return err;
5317 5681
5318 wdev = dev->ieee80211_ptr;
5319
5320 wdev_lock(wdev); 5682 wdev_lock(wdev);
5321 spin_lock_bh(&rdev->bss_lock); 5683 spin_lock_bh(&rdev->bss_lock);
5322 cfg80211_bss_expire(rdev); 5684 cfg80211_bss_expire(rdev);
@@ -5337,8 +5699,8 @@ static int nl80211_dump_scan(struct sk_buff *skb,
5337 spin_unlock_bh(&rdev->bss_lock); 5699 spin_unlock_bh(&rdev->bss_lock);
5338 wdev_unlock(wdev); 5700 wdev_unlock(wdev);
5339 5701
5340 cb->args[1] = idx; 5702 cb->args[2] = idx;
5341 nl80211_finish_netdev_dump(rdev); 5703 nl80211_finish_wdev_dump(rdev);
5342 5704
5343 return skb->len; 5705 return skb->len;
5344} 5706}
@@ -5407,14 +5769,19 @@ static int nl80211_dump_survey(struct sk_buff *skb,
5407{ 5769{
5408 struct survey_info survey; 5770 struct survey_info survey;
5409 struct cfg80211_registered_device *dev; 5771 struct cfg80211_registered_device *dev;
5410 struct net_device *netdev; 5772 struct wireless_dev *wdev;
5411 int survey_idx = cb->args[1]; 5773 int survey_idx = cb->args[2];
5412 int res; 5774 int res;
5413 5775
5414 res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); 5776 res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);
5415 if (res) 5777 if (res)
5416 return res; 5778 return res;
5417 5779
5780 if (!wdev->netdev) {
5781 res = -EINVAL;
5782 goto out_err;
5783 }
5784
5418 if (!dev->ops->dump_survey) { 5785 if (!dev->ops->dump_survey) {
5419 res = -EOPNOTSUPP; 5786 res = -EOPNOTSUPP;
5420 goto out_err; 5787 goto out_err;
@@ -5423,7 +5790,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
5423 while (1) { 5790 while (1) {
5424 struct ieee80211_channel *chan; 5791 struct ieee80211_channel *chan;
5425 5792
5426 res = rdev_dump_survey(dev, netdev, survey_idx, &survey); 5793 res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);
5427 if (res == -ENOENT) 5794 if (res == -ENOENT)
5428 break; 5795 break;
5429 if (res) 5796 if (res)
@@ -5445,17 +5812,16 @@ static int nl80211_dump_survey(struct sk_buff *skb,
5445 if (nl80211_send_survey(skb, 5812 if (nl80211_send_survey(skb,
5446 NETLINK_CB(cb->skb).portid, 5813 NETLINK_CB(cb->skb).portid,
5447 cb->nlh->nlmsg_seq, NLM_F_MULTI, 5814 cb->nlh->nlmsg_seq, NLM_F_MULTI,
5448 netdev, 5815 wdev->netdev, &survey) < 0)
5449 &survey) < 0)
5450 goto out; 5816 goto out;
5451 survey_idx++; 5817 survey_idx++;
5452 } 5818 }
5453 5819
5454 out: 5820 out:
5455 cb->args[1] = survey_idx; 5821 cb->args[2] = survey_idx;
5456 res = skb->len; 5822 res = skb->len;
5457 out_err: 5823 out_err:
5458 nl80211_finish_netdev_dump(dev); 5824 nl80211_finish_wdev_dump(dev);
5459 return res; 5825 return res;
5460} 5826}
5461 5827
@@ -5663,14 +6029,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
5663{ 6029{
5664 struct cfg80211_registered_device *rdev = info->user_ptr[0]; 6030 struct cfg80211_registered_device *rdev = info->user_ptr[0];
5665 struct net_device *dev = info->user_ptr[1]; 6031 struct net_device *dev = info->user_ptr[1];
5666 struct cfg80211_crypto_settings crypto;
5667 struct ieee80211_channel *chan; 6032 struct ieee80211_channel *chan;
5668 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; 6033 struct cfg80211_assoc_request req = {};
5669 int err, ssid_len, ie_len = 0; 6034 const u8 *bssid, *ssid;
5670 bool use_mfp = false; 6035 int err, ssid_len = 0;
5671 u32 flags = 0;
5672 struct ieee80211_ht_cap *ht_capa = NULL;
5673 struct ieee80211_ht_cap *ht_capa_mask = NULL;
5674 6036
5675 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 6037 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
5676 return -EINVAL; 6038 return -EINVAL;
@@ -5698,41 +6060,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
5698 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 6060 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
5699 6061
5700 if (info->attrs[NL80211_ATTR_IE]) { 6062 if (info->attrs[NL80211_ATTR_IE]) {
5701 ie = nla_data(info->attrs[NL80211_ATTR_IE]); 6063 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
5702 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 6064 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
5703 } 6065 }
5704 6066
5705 if (info->attrs[NL80211_ATTR_USE_MFP]) { 6067 if (info->attrs[NL80211_ATTR_USE_MFP]) {
5706 enum nl80211_mfp mfp = 6068 enum nl80211_mfp mfp =
5707 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 6069 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
5708 if (mfp == NL80211_MFP_REQUIRED) 6070 if (mfp == NL80211_MFP_REQUIRED)
5709 use_mfp = true; 6071 req.use_mfp = true;
5710 else if (mfp != NL80211_MFP_NO) 6072 else if (mfp != NL80211_MFP_NO)
5711 return -EINVAL; 6073 return -EINVAL;
5712 } 6074 }
5713 6075
5714 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 6076 if (info->attrs[NL80211_ATTR_PREV_BSSID])
5715 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 6077 req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
5716 6078
5717 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) 6079 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
5718 flags |= ASSOC_REQ_DISABLE_HT; 6080 req.flags |= ASSOC_REQ_DISABLE_HT;
5719 6081
5720 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) 6082 if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
5721 ht_capa_mask = 6083 memcpy(&req.ht_capa_mask,
5722 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]); 6084 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
6085 sizeof(req.ht_capa_mask));
5723 6086
5724 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { 6087 if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
5725 if (!ht_capa_mask) 6088 if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
5726 return -EINVAL; 6089 return -EINVAL;
5727 ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); 6090 memcpy(&req.ht_capa,
6091 nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
6092 sizeof(req.ht_capa));
5728 } 6093 }
5729 6094
5730 err = nl80211_crypto_settings(rdev, info, &crypto, 1); 6095 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
6096 req.flags |= ASSOC_REQ_DISABLE_VHT;
6097
6098 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6099 memcpy(&req.vht_capa_mask,
6100 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
6101 sizeof(req.vht_capa_mask));
6102
6103 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
6104 if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6105 return -EINVAL;
6106 memcpy(&req.vht_capa,
6107 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
6108 sizeof(req.vht_capa));
6109 }
6110
6111 err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
5731 if (!err) 6112 if (!err)
5732 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 6113 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid,
5733 ssid, ssid_len, ie, ie_len, use_mfp, 6114 ssid, ssid_len, &req);
5734 &crypto, flags, ht_capa,
5735 ht_capa_mask);
5736 6115
5737 return err; 6116 return err;
5738} 6117}
@@ -6312,6 +6691,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
6312 sizeof(connect.ht_capa)); 6691 sizeof(connect.ht_capa));
6313 } 6692 }
6314 6693
6694 if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
6695 connect.flags |= ASSOC_REQ_DISABLE_VHT;
6696
6697 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
6698 memcpy(&connect.vht_capa_mask,
6699 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
6700 sizeof(connect.vht_capa_mask));
6701
6702 if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
6703 if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
6704 kfree(connkeys);
6705 return -EINVAL;
6706 }
6707 memcpy(&connect.vht_capa,
6708 nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
6709 sizeof(connect.vht_capa));
6710 }
6711
6315 err = cfg80211_connect(rdev, dev, &connect, connkeys); 6712 err = cfg80211_connect(rdev, dev, &connect, connkeys);
6316 if (err) 6713 if (err)
6317 kfree(connkeys); 6714 kfree(connkeys);
@@ -7085,6 +7482,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
7085 return err; 7482 return err;
7086 } 7483 }
7087 7484
7485 if (setup.user_mpm)
7486 cfg.auto_open_plinks = false;
7487
7088 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { 7488 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
7089 err = nl80211_parse_chandef(rdev, info, &setup.chandef); 7489 err = nl80211_parse_chandef(rdev, info, &setup.chandef);
7090 if (err) 7490 if (err)
@@ -7284,7 +7684,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
7284 return -EINVAL; 7684 return -EINVAL;
7285 7685
7286 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > 7686 if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) >
7287 rdev->wiphy.wowlan.tcp->data_interval_max) 7687 rdev->wiphy.wowlan.tcp->data_interval_max ||
7688 nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)
7288 return -EINVAL; 7689 return -EINVAL;
7289 7690
7290 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); 7691 wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]);
@@ -7762,10 +8163,118 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
7762 if (!rdev->ops->stop_p2p_device) 8163 if (!rdev->ops->stop_p2p_device)
7763 return -EOPNOTSUPP; 8164 return -EOPNOTSUPP;
7764 8165
8166 mutex_lock(&rdev->devlist_mtx);
7765 mutex_lock(&rdev->sched_scan_mtx); 8167 mutex_lock(&rdev->sched_scan_mtx);
7766 cfg80211_stop_p2p_device(rdev, wdev); 8168 cfg80211_stop_p2p_device(rdev, wdev);
7767 mutex_unlock(&rdev->sched_scan_mtx); 8169 mutex_unlock(&rdev->sched_scan_mtx);
8170 mutex_unlock(&rdev->devlist_mtx);
8171
8172 return 0;
8173}
8174
8175static int nl80211_get_protocol_features(struct sk_buff *skb,
8176 struct genl_info *info)
8177{
8178 void *hdr;
8179 struct sk_buff *msg;
8180
8181 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
8182 if (!msg)
8183 return -ENOMEM;
8184
8185 hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
8186 NL80211_CMD_GET_PROTOCOL_FEATURES);
8187 if (!hdr)
8188 goto nla_put_failure;
8189
8190 if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES,
8191 NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP))
8192 goto nla_put_failure;
8193
8194 genlmsg_end(msg, hdr);
8195 return genlmsg_reply(msg, info);
8196
8197 nla_put_failure:
8198 kfree_skb(msg);
8199 return -ENOBUFS;
8200}
8201
8202static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
8203{
8204 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8205 struct cfg80211_update_ft_ies_params ft_params;
8206 struct net_device *dev = info->user_ptr[1];
8207
8208 if (!rdev->ops->update_ft_ies)
8209 return -EOPNOTSUPP;
8210
8211 if (!info->attrs[NL80211_ATTR_MDID] ||
8212 !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
8213 return -EINVAL;
7768 8214
8215 memset(&ft_params, 0, sizeof(ft_params));
8216 ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]);
8217 ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
8218 ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
8219
8220 return rdev_update_ft_ies(rdev, dev, &ft_params);
8221}
8222
8223static int nl80211_crit_protocol_start(struct sk_buff *skb,
8224 struct genl_info *info)
8225{
8226 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8227 struct wireless_dev *wdev = info->user_ptr[1];
8228 enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC;
8229 u16 duration;
8230 int ret;
8231
8232 if (!rdev->ops->crit_proto_start)
8233 return -EOPNOTSUPP;
8234
8235 if (WARN_ON(!rdev->ops->crit_proto_stop))
8236 return -EINVAL;
8237
8238 if (rdev->crit_proto_nlportid)
8239 return -EBUSY;
8240
8241 /* determine protocol if provided */
8242 if (info->attrs[NL80211_ATTR_CRIT_PROT_ID])
8243 proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]);
8244
8245 if (proto >= NUM_NL80211_CRIT_PROTO)
8246 return -EINVAL;
8247
8248 /* timeout must be provided */
8249 if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION])
8250 return -EINVAL;
8251
8252 duration =
8253 nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]);
8254
8255 if (duration > NL80211_CRIT_PROTO_MAX_DURATION)
8256 return -ERANGE;
8257
8258 ret = rdev_crit_proto_start(rdev, wdev, proto, duration);
8259 if (!ret)
8260 rdev->crit_proto_nlportid = info->snd_portid;
8261
8262 return ret;
8263}
8264
8265static int nl80211_crit_protocol_stop(struct sk_buff *skb,
8266 struct genl_info *info)
8267{
8268 struct cfg80211_registered_device *rdev = info->user_ptr[0];
8269 struct wireless_dev *wdev = info->user_ptr[1];
8270
8271 if (!rdev->ops->crit_proto_stop)
8272 return -EOPNOTSUPP;
8273
8274 if (rdev->crit_proto_nlportid) {
8275 rdev->crit_proto_nlportid = 0;
8276 rdev_crit_proto_stop(rdev, wdev);
8277 }
7769 return 0; 8278 return 0;
7770} 8279}
7771 8280
@@ -8445,6 +8954,35 @@ static struct genl_ops nl80211_ops[] = {
8445 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | 8954 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
8446 NL80211_FLAG_NEED_RTNL, 8955 NL80211_FLAG_NEED_RTNL,
8447 }, 8956 },
8957 {
8958 .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
8959 .doit = nl80211_get_protocol_features,
8960 .policy = nl80211_policy,
8961 },
8962 {
8963 .cmd = NL80211_CMD_UPDATE_FT_IES,
8964 .doit = nl80211_update_ft_ies,
8965 .policy = nl80211_policy,
8966 .flags = GENL_ADMIN_PERM,
8967 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
8968 NL80211_FLAG_NEED_RTNL,
8969 },
8970 {
8971 .cmd = NL80211_CMD_CRIT_PROTOCOL_START,
8972 .doit = nl80211_crit_protocol_start,
8973 .policy = nl80211_policy,
8974 .flags = GENL_ADMIN_PERM,
8975 .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
8976 NL80211_FLAG_NEED_RTNL,
8977 },
8978 {
8979 .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
8980 .doit = nl80211_crit_protocol_stop,
8981 .policy = nl80211_policy,
8982 .flags = GENL_ADMIN_PERM,
8983 .internal_flags = NL80211_FLAG_NEED_WDEV_UP |
8984 NL80211_FLAG_NEED_RTNL,
8985 }
8448}; 8986};
8449 8987
8450static struct genl_multicast_group nl80211_mlme_mcgrp = { 8988static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -8472,7 +9010,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
8472 if (!msg) 9010 if (!msg)
8473 return; 9011 return;
8474 9012
8475 if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { 9013 if (nl80211_send_wiphy(rdev, msg, 0, 0, 0,
9014 false, NULL, NULL, NULL) < 0) {
8476 nlmsg_free(msg); 9015 nlmsg_free(msg);
8477 return; 9016 return;
8478 } 9017 }
@@ -8796,21 +9335,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
8796 NL80211_CMD_DISASSOCIATE, gfp); 9335 NL80211_CMD_DISASSOCIATE, gfp);
8797} 9336}
8798 9337
8799void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, 9338void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf,
8800 struct net_device *netdev, const u8 *buf, 9339 size_t len)
8801 size_t len, gfp_t gfp)
8802{ 9340{
8803 nl80211_send_mlme_event(rdev, netdev, buf, len, 9341 struct wireless_dev *wdev = dev->ieee80211_ptr;
8804 NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); 9342 struct wiphy *wiphy = wdev->wiphy;
9343 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9344
9345 trace_cfg80211_send_unprot_deauth(dev);
9346 nl80211_send_mlme_event(rdev, dev, buf, len,
9347 NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);
8805} 9348}
9349EXPORT_SYMBOL(cfg80211_send_unprot_deauth);
8806 9350
8807void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, 9351void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf,
8808 struct net_device *netdev, const u8 *buf, 9352 size_t len)
8809 size_t len, gfp_t gfp)
8810{ 9353{
8811 nl80211_send_mlme_event(rdev, netdev, buf, len, 9354 struct wireless_dev *wdev = dev->ieee80211_ptr;
8812 NL80211_CMD_UNPROT_DISASSOCIATE, gfp); 9355 struct wiphy *wiphy = wdev->wiphy;
9356 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9357
9358 trace_cfg80211_send_unprot_disassoc(dev);
9359 nl80211_send_mlme_event(rdev, dev, buf, len,
9360 NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);
8813} 9361}
9362EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);
8814 9363
8815static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, 9364static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
8816 struct net_device *netdev, int cmd, 9365 struct net_device *netdev, int cmd,
@@ -9013,14 +9562,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
9013 nlmsg_free(msg); 9562 nlmsg_free(msg);
9014} 9563}
9015 9564
9016void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, 9565void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr,
9017 struct net_device *netdev, 9566 const u8* ie, u8 ie_len, gfp_t gfp)
9018 const u8 *macaddr, const u8* ie, u8 ie_len,
9019 gfp_t gfp)
9020{ 9567{
9568 struct wireless_dev *wdev = dev->ieee80211_ptr;
9569 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
9021 struct sk_buff *msg; 9570 struct sk_buff *msg;
9022 void *hdr; 9571 void *hdr;
9023 9572
9573 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT))
9574 return;
9575
9576 trace_cfg80211_notify_new_peer_candidate(dev, addr);
9577
9024 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9578 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9025 if (!msg) 9579 if (!msg)
9026 return; 9580 return;
@@ -9032,8 +9586,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
9032 } 9586 }
9033 9587
9034 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 9588 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9035 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 9589 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9036 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) || 9590 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
9037 (ie_len && ie && 9591 (ie_len && ie &&
9038 nla_put(msg, NL80211_ATTR_IE, ie_len , ie))) 9592 nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))
9039 goto nla_put_failure; 9593 goto nla_put_failure;
@@ -9048,6 +9602,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
9048 genlmsg_cancel(msg, hdr); 9602 genlmsg_cancel(msg, hdr);
9049 nlmsg_free(msg); 9603 nlmsg_free(msg);
9050} 9604}
9605EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);
9051 9606
9052void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 9607void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
9053 struct net_device *netdev, const u8 *addr, 9608 struct net_device *netdev, const u8 *addr,
@@ -9116,7 +9671,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
9116 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); 9671 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);
9117 if (!nl_freq) 9672 if (!nl_freq)
9118 goto nla_put_failure; 9673 goto nla_put_failure;
9119 if (nl80211_msg_put_channel(msg, channel_before)) 9674 if (nl80211_msg_put_channel(msg, channel_before, false))
9120 goto nla_put_failure; 9675 goto nla_put_failure;
9121 nla_nest_end(msg, nl_freq); 9676 nla_nest_end(msg, nl_freq);
9122 9677
@@ -9124,7 +9679,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
9124 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); 9679 nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);
9125 if (!nl_freq) 9680 if (!nl_freq)
9126 goto nla_put_failure; 9681 goto nla_put_failure;
9127 if (nl80211_msg_put_channel(msg, channel_after)) 9682 if (nl80211_msg_put_channel(msg, channel_after, false))
9128 goto nla_put_failure; 9683 goto nla_put_failure;
9129 nla_nest_end(msg, nl_freq); 9684 nla_nest_end(msg, nl_freq);
9130 9685
@@ -9186,31 +9741,42 @@ static void nl80211_send_remain_on_chan_event(
9186 nlmsg_free(msg); 9741 nlmsg_free(msg);
9187} 9742}
9188 9743
9189void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, 9744void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie,
9190 struct wireless_dev *wdev, u64 cookie, 9745 struct ieee80211_channel *chan,
9191 struct ieee80211_channel *chan, 9746 unsigned int duration, gfp_t gfp)
9192 unsigned int duration, gfp_t gfp)
9193{ 9747{
9748 struct wiphy *wiphy = wdev->wiphy;
9749 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9750
9751 trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);
9194 nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, 9752 nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,
9195 rdev, wdev, cookie, chan, 9753 rdev, wdev, cookie, chan,
9196 duration, gfp); 9754 duration, gfp);
9197} 9755}
9756EXPORT_SYMBOL(cfg80211_ready_on_channel);
9198 9757
9199void nl80211_send_remain_on_channel_cancel( 9758void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
9200 struct cfg80211_registered_device *rdev, 9759 struct ieee80211_channel *chan,
9201 struct wireless_dev *wdev, 9760 gfp_t gfp)
9202 u64 cookie, struct ieee80211_channel *chan, gfp_t gfp)
9203{ 9761{
9762 struct wiphy *wiphy = wdev->wiphy;
9763 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9764
9765 trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);
9204 nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, 9766 nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
9205 rdev, wdev, cookie, chan, 0, gfp); 9767 rdev, wdev, cookie, chan, 0, gfp);
9206} 9768}
9769EXPORT_SYMBOL(cfg80211_remain_on_channel_expired);
9207 9770
9208void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, 9771void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
9209 struct net_device *dev, const u8 *mac_addr, 9772 struct station_info *sinfo, gfp_t gfp)
9210 struct station_info *sinfo, gfp_t gfp)
9211{ 9773{
9774 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9775 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9212 struct sk_buff *msg; 9776 struct sk_buff *msg;
9213 9777
9778 trace_cfg80211_new_sta(dev, mac_addr, sinfo);
9779
9214 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9780 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9215 if (!msg) 9781 if (!msg)
9216 return; 9782 return;
@@ -9224,14 +9790,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
9224 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, 9790 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
9225 nl80211_mlme_mcgrp.id, gfp); 9791 nl80211_mlme_mcgrp.id, gfp);
9226} 9792}
9793EXPORT_SYMBOL(cfg80211_new_sta);
9227 9794
9228void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, 9795void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
9229 struct net_device *dev, const u8 *mac_addr,
9230 gfp_t gfp)
9231{ 9796{
9797 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9798 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9232 struct sk_buff *msg; 9799 struct sk_buff *msg;
9233 void *hdr; 9800 void *hdr;
9234 9801
9802 trace_cfg80211_del_sta(dev, mac_addr);
9803
9235 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 9804 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9236 if (!msg) 9805 if (!msg)
9237 return; 9806 return;
@@ -9256,12 +9825,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
9256 genlmsg_cancel(msg, hdr); 9825 genlmsg_cancel(msg, hdr);
9257 nlmsg_free(msg); 9826 nlmsg_free(msg);
9258} 9827}
9828EXPORT_SYMBOL(cfg80211_del_sta);
9259 9829
9260void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, 9830void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
9261 struct net_device *dev, const u8 *mac_addr, 9831 enum nl80211_connect_failed_reason reason,
9262 enum nl80211_connect_failed_reason reason, 9832 gfp_t gfp)
9263 gfp_t gfp)
9264{ 9833{
9834 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
9835 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9265 struct sk_buff *msg; 9836 struct sk_buff *msg;
9266 void *hdr; 9837 void *hdr;
9267 9838
@@ -9290,6 +9861,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
9290 genlmsg_cancel(msg, hdr); 9861 genlmsg_cancel(msg, hdr);
9291 nlmsg_free(msg); 9862 nlmsg_free(msg);
9292} 9863}
9864EXPORT_SYMBOL(cfg80211_conn_failed);
9293 9865
9294static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, 9866static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9295 const u8 *addr, gfp_t gfp) 9867 const u8 *addr, gfp_t gfp)
@@ -9334,19 +9906,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
9334 return true; 9906 return true;
9335} 9907}
9336 9908
9337bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) 9909bool cfg80211_rx_spurious_frame(struct net_device *dev,
9910 const u8 *addr, gfp_t gfp)
9338{ 9911{
9339 return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, 9912 struct wireless_dev *wdev = dev->ieee80211_ptr;
9340 addr, gfp); 9913 bool ret;
9914
9915 trace_cfg80211_rx_spurious_frame(dev, addr);
9916
9917 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
9918 wdev->iftype != NL80211_IFTYPE_P2P_GO)) {
9919 trace_cfg80211_return_bool(false);
9920 return false;
9921 }
9922 ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
9923 addr, gfp);
9924 trace_cfg80211_return_bool(ret);
9925 return ret;
9341} 9926}
9927EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
9342 9928
9343bool nl80211_unexpected_4addr_frame(struct net_device *dev, 9929bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
9344 const u8 *addr, gfp_t gfp) 9930 const u8 *addr, gfp_t gfp)
9345{ 9931{
9346 return __nl80211_unexpected_frame(dev, 9932 struct wireless_dev *wdev = dev->ieee80211_ptr;
9347 NL80211_CMD_UNEXPECTED_4ADDR_FRAME, 9933 bool ret;
9348 addr, gfp); 9934
9935 trace_cfg80211_rx_unexpected_4addr_frame(dev, addr);
9936
9937 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
9938 wdev->iftype != NL80211_IFTYPE_P2P_GO &&
9939 wdev->iftype != NL80211_IFTYPE_AP_VLAN)) {
9940 trace_cfg80211_return_bool(false);
9941 return false;
9942 }
9943 ret = __nl80211_unexpected_frame(dev,
9944 NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
9945 addr, gfp);
9946 trace_cfg80211_return_bool(ret);
9947 return ret;
9349} 9948}
9949EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
9350 9950
9351int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 9951int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9352 struct wireless_dev *wdev, u32 nlportid, 9952 struct wireless_dev *wdev, u32 nlportid,
@@ -9386,15 +9986,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
9386 return -ENOBUFS; 9986 return -ENOBUFS;
9387} 9987}
9388 9988
9389void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, 9989void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
9390 struct wireless_dev *wdev, u64 cookie, 9990 const u8 *buf, size_t len, bool ack, gfp_t gfp)
9391 const u8 *buf, size_t len, bool ack,
9392 gfp_t gfp)
9393{ 9991{
9992 struct wiphy *wiphy = wdev->wiphy;
9993 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9394 struct net_device *netdev = wdev->netdev; 9994 struct net_device *netdev = wdev->netdev;
9395 struct sk_buff *msg; 9995 struct sk_buff *msg;
9396 void *hdr; 9996 void *hdr;
9397 9997
9998 trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
9999
9398 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 10000 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9399 if (!msg) 10001 if (!msg)
9400 return; 10002 return;
@@ -9422,17 +10024,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
9422 genlmsg_cancel(msg, hdr); 10024 genlmsg_cancel(msg, hdr);
9423 nlmsg_free(msg); 10025 nlmsg_free(msg);
9424} 10026}
10027EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
9425 10028
9426void 10029void cfg80211_cqm_rssi_notify(struct net_device *dev,
9427nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 10030 enum nl80211_cqm_rssi_threshold_event rssi_event,
9428 struct net_device *netdev, 10031 gfp_t gfp)
9429 enum nl80211_cqm_rssi_threshold_event rssi_event,
9430 gfp_t gfp)
9431{ 10032{
10033 struct wireless_dev *wdev = dev->ieee80211_ptr;
10034 struct wiphy *wiphy = wdev->wiphy;
10035 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9432 struct sk_buff *msg; 10036 struct sk_buff *msg;
9433 struct nlattr *pinfoattr; 10037 struct nlattr *pinfoattr;
9434 void *hdr; 10038 void *hdr;
9435 10039
10040 trace_cfg80211_cqm_rssi_notify(dev, rssi_event);
10041
9436 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 10042 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9437 if (!msg) 10043 if (!msg)
9438 return; 10044 return;
@@ -9444,7 +10050,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
9444 } 10050 }
9445 10051
9446 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 10052 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9447 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) 10053 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
9448 goto nla_put_failure; 10054 goto nla_put_failure;
9449 10055
9450 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); 10056 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
@@ -9467,10 +10073,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
9467 genlmsg_cancel(msg, hdr); 10073 genlmsg_cancel(msg, hdr);
9468 nlmsg_free(msg); 10074 nlmsg_free(msg);
9469} 10075}
10076EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
9470 10077
9471void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, 10078static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
9472 struct net_device *netdev, const u8 *bssid, 10079 struct net_device *netdev, const u8 *bssid,
9473 const u8 *replay_ctr, gfp_t gfp) 10080 const u8 *replay_ctr, gfp_t gfp)
9474{ 10081{
9475 struct sk_buff *msg; 10082 struct sk_buff *msg;
9476 struct nlattr *rekey_attr; 10083 struct nlattr *rekey_attr;
@@ -9512,9 +10119,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
9512 nlmsg_free(msg); 10119 nlmsg_free(msg);
9513} 10120}
9514 10121
9515void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, 10122void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
9516 struct net_device *netdev, int index, 10123 const u8 *replay_ctr, gfp_t gfp)
9517 const u8 *bssid, bool preauth, gfp_t gfp) 10124{
10125 struct wireless_dev *wdev = dev->ieee80211_ptr;
10126 struct wiphy *wiphy = wdev->wiphy;
10127 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10128
10129 trace_cfg80211_gtk_rekey_notify(dev, bssid);
10130 nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp);
10131}
10132EXPORT_SYMBOL(cfg80211_gtk_rekey_notify);
10133
10134static void
10135nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
10136 struct net_device *netdev, int index,
10137 const u8 *bssid, bool preauth, gfp_t gfp)
9518{ 10138{
9519 struct sk_buff *msg; 10139 struct sk_buff *msg;
9520 struct nlattr *attr; 10140 struct nlattr *attr;
@@ -9557,9 +10177,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
9557 nlmsg_free(msg); 10177 nlmsg_free(msg);
9558} 10178}
9559 10179
9560void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, 10180void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
9561 struct net_device *netdev, 10181 const u8 *bssid, bool preauth, gfp_t gfp)
9562 struct cfg80211_chan_def *chandef, gfp_t gfp) 10182{
10183 struct wireless_dev *wdev = dev->ieee80211_ptr;
10184 struct wiphy *wiphy = wdev->wiphy;
10185 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10186
10187 trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth);
10188 nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
10189}
10190EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
10191
10192static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
10193 struct net_device *netdev,
10194 struct cfg80211_chan_def *chandef,
10195 gfp_t gfp)
9563{ 10196{
9564 struct sk_buff *msg; 10197 struct sk_buff *msg;
9565 void *hdr; 10198 void *hdr;
@@ -9591,11 +10224,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
9591 nlmsg_free(msg); 10224 nlmsg_free(msg);
9592} 10225}
9593 10226
9594void 10227void cfg80211_ch_switch_notify(struct net_device *dev,
9595nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, 10228 struct cfg80211_chan_def *chandef)
9596 struct net_device *netdev, const u8 *peer, 10229{
9597 u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) 10230 struct wireless_dev *wdev = dev->ieee80211_ptr;
10231 struct wiphy *wiphy = wdev->wiphy;
10232 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10233
10234 trace_cfg80211_ch_switch_notify(dev, chandef);
10235
10236 wdev_lock(wdev);
10237
10238 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
10239 wdev->iftype != NL80211_IFTYPE_P2P_GO))
10240 goto out;
10241
10242 wdev->channel = chandef->chan;
10243 nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL);
10244out:
10245 wdev_unlock(wdev);
10246 return;
10247}
10248EXPORT_SYMBOL(cfg80211_ch_switch_notify);
10249
10250void cfg80211_cqm_txe_notify(struct net_device *dev,
10251 const u8 *peer, u32 num_packets,
10252 u32 rate, u32 intvl, gfp_t gfp)
9598{ 10253{
10254 struct wireless_dev *wdev = dev->ieee80211_ptr;
10255 struct wiphy *wiphy = wdev->wiphy;
10256 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9599 struct sk_buff *msg; 10257 struct sk_buff *msg;
9600 struct nlattr *pinfoattr; 10258 struct nlattr *pinfoattr;
9601 void *hdr; 10259 void *hdr;
@@ -9611,7 +10269,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
9611 } 10269 }
9612 10270
9613 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 10271 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9614 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 10272 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9615 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) 10273 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
9616 goto nla_put_failure; 10274 goto nla_put_failure;
9617 10275
@@ -9640,6 +10298,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
9640 genlmsg_cancel(msg, hdr); 10298 genlmsg_cancel(msg, hdr);
9641 nlmsg_free(msg); 10299 nlmsg_free(msg);
9642} 10300}
10301EXPORT_SYMBOL(cfg80211_cqm_txe_notify);
9643 10302
9644void 10303void
9645nl80211_radar_notify(struct cfg80211_registered_device *rdev, 10304nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -9692,15 +10351,18 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
9692 nlmsg_free(msg); 10351 nlmsg_free(msg);
9693} 10352}
9694 10353
9695void 10354void cfg80211_cqm_pktloss_notify(struct net_device *dev,
9696nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, 10355 const u8 *peer, u32 num_packets, gfp_t gfp)
9697 struct net_device *netdev, const u8 *peer,
9698 u32 num_packets, gfp_t gfp)
9699{ 10356{
10357 struct wireless_dev *wdev = dev->ieee80211_ptr;
10358 struct wiphy *wiphy = wdev->wiphy;
10359 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
9700 struct sk_buff *msg; 10360 struct sk_buff *msg;
9701 struct nlattr *pinfoattr; 10361 struct nlattr *pinfoattr;
9702 void *hdr; 10362 void *hdr;
9703 10363
10364 trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets);
10365
9704 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 10366 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
9705 if (!msg) 10367 if (!msg)
9706 return; 10368 return;
@@ -9712,7 +10374,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
9712 } 10374 }
9713 10375
9714 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || 10376 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
9715 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || 10377 nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
9716 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) 10378 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))
9717 goto nla_put_failure; 10379 goto nla_put_failure;
9718 10380
@@ -9735,6 +10397,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
9735 genlmsg_cancel(msg, hdr); 10397 genlmsg_cancel(msg, hdr);
9736 nlmsg_free(msg); 10398 nlmsg_free(msg);
9737} 10399}
10400EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);
9738 10401
9739void cfg80211_probe_status(struct net_device *dev, const u8 *addr, 10402void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
9740 u64 cookie, bool acked, gfp_t gfp) 10403 u64 cookie, bool acked, gfp_t gfp)
@@ -10021,6 +10684,89 @@ static struct notifier_block nl80211_netlink_notifier = {
10021 .notifier_call = nl80211_netlink_notify, 10684 .notifier_call = nl80211_netlink_notify,
10022}; 10685};
10023 10686
10687void cfg80211_ft_event(struct net_device *netdev,
10688 struct cfg80211_ft_event_params *ft_event)
10689{
10690 struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
10691 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
10692 struct sk_buff *msg;
10693 void *hdr;
10694 int err;
10695
10696 trace_cfg80211_ft_event(wiphy, netdev, ft_event);
10697
10698 if (!ft_event->target_ap)
10699 return;
10700
10701 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
10702 if (!msg)
10703 return;
10704
10705 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT);
10706 if (!hdr) {
10707 nlmsg_free(msg);
10708 return;
10709 }
10710
10711 nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
10712 nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
10713 nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap);
10714 if (ft_event->ies)
10715 nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies);
10716 if (ft_event->ric_ies)
10717 nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
10718 ft_event->ric_ies);
10719
10720 err = genlmsg_end(msg, hdr);
10721 if (err < 0) {
10722 nlmsg_free(msg);
10723 return;
10724 }
10725
10726 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
10727 nl80211_mlme_mcgrp.id, GFP_KERNEL);
10728}
10729EXPORT_SYMBOL(cfg80211_ft_event);
10730
10731void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
10732{
10733 struct cfg80211_registered_device *rdev;
10734 struct sk_buff *msg;
10735 void *hdr;
10736 u32 nlportid;
10737
10738 rdev = wiphy_to_dev(wdev->wiphy);
10739 if (!rdev->crit_proto_nlportid)
10740 return;
10741
10742 nlportid = rdev->crit_proto_nlportid;
10743 rdev->crit_proto_nlportid = 0;
10744
10745 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
10746 if (!msg)
10747 return;
10748
10749 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP);
10750 if (!hdr)
10751 goto nla_put_failure;
10752
10753 if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
10754 nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
10755 goto nla_put_failure;
10756
10757 genlmsg_end(msg, hdr);
10758
10759 genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
10760 return;
10761
10762 nla_put_failure:
10763 if (hdr)
10764 genlmsg_cancel(msg, hdr);
10765 nlmsg_free(msg);
10766
10767}
10768EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
10769
10024/* initialisation/exit functions */ 10770/* initialisation/exit functions */
10025 10771
10026int nl80211_init(void) 10772int nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b061da4919e1..a4073e808c13 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
29void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, 29void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
30 struct net_device *netdev, 30 struct net_device *netdev,
31 const u8 *buf, size_t len, gfp_t gfp); 31 const u8 *buf, size_t len, gfp_t gfp);
32void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev,
33 struct net_device *netdev,
34 const u8 *buf, size_t len, gfp_t gfp);
35void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev,
36 struct net_device *netdev,
37 const u8 *buf, size_t len, gfp_t gfp);
38void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, 32void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
39 struct net_device *netdev, 33 struct net_device *netdev,
40 const u8 *addr, gfp_t gfp); 34 const u8 *addr, gfp_t gfp);
@@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
54 struct net_device *netdev, u16 reason, 48 struct net_device *netdev, u16 reason,
55 const u8 *ie, size_t ie_len, bool from_ap); 49 const u8 *ie, size_t ie_len, bool from_ap);
56 50
57void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,
58 struct net_device *netdev,
59 const u8 *macaddr, const u8* ie, u8 ie_len,
60 gfp_t gfp);
61void 51void
62nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, 52nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
63 struct net_device *netdev, const u8 *addr, 53 struct net_device *netdev, const u8 *addr,
@@ -73,41 +63,10 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
73 struct net_device *netdev, const u8 *bssid, 63 struct net_device *netdev, const u8 *bssid,
74 gfp_t gfp); 64 gfp_t gfp);
75 65
76void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev,
77 struct wireless_dev *wdev, u64 cookie,
78 struct ieee80211_channel *chan,
79 unsigned int duration, gfp_t gfp);
80void nl80211_send_remain_on_channel_cancel(
81 struct cfg80211_registered_device *rdev,
82 struct wireless_dev *wdev,
83 u64 cookie, struct ieee80211_channel *chan, gfp_t gfp);
84
85void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
86 struct net_device *dev, const u8 *mac_addr,
87 struct station_info *sinfo, gfp_t gfp);
88void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
89 struct net_device *dev, const u8 *mac_addr,
90 gfp_t gfp);
91
92void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,
93 struct net_device *dev, const u8 *mac_addr,
94 enum nl80211_connect_failed_reason reason,
95 gfp_t gfp);
96
97int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, 66int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
98 struct wireless_dev *wdev, u32 nlpid, 67 struct wireless_dev *wdev, u32 nlpid,
99 int freq, int sig_dbm, 68 int freq, int sig_dbm,
100 const u8 *buf, size_t len, gfp_t gfp); 69 const u8 *buf, size_t len, gfp_t gfp);
101void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
102 struct wireless_dev *wdev, u64 cookie,
103 const u8 *buf, size_t len, bool ack,
104 gfp_t gfp);
105
106void
107nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
108 struct net_device *netdev,
109 enum nl80211_cqm_rssi_threshold_event rssi_event,
110 gfp_t gfp);
111 70
112void 71void
113nl80211_radar_notify(struct cfg80211_registered_device *rdev, 72nl80211_radar_notify(struct cfg80211_registered_device *rdev,
@@ -115,31 +74,4 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
115 enum nl80211_radar_event event, 74 enum nl80211_radar_event event,
116 struct net_device *netdev, gfp_t gfp); 75 struct net_device *netdev, gfp_t gfp);
117 76
118void
119nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
120 struct net_device *netdev, const u8 *peer,
121 u32 num_packets, gfp_t gfp);
122
123void
124nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,
125 struct net_device *netdev, const u8 *peer,
126 u32 num_packets, u32 rate, u32 intvl, gfp_t gfp);
127
128void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,
129 struct net_device *netdev, const u8 *bssid,
130 const u8 *replay_ctr, gfp_t gfp);
131
132void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
133 struct net_device *netdev, int index,
134 const u8 *bssid, bool preauth, gfp_t gfp);
135
136void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
137 struct net_device *dev,
138 struct cfg80211_chan_def *chandef, gfp_t gfp);
139
140bool nl80211_unexpected_frame(struct net_device *dev,
141 const u8 *addr, gfp_t gfp);
142bool nl80211_unexpected_4addr_frame(struct net_device *dev,
143 const u8 *addr, gfp_t gfp);
144
145#endif /* __NET_WIRELESS_NL80211_H */ 77#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 422d38291d66..9f15f0ac824d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -6,11 +6,12 @@
6#include "core.h" 6#include "core.h"
7#include "trace.h" 7#include "trace.h"
8 8
9static inline int rdev_suspend(struct cfg80211_registered_device *rdev) 9static inline int rdev_suspend(struct cfg80211_registered_device *rdev,
10 struct cfg80211_wowlan *wowlan)
10{ 11{
11 int ret; 12 int ret;
12 trace_rdev_suspend(&rdev->wiphy, rdev->wowlan); 13 trace_rdev_suspend(&rdev->wiphy, wowlan);
13 ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); 14 ret = rdev->ops->suspend(&rdev->wiphy, wowlan);
14 trace_rdev_return_int(&rdev->wiphy, ret); 15 trace_rdev_return_int(&rdev->wiphy, ret);
15 return ret; 16 return ret;
16} 17}
@@ -874,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
874 trace_rdev_stop_p2p_device(&rdev->wiphy, wdev); 875 trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);
875 rdev->ops->stop_p2p_device(&rdev->wiphy, wdev); 876 rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
876 trace_rdev_return_void(&rdev->wiphy); 877 trace_rdev_return_void(&rdev->wiphy);
877} 878}
878 879
879static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, 880static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
880 struct net_device *dev, 881 struct net_device *dev,
@@ -887,4 +888,39 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
887 trace_rdev_return_int(&rdev->wiphy, ret); 888 trace_rdev_return_int(&rdev->wiphy, ret);
888 return ret; 889 return ret;
889} 890}
891
892static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
893 struct net_device *dev,
894 struct cfg80211_update_ft_ies_params *ftie)
895{
896 int ret;
897
898 trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
899 ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
900 trace_rdev_return_int(&rdev->wiphy, ret);
901 return ret;
902}
903
904static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev,
905 struct wireless_dev *wdev,
906 enum nl80211_crit_proto_id protocol,
907 u16 duration)
908{
909 int ret;
910
911 trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration);
912 ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev,
913 protocol, duration);
914 trace_rdev_return_int(&rdev->wiphy, ret);
915 return ret;
916}
917
918static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
919 struct wireless_dev *wdev)
920{
921 trace_rdev_crit_proto_stop(&rdev->wiphy, wdev);
922 rdev->ops->crit_proto_stop(&rdev->wiphy, wdev);
923 trace_rdev_return_void(&rdev->wiphy);
924}
925
890#endif /* __CFG80211_RDEV_OPS */ 926#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 98532c00242d..cc35fbaa4578 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -184,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = {
184 NL80211_RRF_NO_IBSS | 184 NL80211_RRF_NO_IBSS |
185 NL80211_RRF_NO_OFDM), 185 NL80211_RRF_NO_OFDM),
186 /* IEEE 802.11a, channel 36..48 */ 186 /* IEEE 802.11a, channel 36..48 */
187 REG_RULE(5180-10, 5240+10, 40, 6, 20, 187 REG_RULE(5180-10, 5240+10, 80, 6, 20,
188 NL80211_RRF_PASSIVE_SCAN | 188 NL80211_RRF_PASSIVE_SCAN |
189 NL80211_RRF_NO_IBSS), 189 NL80211_RRF_NO_IBSS),
190 190
191 /* NB: 5260 MHz - 5700 MHz requies DFS */ 191 /* NB: 5260 MHz - 5700 MHz requires DFS */
192 192
193 /* IEEE 802.11a, channel 149..165 */ 193 /* IEEE 802.11a, channel 149..165 */
194 REG_RULE(5745-10, 5825+10, 40, 6, 20, 194 REG_RULE(5745-10, 5825+10, 80, 6, 20,
195 NL80211_RRF_PASSIVE_SCAN | 195 NL80211_RRF_PASSIVE_SCAN |
196 NL80211_RRF_NO_IBSS), 196 NL80211_RRF_NO_IBSS),
197 197
@@ -855,7 +855,7 @@ static void handle_channel(struct wiphy *wiphy,
855 return; 855 return;
856 856
857 REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); 857 REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq);
858 chan->flags = IEEE80211_CHAN_DISABLED; 858 chan->flags |= IEEE80211_CHAN_DISABLED;
859 return; 859 return;
860 } 860 }
861 861
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 09d994d192ff..a9dc5c736df0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -160,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
160{ 160{
161 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 161 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
162 struct cfg80211_connect_params *params; 162 struct cfg80211_connect_params *params;
163 const u8 *prev_bssid = NULL; 163 struct cfg80211_assoc_request req = {};
164 int err; 164 int err;
165 165
166 ASSERT_WDEV_LOCK(wdev); 166 ASSERT_WDEV_LOCK(wdev);
@@ -187,16 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
187 BUG_ON(!rdev->ops->assoc); 187 BUG_ON(!rdev->ops->assoc);
188 wdev->conn->state = CFG80211_CONN_ASSOCIATING; 188 wdev->conn->state = CFG80211_CONN_ASSOCIATING;
189 if (wdev->conn->prev_bssid_valid) 189 if (wdev->conn->prev_bssid_valid)
190 prev_bssid = wdev->conn->prev_bssid; 190 req.prev_bssid = wdev->conn->prev_bssid;
191 err = __cfg80211_mlme_assoc(rdev, wdev->netdev, 191 req.ie = params->ie;
192 params->channel, params->bssid, 192 req.ie_len = params->ie_len;
193 prev_bssid, 193 req.use_mfp = params->mfp != NL80211_MFP_NO;
194 params->ssid, params->ssid_len, 194 req.crypto = params->crypto;
195 params->ie, params->ie_len, 195 req.flags = params->flags;
196 params->mfp != NL80211_MFP_NO, 196 req.ht_capa = params->ht_capa;
197 &params->crypto, 197 req.ht_capa_mask = params->ht_capa_mask;
198 params->flags, &params->ht_capa, 198 req.vht_capa = params->vht_capa;
199 &params->ht_capa_mask); 199 req.vht_capa_mask = params->vht_capa_mask;
200
201 err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel,
202 params->bssid, params->ssid,
203 params->ssid_len, &req);
200 if (err) 204 if (err)
201 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 205 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
202 NULL, 0, 206 NULL, 0,
@@ -224,6 +228,7 @@ void cfg80211_conn_work(struct work_struct *work)
224 rtnl_lock(); 228 rtnl_lock();
225 cfg80211_lock_rdev(rdev); 229 cfg80211_lock_rdev(rdev);
226 mutex_lock(&rdev->devlist_mtx); 230 mutex_lock(&rdev->devlist_mtx);
231 mutex_lock(&rdev->sched_scan_mtx);
227 232
228 list_for_each_entry(wdev, &rdev->wdev_list, list) { 233 list_for_each_entry(wdev, &rdev->wdev_list, list) {
229 wdev_lock(wdev); 234 wdev_lock(wdev);
@@ -231,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work)
231 wdev_unlock(wdev); 236 wdev_unlock(wdev);
232 continue; 237 continue;
233 } 238 }
234 if (wdev->sme_state != CFG80211_SME_CONNECTING) { 239 if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {
235 wdev_unlock(wdev); 240 wdev_unlock(wdev);
236 continue; 241 continue;
237 } 242 }
@@ -248,6 +253,7 @@ void cfg80211_conn_work(struct work_struct *work)
248 wdev_unlock(wdev); 253 wdev_unlock(wdev);
249 } 254 }
250 255
256 mutex_unlock(&rdev->sched_scan_mtx);
251 mutex_unlock(&rdev->devlist_mtx); 257 mutex_unlock(&rdev->devlist_mtx);
252 cfg80211_unlock_rdev(rdev); 258 cfg80211_unlock_rdev(rdev);
253 rtnl_unlock(); 259 rtnl_unlock();
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 238ee49b3868..8f28b9f798d8 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
83 return 0; 83 return 0;
84} 84}
85 85
86static void cfg80211_leave_all(struct cfg80211_registered_device *rdev)
87{
88 struct wireless_dev *wdev;
89
90 list_for_each_entry(wdev, &rdev->wdev_list, list)
91 cfg80211_leave(rdev, wdev);
92}
93
86static int wiphy_suspend(struct device *dev, pm_message_t state) 94static int wiphy_suspend(struct device *dev, pm_message_t state)
87{ 95{
88 struct cfg80211_registered_device *rdev = dev_to_rdev(dev); 96 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
@@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)
90 98
91 rdev->suspend_at = get_seconds(); 99 rdev->suspend_at = get_seconds();
92 100
93 if (rdev->ops->suspend) { 101 rtnl_lock();
94 rtnl_lock(); 102 if (rdev->wiphy.registered) {
95 if (rdev->wiphy.registered) 103 if (!rdev->wowlan)
96 ret = rdev_suspend(rdev); 104 cfg80211_leave_all(rdev);
97 rtnl_unlock(); 105 if (rdev->ops->suspend)
106 ret = rdev_suspend(rdev, rdev->wowlan);
107 if (ret == 1) {
108 /* Driver refuse to configure wowlan */
109 cfg80211_leave_all(rdev);
110 ret = rdev_suspend(rdev, NULL);
111 }
98 } 112 }
113 rtnl_unlock();
99 114
100 return ret; 115 return ret;
101} 116}
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 7586de77a2f8..ecd4fcec3c94 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1786,6 +1786,61 @@ TRACE_EVENT(rdev_set_mac_acl,
1786 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) 1786 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
1787); 1787);
1788 1788
1789TRACE_EVENT(rdev_update_ft_ies,
1790 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
1791 struct cfg80211_update_ft_ies_params *ftie),
1792 TP_ARGS(wiphy, netdev, ftie),
1793 TP_STRUCT__entry(
1794 WIPHY_ENTRY
1795 NETDEV_ENTRY
1796 __field(u16, md)
1797 __dynamic_array(u8, ie, ftie->ie_len)
1798 ),
1799 TP_fast_assign(
1800 WIPHY_ASSIGN;
1801 NETDEV_ASSIGN;
1802 __entry->md = ftie->md;
1803 memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
1804 ),
1805 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
1806 WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
1807);
1808
1809TRACE_EVENT(rdev_crit_proto_start,
1810 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
1811 enum nl80211_crit_proto_id protocol, u16 duration),
1812 TP_ARGS(wiphy, wdev, protocol, duration),
1813 TP_STRUCT__entry(
1814 WIPHY_ENTRY
1815 WDEV_ENTRY
1816 __field(u16, proto)
1817 __field(u16, duration)
1818 ),
1819 TP_fast_assign(
1820 WIPHY_ASSIGN;
1821 WDEV_ASSIGN;
1822 __entry->proto = protocol;
1823 __entry->duration = duration;
1824 ),
1825 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u",
1826 WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration)
1827);
1828
1829TRACE_EVENT(rdev_crit_proto_stop,
1830 TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
1831 TP_ARGS(wiphy, wdev),
1832 TP_STRUCT__entry(
1833 WIPHY_ENTRY
1834 WDEV_ENTRY
1835 ),
1836 TP_fast_assign(
1837 WIPHY_ASSIGN;
1838 WDEV_ASSIGN;
1839 ),
1840 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
1841 WIPHY_PR_ARG, WDEV_PR_ARG)
1842);
1843
1789/************************************************************* 1844/*************************************************************
1790 * cfg80211 exported functions traces * 1845 * cfg80211 exported functions traces *
1791 *************************************************************/ 1846 *************************************************************/
@@ -2414,6 +2469,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,
2414 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) 2469 TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
2415); 2470);
2416 2471
2472TRACE_EVENT(cfg80211_ft_event,
2473 TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
2474 struct cfg80211_ft_event_params *ft_event),
2475 TP_ARGS(wiphy, netdev, ft_event),
2476 TP_STRUCT__entry(
2477 WIPHY_ENTRY
2478 NETDEV_ENTRY
2479 __dynamic_array(u8, ies, ft_event->ies_len)
2480 MAC_ENTRY(target_ap)
2481 __dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
2482 ),
2483 TP_fast_assign(
2484 WIPHY_ASSIGN;
2485 NETDEV_ASSIGN;
2486 if (ft_event->ies)
2487 memcpy(__get_dynamic_array(ies), ft_event->ies,
2488 ft_event->ies_len);
2489 MAC_ASSIGN(target_ap, ft_event->target_ap);
2490 if (ft_event->ric_ies)
2491 memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
2492 ft_event->ric_ies_len);
2493 ),
2494 TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
2495 WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
2496);
2497
2417#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ 2498#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
2418 2499
2419#undef TRACE_INCLUDE_PATH 2500#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 37a56ee1e1ed..f5ad4d94ba88 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
511 encaps_data = bridge_tunnel_header; 511 encaps_data = bridge_tunnel_header;
512 encaps_len = sizeof(bridge_tunnel_header); 512 encaps_len = sizeof(bridge_tunnel_header);
513 skip_header_bytes -= 2; 513 skip_header_bytes -= 2;
514 } else if (ethertype > 0x600) { 514 } else if (ethertype >= ETH_P_802_3_MIN) {
515 encaps_data = rfc1042_header; 515 encaps_data = rfc1042_header;
516 encaps_len = sizeof(rfc1042_header); 516 encaps_len = sizeof(rfc1042_header);
517 skip_header_bytes -= 2; 517 skip_header_bytes -= 2;
@@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
1155} 1155}
1156EXPORT_SYMBOL(cfg80211_get_p2p_attr); 1156EXPORT_SYMBOL(cfg80211_get_p2p_attr);
1157 1157
1158bool ieee80211_operating_class_to_band(u8 operating_class,
1159 enum ieee80211_band *band)
1160{
1161 switch (operating_class) {
1162 case 112:
1163 case 115 ... 127:
1164 *band = IEEE80211_BAND_5GHZ;
1165 return true;
1166 case 81:
1167 case 82:
1168 case 83:
1169 case 84:
1170 *band = IEEE80211_BAND_2GHZ;
1171 return true;
1172 }
1173
1174 return false;
1175}
1176EXPORT_SYMBOL(ieee80211_operating_class_to_band);
1177
1158int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, 1178int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
1159 u32 beacon_int) 1179 u32 beacon_int)
1160{ 1180{
@@ -1258,12 +1278,12 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,
1258 list_for_each_entry(wdev_iter, &rdev->wdev_list, list) { 1278 list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {
1259 if (wdev_iter == wdev) 1279 if (wdev_iter == wdev)
1260 continue; 1280 continue;
1261 if (wdev_iter->netdev) { 1281 if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
1262 if (!netif_running(wdev_iter->netdev))
1263 continue;
1264 } else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {
1265 if (!wdev_iter->p2p_started) 1282 if (!wdev_iter->p2p_started)
1266 continue; 1283 continue;
1284 } else if (wdev_iter->netdev) {
1285 if (!netif_running(wdev_iter->netdev))
1286 continue;
1267 } else { 1287 } else {
1268 WARN_ON(1); 1288 WARN_ON(1);
1269 } 1289 }
diff --git a/net/x25/x25_proc.c b/net/x25/x25_proc.c
index 2ffde4631ae2..0917f047f2cf 100644
--- a/net/x25/x25_proc.c
+++ b/net/x25/x25_proc.c
@@ -187,7 +187,6 @@ static int x25_seq_forward_open(struct inode *inode, struct file *file)
187} 187}
188 188
189static const struct file_operations x25_seq_socket_fops = { 189static const struct file_operations x25_seq_socket_fops = {
190 .owner = THIS_MODULE,
191 .open = x25_seq_socket_open, 190 .open = x25_seq_socket_open,
192 .read = seq_read, 191 .read = seq_read,
193 .llseek = seq_lseek, 192 .llseek = seq_lseek,
@@ -195,7 +194,6 @@ static const struct file_operations x25_seq_socket_fops = {
195}; 194};
196 195
197static const struct file_operations x25_seq_route_fops = { 196static const struct file_operations x25_seq_route_fops = {
198 .owner = THIS_MODULE,
199 .open = x25_seq_route_open, 197 .open = x25_seq_route_open,
200 .read = seq_read, 198 .read = seq_read,
201 .llseek = seq_lseek, 199 .llseek = seq_lseek,
@@ -203,55 +201,38 @@ static const struct file_operations x25_seq_route_fops = {
203}; 201};
204 202
205static const struct file_operations x25_seq_forward_fops = { 203static const struct file_operations x25_seq_forward_fops = {
206 .owner = THIS_MODULE,
207 .open = x25_seq_forward_open, 204 .open = x25_seq_forward_open,
208 .read = seq_read, 205 .read = seq_read,
209 .llseek = seq_lseek, 206 .llseek = seq_lseek,
210 .release = seq_release, 207 .release = seq_release,
211}; 208};
212 209
213static struct proc_dir_entry *x25_proc_dir;
214
215int __init x25_proc_init(void) 210int __init x25_proc_init(void)
216{ 211{
217 struct proc_dir_entry *p; 212 if (!proc_mkdir("x25", init_net.proc_net))
218 int rc = -ENOMEM; 213 return -ENOMEM;
219 214
220 x25_proc_dir = proc_mkdir("x25", init_net.proc_net); 215 if (!proc_create("x25/route", S_IRUGO, init_net.proc_net,
221 if (!x25_proc_dir) 216 &x25_seq_route_fops))
222 goto out; 217 goto out;
223 218
224 p = proc_create("route", S_IRUGO, x25_proc_dir, &x25_seq_route_fops); 219 if (!proc_create("x25/socket", S_IRUGO, init_net.proc_net,
225 if (!p) 220 &x25_seq_socket_fops))
226 goto out_route; 221 goto out;
227
228 p = proc_create("socket", S_IRUGO, x25_proc_dir, &x25_seq_socket_fops);
229 if (!p)
230 goto out_socket;
231 222
232 p = proc_create("forward", S_IRUGO, x25_proc_dir, 223 if (!proc_create("x25/forward", S_IRUGO, init_net.proc_net,
233 &x25_seq_forward_fops); 224 &x25_seq_forward_fops))
234 if (!p) 225 goto out;
235 goto out_forward; 226 return 0;
236 rc = 0;
237 227
238out: 228out:
239 return rc; 229 remove_proc_subtree("x25", init_net.proc_net);
240out_forward: 230 return -ENOMEM;
241 remove_proc_entry("socket", x25_proc_dir);
242out_socket:
243 remove_proc_entry("route", x25_proc_dir);
244out_route:
245 remove_proc_entry("x25", init_net.proc_net);
246 goto out;
247} 231}
248 232
249void __exit x25_proc_exit(void) 233void __exit x25_proc_exit(void)
250{ 234{
251 remove_proc_entry("forward", x25_proc_dir); 235 remove_proc_subtree("x25", init_net.proc_net);
252 remove_proc_entry("route", x25_proc_dir);
253 remove_proc_entry("socket", x25_proc_dir);
254 remove_proc_entry("x25", init_net.proc_net);
255} 236}
256 237
257#else /* CONFIG_PROC_FS */ 238#else /* CONFIG_PROC_FS */
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00a75dc..ab4ef72f0b1d 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
311 .sadb_alg_maxbits = 128 311 .sadb_alg_maxbits = 128
312 } 312 }
313}, 313},
314{
315 /* rfc4494 */
316 .name = "cmac(aes)",
317
318 .uinfo = {
319 .auth = {
320 .icv_truncbits = 96,
321 .icv_fullbits = 128,
322 }
323 },
324
325 .pfkey_supported = 0,
326},
314}; 327};
315 328
316static struct xfrm_algo_desc ealg_list[] = { 329static struct xfrm_algo_desc ealg_list[] = {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 167c67d46c6a..23cea0f74336 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir
1037 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); 1037 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1038} 1038}
1039 1039
1040static int flow_to_policy_dir(int dir)
1041{
1042 if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1043 XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1044 XFRM_POLICY_FWD == FLOW_DIR_FWD)
1045 return dir;
1046
1047 switch (dir) {
1048 default:
1049 case FLOW_DIR_IN:
1050 return XFRM_POLICY_IN;
1051 case FLOW_DIR_OUT:
1052 return XFRM_POLICY_OUT;
1053 case FLOW_DIR_FWD:
1054 return XFRM_POLICY_FWD;
1055 }
1056}
1057
1040static struct flow_cache_object * 1058static struct flow_cache_object *
1041xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, 1059xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1042 u8 dir, struct flow_cache_object *old_obj, void *ctx) 1060 u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1046 if (old_obj) 1064 if (old_obj)
1047 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); 1065 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1048 1066
1049 pol = __xfrm_policy_lookup(net, fl, family, dir); 1067 pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1050 if (IS_ERR_OR_NULL(pol)) 1068 if (IS_ERR_OR_NULL(pol))
1051 return ERR_CAST(pol); 1069 return ERR_CAST(pol);
1052 1070
@@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1932 * previous cache entry */ 1950 * previous cache entry */
1933 if (xdst == NULL) { 1951 if (xdst == NULL) {
1934 num_pols = 1; 1952 num_pols = 1;
1935 pols[0] = __xfrm_policy_lookup(net, fl, family, dir); 1953 pols[0] = __xfrm_policy_lookup(net, fl, family,
1954 flow_to_policy_dir(dir));
1936 err = xfrm_expand_policies(fl, family, pols, 1955 err = xfrm_expand_policies(fl, family, pols,
1937 &num_pols, &num_xfrms); 1956 &num_pols, &num_xfrms);
1938 if (err < 0) 1957 if (err < 0)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2c341bdaf47c..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
1187 goto error; 1187 goto error;
1188 1188
1189 x->props.flags = orig->props.flags; 1189 x->props.flags = orig->props.flags;
1190 x->props.extra_flags = orig->props.extra_flags;
1190 1191
1191 x->curlft.add_time = orig->curlft.add_time; 1192 x->curlft.add_time = orig->curlft.add_time;
1192 x->km.state = orig->km.state; 1193 x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6cd0fd7..aa778748c565 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
515 515
516 copy_from_user_state(x, p); 516 copy_from_user_state(x, p);
517 517
518 if (attrs[XFRMA_SA_EXTRA_FLAGS])
519 x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
520
518 if ((err = attach_aead(&x->aead, &x->props.ealgo, 521 if ((err = attach_aead(&x->aead, &x->props.ealgo,
519 attrs[XFRMA_ALG_AEAD]))) 522 attrs[XFRMA_ALG_AEAD])))
520 goto error; 523 goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
779 782
780 copy_to_user_state(x, p); 783 copy_to_user_state(x, p);
781 784
785 if (x->props.extra_flags) {
786 ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
787 x->props.extra_flags);
788 if (ret)
789 goto out;
790 }
791
782 if (x->coaddr) { 792 if (x->coaddr) {
783 ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); 793 ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
784 if (ret) 794 if (ret)
@@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
2302 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, 2312 [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) },
2303 [XFRMA_TFCPAD] = { .type = NLA_U32 }, 2313 [XFRMA_TFCPAD] = { .type = NLA_U32 },
2304 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, 2314 [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
2315 [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
2305}; 2316};
2306 2317
2307static struct xfrm_link { 2318static const struct xfrm_link {
2308 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); 2319 int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
2309 int (*dump)(struct sk_buff *, struct netlink_callback *); 2320 int (*dump)(struct sk_buff *, struct netlink_callback *);
2310 int (*done)(struct netlink_callback *); 2321 int (*done)(struct netlink_callback *);
@@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
2338{ 2349{
2339 struct net *net = sock_net(skb->sk); 2350 struct net *net = sock_net(skb->sk);
2340 struct nlattr *attrs[XFRMA_MAX+1]; 2351 struct nlattr *attrs[XFRMA_MAX+1];
2341 struct xfrm_link *link; 2352 const struct xfrm_link *link;
2342 int type, err; 2353 int type, err;
2343 2354
2344 type = nlh->nlmsg_type; 2355 type = nlh->nlmsg_type;
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
2495 x->security->ctx_len); 2506 x->security->ctx_len);
2496 if (x->coaddr) 2507 if (x->coaddr)
2497 l += nla_total_size(sizeof(*x->coaddr)); 2508 l += nla_total_size(sizeof(*x->coaddr));
2509 if (x->props.extra_flags)
2510 l += nla_total_size(sizeof(x->props.extra_flags));
2498 2511
2499 /* Must count x->lastused as it may become non-zero behind our back. */ 2512 /* Must count x->lastused as it may become non-zero behind our back. */
2500 l += nla_total_size(sizeof(u64)); 2513 l += nla_total_size(sizeof(u64));