aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c4
-rw-r--r--net/8021q/vlan.c21
-rw-r--r--net/8021q/vlan.h13
-rw-r--r--net/8021q/vlan_core.c32
-rw-r--r--net/8021q/vlan_dev.c182
-rw-r--r--net/8021q/vlanproc.c16
-rw-r--r--net/9p/client.c463
-rw-r--r--net/9p/protocol.c80
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_rdma.c1
-rw-r--r--net/9p/trans_virtio.c6
-rw-r--r--net/Kconfig21
-rw-r--r--net/Makefile7
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/br2684.c67
-rw-r--r--net/atm/clip.c2
-rw-r--r--net/atm/common.c60
-rw-r--r--net/atm/lec.c6
-rw-r--r--net/atm/mpc.c32
-rw-r--r--net/atm/mpoa_caches.c20
-rw-r--r--net/atm/pppoatm.c2
-rw-r--r--net/atm/proc.c10
-rw-r--r--net/atm/signaling.c2
-rw-r--r--net/atm/svc.c62
-rw-r--r--net/ax25/af_ax25.c8
-rw-r--r--net/bluetooth/af_bluetooth.c6
-rw-r--r--net/bluetooth/bnep/bnep.h8
-rw-r--r--net/bluetooth/bnep/core.c8
-rw-r--r--net/bluetooth/bnep/netdev.c22
-rw-r--r--net/bluetooth/cmtp/cmtp.h2
-rw-r--r--net/bluetooth/cmtp/core.c4
-rw-r--r--net/bluetooth/hci_conn.c39
-rw-r--r--net/bluetooth/hci_core.c231
-rw-r--r--net/bluetooth/hci_event.c41
-rw-r--r--net/bluetooth/hci_sock.c88
-rw-r--r--net/bluetooth/hci_sysfs.c71
-rw-r--r--net/bluetooth/hidp/core.c10
-rw-r--r--net/bluetooth/hidp/hidp.h4
-rw-r--r--net/bluetooth/l2cap.c1546
-rw-r--r--net/bluetooth/rfcomm/sock.c10
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bluetooth/sco.c31
-rw-r--r--net/bridge/Kconfig6
-rw-r--r--net/bridge/br.c4
-rw-r--r--net/bridge/br_device.c160
-rw-r--r--net/bridge/br_fdb.c21
-rw-r--r--net/bridge/br_forward.c45
-rw-r--r--net/bridge/br_if.c37
-rw-r--r--net/bridge/br_input.c36
-rw-r--r--net/bridge/br_ioctl.c2
-rw-r--r--net/bridge/br_multicast.c696
-rw-r--r--net/bridge/br_netfilter.c326
-rw-r--r--net/bridge/br_netlink.c17
-rw-r--r--net/bridge/br_notify.c16
-rw-r--r--net/bridge/br_private.h100
-rw-r--r--net/bridge/br_stp.c11
-rw-r--r--net/bridge/br_stp_bpdu.c9
-rw-r--r--net/bridge/br_stp_if.c16
-rw-r--r--net/bridge/br_stp_timer.c24
-rw-r--r--net/bridge/br_sysfs_br.c74
-rw-r--r--net/bridge/br_sysfs_if.c32
-rw-r--r--net/bridge/netfilter/ebt_802_3.c8
-rw-r--r--net/bridge/netfilter/ebt_among.c27
-rw-r--r--net/bridge/netfilter/ebt_arp.c10
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c10
-rw-r--r--net/bridge/netfilter/ebt_dnat.c12
-rw-r--r--net/bridge/netfilter/ebt_ip.c18
-rw-r--r--net/bridge/netfilter/ebt_ip6.c39
-rw-r--r--net/bridge/netfilter/ebt_limit.c11
-rw-r--r--net/bridge/netfilter/ebt_log.c10
-rw-r--r--net/bridge/netfilter/ebt_mark.c12
-rw-r--r--net/bridge/netfilter/ebt_mark_m.c12
-rw-r--r--net/bridge/netfilter/ebt_nflog.c8
-rw-r--r--net/bridge/netfilter/ebt_pkttype.c8
-rw-r--r--net/bridge/netfilter/ebt_redirect.c15
-rw-r--r--net/bridge/netfilter/ebt_snat.c12
-rw-r--r--net/bridge/netfilter/ebt_stp.c10
-rw-r--r--net/bridge/netfilter/ebt_ulog.c46
-rw-r--r--net/bridge/netfilter/ebt_vlan.c54
-rw-r--r--net/bridge/netfilter/ebtables.c67
-rw-r--r--net/caif/Kconfig42
-rw-r--r--net/caif/Makefile16
-rw-r--r--net/caif/caif_config_util.c92
-rw-r--r--net/caif/caif_dev.c421
-rw-r--r--net/caif/caif_socket.c1244
-rw-r--r--net/caif/cfcnfg.c504
-rw-r--r--net/caif/cfctrl.c652
-rw-r--r--net/caif/cfdbgl.c40
-rw-r--r--net/caif/cfdgml.c113
-rw-r--r--net/caif/cffrml.c151
-rw-r--r--net/caif/cfmuxl.c252
-rw-r--r--net/caif/cfpkt_skbuff.c579
-rw-r--r--net/caif/cfrfml.c310
-rw-r--r--net/caif/cfserl.c196
-rw-r--r--net/caif/cfsrvl.c210
-rw-r--r--net/caif/cfutill.c109
-rw-r--r--net/caif/cfveil.c102
-rw-r--r--net/caif/cfvidl.c65
-rw-r--r--net/caif/chnl_net.c514
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/raw.c15
-rw-r--r--net/compat.c53
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/datagram.c31
-rw-r--r--net/core/dev.c1850
-rw-r--r--net/core/dev_addr_lists.c741
-rw-r--r--net/core/dev_mcast.c232
-rw-r--r--net/core/drop_monitor.c45
-rw-r--r--net/core/dst.c45
-rw-r--r--net/core/ethtool.c287
-rw-r--r--net/core/fib_rules.c31
-rw-r--r--net/core/filter.c219
-rw-r--r--net/core/flow.c408
-rw-r--r--net/core/gen_estimator.c16
-rw-r--r--net/core/gen_stats.c14
-rw-r--r--net/core/iovec.c9
-rw-r--r--net/core/link_watch.c1
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/net-sysfs.c396
-rw-r--r--net/core/net-sysfs.h1
-rw-r--r--net/core/net_namespace.c95
-rw-r--r--net/core/netevent.c5
-rw-r--r--net/core/netpoll.c186
-rw-r--r--net/core/pktgen.c270
-rw-r--r--net/core/rtnetlink.c384
-rw-r--r--net/core/scm.c33
-rw-r--r--net/core/skbuff.c124
-rw-r--r--net/core/sock.c179
-rw-r--r--net/core/stream.c28
-rw-r--r--net/core/sysctl_net_core.c75
-rw-r--r--net/core/timestamping.c126
-rw-r--r--net/core/utils.c3
-rw-r--r--net/dccp/ackvec.c4
-rw-r--r--net/dccp/ccids/ccid3.c6
-rw-r--r--net/dccp/dccp.h16
-rw-r--r--net/dccp/input.c21
-rw-r--r--net/dccp/ipv4.c6
-rw-r--r--net/dccp/ipv6.c37
-rw-r--r--net/dccp/options.c22
-rw-r--r--net/dccp/output.c18
-rw-r--r--net/dccp/proto.c16
-rw-r--r--net/dccp/timer.c4
-rw-r--r--net/decnet/af_decnet.c32
-rw-r--r--net/decnet/dn_dev.c15
-rw-r--r--net/decnet/dn_neigh.c9
-rw-r--r--net/decnet/dn_nsp_in.c3
-rw-r--r--net/decnet/dn_route.c187
-rw-r--r--net/decnet/dn_rules.c22
-rw-r--r--net/dns_resolver/Kconfig27
-rw-r--r--net/dns_resolver/Makefile7
-rw-r--r--net/dns_resolver/dns_key.c211
-rw-r--r--net/dns_resolver/dns_query.c160
-rw-r--r--net/dns_resolver/internal.h44
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/dsa/slave.c17
-rw-r--r--net/econet/af_econet.c27
-rw-r--r--net/ethernet/eth.c9
-rw-r--r--net/ethernet/pe2.c3
-rw-r--r--net/ieee802154/wpan-class.c7
-rw-r--r--net/ipv4/Kconfig32
-rw-r--r--net/ipv4/af_inet.c137
-rw-r--r--net/ipv4/arp.c59
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/datagram.c4
-rw-r--r--net/ipv4/devinet.c5
-rw-r--r--net/ipv4/fib_frontend.c13
-rw-r--r--net/ipv4/fib_rules.c22
-rw-r--r--net/ipv4/fib_trie.c2
-rw-r--r--net/ipv4/icmp.c48
-rw-r--r--net/ipv4/igmp.c36
-rw-r--r--net/ipv4/inet_connection_sock.c31
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/inetpeer.c244
-rw-r--r--net/ipv4/ip_forward.c12
-rw-r--r--net/ipv4/ip_fragment.c27
-rw-r--r--net/ipv4/ip_gre.c26
-rw-r--r--net/ipv4/ip_input.c34
-rw-r--r--net/ipv4/ip_options.c10
-rw-r--r--net/ipv4/ip_output.c118
-rw-r--r--net/ipv4/ip_sockglue.c65
-rw-r--r--net/ipv4/ipconfig.c9
-rw-r--r--net/ipv4/ipip.c15
-rw-r--r--net/ipv4/ipmr.c942
-rw-r--r--net/ipv4/netfilter.c18
-rw-r--r--net/ipv4/netfilter/arp_tables.c124
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c4
-rw-r--r--net/ipv4/netfilter/ip_queue.c61
-rw-r--r--net/ipv4/netfilter/ip_tables.c276
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c127
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c23
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c73
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c18
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c22
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c16
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c31
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c47
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c28
-rw-r--r--net/ipv4/netfilter/ipt_ah.c28
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c19
-rw-r--r--net/ipv4/netfilter/iptable_filter.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c7
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c29
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c12
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c10
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c31
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c16
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c1
-rw-r--r--net/ipv4/proc.c17
-rw-r--r--net/ipv4/protocol.c3
-rw-r--r--net/ipv4/raw.c26
-rw-r--r--net/ipv4/route.c660
-rw-r--r--net/ipv4/syncookies.c107
-rw-r--r--net/ipv4/sysctl_net_ipv4.c17
-rw-r--r--net/ipv4/tcp.c132
-rw-r--r--net/ipv4/tcp_hybla.c4
-rw-r--r--net/ipv4/tcp_input.c38
-rw-r--r--net/ipv4/tcp_ipv4.c221
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_output.c105
-rw-r--r--net/ipv4/tcp_timer.c9
-rw-r--r--net/ipv4/tunnel4.c2
-rw-r--r--net/ipv4/udp.c58
-rw-r--r--net/ipv4/udplite.c3
-rw-r--r--net/ipv4/xfrm4_input.c7
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv4/xfrm4_policy.c26
-rw-r--r--net/ipv6/Kconfig14
-rw-r--r--net/ipv6/addrconf.c951
-rw-r--r--net/ipv6/addrlabel.c14
-rw-r--r--net/ipv6/af_inet6.c35
-rw-r--r--net/ipv6/anycast.c96
-rw-r--r--net/ipv6/datagram.c134
-rw-r--r--net/ipv6/exthdrs.c34
-rw-r--r--net/ipv6/fib6_rules.c13
-rw-r--r--net/ipv6/icmp.c7
-rw-r--r--net/ipv6/inet6_connection_sock.c13
-rw-r--r--net/ipv6/ip6_fib.c46
-rw-r--r--net/ipv6/ip6_flowlabel.c3
-rw-r--r--net/ipv6/ip6_input.c4
-rw-r--r--net/ipv6/ip6_output.c141
-rw-r--r--net/ipv6/ip6_tunnel.c16
-rw-r--r--net/ipv6/ip6mr.c946
-rw-r--r--net/ipv6/ipv6_sockglue.c88
-rw-r--r--net/ipv6/mcast.c334
-rw-r--r--net/ipv6/mip6.c3
-rw-r--r--net/ipv6/ndisc.c16
-rw-r--r--net/ipv6/netfilter.c29
-rw-r--r--net/ipv6/netfilter/ip6_queue.c61
-rw-r--r--net/ipv6/netfilter/ip6_tables.c257
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c101
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c38
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c18
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c4
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c18
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c33
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c8
-rw-r--r--net/ipv6/netfilter/ip6t_mh.c21
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c20
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c2
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c22
-rw-r--r--net/ipv6/proc.c19
-rw-r--r--net/ipv6/raw.c48
-rw-r--r--net/ipv6/reassembly.c21
-rw-r--r--net/ipv6/route.c323
-rw-r--r--net/ipv6/sit.c24
-rw-r--r--net/ipv6/syncookies.c58
-rw-r--r--net/ipv6/tcp_ipv6.c115
-rw-r--r--net/ipv6/udp.c53
-rw-r--r--net/ipv6/xfrm6_input.c2
-rw-r--r--net/ipv6/xfrm6_output.c4
-rw-r--r--net/ipv6/xfrm6_policy.c33
-rw-r--r--net/irda/af_irda.c14
-rw-r--r--net/irda/ircomm/ircomm_param.c2
-rw-r--r--net/irda/iriap.c2
-rw-r--r--net/irda/irnet/irnet_irda.c3
-rw-r--r--net/irda/irnet/irnet_ppp.c12
-rw-r--r--net/irda/irttp.c14
-rw-r--r--net/iucv/af_iucv.c24
-rw-r--r--net/iucv/iucv.c23
-rw-r--r--net/key/af_key.c10
-rw-r--r--net/l2tp/Kconfig107
-rw-r--r--net/l2tp/Makefile12
-rw-r--r--net/l2tp/l2tp_core.c1666
-rw-r--r--net/l2tp/l2tp_core.h304
-rw-r--r--net/l2tp/l2tp_debugfs.c341
-rw-r--r--net/l2tp/l2tp_eth.c334
-rw-r--r--net/l2tp/l2tp_ip.c679
-rw-r--r--net/l2tp/l2tp_netlink.c840
-rw-r--r--net/l2tp/l2tp_ppp.c1840
-rw-r--r--net/llc/af_llc.c12
-rw-r--r--net/llc/llc_core.c6
-rw-r--r--net/llc/llc_sap.c2
-rw-r--r--net/mac80211/Kconfig25
-rw-r--r--net/mac80211/Makefile7
-rw-r--r--net/mac80211/agg-rx.c153
-rw-r--r--net/mac80211/agg-tx.c566
-rw-r--r--net/mac80211/cfg.c230
-rw-r--r--net/mac80211/chan.c127
-rw-r--r--net/mac80211/debugfs.c154
-rw-r--r--net/mac80211/debugfs.h1
-rw-r--r--net/mac80211/debugfs_key.c2
-rw-r--r--net/mac80211/debugfs_netdev.c12
-rw-r--r--net/mac80211/debugfs_sta.c132
-rw-r--r--net/mac80211/driver-ops.h125
-rw-r--r--net/mac80211/driver-trace.h525
-rw-r--r--net/mac80211/ht.c55
-rw-r--r--net/mac80211/ibss.c217
-rw-r--r--net/mac80211/ieee80211_i.h130
-rw-r--r--net/mac80211/iface.c314
-rw-r--r--net/mac80211/key.c295
-rw-r--r--net/mac80211/key.h33
-rw-r--r--net/mac80211/main.c152
-rw-r--r--net/mac80211/mesh.c79
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mesh_hwmp.c9
-rw-r--r--net/mac80211/mesh_pathtbl.c4
-rw-r--r--net/mac80211/mesh_plink.c44
-rw-r--r--net/mac80211/mlme.c565
-rw-r--r--net/mac80211/pm.c20
-rw-r--r--net/mac80211/rate.h13
-rw-r--r--net/mac80211/rc80211_minstrel.c3
-rw-r--r--net/mac80211/rc80211_minstrel.h11
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c41
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c827
-rw-r--r--net/mac80211/rc80211_minstrel_ht.h130
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c118
-rw-r--r--net/mac80211/rx.c265
-rw-r--r--net/mac80211/scan.c144
-rw-r--r--net/mac80211/sta_info.c131
-rw-r--r--net/mac80211/sta_info.h123
-rw-r--r--net/mac80211/status.c27
-rw-r--r--net/mac80211/tkip.c8
-rw-r--r--net/mac80211/tkip.h2
-rw-r--r--net/mac80211/tx.c134
-rw-r--r--net/mac80211/util.c70
-rw-r--r--net/mac80211/wep.c29
-rw-r--r--net/mac80211/wep.h2
-rw-r--r--net/mac80211/work.c105
-rw-r--r--net/mac80211/wpa.c13
-rw-r--r--net/netfilter/Kconfig202
-rw-r--r--net/netfilter/Makefile13
-rw-r--r--net/netfilter/ipvs/Kconfig11
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c43
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c59
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c57
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c186
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c14
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c50
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c56
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c131
-rw-r--r--net/netfilter/nf_conntrack_acct.c14
-rw-r--r--net/netfilter/nf_conntrack_amanda.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c65
-rw-r--r--net/netfilter/nf_conntrack_ecache.c12
-rw-r--r--net/netfilter/nf_conntrack_extend.c22
-rw-r--r--net/netfilter/nf_conntrack_ftp.c4
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c21
-rw-r--r--net/netfilter/nf_conntrack_irc.c4
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c32
-rw-r--r--net/netfilter/nf_conntrack_proto.c8
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c42
-rw-r--r--net/netfilter/nf_conntrack_sip.c16
-rw-r--r--net/netfilter/nf_conntrack_standalone.c9
-rw-r--r--net/netfilter/nf_conntrack_tftp.c4
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_queue.c3
-rw-r--r--net/netfilter/nfnetlink.c7
-rw-r--r--net/netfilter/nfnetlink_log.c77
-rw-r--r--net/netfilter/nfnetlink_queue.c42
-rw-r--r--net/netfilter/x_tables.c117
-rw-r--r--net/netfilter/xt_CHECKSUM.c70
-rw-r--r--net/netfilter/xt_CLASSIFY.c2
-rw-r--r--net/netfilter/xt_CONNMARK.c113
-rw-r--r--net/netfilter/xt_CONNSECMARK.c29
-rw-r--r--net/netfilter/xt_CT.c29
-rw-r--r--net/netfilter/xt_DSCP.c18
-rw-r--r--net/netfilter/xt_HL.c30
-rw-r--r--net/netfilter/xt_IDLETIMER.c315
-rw-r--r--net/netfilter/xt_LED.c93
-rw-r--r--net/netfilter/xt_MARK.c56
-rw-r--r--net/netfilter/xt_NFLOG.c10
-rw-r--r--net/netfilter/xt_NFQUEUE.c50
-rw-r--r--net/netfilter/xt_NOTRACK.c4
-rw-r--r--net/netfilter/xt_RATEEST.c32
-rw-r--r--net/netfilter/xt_SECMARK.c48
-rw-r--r--net/netfilter/xt_TCPMSS.c49
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c7
-rw-r--r--net/netfilter/xt_TEE.c309
-rw-r--r--net/netfilter/xt_TPROXY.c18
-rw-r--r--net/netfilter/xt_TRACE.c2
-rw-r--r--net/netfilter/xt_cluster.c23
-rw-r--r--net/netfilter/xt_comment.c2
-rw-r--r--net/netfilter/xt_connbytes.c30
-rw-r--r--net/netfilter/xt_connlimit.c24
-rw-r--r--net/netfilter/xt_connmark.c104
-rw-r--r--net/netfilter/xt_conntrack.c34
-rw-r--r--net/netfilter/xt_cpu.c63
-rw-r--r--net/netfilter/xt_dccp.c18
-rw-r--r--net/netfilter/xt_dscp.c18
-rw-r--r--net/netfilter/xt_esp.c28
-rw-r--r--net/netfilter/xt_hashlimit.c346
-rw-r--r--net/netfilter/xt_helper.c18
-rw-r--r--net/netfilter/xt_hl.c16
-rw-r--r--net/netfilter/xt_iprange.c5
-rw-r--r--net/netfilter/xt_ipvs.c189
-rw-r--r--net/netfilter/xt_length.c4
-rw-r--r--net/netfilter/xt_limit.c15
-rw-r--r--net/netfilter/xt_mac.c23
-rw-r--r--net/netfilter/xt_mark.c37
-rw-r--r--net/netfilter/xt_multiport.c103
-rw-r--r--net/netfilter/xt_osf.c12
-rw-r--r--net/netfilter/xt_owner.c2
-rw-r--r--net/netfilter/xt_physdev.c18
-rw-r--r--net/netfilter/xt_pkttype.c2
-rw-r--r--net/netfilter/xt_policy.c31
-rw-r--r--net/netfilter/xt_quota.c22
-rw-r--r--net/netfilter/xt_rateest.c10
-rw-r--r--net/netfilter/xt_realm.c2
-rw-r--r--net/netfilter/xt_recent.c189
-rw-r--r--net/netfilter/xt_sctp.c60
-rw-r--r--net/netfilter/xt_socket.c13
-rw-r--r--net/netfilter/xt_state.c64
-rw-r--r--net/netfilter/xt_statistic.c33
-rw-r--r--net/netfilter/xt_string.c68
-rw-r--r--net/netfilter/xt_tcpmss.c4
-rw-r--r--net/netfilter/xt_tcpudp.c38
-rw-r--r--net/netfilter/xt_time.c16
-rw-r--r--net/netfilter/xt_u32.c5
-rw-r--r--net/netlabel/netlabel_addrlist.h2
-rw-r--r--net/netlabel/netlabel_unlabeled.c1
-rw-r--r--net/netlink/af_netlink.c65
-rw-r--r--net/netlink/genetlink.c21
-rw-r--r--net/netrom/af_netrom.c8
-rw-r--r--net/packet/af_packet.c106
-rw-r--r--net/phonet/pep.c17
-rw-r--r--net/phonet/pn_dev.c38
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rds/af_rds.c11
-rw-r--r--net/rds/cong.c2
-rw-r--r--net/rds/ib_cm.c4
-rw-r--r--net/rds/ib_rdma.c5
-rw-r--r--net/rds/ib_recv.c4
-rw-r--r--net/rds/ib_send.c20
-rw-r--r--net/rds/iw_cm.c5
-rw-r--r--net/rds/iw_recv.c4
-rw-r--r--net/rds/iw_send.c3
-rw-r--r--net/rds/loop.c7
-rw-r--r--net/rds/rdma.c4
-rw-r--r--net/rds/rdma_transport.c5
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/recv.c2
-rw-r--r--net/rds/send.c40
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_recv.c1
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rfkill/core.c53
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rose/rose_route.c4
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-ack.c3
-rw-r--r--net/rxrpc/ar-call.c6
-rw-r--r--net/rxrpc/ar-peer.c4
-rw-r--r--net/rxrpc/ar-recvmsg.c6
-rw-r--r--net/sched/act_api.c76
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_ipt.c9
-rw-r--r--net/sched/act_mirred.c59
-rw-r--r--net/sched/act_nat.c62
-rw-r--r--net/sched/act_pedit.c38
-rw-r--r--net/sched/act_police.c12
-rw-r--r--net/sched/act_simple.c8
-rw-r--r--net/sched/cls_api.c30
-rw-r--r--net/sched/cls_cgroup.c50
-rw-r--r--net/sched/cls_flow.c97
-rw-r--r--net/sched/cls_rsvp.h12
-rw-r--r--net/sched/cls_u32.c59
-rw-r--r--net/sched/ematch.c3
-rw-r--r--net/sched/sch_api.c135
-rw-r--r--net/sched/sch_atm.c98
-rw-r--r--net/sched/sch_generic.c44
-rw-r--r--net/sched/sch_hfsc.c7
-rw-r--r--net/sched/sch_htb.c2
-rw-r--r--net/sched/sch_ingress.c1
-rw-r--r--net/sched/sch_mq.c1
-rw-r--r--net/sched/sch_multiq.c1
-rw-r--r--net/sched/sch_prio.c1
-rw-r--r--net/sched/sch_red.c1
-rw-r--r--net/sched/sch_sfq.c46
-rw-r--r--net/sched/sch_tbf.c6
-rw-r--r--net/sched/sch_teql.c1
-rw-r--r--net/sctp/Kconfig12
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/associola.c17
-rw-r--r--net/sctp/chunk.c4
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/input.c22
-rw-r--r--net/sctp/ipv6.c27
-rw-r--r--net/sctp/output.c27
-rw-r--r--net/sctp/outqueue.c96
-rw-r--r--net/sctp/probe.c214
-rw-r--r--net/sctp/proc.c3
-rw-r--r--net/sctp/protocol.c18
-rw-r--r--net/sctp/sm_make_chunk.c30
-rw-r--r--net/sctp/sm_sideeffect.c47
-rw-r--r--net/sctp/socket.c41
-rw-r--r--net/sctp/transport.c67
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/socket.c309
-rw-r--r--net/sunrpc/auth.c178
-rw-r--r--net/sunrpc/auth_generic.c23
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c118
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c697
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c336
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c584
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c155
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c83
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c113
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c404
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c21
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_mech.c5
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_token.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c17
-rw-r--r--net/sunrpc/auth_null.c2
-rw-r--r--net/sunrpc/auth_unix.c21
-rw-r--r--net/sunrpc/bc_svc.c2
-rw-r--r--net/sunrpc/cache.c79
-rw-r--r--net/sunrpc/clnt.c196
-rw-r--r--net/sunrpc/rpc_pipe.c18
-rw-r--r--net/sunrpc/rpcb_clnt.c2
-rw-r--r--net/sunrpc/sched.c130
-rw-r--r--net/sunrpc/stats.c29
-rw-r--r--net/sunrpc/sunrpc_syms.c16
-rw-r--r--net/sunrpc/svc_xprt.c6
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xdr.c1
-rw-r--r--net/sunrpc/xprt.c69
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c3
-rw-r--r--net/sunrpc/xprtrdma/transport.c31
-rw-r--r--net/sunrpc/xprtsock.c111
-rw-r--r--net/sysctl_net.c1
-rw-r--r--net/tipc/addr.c32
-rw-r--r--net/tipc/addr.h37
-rw-r--r--net/tipc/bcast.c149
-rw-r--r--net/tipc/bcast.h117
-rw-r--r--net/tipc/bearer.c16
-rw-r--r--net/tipc/bearer.h16
-rw-r--r--net/tipc/cluster.c2
-rw-r--r--net/tipc/config.c68
-rw-r--r--net/tipc/core.c26
-rw-r--r--net/tipc/core.h27
-rw-r--r--net/tipc/discover.c8
-rw-r--r--net/tipc/link.c102
-rw-r--r--net/tipc/link.h35
-rw-r--r--net/tipc/msg.c94
-rw-r--r--net/tipc/msg.h99
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/net.c8
-rw-r--r--net/tipc/node.c14
-rw-r--r--net/tipc/port.c27
-rw-r--r--net/tipc/port.h2
-rw-r--r--net/tipc/socket.c26
-rw-r--r--net/tipc/subscr.c15
-rw-r--r--net/unix/af_unix.c124
-rw-r--r--net/unix/garbage.c13
-rw-r--r--net/wanrouter/wanmain.c7
-rw-r--r--net/wanrouter/wanproc.c7
-rw-r--r--net/wimax/op-reset.c2
-rw-r--r--net/wimax/op-rfkill.c2
-rw-r--r--net/wimax/op-state-get.c2
-rw-r--r--net/wimax/stack.c4
-rw-r--r--net/wireless/chan.c61
-rw-r--r--net/wireless/core.c68
-rw-r--r--net/wireless/core.h28
-rw-r--r--net/wireless/genregdb.awk1
-rw-r--r--net/wireless/ibss.c9
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c1
-rw-r--r--net/wireless/lib80211_crypt_tkip.c3
-rw-r--r--net/wireless/lib80211_crypt_wep.c1
-rw-r--r--net/wireless/mlme.c68
-rw-r--r--net/wireless/nl80211.c421
-rw-r--r--net/wireless/nl80211.h6
-rw-r--r--net/wireless/reg.c674
-rw-r--r--net/wireless/reg.h2
-rw-r--r--net/wireless/scan.c9
-rw-r--r--net/wireless/sme.c38
-rw-r--r--net/wireless/util.c28
-rw-r--r--net/wireless/wext-compat.c26
-rw-r--r--net/wireless/wext-core.c134
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c42
-rw-r--r--net/x25/x25_dev.c36
-rw-r--r--net/x25/x25_in.c2
-rw-r--r--net/x25/x25_out.c5
-rw-r--r--net/xfrm/xfrm_hash.h9
-rw-r--r--net/xfrm/xfrm_output.c4
-rw-r--r--net/xfrm/xfrm_policy.c864
-rw-r--r--net/xfrm/xfrm_state.c5
-rw-r--r--net/xfrm/xfrm_user.c22
625 files changed, 38147 insertions, 14570 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 9ed7c0e7dc17..941f2a324d3a 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -576,7 +576,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
576 if (!app) 576 if (!app)
577 goto err2; 577 goto err2;
578 578
579 err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0); 579 err = dev_mc_add(dev, appl->proto.group_address);
580 if (err < 0) 580 if (err < 0)
581 goto err3; 581 goto err3;
582 582
@@ -616,7 +616,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
616 garp_pdu_queue(app); 616 garp_pdu_queue(app);
617 garp_queue_xmit(app); 617 garp_queue_xmit(app);
618 618
619 dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0); 619 dev_mc_del(dev, appl->proto.group_address);
620 kfree(app); 620 kfree(app);
621 garp_release_port(dev); 621 garp_release_port(dev);
622} 622}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 97da977c2a23..a2ad15250575 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
155 BUG_ON(!grp); 155 BUG_ON(!grp);
156 156
157 /* Take it out of our own structures, but be sure to interlock with 157 /* Take it out of our own structures, but be sure to interlock with
158 * HW accelerating devices or SW vlan input packet processing. 158 * HW accelerating devices or SW vlan input packet processing if
159 * VLAN is not 0 (leave it there for 802.1p).
159 */ 160 */
160 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 161 if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
161 ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id); 162 ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
162 163
163 grp->nr_vlans--; 164 grp->nr_vlans--;
@@ -357,13 +358,13 @@ static void vlan_sync_address(struct net_device *dev,
357 * the new address */ 358 * the new address */
358 if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && 359 if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
359 !compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) 360 !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
360 dev_unicast_delete(dev, vlandev->dev_addr); 361 dev_uc_del(dev, vlandev->dev_addr);
361 362
362 /* vlan address was equal to the old address and is different from 363 /* vlan address was equal to the old address and is different from
363 * the new address */ 364 * the new address */
364 if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) && 365 if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
365 compare_ether_addr(vlandev->dev_addr, dev->dev_addr)) 366 compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
366 dev_unicast_add(dev, vlandev->dev_addr); 367 dev_uc_add(dev, vlandev->dev_addr);
367 368
368 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); 369 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
369} 370}
@@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
419 if (is_vlan_dev(dev)) 420 if (is_vlan_dev(dev))
420 __vlan_device_event(dev, event); 421 __vlan_device_event(dev, event);
421 422
423 if ((event == NETDEV_UP) &&
424 (dev->features & NETIF_F_HW_VLAN_FILTER) &&
425 dev->netdev_ops->ndo_vlan_rx_add_vid) {
426 pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
427 dev->name);
428 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
429 }
430
422 grp = __vlan_find_group(dev); 431 grp = __vlan_find_group(dev);
423 if (!grp) 432 if (!grp)
424 goto out; 433 goto out;
@@ -533,6 +542,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
533 } 542 }
534 unregister_netdevice_many(&list); 543 unregister_netdevice_many(&list);
535 break; 544 break;
545
546 case NETDEV_PRE_TYPE_CHANGE:
547 /* Forbid underlaying device to change its type. */
548 return NOTIFY_BAD;
536 } 549 }
537 550
538out: 551out:
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 6abdcac1b2e8..8d9503ad01da 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -2,6 +2,7 @@
2#define __BEN_VLAN_802_1Q_INC__ 2#define __BEN_VLAN_802_1Q_INC__
3 3
4#include <linux/if_vlan.h> 4#include <linux/if_vlan.h>
5#include <linux/u64_stats_sync.h>
5 6
6 7
7/** 8/**
@@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping {
21 * struct vlan_rx_stats - VLAN percpu rx stats 22 * struct vlan_rx_stats - VLAN percpu rx stats
22 * @rx_packets: number of received packets 23 * @rx_packets: number of received packets
23 * @rx_bytes: number of received bytes 24 * @rx_bytes: number of received bytes
24 * @multicast: number of received multicast packets 25 * @rx_multicast: number of received multicast packets
26 * @syncp: synchronization point for 64bit counters
25 * @rx_errors: number of errors 27 * @rx_errors: number of errors
26 */ 28 */
27struct vlan_rx_stats { 29struct vlan_rx_stats {
28 unsigned long rx_packets; 30 u64 rx_packets;
29 unsigned long rx_bytes; 31 u64 rx_bytes;
30 unsigned long multicast; 32 u64 rx_multicast;
31 unsigned long rx_errors; 33 struct u64_stats_sync syncp;
34 unsigned long rx_errors;
32}; 35};
33 36
34/** 37/**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c584a0af77d3..01ddb0472f86 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,17 +8,23 @@
8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, 8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
9 u16 vlan_tci, int polling) 9 u16 vlan_tci, int polling)
10{ 10{
11 struct net_device *vlan_dev;
12 u16 vlan_id;
13
11 if (netpoll_rx(skb)) 14 if (netpoll_rx(skb))
12 return NET_RX_DROP; 15 return NET_RX_DROP;
13 16
14 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) 17 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
15 goto drop; 18 skb->deliver_no_wcard = 1;
16 19
17 skb->skb_iif = skb->dev->ifindex; 20 skb->skb_iif = skb->dev->ifindex;
18 __vlan_hwaccel_put_tag(skb, vlan_tci); 21 __vlan_hwaccel_put_tag(skb, vlan_tci);
19 skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); 22 vlan_id = vlan_tci & VLAN_VID_MASK;
23 vlan_dev = vlan_group_get_device(grp, vlan_id);
20 24
21 if (!skb->dev) 25 if (vlan_dev)
26 skb->dev = vlan_dev;
27 else if (vlan_id)
22 goto drop; 28 goto drop;
23 29
24 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 30 return (polling ? netif_receive_skb(skb) : netif_rx(skb));
@@ -41,9 +47,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
41 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); 47 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
42 skb->vlan_tci = 0; 48 skb->vlan_tci = 0;
43 49
44 rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, 50 rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
45 smp_processor_id());
46 51
52 u64_stats_update_begin(&rx_stats->syncp);
47 rx_stats->rx_packets++; 53 rx_stats->rx_packets++;
48 rx_stats->rx_bytes += skb->len; 54 rx_stats->rx_bytes += skb->len;
49 55
@@ -51,7 +57,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
51 case PACKET_BROADCAST: 57 case PACKET_BROADCAST:
52 break; 58 break;
53 case PACKET_MULTICAST: 59 case PACKET_MULTICAST:
54 rx_stats->multicast++; 60 rx_stats->rx_multicast++;
55 break; 61 break;
56 case PACKET_OTHERHOST: 62 case PACKET_OTHERHOST:
57 /* Our lower layer thinks this is not local, let's make sure. 63 /* Our lower layer thinks this is not local, let's make sure.
@@ -61,7 +67,8 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
61 dev->dev_addr)) 67 dev->dev_addr))
62 skb->pkt_type = PACKET_HOST; 68 skb->pkt_type = PACKET_HOST;
63 break; 69 break;
64 }; 70 }
71 u64_stats_update_end(&rx_stats->syncp);
65 return 0; 72 return 0;
66} 73}
67 74
@@ -82,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
82 unsigned int vlan_tci, struct sk_buff *skb) 89 unsigned int vlan_tci, struct sk_buff *skb)
83{ 90{
84 struct sk_buff *p; 91 struct sk_buff *p;
92 struct net_device *vlan_dev;
93 u16 vlan_id;
85 94
86 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master))) 95 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
87 goto drop; 96 skb->deliver_no_wcard = 1;
88 97
89 skb->skb_iif = skb->dev->ifindex; 98 skb->skb_iif = skb->dev->ifindex;
90 __vlan_hwaccel_put_tag(skb, vlan_tci); 99 __vlan_hwaccel_put_tag(skb, vlan_tci);
91 skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); 100 vlan_id = vlan_tci & VLAN_VID_MASK;
101 vlan_dev = vlan_group_get_device(grp, vlan_id);
92 102
93 if (!skb->dev) 103 if (vlan_dev)
104 skb->dev = vlan_dev;
105 else if (vlan_id)
94 goto drop; 106 goto drop;
95 107
96 for (p = napi->gro_list; p; p = p->next) { 108 for (p = napi->gro_list; p; p = p->next) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 29b6348c8d4d..3d59c9bf8feb 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
142{ 142{
143 struct vlan_hdr *vhdr; 143 struct vlan_hdr *vhdr;
144 struct vlan_rx_stats *rx_stats; 144 struct vlan_rx_stats *rx_stats;
145 struct net_device *vlan_dev;
145 u16 vlan_id; 146 u16 vlan_id;
146 u16 vlan_tci; 147 u16 vlan_tci;
147 148
@@ -157,53 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
157 vlan_id = vlan_tci & VLAN_VID_MASK; 158 vlan_id = vlan_tci & VLAN_VID_MASK;
158 159
159 rcu_read_lock(); 160 rcu_read_lock();
160 skb->dev = __find_vlan_dev(dev, vlan_id); 161 vlan_dev = __find_vlan_dev(dev, vlan_id);
161 if (!skb->dev) {
162 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
163 __func__, vlan_id, dev->name);
164 goto err_unlock;
165 }
166
167 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
168 smp_processor_id());
169 rx_stats->rx_packets++;
170 rx_stats->rx_bytes += skb->len;
171
172 skb_pull_rcsum(skb, VLAN_HLEN);
173
174 skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
175
176 pr_debug("%s: priority: %u for TCI: %hu\n",
177 __func__, skb->priority, vlan_tci);
178
179 switch (skb->pkt_type) {
180 case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
181 /* stats->broadcast ++; // no such counter :-( */
182 break;
183 162
184 case PACKET_MULTICAST: 163 /* If the VLAN device is defined, we use it.
185 rx_stats->multicast++; 164 * If not, and the VID is 0, it is a 802.1p packet (not
186 break; 165 * really a VLAN), so we will just netif_rx it later to the
166 * original interface, but with the skb->proto set to the
167 * wrapped proto: we do nothing here.
168 */
187 169
188 case PACKET_OTHERHOST: 170 if (!vlan_dev) {
189 /* Our lower layer thinks this is not local, let's make sure. 171 if (vlan_id) {
190 * This allows the VLAN to have a different MAC than the 172 pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
191 * underlying device, and still route correctly. 173 __func__, vlan_id, dev->name);
192 */ 174 goto err_unlock;
193 if (!compare_ether_addr(eth_hdr(skb)->h_dest, 175 }
194 skb->dev->dev_addr)) 176 rx_stats = NULL;
195 skb->pkt_type = PACKET_HOST; 177 } else {
196 break; 178 skb->dev = vlan_dev;
197 default: 179
198 break; 180 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
181 smp_processor_id());
182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len;
185
186 skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
187
188 pr_debug("%s: priority: %u for TCI: %hu\n",
189 __func__, skb->priority, vlan_tci);
190
191 switch (skb->pkt_type) {
192 case PACKET_BROADCAST:
193 /* Yeah, stats collect these together.. */
194 /* stats->broadcast ++; // no such counter :-( */
195 break;
196
197 case PACKET_MULTICAST:
198 rx_stats->rx_multicast++;
199 break;
200
201 case PACKET_OTHERHOST:
202 /* Our lower layer thinks this is not local, let's make
203 * sure.
204 * This allows the VLAN to have a different MAC than the
205 * underlying device, and still route correctly.
206 */
207 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
208 skb->dev->dev_addr))
209 skb->pkt_type = PACKET_HOST;
210 break;
211 default:
212 break;
213 }
214 u64_stats_update_end(&rx_stats->syncp);
199 } 215 }
200 216
217 skb_pull_rcsum(skb, VLAN_HLEN);
201 vlan_set_encap_proto(skb, vhdr); 218 vlan_set_encap_proto(skb, vhdr);
202 219
203 skb = vlan_check_reorder_header(skb); 220 if (vlan_dev) {
204 if (!skb) { 221 skb = vlan_check_reorder_header(skb);
205 rx_stats->rx_errors++; 222 if (!skb) {
206 goto err_unlock; 223 rx_stats->rx_errors++;
224 goto err_unlock;
225 }
207 } 226 }
208 227
209 netif_rx(skb); 228 netif_rx(skb);
@@ -327,7 +346,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
327 len = skb->len; 346 len = skb->len;
328 ret = dev_queue_xmit(skb); 347 ret = dev_queue_xmit(skb);
329 348
330 if (likely(ret == NET_XMIT_SUCCESS)) { 349 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
331 txq->tx_packets++; 350 txq->tx_packets++;
332 txq->tx_bytes += len; 351 txq->tx_bytes += len;
333 } else 352 } else
@@ -353,7 +372,7 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
353 len = skb->len; 372 len = skb->len;
354 ret = dev_queue_xmit(skb); 373 ret = dev_queue_xmit(skb);
355 374
356 if (likely(ret == NET_XMIT_SUCCESS)) { 375 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
357 txq->tx_packets++; 376 txq->tx_packets++;
358 txq->tx_bytes += len; 377 txq->tx_bytes += len;
359 } else 378 } else
@@ -470,7 +489,7 @@ static int vlan_dev_open(struct net_device *dev)
470 return -ENETDOWN; 489 return -ENETDOWN;
471 490
472 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) { 491 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
473 err = dev_unicast_add(real_dev, dev->dev_addr); 492 err = dev_uc_add(real_dev, dev->dev_addr);
474 if (err < 0) 493 if (err < 0)
475 goto out; 494 goto out;
476 } 495 }
@@ -499,7 +518,7 @@ clear_allmulti:
499 dev_set_allmulti(real_dev, -1); 518 dev_set_allmulti(real_dev, -1);
500del_unicast: 519del_unicast:
501 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) 520 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
502 dev_unicast_delete(real_dev, dev->dev_addr); 521 dev_uc_del(real_dev, dev->dev_addr);
503out: 522out:
504 netif_carrier_off(dev); 523 netif_carrier_off(dev);
505 return err; 524 return err;
@@ -514,14 +533,14 @@ static int vlan_dev_stop(struct net_device *dev)
514 vlan_gvrp_request_leave(dev); 533 vlan_gvrp_request_leave(dev);
515 534
516 dev_mc_unsync(real_dev, dev); 535 dev_mc_unsync(real_dev, dev);
517 dev_unicast_unsync(real_dev, dev); 536 dev_uc_unsync(real_dev, dev);
518 if (dev->flags & IFF_ALLMULTI) 537 if (dev->flags & IFF_ALLMULTI)
519 dev_set_allmulti(real_dev, -1); 538 dev_set_allmulti(real_dev, -1);
520 if (dev->flags & IFF_PROMISC) 539 if (dev->flags & IFF_PROMISC)
521 dev_set_promiscuity(real_dev, -1); 540 dev_set_promiscuity(real_dev, -1);
522 541
523 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) 542 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
524 dev_unicast_delete(real_dev, dev->dev_addr); 543 dev_uc_del(real_dev, dev->dev_addr);
525 544
526 netif_carrier_off(dev); 545 netif_carrier_off(dev);
527 return 0; 546 return 0;
@@ -540,13 +559,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
540 goto out; 559 goto out;
541 560
542 if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) { 561 if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
543 err = dev_unicast_add(real_dev, addr->sa_data); 562 err = dev_uc_add(real_dev, addr->sa_data);
544 if (err < 0) 563 if (err < 0)
545 return err; 564 return err;
546 } 565 }
547 566
548 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) 567 if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
549 dev_unicast_delete(real_dev, dev->dev_addr); 568 dev_uc_del(real_dev, dev->dev_addr);
550 569
551out: 570out:
552 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); 571 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -663,7 +682,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
663static void vlan_dev_set_rx_mode(struct net_device *vlan_dev) 682static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
664{ 683{
665 dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); 684 dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
666 dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev); 685 dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
667} 686}
668 687
669/* 688/*
@@ -708,7 +727,8 @@ static int vlan_dev_init(struct net_device *dev)
708 netif_carrier_off(dev); 727 netif_carrier_off(dev);
709 728
710 /* IFF_BROADCAST|IFF_MULTICAST; ??? */ 729 /* IFF_BROADCAST|IFF_MULTICAST; ??? */
711 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); 730 dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
731 IFF_MASTER | IFF_SLAVE);
712 dev->iflink = real_dev->ifindex; 732 dev->iflink = real_dev->ifindex;
713 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | 733 dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
714 (1<<__LINK_STATE_DORMANT))) | 734 (1<<__LINK_STATE_DORMANT))) |
@@ -800,37 +820,65 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
800 return dev_ethtool_get_flags(vlan->real_dev); 820 return dev_ethtool_get_flags(vlan->real_dev);
801} 821}
802 822
803static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev) 823static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
804{ 824{
805 struct net_device_stats *stats = &dev->stats;
806
807 dev_txq_stats_fold(dev, stats); 825 dev_txq_stats_fold(dev, stats);
808 826
809 if (vlan_dev_info(dev)->vlan_rx_stats) { 827 if (vlan_dev_info(dev)->vlan_rx_stats) {
810 struct vlan_rx_stats *p, rx = {0}; 828 struct vlan_rx_stats *p, accum = {0};
811 int i; 829 int i;
812 830
813 for_each_possible_cpu(i) { 831 for_each_possible_cpu(i) {
832 u64 rxpackets, rxbytes, rxmulticast;
833 unsigned int start;
834
814 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i); 835 p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
815 rx.rx_packets += p->rx_packets; 836 do {
816 rx.rx_bytes += p->rx_bytes; 837 start = u64_stats_fetch_begin_bh(&p->syncp);
817 rx.rx_errors += p->rx_errors; 838 rxpackets = p->rx_packets;
818 rx.multicast += p->multicast; 839 rxbytes = p->rx_bytes;
840 rxmulticast = p->rx_multicast;
841 } while (u64_stats_fetch_retry_bh(&p->syncp, start));
842 accum.rx_packets += rxpackets;
843 accum.rx_bytes += rxbytes;
844 accum.rx_multicast += rxmulticast;
845 /* rx_errors is an ulong, not protected by syncp */
846 accum.rx_errors += p->rx_errors;
819 } 847 }
820 stats->rx_packets = rx.rx_packets; 848 stats->rx_packets = accum.rx_packets;
821 stats->rx_bytes = rx.rx_bytes; 849 stats->rx_bytes = accum.rx_bytes;
822 stats->rx_errors = rx.rx_errors; 850 stats->rx_errors = accum.rx_errors;
823 stats->multicast = rx.multicast; 851 stats->multicast = accum.rx_multicast;
824 } 852 }
825 return stats; 853 return stats;
826} 854}
827 855
856static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
857{
858 if (data) {
859 struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
860
861 /* Underlying device must support TSO for VLAN-tagged packets
862 * and must have TSO enabled now.
863 */
864 if (!(real_dev->vlan_features & NETIF_F_TSO))
865 return -EOPNOTSUPP;
866 if (!(real_dev->features & NETIF_F_TSO))
867 return -EINVAL;
868 dev->features |= NETIF_F_TSO;
869 } else {
870 dev->features &= ~NETIF_F_TSO;
871 }
872 return 0;
873}
874
828static const struct ethtool_ops vlan_ethtool_ops = { 875static const struct ethtool_ops vlan_ethtool_ops = {
829 .get_settings = vlan_ethtool_get_settings, 876 .get_settings = vlan_ethtool_get_settings,
830 .get_drvinfo = vlan_ethtool_get_drvinfo, 877 .get_drvinfo = vlan_ethtool_get_drvinfo,
831 .get_link = ethtool_op_get_link, 878 .get_link = ethtool_op_get_link,
832 .get_rx_csum = vlan_ethtool_get_rx_csum, 879 .get_rx_csum = vlan_ethtool_get_rx_csum,
833 .get_flags = vlan_ethtool_get_flags, 880 .get_flags = vlan_ethtool_get_flags,
881 .set_tso = vlan_ethtool_set_tso,
834}; 882};
835 883
836static const struct net_device_ops vlan_netdev_ops = { 884static const struct net_device_ops vlan_netdev_ops = {
@@ -847,7 +895,7 @@ static const struct net_device_ops vlan_netdev_ops = {
847 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 895 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
848 .ndo_do_ioctl = vlan_dev_ioctl, 896 .ndo_do_ioctl = vlan_dev_ioctl,
849 .ndo_neigh_setup = vlan_dev_neigh_setup, 897 .ndo_neigh_setup = vlan_dev_neigh_setup,
850 .ndo_get_stats = vlan_dev_get_stats, 898 .ndo_get_stats64 = vlan_dev_get_stats64,
851#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 899#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
852 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 900 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
853 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 901 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -871,7 +919,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = {
871 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 919 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
872 .ndo_do_ioctl = vlan_dev_ioctl, 920 .ndo_do_ioctl = vlan_dev_ioctl,
873 .ndo_neigh_setup = vlan_dev_neigh_setup, 921 .ndo_neigh_setup = vlan_dev_neigh_setup,
874 .ndo_get_stats = vlan_dev_get_stats, 922 .ndo_get_stats64 = vlan_dev_get_stats64,
875#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 923#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
876 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 924 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
877 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 925 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -896,7 +944,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = {
896 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 944 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
897 .ndo_do_ioctl = vlan_dev_ioctl, 945 .ndo_do_ioctl = vlan_dev_ioctl,
898 .ndo_neigh_setup = vlan_dev_neigh_setup, 946 .ndo_neigh_setup = vlan_dev_neigh_setup,
899 .ndo_get_stats = vlan_dev_get_stats, 947 .ndo_get_stats64 = vlan_dev_get_stats64,
900#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 948#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
901 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 949 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
902 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 950 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
@@ -921,7 +969,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = {
921 .ndo_change_rx_flags = vlan_dev_change_rx_flags, 969 .ndo_change_rx_flags = vlan_dev_change_rx_flags,
922 .ndo_do_ioctl = vlan_dev_ioctl, 970 .ndo_do_ioctl = vlan_dev_ioctl,
923 .ndo_neigh_setup = vlan_dev_neigh_setup, 971 .ndo_neigh_setup = vlan_dev_neigh_setup,
924 .ndo_get_stats = vlan_dev_get_stats, 972 .ndo_get_stats64 = vlan_dev_get_stats64,
925#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) 973#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
926 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup, 974 .ndo_fcoe_ddp_setup = vlan_dev_fcoe_ddp_setup,
927 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done, 975 .ndo_fcoe_ddp_done = vlan_dev_fcoe_ddp_done,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index afead353e215..80e280f56686 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,25 +278,27 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
278{ 278{
279 struct net_device *vlandev = (struct net_device *) seq->private; 279 struct net_device *vlandev = (struct net_device *) seq->private;
280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev); 280 const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
281 const struct net_device_stats *stats; 281 struct rtnl_link_stats64 temp;
282 const struct rtnl_link_stats64 *stats;
282 static const char fmt[] = "%30s %12lu\n"; 283 static const char fmt[] = "%30s %12lu\n";
284 static const char fmt64[] = "%30s %12llu\n";
283 int i; 285 int i;
284 286
285 if (!is_vlan_dev(vlandev)) 287 if (!is_vlan_dev(vlandev))
286 return 0; 288 return 0;
287 289
288 stats = dev_get_stats(vlandev); 290 stats = dev_get_stats(vlandev, &temp);
289 seq_printf(seq, 291 seq_printf(seq,
290 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n", 292 "%s VID: %d REORDER_HDR: %i dev->priv_flags: %hx\n",
291 vlandev->name, dev_info->vlan_id, 293 vlandev->name, dev_info->vlan_id,
292 (int)(dev_info->flags & 1), vlandev->priv_flags); 294 (int)(dev_info->flags & 1), vlandev->priv_flags);
293 295
294 seq_printf(seq, fmt, "total frames received", stats->rx_packets); 296 seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
295 seq_printf(seq, fmt, "total bytes received", stats->rx_bytes); 297 seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
296 seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast); 298 seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast);
297 seq_puts(seq, "\n"); 299 seq_puts(seq, "\n");
298 seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets); 300 seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
299 seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes); 301 seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
300 seq_printf(seq, fmt, "total headroom inc", 302 seq_printf(seq, fmt, "total headroom inc",
301 dev_info->cnt_inc_headroom_on_tx); 303 dev_info->cnt_inc_headroom_on_tx);
302 seq_printf(seq, fmt, "total encap on xmit", 304 seq_printf(seq, fmt, "total encap on xmit",
diff --git a/net/9p/client.c b/net/9p/client.c
index 0aa79faa9850..dc6f2f26d023 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -460,7 +460,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
460 return err; 460 return err;
461 } 461 }
462 462
463 if (p9_is_proto_dotu(c)) 463 if (p9_is_proto_dotu(c) ||
464 p9_is_proto_dotl(c))
464 err = -ecode; 465 err = -ecode;
465 466
466 if (!err || !IS_ERR_VALUE(err)) 467 if (!err || !IS_ERR_VALUE(err))
@@ -1015,14 +1016,18 @@ int p9_client_open(struct p9_fid *fid, int mode)
1015 struct p9_qid qid; 1016 struct p9_qid qid;
1016 int iounit; 1017 int iounit;
1017 1018
1018 P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode);
1019 err = 0;
1020 clnt = fid->clnt; 1019 clnt = fid->clnt;
1020 P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
1021 p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
1022 err = 0;
1021 1023
1022 if (fid->mode != -1) 1024 if (fid->mode != -1)
1023 return -EINVAL; 1025 return -EINVAL;
1024 1026
1025 req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); 1027 if (p9_is_proto_dotl(clnt))
1028 req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode);
1029 else
1030 req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode);
1026 if (IS_ERR(req)) { 1031 if (IS_ERR(req)) {
1027 err = PTR_ERR(req); 1032 err = PTR_ERR(req);
1028 goto error; 1033 goto error;
@@ -1034,10 +1039,9 @@ int p9_client_open(struct p9_fid *fid, int mode)
1034 goto free_and_error; 1039 goto free_and_error;
1035 } 1040 }
1036 1041
1037 P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", 1042 P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",
1038 qid.type, 1043 p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
1039 (unsigned long long)qid.path, 1044 (unsigned long long)qid.path, qid.version, iounit);
1040 qid.version, iounit);
1041 1045
1042 fid->mode = mode; 1046 fid->mode = mode;
1043 fid->iounit = iounit; 1047 fid->iounit = iounit;
@@ -1049,6 +1053,50 @@ error:
1049} 1053}
1050EXPORT_SYMBOL(p9_client_open); 1054EXPORT_SYMBOL(p9_client_open);
1051 1055
1056int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
1057 gid_t gid, struct p9_qid *qid)
1058{
1059 int err = 0;
1060 struct p9_client *clnt;
1061 struct p9_req_t *req;
1062 int iounit;
1063
1064 P9_DPRINTK(P9_DEBUG_9P,
1065 ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
1066 ofid->fid, name, flags, mode, gid);
1067 clnt = ofid->clnt;
1068
1069 if (ofid->mode != -1)
1070 return -EINVAL;
1071
1072 req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
1073 mode, gid);
1074 if (IS_ERR(req)) {
1075 err = PTR_ERR(req);
1076 goto error;
1077 }
1078
1079 err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);
1080 if (err) {
1081 p9pdu_dump(1, req->rc);
1082 goto free_and_error;
1083 }
1084
1085 P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",
1086 qid->type,
1087 (unsigned long long)qid->path,
1088 qid->version, iounit);
1089
1090 ofid->mode = mode;
1091 ofid->iounit = iounit;
1092
1093free_and_error:
1094 p9_free_req(clnt, req);
1095error:
1096 return err;
1097}
1098EXPORT_SYMBOL(p9_client_create_dotl);
1099
1052int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, 1100int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
1053 char *extension) 1101 char *extension)
1054{ 1102{
@@ -1094,6 +1142,59 @@ error:
1094} 1142}
1095EXPORT_SYMBOL(p9_client_fcreate); 1143EXPORT_SYMBOL(p9_client_fcreate);
1096 1144
1145int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
1146 struct p9_qid *qid)
1147{
1148 int err = 0;
1149 struct p9_client *clnt;
1150 struct p9_req_t *req;
1151
1152 P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n",
1153 dfid->fid, name, symtgt);
1154 clnt = dfid->clnt;
1155
1156 req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
1157 gid);
1158 if (IS_ERR(req)) {
1159 err = PTR_ERR(req);
1160 goto error;
1161 }
1162
1163 err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
1164 if (err) {
1165 p9pdu_dump(1, req->rc);
1166 goto free_and_error;
1167 }
1168
1169 P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",
1170 qid->type, (unsigned long long)qid->path, qid->version);
1171
1172free_and_error:
1173 p9_free_req(clnt, req);
1174error:
1175 return err;
1176}
1177EXPORT_SYMBOL(p9_client_symlink);
1178
1179int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
1180{
1181 struct p9_client *clnt;
1182 struct p9_req_t *req;
1183
1184 P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",
1185 dfid->fid, oldfid->fid, newname);
1186 clnt = dfid->clnt;
1187 req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid,
1188 newname);
1189 if (IS_ERR(req))
1190 return PTR_ERR(req);
1191
1192 P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n");
1193 p9_free_req(clnt, req);
1194 return 0;
1195}
1196EXPORT_SYMBOL(p9_client_link);
1197
1097int p9_client_clunk(struct p9_fid *fid) 1198int p9_client_clunk(struct p9_fid *fid)
1098{ 1199{
1099 int err; 1200 int err;
@@ -1139,9 +1240,8 @@ int p9_client_remove(struct p9_fid *fid)
1139 P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); 1240 P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);
1140 1241
1141 p9_free_req(clnt, req); 1242 p9_free_req(clnt, req);
1142 p9_fid_destroy(fid);
1143
1144error: 1243error:
1244 p9_fid_destroy(fid);
1145 return err; 1245 return err;
1146} 1246}
1147EXPORT_SYMBOL(p9_client_remove); 1247EXPORT_SYMBOL(p9_client_remove);
@@ -1302,6 +1402,65 @@ error:
1302} 1402}
1303EXPORT_SYMBOL(p9_client_stat); 1403EXPORT_SYMBOL(p9_client_stat);
1304 1404
1405struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
1406 u64 request_mask)
1407{
1408 int err;
1409 struct p9_client *clnt;
1410 struct p9_stat_dotl *ret = kmalloc(sizeof(struct p9_stat_dotl),
1411 GFP_KERNEL);
1412 struct p9_req_t *req;
1413
1414 P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",
1415 fid->fid, request_mask);
1416
1417 if (!ret)
1418 return ERR_PTR(-ENOMEM);
1419
1420 err = 0;
1421 clnt = fid->clnt;
1422
1423 req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
1424 if (IS_ERR(req)) {
1425 err = PTR_ERR(req);
1426 goto error;
1427 }
1428
1429 err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);
1430 if (err) {
1431 p9pdu_dump(1, req->rc);
1432 p9_free_req(clnt, req);
1433 goto error;
1434 }
1435
1436 P9_DPRINTK(P9_DEBUG_9P,
1437 "<<< RGETATTR st_result_mask=%lld\n"
1438 "<<< qid=%x.%llx.%x\n"
1439 "<<< st_mode=%8.8x st_nlink=%llu\n"
1440 "<<< st_uid=%d st_gid=%d\n"
1441 "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n"
1442 "<<< st_atime_sec=%lld st_atime_nsec=%lld\n"
1443 "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n"
1444 "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n"
1445 "<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
1446 "<<< st_gen=%lld st_data_version=%lld",
1447 ret->st_result_mask, ret->qid.type, ret->qid.path,
1448 ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
1449 ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
1450 ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
1451 ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
1452 ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
1453 ret->st_gen, ret->st_data_version);
1454
1455 p9_free_req(clnt, req);
1456 return ret;
1457
1458error:
1459 kfree(ret);
1460 return ERR_PTR(err);
1461}
1462EXPORT_SYMBOL(p9_client_getattr_dotl);
1463
1305static int p9_client_statsize(struct p9_wstat *wst, int proto_version) 1464static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
1306{ 1465{
1307 int ret; 1466 int ret;
@@ -1321,7 +1480,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
1321 if (wst->muid) 1480 if (wst->muid)
1322 ret += strlen(wst->muid); 1481 ret += strlen(wst->muid);
1323 1482
1324 if (proto_version == p9_proto_2000u) { 1483 if ((proto_version == p9_proto_2000u) ||
1484 (proto_version == p9_proto_2000L)) {
1325 ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ 1485 ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */
1326 if (wst->extension) 1486 if (wst->extension)
1327 ret += strlen(wst->extension); 1487 ret += strlen(wst->extension);
@@ -1364,3 +1524,284 @@ error:
1364 return err; 1524 return err;
1365} 1525}
1366EXPORT_SYMBOL(p9_client_wstat); 1526EXPORT_SYMBOL(p9_client_wstat);
1527
1528int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
1529{
1530 int err;
1531 struct p9_req_t *req;
1532 struct p9_client *clnt;
1533
1534 err = 0;
1535 clnt = fid->clnt;
1536 P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
1537 P9_DPRINTK(P9_DEBUG_9P,
1538 " valid=%x mode=%x uid=%d gid=%d size=%lld\n"
1539 " atime_sec=%lld atime_nsec=%lld\n"
1540 " mtime_sec=%lld mtime_nsec=%lld\n",
1541 p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
1542 p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
1543 p9attr->mtime_sec, p9attr->mtime_nsec);
1544
1545 req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr);
1546
1547 if (IS_ERR(req)) {
1548 err = PTR_ERR(req);
1549 goto error;
1550 }
1551 P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);
1552 p9_free_req(clnt, req);
1553error:
1554 return err;
1555}
1556EXPORT_SYMBOL(p9_client_setattr);
1557
1558int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
1559{
1560 int err;
1561 struct p9_req_t *req;
1562 struct p9_client *clnt;
1563
1564 err = 0;
1565 clnt = fid->clnt;
1566
1567 P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
1568
1569 req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
1570 if (IS_ERR(req)) {
1571 err = PTR_ERR(req);
1572 goto error;
1573 }
1574
1575 err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
1576 &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
1577 &sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
1578 if (err) {
1579 p9pdu_dump(1, req->rc);
1580 p9_free_req(clnt, req);
1581 goto error;
1582 }
1583
1584 P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
1585 "blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
1586 "fsid %llu namelen %ld\n",
1587 fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
1588 sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree,
1589 sb->fsid, (long int)sb->namelen);
1590
1591 p9_free_req(clnt, req);
1592error:
1593 return err;
1594}
1595EXPORT_SYMBOL(p9_client_statfs);
1596
1597int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
1598{
1599 int err;
1600 struct p9_req_t *req;
1601 struct p9_client *clnt;
1602
1603 err = 0;
1604 clnt = fid->clnt;
1605
1606 P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
1607 fid->fid, newdirfid->fid, name);
1608
1609 req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
1610 newdirfid->fid, name);
1611 if (IS_ERR(req)) {
1612 err = PTR_ERR(req);
1613 goto error;
1614 }
1615
1616 P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
1617
1618 p9_free_req(clnt, req);
1619error:
1620 return err;
1621}
1622EXPORT_SYMBOL(p9_client_rename);
1623
1624/*
1625 * An xattrwalk without @attr_name gives the fid for the lisxattr namespace
1626 */
1627struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
1628 const char *attr_name, u64 *attr_size)
1629{
1630 int err;
1631 struct p9_req_t *req;
1632 struct p9_client *clnt;
1633 struct p9_fid *attr_fid;
1634
1635 err = 0;
1636 clnt = file_fid->clnt;
1637 attr_fid = p9_fid_create(clnt);
1638 if (IS_ERR(attr_fid)) {
1639 err = PTR_ERR(attr_fid);
1640 attr_fid = NULL;
1641 goto error;
1642 }
1643 P9_DPRINTK(P9_DEBUG_9P,
1644 ">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",
1645 file_fid->fid, attr_fid->fid, attr_name);
1646
1647 req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds",
1648 file_fid->fid, attr_fid->fid, attr_name);
1649 if (IS_ERR(req)) {
1650 err = PTR_ERR(req);
1651 goto error;
1652 }
1653 err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);
1654 if (err) {
1655 p9pdu_dump(1, req->rc);
1656 p9_free_req(clnt, req);
1657 goto clunk_fid;
1658 }
1659 p9_free_req(clnt, req);
1660 P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n",
1661 attr_fid->fid, *attr_size);
1662 return attr_fid;
1663clunk_fid:
1664 p9_client_clunk(attr_fid);
1665 attr_fid = NULL;
1666error:
1667 if (attr_fid && (attr_fid != file_fid))
1668 p9_fid_destroy(attr_fid);
1669
1670 return ERR_PTR(err);
1671}
1672EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
1673
1674int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
1675 u64 attr_size, int flags)
1676{
1677 int err;
1678 struct p9_req_t *req;
1679 struct p9_client *clnt;
1680
1681 P9_DPRINTK(P9_DEBUG_9P,
1682 ">>> TXATTRCREATE fid %d name %s size %lld flag %d\n",
1683 fid->fid, name, (long long)attr_size, flags);
1684 err = 0;
1685 clnt = fid->clnt;
1686 req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
1687 fid->fid, name, attr_size, flags);
1688 if (IS_ERR(req)) {
1689 err = PTR_ERR(req);
1690 goto error;
1691 }
1692 P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);
1693 p9_free_req(clnt, req);
1694error:
1695 return err;
1696}
1697EXPORT_SYMBOL_GPL(p9_client_xattrcreate);
1698
1699int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
1700{
1701 int err, rsize, total;
1702 struct p9_client *clnt;
1703 struct p9_req_t *req;
1704 char *dataptr;
1705
1706 P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
1707 fid->fid, (long long unsigned) offset, count);
1708
1709 err = 0;
1710 clnt = fid->clnt;
1711 total = 0;
1712
1713 rsize = fid->iounit;
1714 if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ)
1715 rsize = clnt->msize - P9_READDIRHDRSZ;
1716
1717 if (count < rsize)
1718 rsize = count;
1719
1720 req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize);
1721 if (IS_ERR(req)) {
1722 err = PTR_ERR(req);
1723 goto error;
1724 }
1725
1726 err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);
1727 if (err) {
1728 p9pdu_dump(1, req->rc);
1729 goto free_and_error;
1730 }
1731
1732 P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
1733
1734 if (data)
1735 memmove(data, dataptr, count);
1736
1737 p9_free_req(clnt, req);
1738 return count;
1739
1740free_and_error:
1741 p9_free_req(clnt, req);
1742error:
1743 return err;
1744}
1745EXPORT_SYMBOL(p9_client_readdir);
1746
1747int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
1748 dev_t rdev, gid_t gid, struct p9_qid *qid)
1749{
1750 int err;
1751 struct p9_client *clnt;
1752 struct p9_req_t *req;
1753
1754 err = 0;
1755 clnt = fid->clnt;
1756 P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
1757 "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
1758 req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
1759 MAJOR(rdev), MINOR(rdev), gid);
1760 if (IS_ERR(req))
1761 return PTR_ERR(req);
1762
1763 err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
1764 if (err) {
1765 p9pdu_dump(1, req->rc);
1766 goto error;
1767 }
1768 P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,
1769 (unsigned long long)qid->path, qid->version);
1770
1771error:
1772 p9_free_req(clnt, req);
1773 return err;
1774
1775}
1776EXPORT_SYMBOL(p9_client_mknod_dotl);
1777
1778int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
1779 gid_t gid, struct p9_qid *qid)
1780{
1781 int err;
1782 struct p9_client *clnt;
1783 struct p9_req_t *req;
1784
1785 err = 0;
1786 clnt = fid->clnt;
1787 P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
1788 fid->fid, name, mode, gid);
1789 req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
1790 gid);
1791 if (IS_ERR(req))
1792 return PTR_ERR(req);
1793
1794 err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);
1795 if (err) {
1796 p9pdu_dump(1, req->rc);
1797 goto error;
1798 }
1799 P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,
1800 (unsigned long long)qid->path, qid->version);
1801
1802error:
1803 p9_free_req(clnt, req);
1804 return err;
1805
1806}
1807EXPORT_SYMBOL(p9_client_mkdir_dotl);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index e7541d5b0118..3acd3afb20c8 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -141,6 +141,7 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
141 D - data blob (int32_t size followed by void *, results are not freed) 141 D - data blob (int32_t size followed by void *, results are not freed)
142 T - array of strings (int16_t count, followed by strings) 142 T - array of strings (int16_t count, followed by strings)
143 R - array of qids (int16_t count, followed by qids) 143 R - array of qids (int16_t count, followed by qids)
144 A - stat for 9p2000.L (p9_stat_dotl)
144 ? - if optional = 1, continue parsing 145 ? - if optional = 1, continue parsing
145*/ 146*/
146 147
@@ -340,8 +341,36 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
340 } 341 }
341 } 342 }
342 break; 343 break;
344 case 'A': {
345 struct p9_stat_dotl *stbuf =
346 va_arg(ap, struct p9_stat_dotl *);
347
348 memset(stbuf, 0, sizeof(struct p9_stat_dotl));
349 errcode =
350 p9pdu_readf(pdu, proto_version,
351 "qQdddqqqqqqqqqqqqqqq",
352 &stbuf->st_result_mask,
353 &stbuf->qid,
354 &stbuf->st_mode,
355 &stbuf->st_uid, &stbuf->st_gid,
356 &stbuf->st_nlink,
357 &stbuf->st_rdev, &stbuf->st_size,
358 &stbuf->st_blksize, &stbuf->st_blocks,
359 &stbuf->st_atime_sec,
360 &stbuf->st_atime_nsec,
361 &stbuf->st_mtime_sec,
362 &stbuf->st_mtime_nsec,
363 &stbuf->st_ctime_sec,
364 &stbuf->st_ctime_nsec,
365 &stbuf->st_btime_sec,
366 &stbuf->st_btime_nsec,
367 &stbuf->st_gen,
368 &stbuf->st_data_version);
369 }
370 break;
343 case '?': 371 case '?':
344 if (proto_version != p9_proto_2000u) 372 if ((proto_version != p9_proto_2000u) &&
373 (proto_version != p9_proto_2000L))
345 return 0; 374 return 0;
346 break; 375 break;
347 default: 376 default:
@@ -393,7 +422,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
393 const char *sptr = va_arg(ap, const char *); 422 const char *sptr = va_arg(ap, const char *);
394 int16_t len = 0; 423 int16_t len = 0;
395 if (sptr) 424 if (sptr)
396 len = MIN(strlen(sptr), USHORT_MAX); 425 len = MIN(strlen(sptr), USHRT_MAX);
397 426
398 errcode = p9pdu_writef(pdu, proto_version, 427 errcode = p9pdu_writef(pdu, proto_version,
399 "w", len); 428 "w", len);
@@ -487,8 +516,26 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
487 } 516 }
488 } 517 }
489 break; 518 break;
519 case 'I':{
520 struct p9_iattr_dotl *p9attr = va_arg(ap,
521 struct p9_iattr_dotl *);
522
523 errcode = p9pdu_writef(pdu, proto_version,
524 "ddddqqqqq",
525 p9attr->valid,
526 p9attr->mode,
527 p9attr->uid,
528 p9attr->gid,
529 p9attr->size,
530 p9attr->atime_sec,
531 p9attr->atime_nsec,
532 p9attr->mtime_sec,
533 p9attr->mtime_nsec);
534 }
535 break;
490 case '?': 536 case '?':
491 if (proto_version != p9_proto_2000u) 537 if ((proto_version != p9_proto_2000u) &&
538 (proto_version != p9_proto_2000L))
492 return 0; 539 return 0;
493 break; 540 break;
494 default: 541 default:
@@ -578,3 +625,30 @@ void p9pdu_reset(struct p9_fcall *pdu)
578 pdu->offset = 0; 625 pdu->offset = 0;
579 pdu->size = 0; 626 pdu->size = 0;
580} 627}
628
629int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,
630 int proto_version)
631{
632 struct p9_fcall fake_pdu;
633 int ret;
634 char *nameptr;
635
636 fake_pdu.size = len;
637 fake_pdu.capacity = len;
638 fake_pdu.sdata = buf;
639 fake_pdu.offset = 0;
640
641 ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid,
642 &dirent->d_off, &dirent->d_type, &nameptr);
643 if (ret) {
644 P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret);
645 p9pdu_dump(1, &fake_pdu);
646 goto out;
647 }
648
649 strcpy(dirent->d_name, nameptr);
650
651out:
652 return fake_pdu.offset;
653}
654EXPORT_SYMBOL(p9dirent_read);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 98ce9bcb0e15..c85109d809ca 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
948 948
949 csocket = NULL; 949 csocket = NULL;
950 950
951 if (strlen(addr) > UNIX_PATH_MAX) { 951 if (strlen(addr) >= UNIX_PATH_MAX) {
952 P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", 952 P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
953 addr); 953 addr);
954 return -ENAMETOOLONG; 954 return -ENAMETOOLONG;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 041101ab4aa5..0ea20c30466c 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -308,7 +308,6 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
308 req, err, status); 308 req, err, status);
309 rdma->state = P9_RDMA_FLUSHING; 309 rdma->state = P9_RDMA_FLUSHING;
310 client->status = Disconnected; 310 client->status = Disconnected;
311 return;
312} 311}
313 312
314static void 313static void
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7eb78ecc1618..dcfbe99ff81c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -137,7 +137,7 @@ static void req_done(struct virtqueue *vq)
137 137
138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); 138 P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
139 139
140 while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { 140 while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) {
141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); 141 P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); 142 P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
143 req = p9_tag_lookup(chan->client, rc->tag); 143 req = p9_tag_lookup(chan->client, rc->tag);
@@ -209,13 +209,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
209 209
210 req->status = REQ_STATUS_SENT; 210 req->status = REQ_STATUS_SENT;
211 211
212 if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) { 212 if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
213 P9_DPRINTK(P9_DEBUG_TRANS, 213 P9_DPRINTK(P9_DEBUG_TRANS,
214 "9p debug: virtio rpc add_buf returned failure"); 214 "9p debug: virtio rpc add_buf returned failure");
215 return -EIO; 215 return -EIO;
216 } 216 }
217 217
218 chan->vq->vq_ops->kick(chan->vq); 218 virtqueue_kick(chan->vq);
219 219
220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); 220 P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
221 return 0; 221 return 0;
diff --git a/net/Kconfig b/net/Kconfig
index 041c35edb763..e330594d3709 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -32,7 +32,7 @@ config WANT_COMPAT_NETLINK_MESSAGES
32config COMPAT_NETLINK_MESSAGES 32config COMPAT_NETLINK_MESSAGES
33 def_bool y 33 def_bool y
34 depends on COMPAT 34 depends on COMPAT
35 depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES 35 depends on WEXT_CORE || WANT_COMPAT_NETLINK_MESSAGES
36 help 36 help
37 This option makes it possible to send different netlink messages 37 This option makes it possible to send different netlink messages
38 to tasks depending on whether the task is a compat task or not. To 38 to tasks depending on whether the task is a compat task or not. To
@@ -86,6 +86,16 @@ config NETWORK_SECMARK
86 to nfmark, but designated for security purposes. 86 to nfmark, but designated for security purposes.
87 If you are unsure how to answer this question, answer N. 87 If you are unsure how to answer this question, answer N.
88 88
89config NETWORK_PHY_TIMESTAMPING
90 bool "Timestamping in PHY devices"
91 depends on EXPERIMENTAL
92 help
93 This allows timestamping of network packets by PHYs with
94 hardware timestamping capabilities. This option adds some
95 overhead in the transmit and receive paths.
96
97 If you are unsure how to answer this question, answer N.
98
89menuconfig NETFILTER 99menuconfig NETFILTER
90 bool "Network packet filtering framework (Netfilter)" 100 bool "Network packet filtering framework (Netfilter)"
91 ---help--- 101 ---help---
@@ -186,6 +196,7 @@ source "net/sctp/Kconfig"
186source "net/rds/Kconfig" 196source "net/rds/Kconfig"
187source "net/tipc/Kconfig" 197source "net/tipc/Kconfig"
188source "net/atm/Kconfig" 198source "net/atm/Kconfig"
199source "net/l2tp/Kconfig"
189source "net/802/Kconfig" 200source "net/802/Kconfig"
190source "net/bridge/Kconfig" 201source "net/bridge/Kconfig"
191source "net/dsa/Kconfig" 202source "net/dsa/Kconfig"
@@ -202,6 +213,12 @@ source "net/phonet/Kconfig"
202source "net/ieee802154/Kconfig" 213source "net/ieee802154/Kconfig"
203source "net/sched/Kconfig" 214source "net/sched/Kconfig"
204source "net/dcb/Kconfig" 215source "net/dcb/Kconfig"
216source "net/dns_resolver/Kconfig"
217
218config RPS
219 boolean
220 depends on SMP && SYSFS
221 default y
205 222
206menu "Network testing" 223menu "Network testing"
207 224
@@ -275,5 +292,7 @@ source "net/wimax/Kconfig"
275 292
276source "net/rfkill/Kconfig" 293source "net/rfkill/Kconfig"
277source "net/9p/Kconfig" 294source "net/9p/Kconfig"
295source "net/caif/Kconfig"
296
278 297
279endif # if NET 298endif # if NET
diff --git a/net/Makefile b/net/Makefile
index 1542e7268a7b..ea60fbce9b1b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_BT) += bluetooth/
40obj-$(CONFIG_SUNRPC) += sunrpc/ 40obj-$(CONFIG_SUNRPC) += sunrpc/
41obj-$(CONFIG_AF_RXRPC) += rxrpc/ 41obj-$(CONFIG_AF_RXRPC) += rxrpc/
42obj-$(CONFIG_ATM) += atm/ 42obj-$(CONFIG_ATM) += atm/
43obj-$(CONFIG_L2TP) += l2tp/
43obj-$(CONFIG_DECNET) += decnet/ 44obj-$(CONFIG_DECNET) += decnet/
44obj-$(CONFIG_ECONET) += econet/ 45obj-$(CONFIG_ECONET) += econet/
45obj-$(CONFIG_PHONET) += phonet/ 46obj-$(CONFIG_PHONET) += phonet/
@@ -49,19 +50,21 @@ endif
49obj-$(CONFIG_IP_DCCP) += dccp/ 50obj-$(CONFIG_IP_DCCP) += dccp/
50obj-$(CONFIG_IP_SCTP) += sctp/ 51obj-$(CONFIG_IP_SCTP) += sctp/
51obj-$(CONFIG_RDS) += rds/ 52obj-$(CONFIG_RDS) += rds/
52obj-y += wireless/ 53obj-$(CONFIG_WIRELESS) += wireless/
53obj-$(CONFIG_MAC80211) += mac80211/ 54obj-$(CONFIG_MAC80211) += mac80211/
54obj-$(CONFIG_TIPC) += tipc/ 55obj-$(CONFIG_TIPC) += tipc/
55obj-$(CONFIG_NETLABEL) += netlabel/ 56obj-$(CONFIG_NETLABEL) += netlabel/
56obj-$(CONFIG_IUCV) += iucv/ 57obj-$(CONFIG_IUCV) += iucv/
57obj-$(CONFIG_RFKILL) += rfkill/ 58obj-$(CONFIG_RFKILL) += rfkill/
58obj-$(CONFIG_NET_9P) += 9p/ 59obj-$(CONFIG_NET_9P) += 9p/
60obj-$(CONFIG_CAIF) += caif/
59ifneq ($(CONFIG_DCB),) 61ifneq ($(CONFIG_DCB),)
60obj-y += dcb/ 62obj-y += dcb/
61endif 63endif
62obj-y += ieee802154/ 64obj-$(CONFIG_IEEE802154) += ieee802154/
63 65
64ifeq ($(CONFIG_NET),y) 66ifeq ($(CONFIG_NET),y)
65obj-$(CONFIG_SYSCTL) += sysctl_net.o 67obj-$(CONFIG_SYSCTL) += sysctl_net.o
66endif 68endif
67obj-$(CONFIG_WIMAX) += wimax/ 69obj-$(CONFIG_WIMAX) += wimax/
70obj-$(CONFIG_DNS_RESOLVER) += dns_resolver/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7b02967fbbe7..c410b93fda2e 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -782,7 +782,7 @@ static int atif_ioctl(int cmd, void __user *arg)
782 atrtr_create(&rtdef, dev); 782 atrtr_create(&rtdef, dev);
783 } 783 }
784 } 784 }
785 dev_mc_add(dev, aarp_mcast, 6, 1); 785 dev_mc_add_global(dev, aarp_mcast);
786 return 0; 786 return 0;
787 787
788 case SIOCGIFADDR: 788 case SIOCGIFADDR:
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d6c7ceaf13e9..651babdfab38 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
139 return NULL; 139 return NULL;
140} 140}
141 141
142static int atm_dev_event(struct notifier_block *this, unsigned long event,
143 void *arg)
144{
145 struct atm_dev *atm_dev = arg;
146 struct list_head *lh;
147 struct net_device *net_dev;
148 struct br2684_vcc *brvcc;
149 struct atm_vcc *atm_vcc;
150 unsigned long flags;
151
152 pr_debug("event=%ld dev=%p\n", event, atm_dev);
153
154 read_lock_irqsave(&devs_lock, flags);
155 list_for_each(lh, &br2684_devs) {
156 net_dev = list_entry_brdev(lh);
157
158 list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) {
159 atm_vcc = brvcc->atmvcc;
160 if (atm_vcc && brvcc->atmvcc->dev == atm_dev) {
161
162 if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST)
163 netif_carrier_off(net_dev);
164 else
165 netif_carrier_on(net_dev);
166
167 }
168 }
169 }
170 read_unlock_irqrestore(&devs_lock, flags);
171
172 return NOTIFY_DONE;
173}
174
175static struct notifier_block atm_dev_notifier = {
176 .notifier_call = atm_dev_event,
177};
178
142/* chained vcc->pop function. Check if we should wake the netif_queue */ 179/* chained vcc->pop function. Check if we should wake the netif_queue */
143static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb) 180static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
144{ 181{
@@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
362 unregister_netdev(net_dev); 399 unregister_netdev(net_dev);
363 free_netdev(net_dev); 400 free_netdev(net_dev);
364 } 401 }
402 read_lock_irq(&devs_lock);
403 if (list_empty(&br2684_devs)) {
404 /* last br2684 device */
405 unregister_atmdevice_notifier(&atm_dev_notifier);
406 }
407 read_unlock_irq(&devs_lock);
365 return; 408 return;
366 } 409 }
367 410
@@ -446,7 +489,6 @@ error:
446 net_dev->stats.rx_errors++; 489 net_dev->stats.rx_errors++;
447free_skb: 490free_skb:
448 dev_kfree_skb(skb); 491 dev_kfree_skb(skb);
449 return;
450} 492}
451 493
452/* 494/*
@@ -531,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
531 573
532 br2684_push(atmvcc, skb); 574 br2684_push(atmvcc, skb);
533 } 575 }
576
577 /* initialize netdev carrier state */
578 if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
579 netif_carrier_off(net_dev);
580 else
581 netif_carrier_on(net_dev);
582
534 __module_get(THIS_MODULE); 583 __module_get(THIS_MODULE);
535 return 0; 584 return 0;
536 585
@@ -621,9 +670,16 @@ static int br2684_create(void __user *arg)
621 } 670 }
622 671
623 write_lock_irq(&devs_lock); 672 write_lock_irq(&devs_lock);
673
624 brdev->payload = payload; 674 brdev->payload = payload;
625 brdev->number = list_empty(&br2684_devs) ? 1 : 675
626 BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; 676 if (list_empty(&br2684_devs)) {
677 /* 1st br2684 device */
678 register_atmdevice_notifier(&atm_dev_notifier);
679 brdev->number = 1;
680 } else
681 brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
682
627 list_add_tail(&brdev->br2684_devs, &br2684_devs); 683 list_add_tail(&brdev->br2684_devs, &br2684_devs);
628 write_unlock_irq(&devs_lock); 684 write_unlock_irq(&devs_lock);
629 return 0; 685 return 0;
@@ -773,6 +829,11 @@ static void __exit br2684_exit(void)
773 remove_proc_entry("br2684", atm_proc_root); 829 remove_proc_entry("br2684", atm_proc_root);
774#endif 830#endif
775 831
832
833 /* if not already empty */
834 if (!list_empty(&br2684_devs))
835 unregister_atmdevice_notifier(&atm_dev_notifier);
836
776 while (!list_empty(&br2684_devs)) { 837 while (!list_empty(&br2684_devs)) {
777 net_dev = list_entry_brdev(br2684_devs.next); 838 net_dev = list_entry_brdev(br2684_devs.next);
778 brdev = BRPRIV(net_dev); 839 brdev = BRPRIV(net_dev);
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316b..95fdd1185067 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
522 error = ip_route_output_key(&init_net, &rt, &fl); 522 error = ip_route_output_key(&init_net, &rt, &fl);
523 if (error) 523 if (error)
524 return error; 524 return error;
525 neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1); 525 neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
526 ip_rt_put(rt); 526 ip_rt_put(rt);
527 if (!neigh) 527 if (!neigh)
528 return -ENOMEM; 528 return -ENOMEM;
diff --git a/net/atm/common.c b/net/atm/common.c
index 97ed94aa0cbc..940404a73b3d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash);
37DEFINE_RWLOCK(vcc_sklist_lock); 37DEFINE_RWLOCK(vcc_sklist_lock);
38EXPORT_SYMBOL(vcc_sklist_lock); 38EXPORT_SYMBOL(vcc_sklist_lock);
39 39
40static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
41
40static void __vcc_insert_socket(struct sock *sk) 42static void __vcc_insert_socket(struct sock *sk)
41{ 43{
42 struct atm_vcc *vcc = atm_sk(sk); 44 struct atm_vcc *vcc = atm_sk(sk);
@@ -90,10 +92,13 @@ static void vcc_sock_destruct(struct sock *sk)
90 92
91static void vcc_def_wakeup(struct sock *sk) 93static void vcc_def_wakeup(struct sock *sk)
92{ 94{
93 read_lock(&sk->sk_callback_lock); 95 struct socket_wq *wq;
94 if (sk_has_sleeper(sk)) 96
95 wake_up(sk->sk_sleep); 97 rcu_read_lock();
96 read_unlock(&sk->sk_callback_lock); 98 wq = rcu_dereference(sk->sk_wq);
99 if (wq_has_sleeper(wq))
100 wake_up(&wq->wait);
101 rcu_read_unlock();
97} 102}
98 103
99static inline int vcc_writable(struct sock *sk) 104static inline int vcc_writable(struct sock *sk)
@@ -106,16 +111,19 @@ static inline int vcc_writable(struct sock *sk)
106 111
107static void vcc_write_space(struct sock *sk) 112static void vcc_write_space(struct sock *sk)
108{ 113{
109 read_lock(&sk->sk_callback_lock); 114 struct socket_wq *wq;
115
116 rcu_read_lock();
110 117
111 if (vcc_writable(sk)) { 118 if (vcc_writable(sk)) {
112 if (sk_has_sleeper(sk)) 119 wq = rcu_dereference(sk->sk_wq);
113 wake_up_interruptible(sk->sk_sleep); 120 if (wq_has_sleeper(wq))
121 wake_up_interruptible(&wq->wait);
114 122
115 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 123 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
116 } 124 }
117 125
118 read_unlock(&sk->sk_callback_lock); 126 rcu_read_unlock();
119} 127}
120 128
121static struct proto vcc_proto = { 129static struct proto vcc_proto = {
@@ -206,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
206} 214}
207EXPORT_SYMBOL(vcc_release_async); 215EXPORT_SYMBOL(vcc_release_async);
208 216
217void atm_dev_signal_change(struct atm_dev *dev, char signal)
218{
219 pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
220 __func__, signal, dev, dev->number, dev->signal);
221
222 /* atm driver sending invalid signal */
223 WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
224
225 if (dev->signal == signal)
226 return; /* no change */
227
228 dev->signal = signal;
229
230 atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
231}
232EXPORT_SYMBOL(atm_dev_signal_change);
209 233
210void atm_dev_release_vccs(struct atm_dev *dev) 234void atm_dev_release_vccs(struct atm_dev *dev)
211{ 235{
@@ -549,7 +573,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
549 } 573 }
550 574
551 eff = (size+3) & ~3; /* align to word boundary */ 575 eff = (size+3) & ~3; /* align to word boundary */
552 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 576 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
553 error = 0; 577 error = 0;
554 while (!(skb = alloc_tx(vcc, eff))) { 578 while (!(skb = alloc_tx(vcc, eff))) {
555 if (m->msg_flags & MSG_DONTWAIT) { 579 if (m->msg_flags & MSG_DONTWAIT) {
@@ -568,9 +592,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
568 send_sig(SIGPIPE, current, 0); 592 send_sig(SIGPIPE, current, 0);
569 break; 593 break;
570 } 594 }
571 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 595 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
572 } 596 }
573 finish_wait(sk->sk_sleep, &wait); 597 finish_wait(sk_sleep(sk), &wait);
574 if (error) 598 if (error)
575 goto out; 599 goto out;
576 skb->dev = NULL; /* for paths shared with net_device interfaces */ 600 skb->dev = NULL; /* for paths shared with net_device interfaces */
@@ -595,7 +619,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
595 struct atm_vcc *vcc; 619 struct atm_vcc *vcc;
596 unsigned int mask; 620 unsigned int mask;
597 621
598 sock_poll_wait(file, sk->sk_sleep, wait); 622 sock_poll_wait(file, sk_sleep(sk), wait);
599 mask = 0; 623 mask = 0;
600 624
601 vcc = ATM_SD(sock); 625 vcc = ATM_SD(sock);
@@ -775,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
775 return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len); 799 return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
776} 800}
777 801
802int register_atmdevice_notifier(struct notifier_block *nb)
803{
804 return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
805}
806EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
807
808void unregister_atmdevice_notifier(struct notifier_block *nb)
809{
810 atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
811}
812EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
813
778static int __init atm_init(void) 814static int __init atm_init(void)
779{ 815{
780 int error; 816 int error;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index feeaf5718472..d98bde1a0ac8 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -161,8 +161,6 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
161 skb_queue_tail(&sk->sk_receive_queue, skb2); 161 skb_queue_tail(&sk->sk_receive_queue, skb2);
162 sk->sk_data_ready(sk, skb2->len); 162 sk->sk_data_ready(sk, skb2->len);
163 } 163 }
164
165 return;
166} 164}
167#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ 165#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
168 166
@@ -640,7 +638,6 @@ static void lec_set_multicast_list(struct net_device *dev)
640 * by default, all multicast frames arrive over the bus. 638 * by default, all multicast frames arrive over the bus.
641 * eventually support selective multicast service 639 * eventually support selective multicast service
642 */ 640 */
643 return;
644} 641}
645 642
646static const struct net_device_ops lec_netdev_ops = { 643static const struct net_device_ops lec_netdev_ops = {
@@ -1199,8 +1196,6 @@ static void __exit lane_module_cleanup(void)
1199 dev_lec[i] = NULL; 1196 dev_lec[i] = NULL;
1200 } 1197 }
1201 } 1198 }
1202
1203 return;
1204} 1199}
1205 1200
1206module_init(lane_module_init); 1201module_init(lane_module_init);
@@ -1334,7 +1329,6 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
1334 priv->lane2_ops->associate_indicator(dev, mac_addr, 1329 priv->lane2_ops->associate_indicator(dev, mac_addr,
1335 tlvs, sizeoftlvs); 1330 tlvs, sizeoftlvs);
1336 } 1331 }
1337 return;
1338} 1332}
1339 1333
1340/* 1334/*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 436f2e177657..622b471e14e0 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -455,7 +455,6 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
455 if (end_of_tlvs - tlvs != 0) 455 if (end_of_tlvs - tlvs != 0)
456 pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n", 456 pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n",
457 dev->name, end_of_tlvs - tlvs); 457 dev->name, end_of_tlvs - tlvs);
458 return;
459} 458}
460 459
461/* 460/*
@@ -684,8 +683,6 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
684 683
685 if (in_entry == NULL && eg_entry == NULL) 684 if (in_entry == NULL && eg_entry == NULL)
686 dprintk("(%s) unused vcc closed\n", dev->name); 685 dprintk("(%s) unused vcc closed\n", dev->name);
687
688 return;
689} 686}
690 687
691static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) 688static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
@@ -783,8 +780,6 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
783 780
784 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 781 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
785 netif_rx(new_skb); 782 netif_rx(new_skb);
786
787 return;
788} 783}
789 784
790static struct atmdev_ops mpc_ops = { /* only send is required */ 785static struct atmdev_ops mpc_ops = { /* only send is required */
@@ -873,8 +868,6 @@ static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc)
873 mesg.type = SET_MPS_CTRL_ADDR; 868 mesg.type = SET_MPS_CTRL_ADDR;
874 memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN); 869 memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN);
875 msg_to_mpoad(&mesg, mpc); 870 msg_to_mpoad(&mesg, mpc);
876
877 return;
878} 871}
879 872
880static void mpoad_close(struct atm_vcc *vcc) 873static void mpoad_close(struct atm_vcc *vcc)
@@ -911,8 +904,6 @@ static void mpoad_close(struct atm_vcc *vcc)
911 pr_info("(%s) going down\n", 904 pr_info("(%s) going down\n",
912 (mpc->dev) ? mpc->dev->name : "<unknown>"); 905 (mpc->dev) ? mpc->dev->name : "<unknown>");
913 module_put(THIS_MODULE); 906 module_put(THIS_MODULE);
914
915 return;
916} 907}
917 908
918/* 909/*
@@ -1122,7 +1113,6 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1122 pr_info("(%s) entry already in resolving state\n", 1113 pr_info("(%s) entry already in resolving state\n",
1123 (mpc->dev) ? mpc->dev->name : "<unknown>"); 1114 (mpc->dev) ? mpc->dev->name : "<unknown>");
1124 mpc->in_ops->put(entry); 1115 mpc->in_ops->put(entry);
1125 return;
1126} 1116}
1127 1117
1128/* 1118/*
@@ -1166,7 +1156,6 @@ static void check_qos_and_open_shortcut(struct k_message *msg,
1166 } else 1156 } else
1167 memset(&msg->qos, 0, sizeof(struct atm_qos)); 1157 memset(&msg->qos, 0, sizeof(struct atm_qos));
1168 msg_to_mpoad(msg, client); 1158 msg_to_mpoad(msg, client);
1169 return;
1170} 1159}
1171 1160
1172static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) 1161static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1240,8 +1229,6 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1240 mpc->in_ops->put(entry); 1229 mpc->in_ops->put(entry);
1241 entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask); 1230 entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
1242 } while (entry != NULL); 1231 } while (entry != NULL);
1243
1244 return;
1245} 1232}
1246 1233
1247static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc) 1234static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1260,8 +1247,6 @@ static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
1260 write_unlock_irq(&mpc->egress_lock); 1247 write_unlock_irq(&mpc->egress_lock);
1261 1248
1262 mpc->eg_ops->put(entry); 1249 mpc->eg_ops->put(entry);
1263
1264 return;
1265} 1250}
1266 1251
1267static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry) 1252static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
@@ -1295,8 +1280,6 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
1295 skb_queue_tail(&sk->sk_receive_queue, skb); 1280 skb_queue_tail(&sk->sk_receive_queue, skb);
1296 sk->sk_data_ready(sk, skb->len); 1281 sk->sk_data_ready(sk, skb->len);
1297 dprintk("exiting\n"); 1282 dprintk("exiting\n");
1298
1299 return;
1300} 1283}
1301 1284
1302/* 1285/*
@@ -1325,8 +1308,6 @@ static void mps_death(struct k_message *msg, struct mpoa_client *mpc)
1325 1308
1326 mpc->in_ops->destroy_cache(mpc); 1309 mpc->in_ops->destroy_cache(mpc);
1327 mpc->eg_ops->destroy_cache(mpc); 1310 mpc->eg_ops->destroy_cache(mpc);
1328
1329 return;
1330} 1311}
1331 1312
1332static void MPOA_cache_impos_rcvd(struct k_message *msg, 1313static void MPOA_cache_impos_rcvd(struct k_message *msg,
@@ -1353,8 +1334,6 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg,
1353 write_unlock_irq(&mpc->egress_lock); 1334 write_unlock_irq(&mpc->egress_lock);
1354 1335
1355 mpc->eg_ops->put(entry); 1336 mpc->eg_ops->put(entry);
1356
1357 return;
1358} 1337}
1359 1338
1360static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, 1339static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
@@ -1392,8 +1371,6 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
1392 pr_info("(%s) targetless LE_ARP request failed\n", 1371 pr_info("(%s) targetless LE_ARP request failed\n",
1393 mpc->dev->name); 1372 mpc->dev->name);
1394 } 1373 }
1395
1396 return;
1397} 1374}
1398 1375
1399static void set_mps_mac_addr_rcvd(struct k_message *msg, 1376static void set_mps_mac_addr_rcvd(struct k_message *msg,
@@ -1409,8 +1386,6 @@ static void set_mps_mac_addr_rcvd(struct k_message *msg,
1409 return; 1386 return;
1410 } 1387 }
1411 client->number_of_mps_macs = 1; 1388 client->number_of_mps_macs = 1;
1412
1413 return;
1414} 1389}
1415 1390
1416/* 1391/*
@@ -1436,7 +1411,6 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
1436 1411
1437 msg->type = action; 1412 msg->type = action;
1438 msg_to_mpoad(msg, mpc); 1413 msg_to_mpoad(msg, mpc);
1439 return;
1440} 1414}
1441 1415
1442static void mpc_timer_refresh(void) 1416static void mpc_timer_refresh(void)
@@ -1445,8 +1419,6 @@ static void mpc_timer_refresh(void)
1445 mpc_timer.data = mpc_timer.expires; 1419 mpc_timer.data = mpc_timer.expires;
1446 mpc_timer.function = mpc_cache_check; 1420 mpc_timer.function = mpc_cache_check;
1447 add_timer(&mpc_timer); 1421 add_timer(&mpc_timer);
1448
1449 return;
1450} 1422}
1451 1423
1452static void mpc_cache_check(unsigned long checking_time) 1424static void mpc_cache_check(unsigned long checking_time)
@@ -1471,8 +1443,6 @@ static void mpc_cache_check(unsigned long checking_time)
1471 mpc = mpc->next; 1443 mpc = mpc->next;
1472 } 1444 }
1473 mpc_timer_refresh(); 1445 mpc_timer_refresh();
1474
1475 return;
1476} 1446}
1477 1447
1478static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd, 1448static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd,
@@ -1561,8 +1531,6 @@ static void __exit atm_mpoa_cleanup(void)
1561 kfree(qos); 1531 kfree(qos);
1562 qos = nextqos; 1532 qos = nextqos;
1563 } 1533 }
1564
1565 return;
1566} 1534}
1567 1535
1568module_init(atm_mpoa_init); 1536module_init(atm_mpoa_init);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e773d8336918..d1b2d9a03144 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -182,8 +182,6 @@ static void in_cache_put(in_cache_entry *entry)
182 memset(entry, 0, sizeof(in_cache_entry)); 182 memset(entry, 0, sizeof(in_cache_entry));
183 kfree(entry); 183 kfree(entry);
184 } 184 }
185
186 return;
187} 185}
188 186
189/* 187/*
@@ -221,8 +219,6 @@ static void in_cache_remove_entry(in_cache_entry *entry,
221 } 219 }
222 vcc_release_async(vcc, -EPIPE); 220 vcc_release_async(vcc, -EPIPE);
223 } 221 }
224
225 return;
226} 222}
227 223
228/* Call this every MPC-p2 seconds... Not exactly correct solution, 224/* Call this every MPC-p2 seconds... Not exactly correct solution,
@@ -248,8 +244,6 @@ static void clear_count_and_expired(struct mpoa_client *client)
248 entry = next_entry; 244 entry = next_entry;
249 } 245 }
250 write_unlock_bh(&client->ingress_lock); 246 write_unlock_bh(&client->ingress_lock);
251
252 return;
253} 247}
254 248
255/* Call this every MPC-p4 seconds. */ 249/* Call this every MPC-p4 seconds. */
@@ -334,8 +328,6 @@ static void in_destroy_cache(struct mpoa_client *mpc)
334 while (mpc->in_cache != NULL) 328 while (mpc->in_cache != NULL)
335 mpc->in_ops->remove_entry(mpc->in_cache, mpc); 329 mpc->in_ops->remove_entry(mpc->in_cache, mpc);
336 write_unlock_irq(&mpc->ingress_lock); 330 write_unlock_irq(&mpc->ingress_lock);
337
338 return;
339} 331}
340 332
341static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id, 333static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id,
@@ -427,8 +419,6 @@ static void eg_cache_put(eg_cache_entry *entry)
427 memset(entry, 0, sizeof(eg_cache_entry)); 419 memset(entry, 0, sizeof(eg_cache_entry));
428 kfree(entry); 420 kfree(entry);
429 } 421 }
430
431 return;
432} 422}
433 423
434/* 424/*
@@ -463,8 +453,6 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
463 } 453 }
464 vcc_release_async(vcc, -EPIPE); 454 vcc_release_async(vcc, -EPIPE);
465 } 455 }
466
467 return;
468} 456}
469 457
470static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, 458static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
@@ -509,8 +497,6 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
509 do_gettimeofday(&(entry->tv)); 497 do_gettimeofday(&(entry->tv));
510 entry->entry_state = EGRESS_RESOLVED; 498 entry->entry_state = EGRESS_RESOLVED;
511 entry->ctrl_info.holding_time = holding_time; 499 entry->ctrl_info.holding_time = holding_time;
512
513 return;
514} 500}
515 501
516static void clear_expired(struct mpoa_client *client) 502static void clear_expired(struct mpoa_client *client)
@@ -537,8 +523,6 @@ static void clear_expired(struct mpoa_client *client)
537 entry = next_entry; 523 entry = next_entry;
538 } 524 }
539 write_unlock_irq(&client->egress_lock); 525 write_unlock_irq(&client->egress_lock);
540
541 return;
542} 526}
543 527
544static void eg_destroy_cache(struct mpoa_client *mpc) 528static void eg_destroy_cache(struct mpoa_client *mpc)
@@ -547,8 +531,6 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
547 while (mpc->eg_cache != NULL) 531 while (mpc->eg_cache != NULL)
548 mpc->eg_ops->remove_entry(mpc->eg_cache, mpc); 532 mpc->eg_ops->remove_entry(mpc->eg_cache, mpc);
549 write_unlock_irq(&mpc->egress_lock); 533 write_unlock_irq(&mpc->egress_lock);
550
551 return;
552} 534}
553 535
554 536
@@ -584,6 +566,4 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc)
584{ 566{
585 mpc->in_ops = &ingress_ops; 567 mpc->in_ops = &ingress_ops;
586 mpc->eg_ops = &egress_ops; 568 mpc->eg_ops = &egress_ops;
587
588 return;
589} 569}
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index e49bb6d948a1..e9aced0ec56b 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -260,7 +260,7 @@ static int pppoatm_devppp_ioctl(struct ppp_channel *chan, unsigned int cmd,
260 return -ENOTTY; 260 return -ENOTTY;
261} 261}
262 262
263static /*const*/ struct ppp_channel_ops pppoatm_ops = { 263static const struct ppp_channel_ops pppoatm_ops = {
264 .start_xmit = pppoatm_send, 264 .start_xmit = pppoatm_send,
265 .ioctl = pppoatm_devppp_ioctl, 265 .ioctl = pppoatm_devppp_ioctl,
266}; 266};
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 696e218436e5..6262aeae398e 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -407,7 +407,6 @@ EXPORT_SYMBOL(atm_proc_root);
407 407
408int atm_proc_dev_register(struct atm_dev *dev) 408int atm_proc_dev_register(struct atm_dev *dev)
409{ 409{
410 int digits, num;
411 int error; 410 int error;
412 411
413 /* No proc info */ 412 /* No proc info */
@@ -415,16 +414,9 @@ int atm_proc_dev_register(struct atm_dev *dev)
415 return 0; 414 return 0;
416 415
417 error = -ENOMEM; 416 error = -ENOMEM;
418 digits = 0; 417 dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number);
419 for (num = dev->number; num; num /= 10)
420 digits++;
421 if (!digits)
422 digits++;
423
424 dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
425 if (!dev->proc_name) 418 if (!dev->proc_name)
426 goto err_out; 419 goto err_out;
427 sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
428 420
429 dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root, 421 dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
430 &proc_atm_dev_ops, dev); 422 &proc_atm_dev_ops, dev);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 6ba6e466ee54..509c8ac02b63 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
131 } 131 }
132 sk->sk_ack_backlog++; 132 sk->sk_ack_backlog++;
133 skb_queue_tail(&sk->sk_receive_queue, skb); 133 skb_queue_tail(&sk->sk_receive_queue, skb);
134 pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep); 134 pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk));
135 sk->sk_state_change(sk); 135 sk->sk_state_change(sk);
136as_indicate_complete: 136as_indicate_complete:
137 release_sock(sk); 137 release_sock(sk);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 3ba9a45a51ac..754ee4791d96 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc)
49 49
50 pr_debug("%p\n", vcc); 50 pr_debug("%p\n", vcc);
51 if (test_bit(ATM_VF_REGIS, &vcc->flags)) { 51 if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
52 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 52 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
53 sigd_enq(vcc, as_close, NULL, NULL, NULL); 53 sigd_enq(vcc, as_close, NULL, NULL, NULL);
54 while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { 54 while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
55 schedule(); 55 schedule();
56 prepare_to_wait(sk->sk_sleep, &wait, 56 prepare_to_wait(sk_sleep(sk), &wait,
57 TASK_UNINTERRUPTIBLE); 57 TASK_UNINTERRUPTIBLE);
58 } 58 }
59 finish_wait(sk->sk_sleep, &wait); 59 finish_wait(sk_sleep(sk), &wait);
60 } 60 }
61 /* beware - socket is still in use by atmsigd until the last 61 /* beware - socket is still in use by atmsigd until the last
62 as_indicate has been answered */ 62 as_indicate has been answered */
@@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
125 } 125 }
126 vcc->local = *addr; 126 vcc->local = *addr;
127 set_bit(ATM_VF_WAITING, &vcc->flags); 127 set_bit(ATM_VF_WAITING, &vcc->flags);
128 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 128 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
129 sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local); 129 sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
130 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 130 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
131 schedule(); 131 schedule();
132 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 132 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
133 } 133 }
134 finish_wait(sk->sk_sleep, &wait); 134 finish_wait(sk_sleep(sk), &wait);
135 clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */ 135 clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
136 if (!sigd) { 136 if (!sigd) {
137 error = -EUNATCH; 137 error = -EUNATCH;
@@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
201 } 201 }
202 vcc->remote = *addr; 202 vcc->remote = *addr;
203 set_bit(ATM_VF_WAITING, &vcc->flags); 203 set_bit(ATM_VF_WAITING, &vcc->flags);
204 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 204 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
205 sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote); 205 sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
206 if (flags & O_NONBLOCK) { 206 if (flags & O_NONBLOCK) {
207 finish_wait(sk->sk_sleep, &wait); 207 finish_wait(sk_sleep(sk), &wait);
208 sock->state = SS_CONNECTING; 208 sock->state = SS_CONNECTING;
209 error = -EINPROGRESS; 209 error = -EINPROGRESS;
210 goto out; 210 goto out;
@@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
213 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 213 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
214 schedule(); 214 schedule();
215 if (!signal_pending(current)) { 215 if (!signal_pending(current)) {
216 prepare_to_wait(sk->sk_sleep, &wait, 216 prepare_to_wait(sk_sleep(sk), &wait,
217 TASK_INTERRUPTIBLE); 217 TASK_INTERRUPTIBLE);
218 continue; 218 continue;
219 } 219 }
@@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
232 */ 232 */
233 sigd_enq(vcc, as_close, NULL, NULL, NULL); 233 sigd_enq(vcc, as_close, NULL, NULL, NULL);
234 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 234 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
235 prepare_to_wait(sk->sk_sleep, &wait, 235 prepare_to_wait(sk_sleep(sk), &wait,
236 TASK_INTERRUPTIBLE); 236 TASK_INTERRUPTIBLE);
237 schedule(); 237 schedule();
238 } 238 }
239 if (!sk->sk_err) 239 if (!sk->sk_err)
240 while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && 240 while (!test_bit(ATM_VF_RELEASED, &vcc->flags) &&
241 sigd) { 241 sigd) {
242 prepare_to_wait(sk->sk_sleep, &wait, 242 prepare_to_wait(sk_sleep(sk), &wait,
243 TASK_INTERRUPTIBLE); 243 TASK_INTERRUPTIBLE);
244 schedule(); 244 schedule();
245 } 245 }
@@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
250 error = -EINTR; 250 error = -EINTR;
251 break; 251 break;
252 } 252 }
253 finish_wait(sk->sk_sleep, &wait); 253 finish_wait(sk_sleep(sk), &wait);
254 if (error) 254 if (error)
255 goto out; 255 goto out;
256 if (!sigd) { 256 if (!sigd) {
@@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog)
302 goto out; 302 goto out;
303 } 303 }
304 set_bit(ATM_VF_WAITING, &vcc->flags); 304 set_bit(ATM_VF_WAITING, &vcc->flags);
305 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 305 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
306 sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local); 306 sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
307 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 307 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
308 schedule(); 308 schedule();
309 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 309 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
310 } 310 }
311 finish_wait(sk->sk_sleep, &wait); 311 finish_wait(sk_sleep(sk), &wait);
312 if (!sigd) { 312 if (!sigd) {
313 error = -EUNATCH; 313 error = -EUNATCH;
314 goto out; 314 goto out;
@@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
343 while (1) { 343 while (1) {
344 DEFINE_WAIT(wait); 344 DEFINE_WAIT(wait);
345 345
346 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 346 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
347 while (!(skb = skb_dequeue(&sk->sk_receive_queue)) && 347 while (!(skb = skb_dequeue(&sk->sk_receive_queue)) &&
348 sigd) { 348 sigd) {
349 if (test_bit(ATM_VF_RELEASED, &old_vcc->flags)) 349 if (test_bit(ATM_VF_RELEASED, &old_vcc->flags))
@@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
363 error = -ERESTARTSYS; 363 error = -ERESTARTSYS;
364 break; 364 break;
365 } 365 }
366 prepare_to_wait(sk->sk_sleep, &wait, 366 prepare_to_wait(sk_sleep(sk), &wait,
367 TASK_INTERRUPTIBLE); 367 TASK_INTERRUPTIBLE);
368 } 368 }
369 finish_wait(sk->sk_sleep, &wait); 369 finish_wait(sk_sleep(sk), &wait);
370 if (error) 370 if (error)
371 goto out; 371 goto out;
372 if (!skb) { 372 if (!skb) {
@@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
392 } 392 }
393 /* wait should be short, so we ignore the non-blocking flag */ 393 /* wait should be short, so we ignore the non-blocking flag */
394 set_bit(ATM_VF_WAITING, &new_vcc->flags); 394 set_bit(ATM_VF_WAITING, &new_vcc->flags);
395 prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, 395 prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
396 TASK_UNINTERRUPTIBLE); 396 TASK_UNINTERRUPTIBLE);
397 sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL); 397 sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
398 while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) { 398 while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
399 release_sock(sk); 399 release_sock(sk);
400 schedule(); 400 schedule();
401 lock_sock(sk); 401 lock_sock(sk);
402 prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait, 402 prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
403 TASK_UNINTERRUPTIBLE); 403 TASK_UNINTERRUPTIBLE);
404 } 404 }
405 finish_wait(sk_atm(new_vcc)->sk_sleep, &wait); 405 finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
406 if (!sigd) { 406 if (!sigd) {
407 error = -EUNATCH; 407 error = -EUNATCH;
408 goto out; 408 goto out;
@@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
438 DEFINE_WAIT(wait); 438 DEFINE_WAIT(wait);
439 439
440 set_bit(ATM_VF_WAITING, &vcc->flags); 440 set_bit(ATM_VF_WAITING, &vcc->flags);
441 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 441 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
442 sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0); 442 sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
443 while (test_bit(ATM_VF_WAITING, &vcc->flags) && 443 while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
444 !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { 444 !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
445 schedule(); 445 schedule();
446 prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE); 446 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
447 } 447 }
448 finish_wait(sk->sk_sleep, &wait); 448 finish_wait(sk_sleep(sk), &wait);
449 if (!sigd) 449 if (!sigd)
450 return -EUNATCH; 450 return -EUNATCH;
451 return -sk->sk_err; 451 return -sk->sk_err;
@@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
534 534
535 lock_sock(sk); 535 lock_sock(sk);
536 set_bit(ATM_VF_WAITING, &vcc->flags); 536 set_bit(ATM_VF_WAITING, &vcc->flags);
537 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 537 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
538 sigd_enq(vcc, as_addparty, NULL, NULL, 538 sigd_enq(vcc, as_addparty, NULL, NULL,
539 (struct sockaddr_atmsvc *) sockaddr); 539 (struct sockaddr_atmsvc *) sockaddr);
540 if (flags & O_NONBLOCK) { 540 if (flags & O_NONBLOCK) {
541 finish_wait(sk->sk_sleep, &wait); 541 finish_wait(sk_sleep(sk), &wait);
542 error = -EINPROGRESS; 542 error = -EINPROGRESS;
543 goto out; 543 goto out;
544 } 544 }
545 pr_debug("added wait queue\n"); 545 pr_debug("added wait queue\n");
546 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 546 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
547 schedule(); 547 schedule();
548 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 548 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
549 } 549 }
550 finish_wait(sk->sk_sleep, &wait); 550 finish_wait(sk_sleep(sk), &wait);
551 error = xchg(&sk->sk_err_soft, 0); 551 error = xchg(&sk->sk_err_soft, 0);
552out: 552out:
553 release_sock(sk); 553 release_sock(sk);
@@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
563 563
564 lock_sock(sk); 564 lock_sock(sk);
565 set_bit(ATM_VF_WAITING, &vcc->flags); 565 set_bit(ATM_VF_WAITING, &vcc->flags);
566 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 566 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
567 sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref); 567 sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
568 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 568 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
569 schedule(); 569 schedule();
570 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 570 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
571 } 571 }
572 finish_wait(sk->sk_sleep, &wait); 572 finish_wait(sk_sleep(sk), &wait);
573 if (!sigd) { 573 if (!sigd) {
574 error = -EUNATCH; 574 error = -EUNATCH;
575 goto out; 575 goto out;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 65c5801261f9..cfdfd7e2a172 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock,
1281 DEFINE_WAIT(wait); 1281 DEFINE_WAIT(wait);
1282 1282
1283 for (;;) { 1283 for (;;) {
1284 prepare_to_wait(sk->sk_sleep, &wait, 1284 prepare_to_wait(sk_sleep(sk), &wait,
1285 TASK_INTERRUPTIBLE); 1285 TASK_INTERRUPTIBLE);
1286 if (sk->sk_state != TCP_SYN_SENT) 1286 if (sk->sk_state != TCP_SYN_SENT)
1287 break; 1287 break;
@@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock,
1294 err = -ERESTARTSYS; 1294 err = -ERESTARTSYS;
1295 break; 1295 break;
1296 } 1296 }
1297 finish_wait(sk->sk_sleep, &wait); 1297 finish_wait(sk_sleep(sk), &wait);
1298 1298
1299 if (err) 1299 if (err)
1300 goto out_release; 1300 goto out_release;
@@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
1346 * hooked into the SABM we saved 1346 * hooked into the SABM we saved
1347 */ 1347 */
1348 for (;;) { 1348 for (;;) {
1349 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1349 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1350 skb = skb_dequeue(&sk->sk_receive_queue); 1350 skb = skb_dequeue(&sk->sk_receive_queue);
1351 if (skb) 1351 if (skb)
1352 break; 1352 break;
@@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
1364 err = -ERESTARTSYS; 1364 err = -ERESTARTSYS;
1365 break; 1365 break;
1366 } 1366 }
1367 finish_wait(sk->sk_sleep, &wait); 1367 finish_wait(sk_sleep(sk), &wait);
1368 1368
1369 if (err) 1369 if (err)
1370 goto out; 1370 goto out;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 404a8500fd03..421c45bd1b95 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
288 288
289 BT_DBG("sock %p, sk %p", sock, sk); 289 BT_DBG("sock %p, sk %p", sock, sk);
290 290
291 poll_wait(file, sk->sk_sleep, wait); 291 poll_wait(file, sk_sleep(sk), wait);
292 292
293 if (sk->sk_state == BT_LISTEN) 293 if (sk->sk_state == BT_LISTEN)
294 return bt_accept_poll(sk); 294 return bt_accept_poll(sk);
@@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
378 378
379 BT_DBG("sk %p", sk); 379 BT_DBG("sk %p", sk);
380 380
381 add_wait_queue(sk->sk_sleep, &wait); 381 add_wait_queue(sk_sleep(sk), &wait);
382 while (sk->sk_state != state) { 382 while (sk->sk_state != state) {
383 set_current_state(TASK_INTERRUPTIBLE); 383 set_current_state(TASK_INTERRUPTIBLE);
384 384
@@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
401 break; 401 break;
402 } 402 }
403 set_current_state(TASK_RUNNING); 403 set_current_state(TASK_RUNNING);
404 remove_wait_queue(sk->sk_sleep, &wait); 404 remove_wait_queue(sk_sleep(sk), &wait);
405 return err; 405 return err;
406} 406}
407EXPORT_SYMBOL(bt_sock_wait_state); 407EXPORT_SYMBOL(bt_sock_wait_state);
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5a..70672544db86 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
86 __u8 ctrl; 86 __u8 ctrl;
87 __u8 uuid_size; 87 __u8 uuid_size;
88 __u8 service[0]; 88 __u8 service[0];
89} __attribute__((packed)); 89} __packed;
90 90
91struct bnep_set_filter_req { 91struct bnep_set_filter_req {
92 __u8 type; 92 __u8 type;
93 __u8 ctrl; 93 __u8 ctrl;
94 __be16 len; 94 __be16 len;
95 __u8 list[0]; 95 __u8 list[0];
96} __attribute__((packed)); 96} __packed;
97 97
98struct bnep_control_rsp { 98struct bnep_control_rsp {
99 __u8 type; 99 __u8 type;
100 __u8 ctrl; 100 __u8 ctrl;
101 __be16 resp; 101 __be16 resp;
102} __attribute__((packed)); 102} __packed;
103 103
104struct bnep_ext_hdr { 104struct bnep_ext_hdr {
105 __u8 type; 105 __u8 type;
106 __u8 len; 106 __u8 len;
107 __u8 data[0]; 107 __u8 data[0];
108} __attribute__((packed)); 108} __packed;
109 109
110/* BNEP ioctl defines */ 110/* BNEP ioctl defines */
111#define BNEPCONNADD _IOW('B', 200, int) 111#define BNEPCONNADD _IOW('B', 200, int)
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 8062dad6d10d..f10b41fb05a0 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -474,7 +474,7 @@ static int bnep_session(void *arg)
474 set_user_nice(current, -15); 474 set_user_nice(current, -15);
475 475
476 init_waitqueue_entry(&wait, current); 476 init_waitqueue_entry(&wait, current);
477 add_wait_queue(sk->sk_sleep, &wait); 477 add_wait_queue(sk_sleep(sk), &wait);
478 while (!atomic_read(&s->killed)) { 478 while (!atomic_read(&s->killed)) {
479 set_current_state(TASK_INTERRUPTIBLE); 479 set_current_state(TASK_INTERRUPTIBLE);
480 480
@@ -496,7 +496,7 @@ static int bnep_session(void *arg)
496 schedule(); 496 schedule();
497 } 497 }
498 set_current_state(TASK_RUNNING); 498 set_current_state(TASK_RUNNING);
499 remove_wait_queue(sk->sk_sleep, &wait); 499 remove_wait_queue(sk_sleep(sk), &wait);
500 500
501 /* Cleanup session */ 501 /* Cleanup session */
502 down_write(&bnep_session_sem); 502 down_write(&bnep_session_sem);
@@ -507,7 +507,7 @@ static int bnep_session(void *arg)
507 /* Wakeup user-space polling for socket errors */ 507 /* Wakeup user-space polling for socket errors */
508 s->sock->sk->sk_err = EUNATCH; 508 s->sock->sk->sk_err = EUNATCH;
509 509
510 wake_up_interruptible(s->sock->sk->sk_sleep); 510 wake_up_interruptible(sk_sleep(s->sock->sk));
511 511
512 /* Release the socket */ 512 /* Release the socket */
513 fput(s->sock->file); 513 fput(s->sock->file);
@@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
638 638
639 /* Kill session thread */ 639 /* Kill session thread */
640 atomic_inc(&s->killed); 640 atomic_inc(&s->killed);
641 wake_up_interruptible(s->sock->sk->sk_sleep); 641 wake_up_interruptible(sk_sleep(s->sock->sk));
642 } else 642 } else
643 err = -ENOENT; 643 err = -ENOENT;
644 644
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 5643a2391e76..8c100c9dae28 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -88,7 +88,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
88 memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN); 88 memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
89 r->len = htons(ETH_ALEN * 2); 89 r->len = htons(ETH_ALEN * 2);
90 } else { 90 } else {
91 struct dev_mc_list *dmi = dev->mc_list; 91 struct netdev_hw_addr *ha;
92 int i, len = skb->len; 92 int i, len = skb->len;
93 93
94 if (dev->flags & IFF_BROADCAST) { 94 if (dev->flags & IFF_BROADCAST) {
@@ -98,18 +98,20 @@ static void bnep_net_set_mc_list(struct net_device *dev)
98 98
99 /* FIXME: We should group addresses here. */ 99 /* FIXME: We should group addresses here. */
100 100
101 for (i = 0; 101 i = 0;
102 i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS; 102 netdev_for_each_mc_addr(ha, dev) {
103 i++) { 103 if (i == BNEP_MAX_MULTICAST_FILTERS)
104 memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); 104 break;
105 memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN); 105 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
106 dmi = dmi->next; 106 memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
107
108 i++;
107 } 109 }
108 r->len = htons(skb->len - len); 110 r->len = htons(skb->len - len);
109 } 111 }
110 112
111 skb_queue_tail(&sk->sk_write_queue, skb); 113 skb_queue_tail(&sk->sk_write_queue, skb);
112 wake_up_interruptible(sk->sk_sleep); 114 wake_up_interruptible(sk_sleep(sk));
113#endif 115#endif
114} 116}
115 117
@@ -193,11 +195,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
193 /* 195 /*
194 * We cannot send L2CAP packets from here as we are potentially in a bh. 196 * We cannot send L2CAP packets from here as we are potentially in a bh.
195 * So we have to queue them and wake up session thread which is sleeping 197 * So we have to queue them and wake up session thread which is sleeping
196 * on the sk->sk_sleep. 198 * on the sk_sleep(sk).
197 */ 199 */
198 dev->trans_start = jiffies; 200 dev->trans_start = jiffies;
199 skb_queue_tail(&sk->sk_write_queue, skb); 201 skb_queue_tail(&sk->sk_write_queue, skb);
200 wake_up_interruptible(sk->sk_sleep); 202 wake_up_interruptible(sk_sleep(sk));
201 203
202 if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) { 204 if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) {
203 BT_DBG("tx queue is full"); 205 BT_DBG("tx queue is full");
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index e4663aa14d26..785e79e953c5 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session)
125{ 125{
126 struct sock *sk = session->sock->sk; 126 struct sock *sk = session->sock->sk;
127 127
128 wake_up_interruptible(sk->sk_sleep); 128 wake_up_interruptible(sk_sleep(sk));
129} 129}
130 130
131/* CMTP init defines */ 131/* CMTP init defines */
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 0073ec8495da..d4c6af082d48 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -284,7 +284,7 @@ static int cmtp_session(void *arg)
284 set_user_nice(current, -15); 284 set_user_nice(current, -15);
285 285
286 init_waitqueue_entry(&wait, current); 286 init_waitqueue_entry(&wait, current);
287 add_wait_queue(sk->sk_sleep, &wait); 287 add_wait_queue(sk_sleep(sk), &wait);
288 while (!atomic_read(&session->terminate)) { 288 while (!atomic_read(&session->terminate)) {
289 set_current_state(TASK_INTERRUPTIBLE); 289 set_current_state(TASK_INTERRUPTIBLE);
290 290
@@ -301,7 +301,7 @@ static int cmtp_session(void *arg)
301 schedule(); 301 schedule();
302 } 302 }
303 set_current_state(TASK_RUNNING); 303 set_current_state(TASK_RUNNING);
304 remove_wait_queue(sk->sk_sleep, &wait); 304 remove_wait_queue(sk_sleep(sk), &wait);
305 305
306 down_write(&cmtp_session_sem); 306 down_write(&cmtp_session_sem);
307 307
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b10e3cdb08f8..0b1e460fe440 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
4 4
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 6
@@ -155,6 +155,27 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
155 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp); 155 hci_send_cmd(hdev, HCI_OP_SETUP_SYNC_CONN, sizeof(cp), &cp);
156} 156}
157 157
158/* Device _must_ be locked */
159void hci_sco_setup(struct hci_conn *conn, __u8 status)
160{
161 struct hci_conn *sco = conn->link;
162
163 BT_DBG("%p", conn);
164
165 if (!sco)
166 return;
167
168 if (!status) {
169 if (lmp_esco_capable(conn->hdev))
170 hci_setup_sync(sco, conn->handle);
171 else
172 hci_add_sco(sco, conn->handle);
173 } else {
174 hci_proto_connect_cfm(sco, status);
175 hci_conn_del(sco);
176 }
177}
178
158static void hci_conn_timeout(unsigned long arg) 179static void hci_conn_timeout(unsigned long arg)
159{ 180{
160 struct hci_conn *conn = (void *) arg; 181 struct hci_conn *conn = (void *) arg;
@@ -358,6 +379,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
358 acl->sec_level = sec_level; 379 acl->sec_level = sec_level;
359 acl->auth_type = auth_type; 380 acl->auth_type = auth_type;
360 hci_acl_connect(acl); 381 hci_acl_connect(acl);
382 } else {
383 if (acl->sec_level < sec_level)
384 acl->sec_level = sec_level;
385 if (acl->auth_type < auth_type)
386 acl->auth_type = auth_type;
361 } 387 }
362 388
363 if (type == ACL_LINK) 389 if (type == ACL_LINK)
@@ -380,10 +406,13 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
380 acl->power_save = 1; 406 acl->power_save = 1;
381 hci_conn_enter_active_mode(acl); 407 hci_conn_enter_active_mode(acl);
382 408
383 if (lmp_esco_capable(hdev)) 409 if (test_bit(HCI_CONN_MODE_CHANGE_PEND, &acl->pend)) {
384 hci_setup_sync(sco, acl->handle); 410 /* defer SCO setup until mode change completed */
385 else 411 set_bit(HCI_CONN_SCO_SETUP_PEND, &acl->pend);
386 hci_add_sco(sco, acl->handle); 412 return sco;
413 }
414
415 hci_sco_setup(acl, 0x00);
387 } 416 }
388 417
389 return sco; 418 return sco;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4ad23192c7a5..c52f091ee6de 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -37,6 +37,7 @@
37#include <linux/fcntl.h> 37#include <linux/fcntl.h>
38#include <linux/init.h> 38#include <linux/init.h>
39#include <linux/skbuff.h> 39#include <linux/skbuff.h>
40#include <linux/workqueue.h>
40#include <linux/interrupt.h> 41#include <linux/interrupt.h>
41#include <linux/notifier.h> 42#include <linux/notifier.h>
42#include <linux/rfkill.h> 43#include <linux/rfkill.h>
@@ -561,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
561 hci_dev_lock_bh(hdev); 562 hci_dev_lock_bh(hdev);
562 inquiry_cache_flush(hdev); 563 inquiry_cache_flush(hdev);
563 hci_conn_hash_flush(hdev); 564 hci_conn_hash_flush(hdev);
565 hci_blacklist_clear(hdev);
564 hci_dev_unlock_bh(hdev); 566 hci_dev_unlock_bh(hdev);
565 567
566 hci_notify(hdev, HCI_DEV_DOWN); 568 hci_notify(hdev, HCI_DEV_DOWN);
@@ -912,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
912 skb_queue_head_init(&hdev->cmd_q); 914 skb_queue_head_init(&hdev->cmd_q);
913 skb_queue_head_init(&hdev->raw_q); 915 skb_queue_head_init(&hdev->raw_q);
914 916
915 for (i = 0; i < 3; i++) 917 for (i = 0; i < NUM_REASSEMBLY; i++)
916 hdev->reassembly[i] = NULL; 918 hdev->reassembly[i] = NULL;
917 919
918 init_waitqueue_head(&hdev->req_wait_q); 920 init_waitqueue_head(&hdev->req_wait_q);
@@ -922,12 +924,18 @@ int hci_register_dev(struct hci_dev *hdev)
922 924
923 hci_conn_hash_init(hdev); 925 hci_conn_hash_init(hdev);
924 926
927 INIT_LIST_HEAD(&hdev->blacklist);
928
925 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); 929 memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
926 930
927 atomic_set(&hdev->promisc, 0); 931 atomic_set(&hdev->promisc, 0);
928 932
929 write_unlock_bh(&hci_dev_list_lock); 933 write_unlock_bh(&hci_dev_list_lock);
930 934
935 hdev->workqueue = create_singlethread_workqueue(hdev->name);
936 if (!hdev->workqueue)
937 goto nomem;
938
931 hci_register_sysfs(hdev); 939 hci_register_sysfs(hdev);
932 940
933 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, 941 hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
@@ -942,6 +950,13 @@ int hci_register_dev(struct hci_dev *hdev)
942 hci_notify(hdev, HCI_DEV_REG); 950 hci_notify(hdev, HCI_DEV_REG);
943 951
944 return id; 952 return id;
953
954nomem:
955 write_lock_bh(&hci_dev_list_lock);
956 list_del(&hdev->list);
957 write_unlock_bh(&hci_dev_list_lock);
958
959 return -ENOMEM;
945} 960}
946EXPORT_SYMBOL(hci_register_dev); 961EXPORT_SYMBOL(hci_register_dev);
947 962
@@ -958,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
958 973
959 hci_dev_do_close(hdev); 974 hci_dev_do_close(hdev);
960 975
961 for (i = 0; i < 3; i++) 976 for (i = 0; i < NUM_REASSEMBLY; i++)
962 kfree_skb(hdev->reassembly[i]); 977 kfree_skb(hdev->reassembly[i]);
963 978
964 hci_notify(hdev, HCI_DEV_UNREG); 979 hci_notify(hdev, HCI_DEV_UNREG);
@@ -970,6 +985,8 @@ int hci_unregister_dev(struct hci_dev *hdev)
970 985
971 hci_unregister_sysfs(hdev); 986 hci_unregister_sysfs(hdev);
972 987
988 destroy_workqueue(hdev->workqueue);
989
973 __hci_dev_put(hdev); 990 __hci_dev_put(hdev);
974 991
975 return 0; 992 return 0;
@@ -1016,89 +1033,170 @@ int hci_recv_frame(struct sk_buff *skb)
1016} 1033}
1017EXPORT_SYMBOL(hci_recv_frame); 1034EXPORT_SYMBOL(hci_recv_frame);
1018 1035
1019/* Receive packet type fragment */ 1036static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
1020#define __reassembly(hdev, type) ((hdev)->reassembly[(type) - 2]) 1037 int count, __u8 index, gfp_t gfp_mask)
1021
1022int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
1023{ 1038{
1024 if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) 1039 int len = 0;
1040 int hlen = 0;
1041 int remain = count;
1042 struct sk_buff *skb;
1043 struct bt_skb_cb *scb;
1044
1045 if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
1046 index >= NUM_REASSEMBLY)
1025 return -EILSEQ; 1047 return -EILSEQ;
1026 1048
1049 skb = hdev->reassembly[index];
1050
1051 if (!skb) {
1052 switch (type) {
1053 case HCI_ACLDATA_PKT:
1054 len = HCI_MAX_FRAME_SIZE;
1055 hlen = HCI_ACL_HDR_SIZE;
1056 break;
1057 case HCI_EVENT_PKT:
1058 len = HCI_MAX_EVENT_SIZE;
1059 hlen = HCI_EVENT_HDR_SIZE;
1060 break;
1061 case HCI_SCODATA_PKT:
1062 len = HCI_MAX_SCO_SIZE;
1063 hlen = HCI_SCO_HDR_SIZE;
1064 break;
1065 }
1066
1067 skb = bt_skb_alloc(len, gfp_mask);
1068 if (!skb)
1069 return -ENOMEM;
1070
1071 scb = (void *) skb->cb;
1072 scb->expect = hlen;
1073 scb->pkt_type = type;
1074
1075 skb->dev = (void *) hdev;
1076 hdev->reassembly[index] = skb;
1077 }
1078
1027 while (count) { 1079 while (count) {
1028 struct sk_buff *skb = __reassembly(hdev, type); 1080 scb = (void *) skb->cb;
1029 struct { int expect; } *scb; 1081 len = min(scb->expect, (__u16)count);
1030 int len = 0;
1031 1082
1032 if (!skb) { 1083 memcpy(skb_put(skb, len), data, len);
1033 /* Start of the frame */
1034 1084
1035 switch (type) { 1085 count -= len;
1036 case HCI_EVENT_PKT: 1086 data += len;
1037 if (count >= HCI_EVENT_HDR_SIZE) { 1087 scb->expect -= len;
1038 struct hci_event_hdr *h = data; 1088 remain = count;
1039 len = HCI_EVENT_HDR_SIZE + h->plen;
1040 } else
1041 return -EILSEQ;
1042 break;
1043 1089
1044 case HCI_ACLDATA_PKT: 1090 switch (type) {
1045 if (count >= HCI_ACL_HDR_SIZE) { 1091 case HCI_EVENT_PKT:
1046 struct hci_acl_hdr *h = data; 1092 if (skb->len == HCI_EVENT_HDR_SIZE) {
1047 len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen); 1093 struct hci_event_hdr *h = hci_event_hdr(skb);
1048 } else 1094 scb->expect = h->plen;
1049 return -EILSEQ; 1095
1050 break; 1096 if (skb_tailroom(skb) < scb->expect) {
1097 kfree_skb(skb);
1098 hdev->reassembly[index] = NULL;
1099 return -ENOMEM;
1100 }
1101 }
1102 break;
1051 1103
1052 case HCI_SCODATA_PKT: 1104 case HCI_ACLDATA_PKT:
1053 if (count >= HCI_SCO_HDR_SIZE) { 1105 if (skb->len == HCI_ACL_HDR_SIZE) {
1054 struct hci_sco_hdr *h = data; 1106 struct hci_acl_hdr *h = hci_acl_hdr(skb);
1055 len = HCI_SCO_HDR_SIZE + h->dlen; 1107 scb->expect = __le16_to_cpu(h->dlen);
1056 } else 1108
1057 return -EILSEQ; 1109 if (skb_tailroom(skb) < scb->expect) {
1058 break; 1110 kfree_skb(skb);
1111 hdev->reassembly[index] = NULL;
1112 return -ENOMEM;
1113 }
1059 } 1114 }
1115 break;
1060 1116
1061 skb = bt_skb_alloc(len, GFP_ATOMIC); 1117 case HCI_SCODATA_PKT:
1062 if (!skb) { 1118 if (skb->len == HCI_SCO_HDR_SIZE) {
1063 BT_ERR("%s no memory for packet", hdev->name); 1119 struct hci_sco_hdr *h = hci_sco_hdr(skb);
1064 return -ENOMEM; 1120 scb->expect = h->dlen;
1121
1122 if (skb_tailroom(skb) < scb->expect) {
1123 kfree_skb(skb);
1124 hdev->reassembly[index] = NULL;
1125 return -ENOMEM;
1126 }
1065 } 1127 }
1128 break;
1129 }
1130
1131 if (scb->expect == 0) {
1132 /* Complete frame */
1066 1133
1067 skb->dev = (void *) hdev;
1068 bt_cb(skb)->pkt_type = type; 1134 bt_cb(skb)->pkt_type = type;
1135 hci_recv_frame(skb);
1069 1136
1070 __reassembly(hdev, type) = skb; 1137 hdev->reassembly[index] = NULL;
1138 return remain;
1139 }
1140 }
1071 1141
1072 scb = (void *) skb->cb; 1142 return remain;
1073 scb->expect = len; 1143}
1074 } else {
1075 /* Continuation */
1076 1144
1077 scb = (void *) skb->cb; 1145int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
1078 len = scb->expect; 1146{
1079 } 1147 int rem = 0;
1080 1148
1081 len = min(len, count); 1149 if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
1150 return -EILSEQ;
1082 1151
1083 memcpy(skb_put(skb, len), data, len); 1152 while (count) {
1153 rem = hci_reassembly(hdev, type, data, count,
1154 type - 1, GFP_ATOMIC);
1155 if (rem < 0)
1156 return rem;
1084 1157
1085 scb->expect -= len; 1158 data += (count - rem);
1159 count = rem;
1160 };
1086 1161
1087 if (scb->expect == 0) { 1162 return rem;
1088 /* Complete frame */ 1163}
1164EXPORT_SYMBOL(hci_recv_fragment);
1089 1165
1090 __reassembly(hdev, type) = NULL; 1166#define STREAM_REASSEMBLY 0
1091 1167
1092 bt_cb(skb)->pkt_type = type; 1168int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
1093 hci_recv_frame(skb); 1169{
1094 } 1170 int type;
1171 int rem = 0;
1095 1172
1096 count -= len; data += len; 1173 while (count) {
1097 } 1174 struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
1098 1175
1099 return 0; 1176 if (!skb) {
1177 struct { char type; } *pkt;
1178
1179 /* Start of the frame */
1180 pkt = data;
1181 type = pkt->type;
1182
1183 data++;
1184 count--;
1185 } else
1186 type = bt_cb(skb)->pkt_type;
1187
1188 rem = hci_reassembly(hdev, type, data,
1189 count, STREAM_REASSEMBLY, GFP_ATOMIC);
1190 if (rem < 0)
1191 return rem;
1192
1193 data += (count - rem);
1194 count = rem;
1195 };
1196
1197 return rem;
1100} 1198}
1101EXPORT_SYMBOL(hci_recv_fragment); 1199EXPORT_SYMBOL(hci_recv_stream_fragment);
1102 1200
1103/* ---- Interface to upper protocols ---- */ 1201/* ---- Interface to upper protocols ---- */
1104 1202
@@ -1260,7 +1358,7 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
1260 hdr->dlen = cpu_to_le16(len); 1358 hdr->dlen = cpu_to_le16(len);
1261} 1359}
1262 1360
1263int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) 1361void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1264{ 1362{
1265 struct hci_dev *hdev = conn->hdev; 1363 struct hci_dev *hdev = conn->hdev;
1266 struct sk_buff *list; 1364 struct sk_buff *list;
@@ -1302,24 +1400,17 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
1302 } 1400 }
1303 1401
1304 tasklet_schedule(&hdev->tx_task); 1402 tasklet_schedule(&hdev->tx_task);
1305
1306 return 0;
1307} 1403}
1308EXPORT_SYMBOL(hci_send_acl); 1404EXPORT_SYMBOL(hci_send_acl);
1309 1405
1310/* Send SCO data */ 1406/* Send SCO data */
1311int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) 1407void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
1312{ 1408{
1313 struct hci_dev *hdev = conn->hdev; 1409 struct hci_dev *hdev = conn->hdev;
1314 struct hci_sco_hdr hdr; 1410 struct hci_sco_hdr hdr;
1315 1411
1316 BT_DBG("%s len %d", hdev->name, skb->len); 1412 BT_DBG("%s len %d", hdev->name, skb->len);
1317 1413
1318 if (skb->len > hdev->sco_mtu) {
1319 kfree_skb(skb);
1320 return -EINVAL;
1321 }
1322
1323 hdr.handle = cpu_to_le16(conn->handle); 1414 hdr.handle = cpu_to_le16(conn->handle);
1324 hdr.dlen = skb->len; 1415 hdr.dlen = skb->len;
1325 1416
@@ -1332,8 +1423,6 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
1332 1423
1333 skb_queue_tail(&conn->data_q, skb); 1424 skb_queue_tail(&conn->data_q, skb);
1334 tasklet_schedule(&hdev->tx_task); 1425 tasklet_schedule(&hdev->tx_task);
1335
1336 return 0;
1337} 1426}
1338EXPORT_SYMBOL(hci_send_sco); 1427EXPORT_SYMBOL(hci_send_sco);
1339 1428
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6c57fc71c7e2..bfef5bae0b3a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
4 4
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 6
@@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
584 conn->out = 1; 584 conn->out = 1;
585 conn->link_mode |= HCI_LM_MASTER; 585 conn->link_mode |= HCI_LM_MASTER;
586 } else 586 } else
587 BT_ERR("No memmory for new connection"); 587 BT_ERR("No memory for new connection");
588 } 588 }
589 } 589 }
590 590
@@ -785,9 +785,13 @@ static void hci_cs_sniff_mode(struct hci_dev *hdev, __u8 status)
785 hci_dev_lock(hdev); 785 hci_dev_lock(hdev);
786 786
787 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); 787 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
788 if (conn) 788 if (conn) {
789 clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); 789 clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
790 790
791 if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
792 hci_sco_setup(conn, status);
793 }
794
791 hci_dev_unlock(hdev); 795 hci_dev_unlock(hdev);
792} 796}
793 797
@@ -808,9 +812,13 @@ static void hci_cs_exit_sniff_mode(struct hci_dev *hdev, __u8 status)
808 hci_dev_lock(hdev); 812 hci_dev_lock(hdev);
809 813
810 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); 814 conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle));
811 if (conn) 815 if (conn) {
812 clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend); 816 clear_bit(HCI_CONN_MODE_CHANGE_PEND, &conn->pend);
813 817
818 if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
819 hci_sco_setup(conn, status);
820 }
821
814 hci_dev_unlock(hdev); 822 hci_dev_unlock(hdev);
815} 823}
816 824
@@ -915,20 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
915 } else 923 } else
916 conn->state = BT_CLOSED; 924 conn->state = BT_CLOSED;
917 925
918 if (conn->type == ACL_LINK) { 926 if (conn->type == ACL_LINK)
919 struct hci_conn *sco = conn->link; 927 hci_sco_setup(conn, ev->status);
920 if (sco) {
921 if (!ev->status) {
922 if (lmp_esco_capable(hdev))
923 hci_setup_sync(sco, conn->handle);
924 else
925 hci_add_sco(sco, conn->handle);
926 } else {
927 hci_proto_connect_cfm(sco, ev->status);
928 hci_conn_del(sco);
929 }
930 }
931 }
932 928
933 if (ev->status) { 929 if (ev->status) {
934 hci_proto_connect_cfm(conn, ev->status); 930 hci_proto_connect_cfm(conn, ev->status);
@@ -952,7 +948,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
952 948
953 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type); 949 mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
954 950
955 if (mask & HCI_LM_ACCEPT) { 951 if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
956 /* Connection accepted */ 952 /* Connection accepted */
957 struct inquiry_entry *ie; 953 struct inquiry_entry *ie;
958 struct hci_conn *conn; 954 struct hci_conn *conn;
@@ -965,7 +961,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
965 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); 961 conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
966 if (!conn) { 962 if (!conn) {
967 if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) { 963 if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
968 BT_ERR("No memmory for new connection"); 964 BT_ERR("No memory for new connection");
969 hci_dev_unlock(hdev); 965 hci_dev_unlock(hdev);
970 return; 966 return;
971 } 967 }
@@ -1049,6 +1045,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
1049 if (conn) { 1045 if (conn) {
1050 if (!ev->status) 1046 if (!ev->status)
1051 conn->link_mode |= HCI_LM_AUTH; 1047 conn->link_mode |= HCI_LM_AUTH;
1048 else
1049 conn->sec_level = BT_SECURITY_LOW;
1052 1050
1053 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend); 1051 clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
1054 1052
@@ -1479,6 +1477,9 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb
1479 else 1477 else
1480 conn->power_save = 0; 1478 conn->power_save = 0;
1481 } 1479 }
1480
1481 if (test_and_clear_bit(HCI_CONN_SCO_SETUP_PEND, &conn->pend))
1482 hci_sco_setup(conn, ev->status);
1482 } 1483 }
1483 1484
1484 hci_dev_unlock(hdev); 1485 hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f6..83acd164d39e 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,84 @@ static int hci_sock_release(struct socket *sock)
165 return 0; 165 return 0;
166} 166}
167 167
168struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
169{
170 struct list_head *p;
171
172 list_for_each(p, &hdev->blacklist) {
173 struct bdaddr_list *b;
174
175 b = list_entry(p, struct bdaddr_list, list);
176
177 if (bacmp(bdaddr, &b->bdaddr) == 0)
178 return b;
179 }
180
181 return NULL;
182}
183
184static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
185{
186 bdaddr_t bdaddr;
187 struct bdaddr_list *entry;
188
189 if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
190 return -EFAULT;
191
192 if (bacmp(&bdaddr, BDADDR_ANY) == 0)
193 return -EBADF;
194
195 if (hci_blacklist_lookup(hdev, &bdaddr))
196 return -EEXIST;
197
198 entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
199 if (!entry)
200 return -ENOMEM;
201
202 bacpy(&entry->bdaddr, &bdaddr);
203
204 list_add(&entry->list, &hdev->blacklist);
205
206 return 0;
207}
208
209int hci_blacklist_clear(struct hci_dev *hdev)
210{
211 struct list_head *p, *n;
212
213 list_for_each_safe(p, n, &hdev->blacklist) {
214 struct bdaddr_list *b;
215
216 b = list_entry(p, struct bdaddr_list, list);
217
218 list_del(p);
219 kfree(b);
220 }
221
222 return 0;
223}
224
225static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
226{
227 bdaddr_t bdaddr;
228 struct bdaddr_list *entry;
229
230 if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
231 return -EFAULT;
232
233 if (bacmp(&bdaddr, BDADDR_ANY) == 0)
234 return hci_blacklist_clear(hdev);
235
236 entry = hci_blacklist_lookup(hdev, &bdaddr);
237 if (!entry)
238 return -ENOENT;
239
240 list_del(&entry->list);
241 kfree(entry);
242
243 return 0;
244}
245
168/* Ioctls that require bound socket */ 246/* Ioctls that require bound socket */
169static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) 247static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
170{ 248{
@@ -194,6 +272,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
194 case HCIGETAUTHINFO: 272 case HCIGETAUTHINFO:
195 return hci_get_auth_info(hdev, (void __user *) arg); 273 return hci_get_auth_info(hdev, (void __user *) arg);
196 274
275 case HCIBLOCKADDR:
276 if (!capable(CAP_NET_ADMIN))
277 return -EACCES;
278 return hci_blacklist_add(hdev, (void __user *) arg);
279
280 case HCIUNBLOCKADDR:
281 if (!capable(CAP_NET_ADMIN))
282 return -EACCES;
283 return hci_blacklist_del(hdev, (void __user *) arg);
284
197 default: 285 default:
198 if (hdev->ioctl) 286 if (hdev->ioctl)
199 return hdev->ioctl(hdev, cmd, arg); 287 return hdev->ioctl(hdev, cmd, arg);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 0e8e1a59856c..8fb967beee80 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -14,8 +14,6 @@ static struct class *bt_class;
14struct dentry *bt_debugfs = NULL; 14struct dentry *bt_debugfs = NULL;
15EXPORT_SYMBOL_GPL(bt_debugfs); 15EXPORT_SYMBOL_GPL(bt_debugfs);
16 16
17static struct workqueue_struct *bt_workq;
18
19static inline char *link_typetostr(int type) 17static inline char *link_typetostr(int type)
20{ 18{
21 switch (type) { 19 switch (type) {
@@ -161,14 +159,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
161{ 159{
162 BT_DBG("conn %p", conn); 160 BT_DBG("conn %p", conn);
163 161
164 queue_work(bt_workq, &conn->work_add); 162 queue_work(conn->hdev->workqueue, &conn->work_add);
165} 163}
166 164
167void hci_conn_del_sysfs(struct hci_conn *conn) 165void hci_conn_del_sysfs(struct hci_conn *conn)
168{ 166{
169 BT_DBG("conn %p", conn); 167 BT_DBG("conn %p", conn);
170 168
171 queue_work(bt_workq, &conn->work_del); 169 queue_work(conn->hdev->workqueue, &conn->work_del);
172} 170}
173 171
174static inline char *host_bustostr(int bus) 172static inline char *host_bustostr(int bus)
@@ -283,11 +281,9 @@ static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *at
283static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 281static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
284{ 282{
285 struct hci_dev *hdev = dev_get_drvdata(dev); 283 struct hci_dev *hdev = dev_get_drvdata(dev);
286 char *ptr; 284 unsigned long val;
287 __u32 val;
288 285
289 val = simple_strtoul(buf, &ptr, 10); 286 if (strict_strtoul(buf, 0, &val) < 0)
290 if (ptr == buf)
291 return -EINVAL; 287 return -EINVAL;
292 288
293 if (val != 0 && (val < 500 || val > 3600000)) 289 if (val != 0 && (val < 500 || val > 3600000))
@@ -307,11 +303,9 @@ static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribu
307static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 303static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
308{ 304{
309 struct hci_dev *hdev = dev_get_drvdata(dev); 305 struct hci_dev *hdev = dev_get_drvdata(dev);
310 char *ptr; 306 unsigned long val;
311 __u16 val;
312 307
313 val = simple_strtoul(buf, &ptr, 10); 308 if (strict_strtoul(buf, 0, &val) < 0)
314 if (ptr == buf)
315 return -EINVAL; 309 return -EINVAL;
316 310
317 if (val < 0x0002 || val > 0xFFFE || val % 2) 311 if (val < 0x0002 || val > 0xFFFE || val % 2)
@@ -334,11 +328,9 @@ static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribu
334static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) 328static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
335{ 329{
336 struct hci_dev *hdev = dev_get_drvdata(dev); 330 struct hci_dev *hdev = dev_get_drvdata(dev);
337 char *ptr; 331 unsigned long val;
338 __u16 val;
339 332
340 val = simple_strtoul(buf, &ptr, 10); 333 if (strict_strtoul(buf, 0, &val) < 0)
341 if (ptr == buf)
342 return -EINVAL; 334 return -EINVAL;
343 335
344 if (val < 0x0002 || val > 0xFFFE || val % 2) 336 if (val < 0x0002 || val > 0xFFFE || val % 2)
@@ -444,6 +436,40 @@ static const struct file_operations inquiry_cache_fops = {
444 .release = single_release, 436 .release = single_release,
445}; 437};
446 438
439static int blacklist_show(struct seq_file *f, void *p)
440{
441 struct hci_dev *hdev = f->private;
442 struct list_head *l;
443
444 hci_dev_lock_bh(hdev);
445
446 list_for_each(l, &hdev->blacklist) {
447 struct bdaddr_list *b;
448 bdaddr_t bdaddr;
449
450 b = list_entry(l, struct bdaddr_list, list);
451
452 baswap(&bdaddr, &b->bdaddr);
453
454 seq_printf(f, "%s\n", batostr(&bdaddr));
455 }
456
457 hci_dev_unlock_bh(hdev);
458
459 return 0;
460}
461
462static int blacklist_open(struct inode *inode, struct file *file)
463{
464 return single_open(file, blacklist_show, inode->i_private);
465}
466
467static const struct file_operations blacklist_fops = {
468 .open = blacklist_open,
469 .read = seq_read,
470 .llseek = seq_lseek,
471 .release = single_release,
472};
447int hci_register_sysfs(struct hci_dev *hdev) 473int hci_register_sysfs(struct hci_dev *hdev)
448{ 474{
449 struct device *dev = &hdev->dev; 475 struct device *dev = &hdev->dev;
@@ -473,6 +499,9 @@ int hci_register_sysfs(struct hci_dev *hdev)
473 debugfs_create_file("inquiry_cache", 0444, hdev->debugfs, 499 debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
474 hdev, &inquiry_cache_fops); 500 hdev, &inquiry_cache_fops);
475 501
502 debugfs_create_file("blacklist", 0444, hdev->debugfs,
503 hdev, &blacklist_fops);
504
476 return 0; 505 return 0;
477} 506}
478 507
@@ -487,17 +516,11 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
487 516
488int __init bt_sysfs_init(void) 517int __init bt_sysfs_init(void)
489{ 518{
490 bt_workq = create_singlethread_workqueue("bluetooth");
491 if (!bt_workq)
492 return -ENOMEM;
493
494 bt_debugfs = debugfs_create_dir("bluetooth", NULL); 519 bt_debugfs = debugfs_create_dir("bluetooth", NULL);
495 520
496 bt_class = class_create(THIS_MODULE, "bluetooth"); 521 bt_class = class_create(THIS_MODULE, "bluetooth");
497 if (IS_ERR(bt_class)) { 522 if (IS_ERR(bt_class))
498 destroy_workqueue(bt_workq);
499 return PTR_ERR(bt_class); 523 return PTR_ERR(bt_class);
500 }
501 524
502 return 0; 525 return 0;
503} 526}
@@ -507,6 +530,4 @@ void bt_sysfs_cleanup(void)
507 class_destroy(bt_class); 530 class_destroy(bt_class);
508 531
509 debugfs_remove_recursive(bt_debugfs); 532 debugfs_remove_recursive(bt_debugfs);
510
511 destroy_workqueue(bt_workq);
512} 533}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 280529ad9274..bfe641b7dfaf 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -561,8 +561,8 @@ static int hidp_session(void *arg)
561 561
562 init_waitqueue_entry(&ctrl_wait, current); 562 init_waitqueue_entry(&ctrl_wait, current);
563 init_waitqueue_entry(&intr_wait, current); 563 init_waitqueue_entry(&intr_wait, current);
564 add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); 564 add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
565 add_wait_queue(intr_sk->sk_sleep, &intr_wait); 565 add_wait_queue(sk_sleep(intr_sk), &intr_wait);
566 while (!atomic_read(&session->terminate)) { 566 while (!atomic_read(&session->terminate)) {
567 set_current_state(TASK_INTERRUPTIBLE); 567 set_current_state(TASK_INTERRUPTIBLE);
568 568
@@ -584,8 +584,8 @@ static int hidp_session(void *arg)
584 schedule(); 584 schedule();
585 } 585 }
586 set_current_state(TASK_RUNNING); 586 set_current_state(TASK_RUNNING);
587 remove_wait_queue(intr_sk->sk_sleep, &intr_wait); 587 remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
588 remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait); 588 remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
589 589
590 down_write(&hidp_session_sem); 590 down_write(&hidp_session_sem);
591 591
@@ -609,7 +609,7 @@ static int hidp_session(void *arg)
609 609
610 fput(session->intr_sock->file); 610 fput(session->intr_sock->file);
611 611
612 wait_event_timeout(*(ctrl_sk->sk_sleep), 612 wait_event_timeout(*(sk_sleep(ctrl_sk)),
613 (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); 613 (ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
614 614
615 fput(session->ctrl_sock->file); 615 fput(session->ctrl_sock->file);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index a4e215d50c10..8d934a19da0a 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session)
164 struct sock *ctrl_sk = session->ctrl_sock->sk; 164 struct sock *ctrl_sk = session->ctrl_sock->sk;
165 struct sock *intr_sk = session->intr_sock->sk; 165 struct sock *intr_sk = session->intr_sock->sk;
166 166
167 wake_up_interruptible(ctrl_sk->sk_sleep); 167 wake_up_interruptible(sk_sleep(ctrl_sk));
168 wake_up_interruptible(intr_sk->sk_sleep); 168 wake_up_interruptible(sk_sleep(intr_sk));
169} 169}
170 170
171/* HIDP init defines */ 171/* HIDP init defines */
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9753b690a8b3..3e3cd9d4e52c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,8 @@
1/* 1/*
2 BlueZ - Bluetooth protocol stack for Linux 2 BlueZ - Bluetooth protocol stack for Linux
3 Copyright (C) 2000-2001 Qualcomm Incorporated 3 Copyright (C) 2000-2001 Qualcomm Incorporated
4 Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
5 Copyright (C) 2010 Google Inc.
4 6
5 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> 7 Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
6 8
@@ -53,27 +55,33 @@
53#include <net/bluetooth/hci_core.h> 55#include <net/bluetooth/hci_core.h>
54#include <net/bluetooth/l2cap.h> 56#include <net/bluetooth/l2cap.h>
55 57
56#define VERSION "2.14" 58#define VERSION "2.15"
57 59
58static int enable_ertm = 0; 60static int disable_ertm = 0;
59static int max_transmit = L2CAP_DEFAULT_MAX_TX;
60 61
61static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; 62static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
62static u8 l2cap_fixed_chan[8] = { 0x02, }; 63static u8 l2cap_fixed_chan[8] = { 0x02, };
63 64
64static const struct proto_ops l2cap_sock_ops; 65static const struct proto_ops l2cap_sock_ops;
65 66
67static struct workqueue_struct *_busy_wq;
68
66static struct bt_sock_list l2cap_sk_list = { 69static struct bt_sock_list l2cap_sk_list = {
67 .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock) 70 .lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
68}; 71};
69 72
73static void l2cap_busy_work(struct work_struct *work);
74
70static void __l2cap_sock_close(struct sock *sk, int reason); 75static void __l2cap_sock_close(struct sock *sk, int reason);
71static void l2cap_sock_close(struct sock *sk); 76static void l2cap_sock_close(struct sock *sk);
72static void l2cap_sock_kill(struct sock *sk); 77static void l2cap_sock_kill(struct sock *sk);
73 78
79static int l2cap_build_conf_req(struct sock *sk, void *data);
74static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, 80static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
75 u8 code, u8 ident, u16 dlen, void *data); 81 u8 code, u8 ident, u16 dlen, void *data);
76 82
83static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
84
77/* ---- L2CAP timers ---- */ 85/* ---- L2CAP timers ---- */
78static void l2cap_sock_timeout(unsigned long arg) 86static void l2cap_sock_timeout(unsigned long arg)
79{ 87{
@@ -219,7 +227,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
219 227
220 l2cap_pi(sk)->conn = conn; 228 l2cap_pi(sk)->conn = conn;
221 229
222 if (sk->sk_type == SOCK_SEQPACKET) { 230 if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
223 /* Alloc CID for connection-oriented socket */ 231 /* Alloc CID for connection-oriented socket */
224 l2cap_pi(sk)->scid = l2cap_alloc_cid(l); 232 l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
225 } else if (sk->sk_type == SOCK_DGRAM) { 233 } else if (sk->sk_type == SOCK_DGRAM) {
@@ -269,6 +277,24 @@ static void l2cap_chan_del(struct sock *sk, int err)
269 parent->sk_data_ready(parent, 0); 277 parent->sk_data_ready(parent, 0);
270 } else 278 } else
271 sk->sk_state_change(sk); 279 sk->sk_state_change(sk);
280
281 skb_queue_purge(TX_QUEUE(sk));
282
283 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
284 struct srej_list *l, *tmp;
285
286 del_timer(&l2cap_pi(sk)->retrans_timer);
287 del_timer(&l2cap_pi(sk)->monitor_timer);
288 del_timer(&l2cap_pi(sk)->ack_timer);
289
290 skb_queue_purge(SREJ_QUEUE(sk));
291 skb_queue_purge(BUSY_QUEUE(sk));
292
293 list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
294 list_del(&l->list);
295 kfree(l);
296 }
297 }
272} 298}
273 299
274/* Service level security */ 300/* Service level security */
@@ -325,25 +351,29 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
325 return id; 351 return id;
326} 352}
327 353
328static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) 354static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
329{ 355{
330 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data); 356 struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
331 357
332 BT_DBG("code 0x%2.2x", code); 358 BT_DBG("code 0x%2.2x", code);
333 359
334 if (!skb) 360 if (!skb)
335 return -ENOMEM; 361 return;
336 362
337 return hci_send_acl(conn->hcon, skb, 0); 363 hci_send_acl(conn->hcon, skb, 0);
338} 364}
339 365
340static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control) 366static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
341{ 367{
342 struct sk_buff *skb; 368 struct sk_buff *skb;
343 struct l2cap_hdr *lh; 369 struct l2cap_hdr *lh;
344 struct l2cap_conn *conn = pi->conn; 370 struct l2cap_conn *conn = pi->conn;
371 struct sock *sk = (struct sock *)pi;
345 int count, hlen = L2CAP_HDR_SIZE + 2; 372 int count, hlen = L2CAP_HDR_SIZE + 2;
346 373
374 if (sk->sk_state != BT_CONNECTED)
375 return;
376
347 if (pi->fcs == L2CAP_FCS_CRC16) 377 if (pi->fcs == L2CAP_FCS_CRC16)
348 hlen += 2; 378 hlen += 2;
349 379
@@ -352,9 +382,19 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
352 count = min_t(unsigned int, conn->mtu, hlen); 382 count = min_t(unsigned int, conn->mtu, hlen);
353 control |= L2CAP_CTRL_FRAME_TYPE; 383 control |= L2CAP_CTRL_FRAME_TYPE;
354 384
385 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
386 control |= L2CAP_CTRL_FINAL;
387 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
388 }
389
390 if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
391 control |= L2CAP_CTRL_POLL;
392 pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
393 }
394
355 skb = bt_skb_alloc(count, GFP_ATOMIC); 395 skb = bt_skb_alloc(count, GFP_ATOMIC);
356 if (!skb) 396 if (!skb)
357 return -ENOMEM; 397 return;
358 398
359 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 399 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
360 lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE); 400 lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
@@ -366,19 +406,25 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
366 put_unaligned_le16(fcs, skb_put(skb, 2)); 406 put_unaligned_le16(fcs, skb_put(skb, 2));
367 } 407 }
368 408
369 return hci_send_acl(pi->conn->hcon, skb, 0); 409 hci_send_acl(pi->conn->hcon, skb, 0);
370} 410}
371 411
372static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control) 412static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
373{ 413{
374 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) 414 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
375 control |= L2CAP_SUPER_RCV_NOT_READY; 415 control |= L2CAP_SUPER_RCV_NOT_READY;
376 else 416 pi->conn_state |= L2CAP_CONN_RNR_SENT;
417 } else
377 control |= L2CAP_SUPER_RCV_READY; 418 control |= L2CAP_SUPER_RCV_READY;
378 419
379 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; 420 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
380 421
381 return l2cap_send_sframe(pi, control); 422 l2cap_send_sframe(pi, control);
423}
424
425static inline int __l2cap_no_conn_pending(struct sock *sk)
426{
427 return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
382} 428}
383 429
384static void l2cap_do_start(struct sock *sk) 430static void l2cap_do_start(struct sock *sk)
@@ -389,12 +435,13 @@ static void l2cap_do_start(struct sock *sk)
389 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) 435 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
390 return; 436 return;
391 437
392 if (l2cap_check_security(sk)) { 438 if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
393 struct l2cap_conn_req req; 439 struct l2cap_conn_req req;
394 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 440 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
395 req.psm = l2cap_pi(sk)->psm; 441 req.psm = l2cap_pi(sk)->psm;
396 442
397 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 443 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
444 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
398 445
399 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 446 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
400 L2CAP_CONN_REQ, sizeof(req), &req); 447 L2CAP_CONN_REQ, sizeof(req), &req);
@@ -414,47 +461,101 @@ static void l2cap_do_start(struct sock *sk)
414 } 461 }
415} 462}
416 463
417static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk) 464static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
465{
466 u32 local_feat_mask = l2cap_feat_mask;
467 if (!disable_ertm)
468 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
469
470 switch (mode) {
471 case L2CAP_MODE_ERTM:
472 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
473 case L2CAP_MODE_STREAMING:
474 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
475 default:
476 return 0x00;
477 }
478}
479
480static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
418{ 481{
419 struct l2cap_disconn_req req; 482 struct l2cap_disconn_req req;
420 483
484 if (!conn)
485 return;
486
487 skb_queue_purge(TX_QUEUE(sk));
488
489 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
490 del_timer(&l2cap_pi(sk)->retrans_timer);
491 del_timer(&l2cap_pi(sk)->monitor_timer);
492 del_timer(&l2cap_pi(sk)->ack_timer);
493 }
494
421 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid); 495 req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
422 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 496 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
423 l2cap_send_cmd(conn, l2cap_get_ident(conn), 497 l2cap_send_cmd(conn, l2cap_get_ident(conn),
424 L2CAP_DISCONN_REQ, sizeof(req), &req); 498 L2CAP_DISCONN_REQ, sizeof(req), &req);
499
500 sk->sk_state = BT_DISCONN;
501 sk->sk_err = err;
425} 502}
426 503
427/* ---- L2CAP connections ---- */ 504/* ---- L2CAP connections ---- */
428static void l2cap_conn_start(struct l2cap_conn *conn) 505static void l2cap_conn_start(struct l2cap_conn *conn)
429{ 506{
430 struct l2cap_chan_list *l = &conn->chan_list; 507 struct l2cap_chan_list *l = &conn->chan_list;
508 struct sock_del_list del, *tmp1, *tmp2;
431 struct sock *sk; 509 struct sock *sk;
432 510
433 BT_DBG("conn %p", conn); 511 BT_DBG("conn %p", conn);
434 512
513 INIT_LIST_HEAD(&del.list);
514
435 read_lock(&l->lock); 515 read_lock(&l->lock);
436 516
437 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 517 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
438 bh_lock_sock(sk); 518 bh_lock_sock(sk);
439 519
440 if (sk->sk_type != SOCK_SEQPACKET) { 520 if (sk->sk_type != SOCK_SEQPACKET &&
521 sk->sk_type != SOCK_STREAM) {
441 bh_unlock_sock(sk); 522 bh_unlock_sock(sk);
442 continue; 523 continue;
443 } 524 }
444 525
445 if (sk->sk_state == BT_CONNECT) { 526 if (sk->sk_state == BT_CONNECT) {
446 if (l2cap_check_security(sk)) { 527 struct l2cap_conn_req req;
447 struct l2cap_conn_req req;
448 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
449 req.psm = l2cap_pi(sk)->psm;
450 528
451 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 529 if (!l2cap_check_security(sk) ||
530 !__l2cap_no_conn_pending(sk)) {
531 bh_unlock_sock(sk);
532 continue;
533 }
452 534
453 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 535 if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
454 L2CAP_CONN_REQ, sizeof(req), &req); 536 conn->feat_mask)
537 && l2cap_pi(sk)->conf_state &
538 L2CAP_CONF_STATE2_DEVICE) {
539 tmp1 = kzalloc(sizeof(struct sock_del_list),
540 GFP_ATOMIC);
541 tmp1->sk = sk;
542 list_add_tail(&tmp1->list, &del.list);
543 bh_unlock_sock(sk);
544 continue;
455 } 545 }
546
547 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
548 req.psm = l2cap_pi(sk)->psm;
549
550 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
551 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
552
553 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
554 L2CAP_CONN_REQ, sizeof(req), &req);
555
456 } else if (sk->sk_state == BT_CONNECT2) { 556 } else if (sk->sk_state == BT_CONNECT2) {
457 struct l2cap_conn_rsp rsp; 557 struct l2cap_conn_rsp rsp;
558 char buf[128];
458 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 559 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
459 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 560 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
460 561
@@ -477,12 +578,31 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
477 578
478 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 579 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
479 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 580 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
581
582 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT ||
583 rsp.result != L2CAP_CR_SUCCESS) {
584 bh_unlock_sock(sk);
585 continue;
586 }
587
588 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
589 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
590 l2cap_build_conf_req(sk, buf), buf);
591 l2cap_pi(sk)->num_conf_req++;
480 } 592 }
481 593
482 bh_unlock_sock(sk); 594 bh_unlock_sock(sk);
483 } 595 }
484 596
485 read_unlock(&l->lock); 597 read_unlock(&l->lock);
598
599 list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
600 bh_lock_sock(tmp1->sk);
601 __l2cap_sock_close(tmp1->sk, ECONNRESET);
602 bh_unlock_sock(tmp1->sk);
603 list_del(&tmp1->list);
604 kfree(tmp1);
605 }
486} 606}
487 607
488static void l2cap_conn_ready(struct l2cap_conn *conn) 608static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -497,7 +617,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
497 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 617 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
498 bh_lock_sock(sk); 618 bh_lock_sock(sk);
499 619
500 if (sk->sk_type != SOCK_SEQPACKET) { 620 if (sk->sk_type != SOCK_SEQPACKET &&
621 sk->sk_type != SOCK_STREAM) {
501 l2cap_sock_clear_timer(sk); 622 l2cap_sock_clear_timer(sk);
502 sk->sk_state = BT_CONNECTED; 623 sk->sk_state = BT_CONNECTED;
503 sk->sk_state_change(sk); 624 sk->sk_state_change(sk);
@@ -706,18 +827,19 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
706 827
707 case BT_CONNECTED: 828 case BT_CONNECTED:
708 case BT_CONFIG: 829 case BT_CONFIG:
709 if (sk->sk_type == SOCK_SEQPACKET) { 830 if (sk->sk_type == SOCK_SEQPACKET ||
831 sk->sk_type == SOCK_STREAM) {
710 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 832 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
711 833
712 sk->sk_state = BT_DISCONN;
713 l2cap_sock_set_timer(sk, sk->sk_sndtimeo); 834 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
714 l2cap_send_disconn_req(conn, sk); 835 l2cap_send_disconn_req(conn, sk, reason);
715 } else 836 } else
716 l2cap_chan_del(sk, reason); 837 l2cap_chan_del(sk, reason);
717 break; 838 break;
718 839
719 case BT_CONNECT2: 840 case BT_CONNECT2:
720 if (sk->sk_type == SOCK_SEQPACKET) { 841 if (sk->sk_type == SOCK_SEQPACKET ||
842 sk->sk_type == SOCK_STREAM) {
721 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 843 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
722 struct l2cap_conn_rsp rsp; 844 struct l2cap_conn_rsp rsp;
723 __u16 result; 845 __u16 result;
@@ -770,16 +892,26 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
770 892
771 pi->imtu = l2cap_pi(parent)->imtu; 893 pi->imtu = l2cap_pi(parent)->imtu;
772 pi->omtu = l2cap_pi(parent)->omtu; 894 pi->omtu = l2cap_pi(parent)->omtu;
895 pi->conf_state = l2cap_pi(parent)->conf_state;
773 pi->mode = l2cap_pi(parent)->mode; 896 pi->mode = l2cap_pi(parent)->mode;
774 pi->fcs = l2cap_pi(parent)->fcs; 897 pi->fcs = l2cap_pi(parent)->fcs;
898 pi->max_tx = l2cap_pi(parent)->max_tx;
899 pi->tx_win = l2cap_pi(parent)->tx_win;
775 pi->sec_level = l2cap_pi(parent)->sec_level; 900 pi->sec_level = l2cap_pi(parent)->sec_level;
776 pi->role_switch = l2cap_pi(parent)->role_switch; 901 pi->role_switch = l2cap_pi(parent)->role_switch;
777 pi->force_reliable = l2cap_pi(parent)->force_reliable; 902 pi->force_reliable = l2cap_pi(parent)->force_reliable;
778 } else { 903 } else {
779 pi->imtu = L2CAP_DEFAULT_MTU; 904 pi->imtu = L2CAP_DEFAULT_MTU;
780 pi->omtu = 0; 905 pi->omtu = 0;
781 pi->mode = L2CAP_MODE_BASIC; 906 if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
907 pi->mode = L2CAP_MODE_ERTM;
908 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
909 } else {
910 pi->mode = L2CAP_MODE_BASIC;
911 }
912 pi->max_tx = L2CAP_DEFAULT_MAX_TX;
782 pi->fcs = L2CAP_FCS_CRC16; 913 pi->fcs = L2CAP_FCS_CRC16;
914 pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
783 pi->sec_level = BT_SECURITY_LOW; 915 pi->sec_level = BT_SECURITY_LOW;
784 pi->role_switch = 0; 916 pi->role_switch = 0;
785 pi->force_reliable = 0; 917 pi->force_reliable = 0;
@@ -790,6 +922,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
790 pi->flush_to = L2CAP_DEFAULT_FLUSH_TO; 922 pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
791 skb_queue_head_init(TX_QUEUE(sk)); 923 skb_queue_head_init(TX_QUEUE(sk));
792 skb_queue_head_init(SREJ_QUEUE(sk)); 924 skb_queue_head_init(SREJ_QUEUE(sk));
925 skb_queue_head_init(BUSY_QUEUE(sk));
793 INIT_LIST_HEAD(SREJ_LIST(sk)); 926 INIT_LIST_HEAD(SREJ_LIST(sk));
794} 927}
795 928
@@ -833,7 +966,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
833 966
834 sock->state = SS_UNCONNECTED; 967 sock->state = SS_UNCONNECTED;
835 968
836 if (sock->type != SOCK_SEQPACKET && 969 if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
837 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW) 970 sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
838 return -ESOCKTNOSUPPORT; 971 return -ESOCKTNOSUPPORT;
839 972
@@ -981,7 +1114,8 @@ static int l2cap_do_connect(struct sock *sk)
981 l2cap_sock_set_timer(sk, sk->sk_sndtimeo); 1114 l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
982 1115
983 if (hcon->state == BT_CONNECTED) { 1116 if (hcon->state == BT_CONNECTED) {
984 if (sk->sk_type != SOCK_SEQPACKET) { 1117 if (sk->sk_type != SOCK_SEQPACKET &&
1118 sk->sk_type != SOCK_STREAM) {
985 l2cap_sock_clear_timer(sk); 1119 l2cap_sock_clear_timer(sk);
986 sk->sk_state = BT_CONNECTED; 1120 sk->sk_state = BT_CONNECTED;
987 } else 1121 } else
@@ -1015,7 +1149,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1015 1149
1016 lock_sock(sk); 1150 lock_sock(sk);
1017 1151
1018 if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) { 1152 if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
1153 && !la.l2_psm) {
1019 err = -EINVAL; 1154 err = -EINVAL;
1020 goto done; 1155 goto done;
1021 } 1156 }
@@ -1025,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1025 break; 1160 break;
1026 case L2CAP_MODE_ERTM: 1161 case L2CAP_MODE_ERTM:
1027 case L2CAP_MODE_STREAMING: 1162 case L2CAP_MODE_STREAMING:
1028 if (enable_ertm) 1163 if (!disable_ertm)
1029 break; 1164 break;
1030 /* fall through */ 1165 /* fall through */
1031 default: 1166 default:
@@ -1042,6 +1177,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1042 1177
1043 case BT_CONNECTED: 1178 case BT_CONNECTED:
1044 /* Already connected */ 1179 /* Already connected */
1180 err = -EISCONN;
1045 goto done; 1181 goto done;
1046 1182
1047 case BT_OPEN: 1183 case BT_OPEN:
@@ -1079,7 +1215,8 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
1079 1215
1080 lock_sock(sk); 1216 lock_sock(sk);
1081 1217
1082 if (sk->sk_state != BT_BOUND || sock->type != SOCK_SEQPACKET) { 1218 if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
1219 || sk->sk_state != BT_BOUND) {
1083 err = -EBADFD; 1220 err = -EBADFD;
1084 goto done; 1221 goto done;
1085 } 1222 }
@@ -1089,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
1089 break; 1226 break;
1090 case L2CAP_MODE_ERTM: 1227 case L2CAP_MODE_ERTM:
1091 case L2CAP_MODE_STREAMING: 1228 case L2CAP_MODE_STREAMING:
1092 if (enable_ertm) 1229 if (!disable_ertm)
1093 break; 1230 break;
1094 /* fall through */ 1231 /* fall through */
1095 default: 1232 default:
@@ -1147,7 +1284,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
1147 BT_DBG("sk %p timeo %ld", sk, timeo); 1284 BT_DBG("sk %p timeo %ld", sk, timeo);
1148 1285
1149 /* Wait for an incoming connection. (wake-one). */ 1286 /* Wait for an incoming connection. (wake-one). */
1150 add_wait_queue_exclusive(sk->sk_sleep, &wait); 1287 add_wait_queue_exclusive(sk_sleep(sk), &wait);
1151 while (!(nsk = bt_accept_dequeue(sk, newsock))) { 1288 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
1152 set_current_state(TASK_INTERRUPTIBLE); 1289 set_current_state(TASK_INTERRUPTIBLE);
1153 if (!timeo) { 1290 if (!timeo) {
@@ -1170,7 +1307,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
1170 } 1307 }
1171 } 1308 }
1172 set_current_state(TASK_RUNNING); 1309 set_current_state(TASK_RUNNING);
1173 remove_wait_queue(sk->sk_sleep, &wait); 1310 remove_wait_queue(sk_sleep(sk), &wait);
1174 1311
1175 if (err) 1312 if (err)
1176 goto done; 1313 goto done;
@@ -1207,14 +1344,46 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
1207 return 0; 1344 return 0;
1208} 1345}
1209 1346
1347static int __l2cap_wait_ack(struct sock *sk)
1348{
1349 DECLARE_WAITQUEUE(wait, current);
1350 int err = 0;
1351 int timeo = HZ/5;
1352
1353 add_wait_queue(sk_sleep(sk), &wait);
1354 while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
1355 set_current_state(TASK_INTERRUPTIBLE);
1356
1357 if (!timeo)
1358 timeo = HZ/5;
1359
1360 if (signal_pending(current)) {
1361 err = sock_intr_errno(timeo);
1362 break;
1363 }
1364
1365 release_sock(sk);
1366 timeo = schedule_timeout(timeo);
1367 lock_sock(sk);
1368
1369 err = sock_error(sk);
1370 if (err)
1371 break;
1372 }
1373 set_current_state(TASK_RUNNING);
1374 remove_wait_queue(sk_sleep(sk), &wait);
1375 return err;
1376}
1377
1210static void l2cap_monitor_timeout(unsigned long arg) 1378static void l2cap_monitor_timeout(unsigned long arg)
1211{ 1379{
1212 struct sock *sk = (void *) arg; 1380 struct sock *sk = (void *) arg;
1213 u16 control; 1381
1382 BT_DBG("sk %p", sk);
1214 1383
1215 bh_lock_sock(sk); 1384 bh_lock_sock(sk);
1216 if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) { 1385 if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
1217 l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk); 1386 l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
1218 bh_unlock_sock(sk); 1387 bh_unlock_sock(sk);
1219 return; 1388 return;
1220 } 1389 }
@@ -1222,15 +1391,15 @@ static void l2cap_monitor_timeout(unsigned long arg)
1222 l2cap_pi(sk)->retry_count++; 1391 l2cap_pi(sk)->retry_count++;
1223 __mod_monitor_timer(); 1392 __mod_monitor_timer();
1224 1393
1225 control = L2CAP_CTRL_POLL; 1394 l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
1226 l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
1227 bh_unlock_sock(sk); 1395 bh_unlock_sock(sk);
1228} 1396}
1229 1397
1230static void l2cap_retrans_timeout(unsigned long arg) 1398static void l2cap_retrans_timeout(unsigned long arg)
1231{ 1399{
1232 struct sock *sk = (void *) arg; 1400 struct sock *sk = (void *) arg;
1233 u16 control; 1401
1402 BT_DBG("sk %p", sk);
1234 1403
1235 bh_lock_sock(sk); 1404 bh_lock_sock(sk);
1236 l2cap_pi(sk)->retry_count = 1; 1405 l2cap_pi(sk)->retry_count = 1;
@@ -1238,8 +1407,7 @@ static void l2cap_retrans_timeout(unsigned long arg)
1238 1407
1239 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F; 1408 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
1240 1409
1241 control = L2CAP_CTRL_POLL; 1410 l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
1242 l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
1243 bh_unlock_sock(sk); 1411 bh_unlock_sock(sk);
1244} 1412}
1245 1413
@@ -1247,7 +1415,8 @@ static void l2cap_drop_acked_frames(struct sock *sk)
1247{ 1415{
1248 struct sk_buff *skb; 1416 struct sk_buff *skb;
1249 1417
1250 while ((skb = skb_peek(TX_QUEUE(sk)))) { 1418 while ((skb = skb_peek(TX_QUEUE(sk))) &&
1419 l2cap_pi(sk)->unacked_frames) {
1251 if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq) 1420 if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
1252 break; 1421 break;
1253 1422
@@ -1259,30 +1428,22 @@ static void l2cap_drop_acked_frames(struct sock *sk)
1259 1428
1260 if (!l2cap_pi(sk)->unacked_frames) 1429 if (!l2cap_pi(sk)->unacked_frames)
1261 del_timer(&l2cap_pi(sk)->retrans_timer); 1430 del_timer(&l2cap_pi(sk)->retrans_timer);
1262
1263 return;
1264} 1431}
1265 1432
1266static inline int l2cap_do_send(struct sock *sk, struct sk_buff *skb) 1433static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
1267{ 1434{
1268 struct l2cap_pinfo *pi = l2cap_pi(sk); 1435 struct l2cap_pinfo *pi = l2cap_pi(sk);
1269 int err;
1270 1436
1271 BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len); 1437 BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
1272 1438
1273 err = hci_send_acl(pi->conn->hcon, skb, 0); 1439 hci_send_acl(pi->conn->hcon, skb, 0);
1274 if (err < 0)
1275 kfree_skb(skb);
1276
1277 return err;
1278} 1440}
1279 1441
1280static int l2cap_streaming_send(struct sock *sk) 1442static void l2cap_streaming_send(struct sock *sk)
1281{ 1443{
1282 struct sk_buff *skb, *tx_skb; 1444 struct sk_buff *skb, *tx_skb;
1283 struct l2cap_pinfo *pi = l2cap_pi(sk); 1445 struct l2cap_pinfo *pi = l2cap_pi(sk);
1284 u16 control, fcs; 1446 u16 control, fcs;
1285 int err;
1286 1447
1287 while ((skb = sk->sk_send_head)) { 1448 while ((skb = sk->sk_send_head)) {
1288 tx_skb = skb_clone(skb, GFP_ATOMIC); 1449 tx_skb = skb_clone(skb, GFP_ATOMIC);
@@ -1291,16 +1452,12 @@ static int l2cap_streaming_send(struct sock *sk)
1291 control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT; 1452 control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
1292 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); 1453 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1293 1454
1294 if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) { 1455 if (pi->fcs == L2CAP_FCS_CRC16) {
1295 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2); 1456 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
1296 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2); 1457 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
1297 } 1458 }
1298 1459
1299 err = l2cap_do_send(sk, tx_skb); 1460 l2cap_do_send(sk, tx_skb);
1300 if (err < 0) {
1301 l2cap_send_disconn_req(pi->conn, sk);
1302 return err;
1303 }
1304 1461
1305 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; 1462 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
1306 1463
@@ -1312,51 +1469,53 @@ static int l2cap_streaming_send(struct sock *sk)
1312 skb = skb_dequeue(TX_QUEUE(sk)); 1469 skb = skb_dequeue(TX_QUEUE(sk));
1313 kfree_skb(skb); 1470 kfree_skb(skb);
1314 } 1471 }
1315 return 0;
1316} 1472}
1317 1473
1318static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq) 1474static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
1319{ 1475{
1320 struct l2cap_pinfo *pi = l2cap_pi(sk); 1476 struct l2cap_pinfo *pi = l2cap_pi(sk);
1321 struct sk_buff *skb, *tx_skb; 1477 struct sk_buff *skb, *tx_skb;
1322 u16 control, fcs; 1478 u16 control, fcs;
1323 int err;
1324 1479
1325 skb = skb_peek(TX_QUEUE(sk)); 1480 skb = skb_peek(TX_QUEUE(sk));
1326 do { 1481 if (!skb)
1327 if (bt_cb(skb)->tx_seq != tx_seq) { 1482 return;
1328 if (skb_queue_is_last(TX_QUEUE(sk), skb))
1329 break;
1330 skb = skb_queue_next(TX_QUEUE(sk), skb);
1331 continue;
1332 }
1333 1483
1334 if (pi->remote_max_tx && 1484 do {
1335 bt_cb(skb)->retries == pi->remote_max_tx) { 1485 if (bt_cb(skb)->tx_seq == tx_seq)
1336 l2cap_send_disconn_req(pi->conn, sk);
1337 break; 1486 break;
1338 }
1339 1487
1340 tx_skb = skb_clone(skb, GFP_ATOMIC); 1488 if (skb_queue_is_last(TX_QUEUE(sk), skb))
1341 bt_cb(skb)->retries++; 1489 return;
1342 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1343 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1344 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1345 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1346 1490
1347 if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) { 1491 } while ((skb = skb_queue_next(TX_QUEUE(sk), skb)));
1348 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
1349 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
1350 }
1351 1492
1352 err = l2cap_do_send(sk, tx_skb); 1493 if (pi->remote_max_tx &&
1353 if (err < 0) { 1494 bt_cb(skb)->retries == pi->remote_max_tx) {
1354 l2cap_send_disconn_req(pi->conn, sk); 1495 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1355 return err; 1496 return;
1356 } 1497 }
1357 break; 1498
1358 } while(1); 1499 tx_skb = skb_clone(skb, GFP_ATOMIC);
1359 return 0; 1500 bt_cb(skb)->retries++;
1501 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1502
1503 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1504 control |= L2CAP_CTRL_FINAL;
1505 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1506 }
1507
1508 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1509 | (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1510
1511 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1512
1513 if (pi->fcs == L2CAP_FCS_CRC16) {
1514 fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
1515 put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
1516 }
1517
1518 l2cap_do_send(sk, tx_skb);
1360} 1519}
1361 1520
1362static int l2cap_ertm_send(struct sock *sk) 1521static int l2cap_ertm_send(struct sock *sk)
@@ -1364,17 +1523,16 @@ static int l2cap_ertm_send(struct sock *sk)
1364 struct sk_buff *skb, *tx_skb; 1523 struct sk_buff *skb, *tx_skb;
1365 struct l2cap_pinfo *pi = l2cap_pi(sk); 1524 struct l2cap_pinfo *pi = l2cap_pi(sk);
1366 u16 control, fcs; 1525 u16 control, fcs;
1367 int err; 1526 int nsent = 0;
1368 1527
1369 if (pi->conn_state & L2CAP_CONN_WAIT_F) 1528 if (sk->sk_state != BT_CONNECTED)
1370 return 0; 1529 return -ENOTCONN;
1371 1530
1372 while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) && 1531 while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
1373 !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
1374 1532
1375 if (pi->remote_max_tx && 1533 if (pi->remote_max_tx &&
1376 bt_cb(skb)->retries == pi->remote_max_tx) { 1534 bt_cb(skb)->retries == pi->remote_max_tx) {
1377 l2cap_send_disconn_req(pi->conn, sk); 1535 l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
1378 break; 1536 break;
1379 } 1537 }
1380 1538
@@ -1383,35 +1541,89 @@ static int l2cap_ertm_send(struct sock *sk)
1383 bt_cb(skb)->retries++; 1541 bt_cb(skb)->retries++;
1384 1542
1385 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE); 1543 control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
1544 control &= L2CAP_CTRL_SAR;
1545
1546 if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
1547 control |= L2CAP_CTRL_FINAL;
1548 pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
1549 }
1386 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT) 1550 control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
1387 | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT); 1551 | (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
1388 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE); 1552 put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
1389 1553
1390 1554
1391 if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) { 1555 if (pi->fcs == L2CAP_FCS_CRC16) {
1392 fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2); 1556 fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
1393 put_unaligned_le16(fcs, skb->data + tx_skb->len - 2); 1557 put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
1394 } 1558 }
1395 1559
1396 err = l2cap_do_send(sk, tx_skb); 1560 l2cap_do_send(sk, tx_skb);
1397 if (err < 0) { 1561
1398 l2cap_send_disconn_req(pi->conn, sk);
1399 return err;
1400 }
1401 __mod_retrans_timer(); 1562 __mod_retrans_timer();
1402 1563
1403 bt_cb(skb)->tx_seq = pi->next_tx_seq; 1564 bt_cb(skb)->tx_seq = pi->next_tx_seq;
1404 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64; 1565 pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
1405 1566
1406 pi->unacked_frames++; 1567 pi->unacked_frames++;
1568 pi->frames_sent++;
1407 1569
1408 if (skb_queue_is_last(TX_QUEUE(sk), skb)) 1570 if (skb_queue_is_last(TX_QUEUE(sk), skb))
1409 sk->sk_send_head = NULL; 1571 sk->sk_send_head = NULL;
1410 else 1572 else
1411 sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb); 1573 sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
1574
1575 nsent++;
1412 } 1576 }
1413 1577
1414 return 0; 1578 return nsent;
1579}
1580
1581static int l2cap_retransmit_frames(struct sock *sk)
1582{
1583 struct l2cap_pinfo *pi = l2cap_pi(sk);
1584 int ret;
1585
1586 if (!skb_queue_empty(TX_QUEUE(sk)))
1587 sk->sk_send_head = TX_QUEUE(sk)->next;
1588
1589 pi->next_tx_seq = pi->expected_ack_seq;
1590 ret = l2cap_ertm_send(sk);
1591 return ret;
1592}
1593
1594static void l2cap_send_ack(struct l2cap_pinfo *pi)
1595{
1596 struct sock *sk = (struct sock *)pi;
1597 u16 control = 0;
1598
1599 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1600
1601 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
1602 control |= L2CAP_SUPER_RCV_NOT_READY;
1603 pi->conn_state |= L2CAP_CONN_RNR_SENT;
1604 l2cap_send_sframe(pi, control);
1605 return;
1606 }
1607
1608 if (l2cap_ertm_send(sk) > 0)
1609 return;
1610
1611 control |= L2CAP_SUPER_RCV_READY;
1612 l2cap_send_sframe(pi, control);
1613}
1614
1615static void l2cap_send_srejtail(struct sock *sk)
1616{
1617 struct srej_list *tail;
1618 u16 control;
1619
1620 control = L2CAP_SUPER_SELECT_REJECT;
1621 control |= L2CAP_CTRL_FINAL;
1622
1623 tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list);
1624 control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
1625
1626 l2cap_send_sframe(l2cap_pi(sk), control);
1415} 1627}
1416 1628
1417static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb) 1629static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
@@ -1420,9 +1632,8 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
1420 struct sk_buff **frag; 1632 struct sk_buff **frag;
1421 int err, sent = 0; 1633 int err, sent = 0;
1422 1634
1423 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) { 1635 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
1424 return -EFAULT; 1636 return -EFAULT;
1425 }
1426 1637
1427 sent += count; 1638 sent += count;
1428 len -= count; 1639 len -= count;
@@ -1513,6 +1724,9 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
1513 1724
1514 BT_DBG("sk %p len %d", sk, (int)len); 1725 BT_DBG("sk %p len %d", sk, (int)len);
1515 1726
1727 if (!conn)
1728 return ERR_PTR(-ENOTCONN);
1729
1516 if (sdulen) 1730 if (sdulen)
1517 hlen += 2; 1731 hlen += 2;
1518 1732
@@ -1554,25 +1768,24 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
1554 u16 control; 1768 u16 control;
1555 size_t size = 0; 1769 size_t size = 0;
1556 1770
1557 __skb_queue_head_init(&sar_queue); 1771 skb_queue_head_init(&sar_queue);
1558 control = L2CAP_SDU_START; 1772 control = L2CAP_SDU_START;
1559 skb = l2cap_create_iframe_pdu(sk, msg, pi->max_pdu_size, control, len); 1773 skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
1560 if (IS_ERR(skb)) 1774 if (IS_ERR(skb))
1561 return PTR_ERR(skb); 1775 return PTR_ERR(skb);
1562 1776
1563 __skb_queue_tail(&sar_queue, skb); 1777 __skb_queue_tail(&sar_queue, skb);
1564 len -= pi->max_pdu_size; 1778 len -= pi->remote_mps;
1565 size +=pi->max_pdu_size; 1779 size += pi->remote_mps;
1566 control = 0;
1567 1780
1568 while (len > 0) { 1781 while (len > 0) {
1569 size_t buflen; 1782 size_t buflen;
1570 1783
1571 if (len > pi->max_pdu_size) { 1784 if (len > pi->remote_mps) {
1572 control |= L2CAP_SDU_CONTINUE; 1785 control = L2CAP_SDU_CONTINUE;
1573 buflen = pi->max_pdu_size; 1786 buflen = pi->remote_mps;
1574 } else { 1787 } else {
1575 control |= L2CAP_SDU_END; 1788 control = L2CAP_SDU_END;
1576 buflen = len; 1789 buflen = len;
1577 } 1790 }
1578 1791
@@ -1585,7 +1798,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
1585 __skb_queue_tail(&sar_queue, skb); 1798 __skb_queue_tail(&sar_queue, skb);
1586 len -= buflen; 1799 len -= buflen;
1587 size += buflen; 1800 size += buflen;
1588 control = 0;
1589 } 1801 }
1590 skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk)); 1802 skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
1591 if (sk->sk_send_head == NULL) 1803 if (sk->sk_send_head == NULL)
@@ -1611,11 +1823,6 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1611 if (msg->msg_flags & MSG_OOB) 1823 if (msg->msg_flags & MSG_OOB)
1612 return -EOPNOTSUPP; 1824 return -EOPNOTSUPP;
1613 1825
1614 /* Check outgoing MTU */
1615 if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC &&
1616 len > pi->omtu)
1617 return -EINVAL;
1618
1619 lock_sock(sk); 1826 lock_sock(sk);
1620 1827
1621 if (sk->sk_state != BT_CONNECTED) { 1828 if (sk->sk_state != BT_CONNECTED) {
@@ -1626,15 +1833,23 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1626 /* Connectionless channel */ 1833 /* Connectionless channel */
1627 if (sk->sk_type == SOCK_DGRAM) { 1834 if (sk->sk_type == SOCK_DGRAM) {
1628 skb = l2cap_create_connless_pdu(sk, msg, len); 1835 skb = l2cap_create_connless_pdu(sk, msg, len);
1629 if (IS_ERR(skb)) 1836 if (IS_ERR(skb)) {
1630 err = PTR_ERR(skb); 1837 err = PTR_ERR(skb);
1631 else 1838 } else {
1632 err = l2cap_do_send(sk, skb); 1839 l2cap_do_send(sk, skb);
1840 err = len;
1841 }
1633 goto done; 1842 goto done;
1634 } 1843 }
1635 1844
1636 switch (pi->mode) { 1845 switch (pi->mode) {
1637 case L2CAP_MODE_BASIC: 1846 case L2CAP_MODE_BASIC:
1847 /* Check outgoing MTU */
1848 if (len > pi->omtu) {
1849 err = -EMSGSIZE;
1850 goto done;
1851 }
1852
1638 /* Create a basic PDU */ 1853 /* Create a basic PDU */
1639 skb = l2cap_create_basic_pdu(sk, msg, len); 1854 skb = l2cap_create_basic_pdu(sk, msg, len);
1640 if (IS_ERR(skb)) { 1855 if (IS_ERR(skb)) {
@@ -1642,15 +1857,14 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1642 goto done; 1857 goto done;
1643 } 1858 }
1644 1859
1645 err = l2cap_do_send(sk, skb); 1860 l2cap_do_send(sk, skb);
1646 if (!err) 1861 err = len;
1647 err = len;
1648 break; 1862 break;
1649 1863
1650 case L2CAP_MODE_ERTM: 1864 case L2CAP_MODE_ERTM:
1651 case L2CAP_MODE_STREAMING: 1865 case L2CAP_MODE_STREAMING:
1652 /* Entire SDU fits into one PDU */ 1866 /* Entire SDU fits into one PDU */
1653 if (len <= pi->max_pdu_size) { 1867 if (len <= pi->remote_mps) {
1654 control = L2CAP_SDU_UNSEGMENTED; 1868 control = L2CAP_SDU_UNSEGMENTED;
1655 skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0); 1869 skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
1656 if (IS_ERR(skb)) { 1870 if (IS_ERR(skb)) {
@@ -1658,8 +1872,10 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1658 goto done; 1872 goto done;
1659 } 1873 }
1660 __skb_queue_tail(TX_QUEUE(sk), skb); 1874 __skb_queue_tail(TX_QUEUE(sk), skb);
1875
1661 if (sk->sk_send_head == NULL) 1876 if (sk->sk_send_head == NULL)
1662 sk->sk_send_head = skb; 1877 sk->sk_send_head = skb;
1878
1663 } else { 1879 } else {
1664 /* Segment SDU into multiples PDUs */ 1880 /* Segment SDU into multiples PDUs */
1665 err = l2cap_sar_segment_sdu(sk, msg, len); 1881 err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1667,18 +1883,24 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1667 goto done; 1883 goto done;
1668 } 1884 }
1669 1885
1670 if (pi->mode == L2CAP_MODE_STREAMING) 1886 if (pi->mode == L2CAP_MODE_STREAMING) {
1671 err = l2cap_streaming_send(sk); 1887 l2cap_streaming_send(sk);
1672 else 1888 } else {
1889 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
1890 pi->conn_state && L2CAP_CONN_WAIT_F) {
1891 err = len;
1892 break;
1893 }
1673 err = l2cap_ertm_send(sk); 1894 err = l2cap_ertm_send(sk);
1895 }
1674 1896
1675 if (!err) 1897 if (err >= 0)
1676 err = len; 1898 err = len;
1677 break; 1899 break;
1678 1900
1679 default: 1901 default:
1680 BT_DBG("bad state %1.1x", pi->mode); 1902 BT_DBG("bad state %1.1x", pi->mode);
1681 err = -EINVAL; 1903 err = -EBADFD;
1682 } 1904 }
1683 1905
1684done: 1906done:
@@ -1694,6 +1916,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1694 1916
1695 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { 1917 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1696 struct l2cap_conn_rsp rsp; 1918 struct l2cap_conn_rsp rsp;
1919 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1920 u8 buf[128];
1697 1921
1698 sk->sk_state = BT_CONFIG; 1922 sk->sk_state = BT_CONFIG;
1699 1923
@@ -1704,6 +1928,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1704 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, 1928 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1705 L2CAP_CONN_RSP, sizeof(rsp), &rsp); 1929 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1706 1930
1931 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
1932 release_sock(sk);
1933 return 0;
1934 }
1935
1936 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1937 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1938 l2cap_build_conf_req(sk, buf), buf);
1939 l2cap_pi(sk)->num_conf_req++;
1940
1707 release_sock(sk); 1941 release_sock(sk);
1708 return 0; 1942 return 0;
1709 } 1943 }
@@ -1731,6 +1965,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
1731 opts.flush_to = l2cap_pi(sk)->flush_to; 1965 opts.flush_to = l2cap_pi(sk)->flush_to;
1732 opts.mode = l2cap_pi(sk)->mode; 1966 opts.mode = l2cap_pi(sk)->mode;
1733 opts.fcs = l2cap_pi(sk)->fcs; 1967 opts.fcs = l2cap_pi(sk)->fcs;
1968 opts.max_tx = l2cap_pi(sk)->max_tx;
1969 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
1734 1970
1735 len = min_t(unsigned int, sizeof(opts), optlen); 1971 len = min_t(unsigned int, sizeof(opts), optlen);
1736 if (copy_from_user((char *) &opts, optval, len)) { 1972 if (copy_from_user((char *) &opts, optval, len)) {
@@ -1738,10 +1974,31 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
1738 break; 1974 break;
1739 } 1975 }
1740 1976
1977 if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
1978 err = -EINVAL;
1979 break;
1980 }
1981
1982 l2cap_pi(sk)->mode = opts.mode;
1983 switch (l2cap_pi(sk)->mode) {
1984 case L2CAP_MODE_BASIC:
1985 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
1986 break;
1987 case L2CAP_MODE_ERTM:
1988 case L2CAP_MODE_STREAMING:
1989 if (!disable_ertm)
1990 break;
1991 /* fall through */
1992 default:
1993 err = -EINVAL;
1994 break;
1995 }
1996
1741 l2cap_pi(sk)->imtu = opts.imtu; 1997 l2cap_pi(sk)->imtu = opts.imtu;
1742 l2cap_pi(sk)->omtu = opts.omtu; 1998 l2cap_pi(sk)->omtu = opts.omtu;
1743 l2cap_pi(sk)->mode = opts.mode;
1744 l2cap_pi(sk)->fcs = opts.fcs; 1999 l2cap_pi(sk)->fcs = opts.fcs;
2000 l2cap_pi(sk)->max_tx = opts.max_tx;
2001 l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
1745 break; 2002 break;
1746 2003
1747 case L2CAP_LM: 2004 case L2CAP_LM:
@@ -1789,7 +2046,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
1789 2046
1790 switch (optname) { 2047 switch (optname) {
1791 case BT_SECURITY: 2048 case BT_SECURITY:
1792 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { 2049 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2050 && sk->sk_type != SOCK_RAW) {
1793 err = -EINVAL; 2051 err = -EINVAL;
1794 break; 2052 break;
1795 } 2053 }
@@ -1856,6 +2114,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
1856 opts.flush_to = l2cap_pi(sk)->flush_to; 2114 opts.flush_to = l2cap_pi(sk)->flush_to;
1857 opts.mode = l2cap_pi(sk)->mode; 2115 opts.mode = l2cap_pi(sk)->mode;
1858 opts.fcs = l2cap_pi(sk)->fcs; 2116 opts.fcs = l2cap_pi(sk)->fcs;
2117 opts.max_tx = l2cap_pi(sk)->max_tx;
2118 opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
1859 2119
1860 len = min_t(unsigned int, len, sizeof(opts)); 2120 len = min_t(unsigned int, len, sizeof(opts));
1861 if (copy_to_user(optval, (char *) &opts, len)) 2121 if (copy_to_user(optval, (char *) &opts, len))
@@ -1937,7 +2197,8 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
1937 2197
1938 switch (optname) { 2198 switch (optname) {
1939 case BT_SECURITY: 2199 case BT_SECURITY:
1940 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { 2200 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
2201 && sk->sk_type != SOCK_RAW) {
1941 err = -EINVAL; 2202 err = -EINVAL;
1942 break; 2203 break;
1943 } 2204 }
@@ -1982,6 +2243,9 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
1982 2243
1983 lock_sock(sk); 2244 lock_sock(sk);
1984 if (!sk->sk_shutdown) { 2245 if (!sk->sk_shutdown) {
2246 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
2247 err = __l2cap_wait_ack(sk);
2248
1985 sk->sk_shutdown = SHUTDOWN_MASK; 2249 sk->sk_shutdown = SHUTDOWN_MASK;
1986 l2cap_sock_clear_timer(sk); 2250 l2cap_sock_clear_timer(sk);
1987 __l2cap_sock_close(sk, 0); 2251 __l2cap_sock_close(sk, 0);
@@ -1990,6 +2254,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
1990 err = bt_sock_wait_state(sk, BT_CLOSED, 2254 err = bt_sock_wait_state(sk, BT_CLOSED,
1991 sk->sk_lingertime); 2255 sk->sk_lingertime);
1992 } 2256 }
2257
2258 if (!err && sk->sk_err)
2259 err = -sk->sk_err;
2260
1993 release_sock(sk); 2261 release_sock(sk);
1994 return err; 2262 return err;
1995} 2263}
@@ -2184,35 +2452,36 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
2184 *ptr += L2CAP_CONF_OPT_SIZE + len; 2452 *ptr += L2CAP_CONF_OPT_SIZE + len;
2185} 2453}
2186 2454
2455static void l2cap_ack_timeout(unsigned long arg)
2456{
2457 struct sock *sk = (void *) arg;
2458
2459 bh_lock_sock(sk);
2460 l2cap_send_ack(l2cap_pi(sk));
2461 bh_unlock_sock(sk);
2462}
2463
2187static inline void l2cap_ertm_init(struct sock *sk) 2464static inline void l2cap_ertm_init(struct sock *sk)
2188{ 2465{
2189 l2cap_pi(sk)->expected_ack_seq = 0; 2466 l2cap_pi(sk)->expected_ack_seq = 0;
2190 l2cap_pi(sk)->unacked_frames = 0; 2467 l2cap_pi(sk)->unacked_frames = 0;
2191 l2cap_pi(sk)->buffer_seq = 0; 2468 l2cap_pi(sk)->buffer_seq = 0;
2192 l2cap_pi(sk)->num_to_ack = 0; 2469 l2cap_pi(sk)->num_acked = 0;
2470 l2cap_pi(sk)->frames_sent = 0;
2193 2471
2194 setup_timer(&l2cap_pi(sk)->retrans_timer, 2472 setup_timer(&l2cap_pi(sk)->retrans_timer,
2195 l2cap_retrans_timeout, (unsigned long) sk); 2473 l2cap_retrans_timeout, (unsigned long) sk);
2196 setup_timer(&l2cap_pi(sk)->monitor_timer, 2474 setup_timer(&l2cap_pi(sk)->monitor_timer,
2197 l2cap_monitor_timeout, (unsigned long) sk); 2475 l2cap_monitor_timeout, (unsigned long) sk);
2476 setup_timer(&l2cap_pi(sk)->ack_timer,
2477 l2cap_ack_timeout, (unsigned long) sk);
2198 2478
2199 __skb_queue_head_init(SREJ_QUEUE(sk)); 2479 __skb_queue_head_init(SREJ_QUEUE(sk));
2200} 2480 __skb_queue_head_init(BUSY_QUEUE(sk));
2201 2481
2202static int l2cap_mode_supported(__u8 mode, __u32 feat_mask) 2482 INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
2203{
2204 u32 local_feat_mask = l2cap_feat_mask;
2205 if (enable_ertm)
2206 local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
2207 2483
2208 switch (mode) { 2484 sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
2209 case L2CAP_MODE_ERTM:
2210 return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
2211 case L2CAP_MODE_STREAMING:
2212 return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
2213 default:
2214 return 0x00;
2215 }
2216} 2485}
2217 2486
2218static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask) 2487static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
@@ -2232,7 +2501,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
2232{ 2501{
2233 struct l2cap_pinfo *pi = l2cap_pi(sk); 2502 struct l2cap_pinfo *pi = l2cap_pi(sk);
2234 struct l2cap_conf_req *req = data; 2503 struct l2cap_conf_req *req = data;
2235 struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; 2504 struct l2cap_conf_rfc rfc = { .mode = pi->mode };
2236 void *ptr = req->data; 2505 void *ptr = req->data;
2237 2506
2238 BT_DBG("sk %p", sk); 2507 BT_DBG("sk %p", sk);
@@ -2243,10 +2512,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
2243 switch (pi->mode) { 2512 switch (pi->mode) {
2244 case L2CAP_MODE_STREAMING: 2513 case L2CAP_MODE_STREAMING:
2245 case L2CAP_MODE_ERTM: 2514 case L2CAP_MODE_ERTM:
2246 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; 2515 if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE)
2247 if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) 2516 break;
2248 l2cap_send_disconn_req(pi->conn, sk); 2517
2249 break; 2518 /* fall through */
2250 default: 2519 default:
2251 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask); 2520 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
2252 break; 2521 break;
@@ -2257,18 +2526,34 @@ done:
2257 case L2CAP_MODE_BASIC: 2526 case L2CAP_MODE_BASIC:
2258 if (pi->imtu != L2CAP_DEFAULT_MTU) 2527 if (pi->imtu != L2CAP_DEFAULT_MTU)
2259 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu); 2528 l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
2529
2530 if (!(pi->conn->feat_mask & L2CAP_FEAT_ERTM) &&
2531 !(pi->conn->feat_mask & L2CAP_FEAT_STREAMING))
2532 break;
2533
2534 rfc.mode = L2CAP_MODE_BASIC;
2535 rfc.txwin_size = 0;
2536 rfc.max_transmit = 0;
2537 rfc.retrans_timeout = 0;
2538 rfc.monitor_timeout = 0;
2539 rfc.max_pdu_size = 0;
2540
2541 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2542 (unsigned long) &rfc);
2260 break; 2543 break;
2261 2544
2262 case L2CAP_MODE_ERTM: 2545 case L2CAP_MODE_ERTM:
2263 rfc.mode = L2CAP_MODE_ERTM; 2546 rfc.mode = L2CAP_MODE_ERTM;
2264 rfc.txwin_size = L2CAP_DEFAULT_TX_WINDOW; 2547 rfc.txwin_size = pi->tx_win;
2265 rfc.max_transmit = max_transmit; 2548 rfc.max_transmit = pi->max_tx;
2266 rfc.retrans_timeout = 0; 2549 rfc.retrans_timeout = 0;
2267 rfc.monitor_timeout = 0; 2550 rfc.monitor_timeout = 0;
2268 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); 2551 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
2552 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2553 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2269 2554
2270 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, 2555 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2271 sizeof(rfc), (unsigned long) &rfc); 2556 (unsigned long) &rfc);
2272 2557
2273 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) 2558 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2274 break; 2559 break;
@@ -2287,9 +2572,11 @@ done:
2287 rfc.retrans_timeout = 0; 2572 rfc.retrans_timeout = 0;
2288 rfc.monitor_timeout = 0; 2573 rfc.monitor_timeout = 0;
2289 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE); 2574 rfc.max_pdu_size = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
2575 if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
2576 rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
2290 2577
2291 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, 2578 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
2292 sizeof(rfc), (unsigned long) &rfc); 2579 (unsigned long) &rfc);
2293 2580
2294 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS)) 2581 if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
2295 break; 2582 break;
@@ -2366,18 +2653,21 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
2366 } 2653 }
2367 } 2654 }
2368 2655
2369 if (pi->num_conf_rsp || pi->num_conf_req) 2656 if (pi->num_conf_rsp || pi->num_conf_req > 1)
2370 goto done; 2657 goto done;
2371 2658
2372 switch (pi->mode) { 2659 switch (pi->mode) {
2373 case L2CAP_MODE_STREAMING: 2660 case L2CAP_MODE_STREAMING:
2374 case L2CAP_MODE_ERTM: 2661 case L2CAP_MODE_ERTM:
2375 pi->conf_state |= L2CAP_CONF_STATE2_DEVICE; 2662 if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
2376 if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) 2663 pi->mode = l2cap_select_mode(rfc.mode,
2664 pi->conn->feat_mask);
2665 break;
2666 }
2667
2668 if (pi->mode != rfc.mode)
2377 return -ECONNREFUSED; 2669 return -ECONNREFUSED;
2378 break; 2670
2379 default:
2380 pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
2381 break; 2671 break;
2382 } 2672 }
2383 2673
@@ -2415,10 +2705,15 @@ done:
2415 case L2CAP_MODE_ERTM: 2705 case L2CAP_MODE_ERTM:
2416 pi->remote_tx_win = rfc.txwin_size; 2706 pi->remote_tx_win = rfc.txwin_size;
2417 pi->remote_max_tx = rfc.max_transmit; 2707 pi->remote_max_tx = rfc.max_transmit;
2418 pi->max_pdu_size = rfc.max_pdu_size; 2708 if (rfc.max_pdu_size > pi->conn->mtu - 10)
2709 rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
2710
2711 pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
2419 2712
2420 rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO; 2713 rfc.retrans_timeout =
2421 rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO; 2714 le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO);
2715 rfc.monitor_timeout =
2716 le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO);
2422 2717
2423 pi->conf_state |= L2CAP_CONF_MODE_DONE; 2718 pi->conf_state |= L2CAP_CONF_MODE_DONE;
2424 2719
@@ -2428,8 +2723,10 @@ done:
2428 break; 2723 break;
2429 2724
2430 case L2CAP_MODE_STREAMING: 2725 case L2CAP_MODE_STREAMING:
2431 pi->remote_tx_win = rfc.txwin_size; 2726 if (rfc.max_pdu_size > pi->conn->mtu - 10)
2432 pi->max_pdu_size = rfc.max_pdu_size; 2727 rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
2728
2729 pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
2433 2730
2434 pi->conf_state |= L2CAP_CONF_MODE_DONE; 2731 pi->conf_state |= L2CAP_CONF_MODE_DONE;
2435 2732
@@ -2493,7 +2790,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
2493 rfc.mode != pi->mode) 2790 rfc.mode != pi->mode)
2494 return -ECONNREFUSED; 2791 return -ECONNREFUSED;
2495 2792
2496 pi->mode = rfc.mode;
2497 pi->fcs = 0; 2793 pi->fcs = 0;
2498 2794
2499 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, 2795 l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
@@ -2502,17 +2798,21 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
2502 } 2798 }
2503 } 2799 }
2504 2800
2801 if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode)
2802 return -ECONNREFUSED;
2803
2804 pi->mode = rfc.mode;
2805
2505 if (*result == L2CAP_CONF_SUCCESS) { 2806 if (*result == L2CAP_CONF_SUCCESS) {
2506 switch (rfc.mode) { 2807 switch (rfc.mode) {
2507 case L2CAP_MODE_ERTM: 2808 case L2CAP_MODE_ERTM:
2508 pi->remote_tx_win = rfc.txwin_size; 2809 pi->remote_tx_win = rfc.txwin_size;
2509 pi->retrans_timeout = rfc.retrans_timeout; 2810 pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
2510 pi->monitor_timeout = rfc.monitor_timeout; 2811 pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
2511 pi->max_pdu_size = le16_to_cpu(rfc.max_pdu_size); 2812 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2512 break; 2813 break;
2513 case L2CAP_MODE_STREAMING: 2814 case L2CAP_MODE_STREAMING:
2514 pi->max_pdu_size = le16_to_cpu(rfc.max_pdu_size); 2815 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2515 break;
2516 } 2816 }
2517 } 2817 }
2518 2818
@@ -2536,6 +2836,42 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 fla
2536 return ptr - data; 2836 return ptr - data;
2537} 2837}
2538 2838
2839static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len)
2840{
2841 struct l2cap_pinfo *pi = l2cap_pi(sk);
2842 int type, olen;
2843 unsigned long val;
2844 struct l2cap_conf_rfc rfc;
2845
2846 BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len);
2847
2848 if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING))
2849 return;
2850
2851 while (len >= L2CAP_CONF_OPT_SIZE) {
2852 len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
2853
2854 switch (type) {
2855 case L2CAP_CONF_RFC:
2856 if (olen == sizeof(rfc))
2857 memcpy(&rfc, (void *)val, olen);
2858 goto done;
2859 }
2860 }
2861
2862done:
2863 switch (rfc.mode) {
2864 case L2CAP_MODE_ERTM:
2865 pi->remote_tx_win = rfc.txwin_size;
2866 pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
2867 pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
2868 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2869 break;
2870 case L2CAP_MODE_STREAMING:
2871 pi->mps = le16_to_cpu(rfc.max_pdu_size);
2872 }
2873}
2874
2539static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data) 2875static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
2540{ 2876{
2541 struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data; 2877 struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data;
@@ -2561,7 +2897,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
2561 struct l2cap_chan_list *list = &conn->chan_list; 2897 struct l2cap_chan_list *list = &conn->chan_list;
2562 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; 2898 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2563 struct l2cap_conn_rsp rsp; 2899 struct l2cap_conn_rsp rsp;
2564 struct sock *sk, *parent; 2900 struct sock *parent, *uninitialized_var(sk);
2565 int result, status = L2CAP_CS_NO_INFO; 2901 int result, status = L2CAP_CS_NO_INFO;
2566 2902
2567 u16 dcid = 0, scid = __le16_to_cpu(req->scid); 2903 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -2670,6 +3006,15 @@ sendresp:
2670 L2CAP_INFO_REQ, sizeof(info), &info); 3006 L2CAP_INFO_REQ, sizeof(info), &info);
2671 } 3007 }
2672 3008
3009 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
3010 result == L2CAP_CR_SUCCESS) {
3011 u8 buf[128];
3012 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3013 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3014 l2cap_build_conf_req(sk, buf), buf);
3015 l2cap_pi(sk)->num_conf_req++;
3016 }
3017
2673 return 0; 3018 return 0;
2674} 3019}
2675 3020
@@ -2690,11 +3035,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
2690 if (scid) { 3035 if (scid) {
2691 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid); 3036 sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
2692 if (!sk) 3037 if (!sk)
2693 return 0; 3038 return -EFAULT;
2694 } else { 3039 } else {
2695 sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident); 3040 sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident);
2696 if (!sk) 3041 if (!sk)
2697 return 0; 3042 return -EFAULT;
2698 } 3043 }
2699 3044
2700 switch (result) { 3045 switch (result) {
@@ -2702,10 +3047,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
2702 sk->sk_state = BT_CONFIG; 3047 sk->sk_state = BT_CONFIG;
2703 l2cap_pi(sk)->ident = 0; 3048 l2cap_pi(sk)->ident = 0;
2704 l2cap_pi(sk)->dcid = dcid; 3049 l2cap_pi(sk)->dcid = dcid;
2705 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
2706
2707 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; 3050 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
2708 3051
3052 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)
3053 break;
3054
3055 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3056
2709 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 3057 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
2710 l2cap_build_conf_req(sk, req), req); 3058 l2cap_build_conf_req(sk, req), req);
2711 l2cap_pi(sk)->num_conf_req++; 3059 l2cap_pi(sk)->num_conf_req++;
@@ -2741,8 +3089,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2741 if (!sk) 3089 if (!sk)
2742 return -ENOENT; 3090 return -ENOENT;
2743 3091
2744 if (sk->sk_state == BT_DISCONN) 3092 if (sk->sk_state != BT_CONFIG) {
3093 struct l2cap_cmd_rej rej;
3094
3095 rej.reason = cpu_to_le16(0x0002);
3096 l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
3097 sizeof(rej), &rej);
2745 goto unlock; 3098 goto unlock;
3099 }
2746 3100
2747 /* Reject if config buffer is too small. */ 3101 /* Reject if config buffer is too small. */
2748 len = cmd_len - sizeof(*req); 3102 len = cmd_len - sizeof(*req);
@@ -2768,7 +3122,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2768 /* Complete config. */ 3122 /* Complete config. */
2769 len = l2cap_parse_conf_req(sk, rsp); 3123 len = l2cap_parse_conf_req(sk, rsp);
2770 if (len < 0) { 3124 if (len < 0) {
2771 l2cap_send_disconn_req(conn, sk); 3125 l2cap_send_disconn_req(conn, sk, ECONNRESET);
2772 goto unlock; 3126 goto unlock;
2773 } 3127 }
2774 3128
@@ -2815,6 +3169,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2815 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data; 3169 struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
2816 u16 scid, flags, result; 3170 u16 scid, flags, result;
2817 struct sock *sk; 3171 struct sock *sk;
3172 int len = cmd->len - sizeof(*rsp);
2818 3173
2819 scid = __le16_to_cpu(rsp->scid); 3174 scid = __le16_to_cpu(rsp->scid);
2820 flags = __le16_to_cpu(rsp->flags); 3175 flags = __le16_to_cpu(rsp->flags);
@@ -2829,15 +3184,15 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2829 3184
2830 switch (result) { 3185 switch (result) {
2831 case L2CAP_CONF_SUCCESS: 3186 case L2CAP_CONF_SUCCESS:
3187 l2cap_conf_rfc_get(sk, rsp->data, len);
2832 break; 3188 break;
2833 3189
2834 case L2CAP_CONF_UNACCEPT: 3190 case L2CAP_CONF_UNACCEPT:
2835 if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) { 3191 if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
2836 int len = cmd->len - sizeof(*rsp);
2837 char req[64]; 3192 char req[64];
2838 3193
2839 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) { 3194 if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
2840 l2cap_send_disconn_req(conn, sk); 3195 l2cap_send_disconn_req(conn, sk, ECONNRESET);
2841 goto done; 3196 goto done;
2842 } 3197 }
2843 3198
@@ -2846,7 +3201,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2846 len = l2cap_parse_conf_rsp(sk, rsp->data, 3201 len = l2cap_parse_conf_rsp(sk, rsp->data,
2847 len, req, &result); 3202 len, req, &result);
2848 if (len < 0) { 3203 if (len < 0) {
2849 l2cap_send_disconn_req(conn, sk); 3204 l2cap_send_disconn_req(conn, sk, ECONNRESET);
2850 goto done; 3205 goto done;
2851 } 3206 }
2852 3207
@@ -2859,10 +3214,9 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
2859 } 3214 }
2860 3215
2861 default: 3216 default:
2862 sk->sk_state = BT_DISCONN;
2863 sk->sk_err = ECONNRESET; 3217 sk->sk_err = ECONNRESET;
2864 l2cap_sock_set_timer(sk, HZ * 5); 3218 l2cap_sock_set_timer(sk, HZ * 5);
2865 l2cap_send_disconn_req(conn, sk); 3219 l2cap_send_disconn_req(conn, sk, ECONNRESET);
2866 goto done; 3220 goto done;
2867 } 3221 }
2868 3222
@@ -2913,14 +3267,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
2913 3267
2914 sk->sk_shutdown = SHUTDOWN_MASK; 3268 sk->sk_shutdown = SHUTDOWN_MASK;
2915 3269
2916 skb_queue_purge(TX_QUEUE(sk));
2917
2918 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
2919 skb_queue_purge(SREJ_QUEUE(sk));
2920 del_timer(&l2cap_pi(sk)->retrans_timer);
2921 del_timer(&l2cap_pi(sk)->monitor_timer);
2922 }
2923
2924 l2cap_chan_del(sk, ECONNRESET); 3270 l2cap_chan_del(sk, ECONNRESET);
2925 bh_unlock_sock(sk); 3271 bh_unlock_sock(sk);
2926 3272
@@ -2943,14 +3289,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
2943 if (!sk) 3289 if (!sk)
2944 return 0; 3290 return 0;
2945 3291
2946 skb_queue_purge(TX_QUEUE(sk));
2947
2948 if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
2949 skb_queue_purge(SREJ_QUEUE(sk));
2950 del_timer(&l2cap_pi(sk)->retrans_timer);
2951 del_timer(&l2cap_pi(sk)->monitor_timer);
2952 }
2953
2954 l2cap_chan_del(sk, 0); 3292 l2cap_chan_del(sk, 0);
2955 bh_unlock_sock(sk); 3293 bh_unlock_sock(sk);
2956 3294
@@ -2973,7 +3311,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
2973 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; 3311 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
2974 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK); 3312 rsp->type = cpu_to_le16(L2CAP_IT_FEAT_MASK);
2975 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); 3313 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
2976 if (enable_ertm) 3314 if (!disable_ertm)
2977 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING 3315 feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
2978 | L2CAP_FEAT_FCS; 3316 | L2CAP_FEAT_FCS;
2979 put_unaligned_le32(feat_mask, rsp->data); 3317 put_unaligned_le32(feat_mask, rsp->data);
@@ -3010,6 +3348,15 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
3010 3348
3011 del_timer(&conn->info_timer); 3349 del_timer(&conn->info_timer);
3012 3350
3351 if (result != L2CAP_IR_SUCCESS) {
3352 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
3353 conn->info_ident = 0;
3354
3355 l2cap_conn_start(conn);
3356
3357 return 0;
3358 }
3359
3013 if (type == L2CAP_IT_FEAT_MASK) { 3360 if (type == L2CAP_IT_FEAT_MASK) {
3014 conn->feat_mask = get_unaligned_le32(rsp->data); 3361 conn->feat_mask = get_unaligned_le32(rsp->data);
3015 3362
@@ -3138,14 +3485,43 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi, struct sk_buff *skb)
3138 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size); 3485 our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
3139 3486
3140 if (our_fcs != rcv_fcs) 3487 if (our_fcs != rcv_fcs)
3141 return -EINVAL; 3488 return -EBADMSG;
3142 } 3489 }
3143 return 0; 3490 return 0;
3144} 3491}
3145 3492
3146static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar) 3493static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
3494{
3495 struct l2cap_pinfo *pi = l2cap_pi(sk);
3496 u16 control = 0;
3497
3498 pi->frames_sent = 0;
3499
3500 control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3501
3502 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3503 control |= L2CAP_SUPER_RCV_NOT_READY;
3504 l2cap_send_sframe(pi, control);
3505 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3506 }
3507
3508 if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
3509 l2cap_retransmit_frames(sk);
3510
3511 l2cap_ertm_send(sk);
3512
3513 if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
3514 pi->frames_sent == 0) {
3515 control |= L2CAP_SUPER_RCV_READY;
3516 l2cap_send_sframe(pi, control);
3517 }
3518}
3519
3520static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
3147{ 3521{
3148 struct sk_buff *next_skb; 3522 struct sk_buff *next_skb;
3523 struct l2cap_pinfo *pi = l2cap_pi(sk);
3524 int tx_seq_offset, next_tx_seq_offset;
3149 3525
3150 bt_cb(skb)->tx_seq = tx_seq; 3526 bt_cb(skb)->tx_seq = tx_seq;
3151 bt_cb(skb)->sar = sar; 3527 bt_cb(skb)->sar = sar;
@@ -3153,29 +3529,282 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
3153 next_skb = skb_peek(SREJ_QUEUE(sk)); 3529 next_skb = skb_peek(SREJ_QUEUE(sk));
3154 if (!next_skb) { 3530 if (!next_skb) {
3155 __skb_queue_tail(SREJ_QUEUE(sk), skb); 3531 __skb_queue_tail(SREJ_QUEUE(sk), skb);
3156 return; 3532 return 0;
3157 } 3533 }
3158 3534
3535 tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
3536 if (tx_seq_offset < 0)
3537 tx_seq_offset += 64;
3538
3159 do { 3539 do {
3160 if (bt_cb(next_skb)->tx_seq > tx_seq) { 3540 if (bt_cb(next_skb)->tx_seq == tx_seq)
3541 return -EINVAL;
3542
3543 next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
3544 pi->buffer_seq) % 64;
3545 if (next_tx_seq_offset < 0)
3546 next_tx_seq_offset += 64;
3547
3548 if (next_tx_seq_offset > tx_seq_offset) {
3161 __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb); 3549 __skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
3162 return; 3550 return 0;
3163 } 3551 }
3164 3552
3165 if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb)) 3553 if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
3166 break; 3554 break;
3167 3555
3168 } while((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb))); 3556 } while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
3169 3557
3170 __skb_queue_tail(SREJ_QUEUE(sk), skb); 3558 __skb_queue_tail(SREJ_QUEUE(sk), skb);
3559
3560 return 0;
3561}
3562
3563static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
3564{
3565 struct l2cap_pinfo *pi = l2cap_pi(sk);
3566 struct sk_buff *_skb;
3567 int err;
3568
3569 switch (control & L2CAP_CTRL_SAR) {
3570 case L2CAP_SDU_UNSEGMENTED:
3571 if (pi->conn_state & L2CAP_CONN_SAR_SDU)
3572 goto drop;
3573
3574 err = sock_queue_rcv_skb(sk, skb);
3575 if (!err)
3576 return err;
3577
3578 break;
3579
3580 case L2CAP_SDU_START:
3581 if (pi->conn_state & L2CAP_CONN_SAR_SDU)
3582 goto drop;
3583
3584 pi->sdu_len = get_unaligned_le16(skb->data);
3585
3586 if (pi->sdu_len > pi->imtu)
3587 goto disconnect;
3588
3589 pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
3590 if (!pi->sdu)
3591 return -ENOMEM;
3592
3593 /* pull sdu_len bytes only after alloc, because of Local Busy
3594 * condition we have to be sure that this will be executed
3595 * only once, i.e., when alloc does not fail */
3596 skb_pull(skb, 2);
3597
3598 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3599
3600 pi->conn_state |= L2CAP_CONN_SAR_SDU;
3601 pi->partial_sdu_len = skb->len;
3602 break;
3603
3604 case L2CAP_SDU_CONTINUE:
3605 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3606 goto disconnect;
3607
3608 if (!pi->sdu)
3609 goto disconnect;
3610
3611 pi->partial_sdu_len += skb->len;
3612 if (pi->partial_sdu_len > pi->sdu_len)
3613 goto drop;
3614
3615 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3616
3617 break;
3618
3619 case L2CAP_SDU_END:
3620 if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
3621 goto disconnect;
3622
3623 if (!pi->sdu)
3624 goto disconnect;
3625
3626 if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
3627 pi->partial_sdu_len += skb->len;
3628
3629 if (pi->partial_sdu_len > pi->imtu)
3630 goto drop;
3631
3632 if (pi->partial_sdu_len != pi->sdu_len)
3633 goto drop;
3634
3635 memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
3636 }
3637
3638 _skb = skb_clone(pi->sdu, GFP_ATOMIC);
3639 if (!_skb) {
3640 pi->conn_state |= L2CAP_CONN_SAR_RETRY;
3641 return -ENOMEM;
3642 }
3643
3644 err = sock_queue_rcv_skb(sk, _skb);
3645 if (err < 0) {
3646 kfree_skb(_skb);
3647 pi->conn_state |= L2CAP_CONN_SAR_RETRY;
3648 return err;
3649 }
3650
3651 pi->conn_state &= ~L2CAP_CONN_SAR_RETRY;
3652 pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
3653
3654 kfree_skb(pi->sdu);
3655 break;
3656 }
3657
3658 kfree_skb(skb);
3659 return 0;
3660
3661drop:
3662 kfree_skb(pi->sdu);
3663 pi->sdu = NULL;
3664
3665disconnect:
3666 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
3667 kfree_skb(skb);
3668 return 0;
3669}
3670
3671static int l2cap_try_push_rx_skb(struct sock *sk)
3672{
3673 struct l2cap_pinfo *pi = l2cap_pi(sk);
3674 struct sk_buff *skb;
3675 u16 control;
3676 int err;
3677
3678 while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
3679 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3680 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3681 if (err < 0) {
3682 skb_queue_head(BUSY_QUEUE(sk), skb);
3683 return -EBUSY;
3684 }
3685
3686 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3687 }
3688
3689 if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
3690 goto done;
3691
3692 control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3693 control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
3694 l2cap_send_sframe(pi, control);
3695 l2cap_pi(sk)->retry_count = 1;
3696
3697 del_timer(&pi->retrans_timer);
3698 __mod_monitor_timer();
3699
3700 l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
3701
3702done:
3703 pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
3704 pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
3705
3706 BT_DBG("sk %p, Exit local busy", sk);
3707
3708 return 0;
3709}
3710
3711static void l2cap_busy_work(struct work_struct *work)
3712{
3713 DECLARE_WAITQUEUE(wait, current);
3714 struct l2cap_pinfo *pi =
3715 container_of(work, struct l2cap_pinfo, busy_work);
3716 struct sock *sk = (struct sock *)pi;
3717 int n_tries = 0, timeo = HZ/5, err;
3718 struct sk_buff *skb;
3719
3720 lock_sock(sk);
3721
3722 add_wait_queue(sk_sleep(sk), &wait);
3723 while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
3724 set_current_state(TASK_INTERRUPTIBLE);
3725
3726 if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
3727 err = -EBUSY;
3728 l2cap_send_disconn_req(pi->conn, sk, EBUSY);
3729 break;
3730 }
3731
3732 if (!timeo)
3733 timeo = HZ/5;
3734
3735 if (signal_pending(current)) {
3736 err = sock_intr_errno(timeo);
3737 break;
3738 }
3739
3740 release_sock(sk);
3741 timeo = schedule_timeout(timeo);
3742 lock_sock(sk);
3743
3744 err = sock_error(sk);
3745 if (err)
3746 break;
3747
3748 if (l2cap_try_push_rx_skb(sk) == 0)
3749 break;
3750 }
3751
3752 set_current_state(TASK_RUNNING);
3753 remove_wait_queue(sk_sleep(sk), &wait);
3754
3755 release_sock(sk);
3756}
3757
3758static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
3759{
3760 struct l2cap_pinfo *pi = l2cap_pi(sk);
3761 int sctrl, err;
3762
3763 if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
3764 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3765 __skb_queue_tail(BUSY_QUEUE(sk), skb);
3766 return l2cap_try_push_rx_skb(sk);
3767
3768
3769 }
3770
3771 err = l2cap_ertm_reassembly_sdu(sk, skb, control);
3772 if (err >= 0) {
3773 pi->buffer_seq = (pi->buffer_seq + 1) % 64;
3774 return err;
3775 }
3776
3777 /* Busy Condition */
3778 BT_DBG("sk %p, Enter local busy", sk);
3779
3780 pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
3781 bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
3782 __skb_queue_tail(BUSY_QUEUE(sk), skb);
3783
3784 sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3785 sctrl |= L2CAP_SUPER_RCV_NOT_READY;
3786 l2cap_send_sframe(pi, sctrl);
3787
3788 pi->conn_state |= L2CAP_CONN_RNR_SENT;
3789
3790 del_timer(&pi->ack_timer);
3791
3792 queue_work(_busy_wq, &pi->busy_work);
3793
3794 return err;
3171} 3795}
3172 3796
3173static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control) 3797static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
3174{ 3798{
3175 struct l2cap_pinfo *pi = l2cap_pi(sk); 3799 struct l2cap_pinfo *pi = l2cap_pi(sk);
3176 struct sk_buff *_skb; 3800 struct sk_buff *_skb;
3177 int err = -EINVAL; 3801 int err = -EINVAL;
3178 3802
3803 /*
3804 * TODO: We have to notify the userland if some data is lost with the
3805 * Streaming Mode.
3806 */
3807
3179 switch (control & L2CAP_CTRL_SAR) { 3808 switch (control & L2CAP_CTRL_SAR) {
3180 case L2CAP_SDU_UNSEGMENTED: 3809 case L2CAP_SDU_UNSEGMENTED:
3181 if (pi->conn_state & L2CAP_CONN_SAR_SDU) { 3810 if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
@@ -3198,6 +3827,11 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
3198 pi->sdu_len = get_unaligned_le16(skb->data); 3827 pi->sdu_len = get_unaligned_le16(skb->data);
3199 skb_pull(skb, 2); 3828 skb_pull(skb, 2);
3200 3829
3830 if (pi->sdu_len > pi->imtu) {
3831 err = -EMSGSIZE;
3832 break;
3833 }
3834
3201 pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC); 3835 pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
3202 if (!pi->sdu) { 3836 if (!pi->sdu) {
3203 err = -ENOMEM; 3837 err = -ENOMEM;
@@ -3234,15 +3868,19 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
3234 pi->conn_state &= ~L2CAP_CONN_SAR_SDU; 3868 pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
3235 pi->partial_sdu_len += skb->len; 3869 pi->partial_sdu_len += skb->len;
3236 3870
3871 if (pi->partial_sdu_len > pi->imtu)
3872 goto drop;
3873
3237 if (pi->partial_sdu_len == pi->sdu_len) { 3874 if (pi->partial_sdu_len == pi->sdu_len) {
3238 _skb = skb_clone(pi->sdu, GFP_ATOMIC); 3875 _skb = skb_clone(pi->sdu, GFP_ATOMIC);
3239 err = sock_queue_rcv_skb(sk, _skb); 3876 err = sock_queue_rcv_skb(sk, _skb);
3240 if (err < 0) 3877 if (err < 0)
3241 kfree_skb(_skb); 3878 kfree_skb(_skb);
3242 } 3879 }
3243 kfree_skb(pi->sdu);
3244 err = 0; 3880 err = 0;
3245 3881
3882drop:
3883 kfree_skb(pi->sdu);
3246 break; 3884 break;
3247 } 3885 }
3248 3886
@@ -3253,18 +3891,18 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
3253static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq) 3891static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
3254{ 3892{
3255 struct sk_buff *skb; 3893 struct sk_buff *skb;
3256 u16 control = 0; 3894 u16 control;
3257 3895
3258 while((skb = skb_peek(SREJ_QUEUE(sk)))) { 3896 while ((skb = skb_peek(SREJ_QUEUE(sk)))) {
3259 if (bt_cb(skb)->tx_seq != tx_seq) 3897 if (bt_cb(skb)->tx_seq != tx_seq)
3260 break; 3898 break;
3261 3899
3262 skb = skb_dequeue(SREJ_QUEUE(sk)); 3900 skb = skb_dequeue(SREJ_QUEUE(sk));
3263 control |= bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT; 3901 control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
3264 l2cap_sar_reassembly_sdu(sk, skb, control); 3902 l2cap_ertm_reassembly_sdu(sk, skb, control);
3265 l2cap_pi(sk)->buffer_seq_srej = 3903 l2cap_pi(sk)->buffer_seq_srej =
3266 (l2cap_pi(sk)->buffer_seq_srej + 1) % 64; 3904 (l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
3267 tx_seq++; 3905 tx_seq = (tx_seq + 1) % 64;
3268 } 3906 }
3269} 3907}
3270 3908
@@ -3274,7 +3912,7 @@ static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq)
3274 struct srej_list *l, *tmp; 3912 struct srej_list *l, *tmp;
3275 u16 control; 3913 u16 control;
3276 3914
3277 list_for_each_entry_safe(l,tmp, SREJ_LIST(sk), list) { 3915 list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
3278 if (l->tx_seq == tx_seq) { 3916 if (l->tx_seq == tx_seq) {
3279 list_del(&l->list); 3917 list_del(&l->list);
3280 kfree(l); 3918 kfree(l);
@@ -3297,17 +3935,14 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
3297 while (tx_seq != pi->expected_tx_seq) { 3935 while (tx_seq != pi->expected_tx_seq) {
3298 control = L2CAP_SUPER_SELECT_REJECT; 3936 control = L2CAP_SUPER_SELECT_REJECT;
3299 control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT; 3937 control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
3300 if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
3301 control |= L2CAP_CTRL_POLL;
3302 pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
3303 }
3304 l2cap_send_sframe(pi, control); 3938 l2cap_send_sframe(pi, control);
3305 3939
3306 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC); 3940 new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
3307 new->tx_seq = pi->expected_tx_seq++; 3941 new->tx_seq = pi->expected_tx_seq;
3942 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3308 list_add_tail(&new->list, SREJ_LIST(sk)); 3943 list_add_tail(&new->list, SREJ_LIST(sk));
3309 } 3944 }
3310 pi->expected_tx_seq++; 3945 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3311} 3946}
3312 3947
3313static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) 3948static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
@@ -3315,11 +3950,21 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3315 struct l2cap_pinfo *pi = l2cap_pi(sk); 3950 struct l2cap_pinfo *pi = l2cap_pi(sk);
3316 u8 tx_seq = __get_txseq(rx_control); 3951 u8 tx_seq = __get_txseq(rx_control);
3317 u8 req_seq = __get_reqseq(rx_control); 3952 u8 req_seq = __get_reqseq(rx_control);
3318 u16 tx_control = 0;
3319 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT; 3953 u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
3954 int tx_seq_offset, expected_tx_seq_offset;
3955 int num_to_ack = (pi->tx_win/6) + 1;
3320 int err = 0; 3956 int err = 0;
3321 3957
3322 BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len); 3958 BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq,
3959 rx_control);
3960
3961 if (L2CAP_CTRL_FINAL & rx_control &&
3962 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
3963 del_timer(&pi->monitor_timer);
3964 if (pi->unacked_frames > 0)
3965 __mod_retrans_timer();
3966 pi->conn_state &= ~L2CAP_CONN_WAIT_F;
3967 }
3323 3968
3324 pi->expected_ack_seq = req_seq; 3969 pi->expected_ack_seq = req_seq;
3325 l2cap_drop_acked_frames(sk); 3970 l2cap_drop_acked_frames(sk);
@@ -3327,6 +3972,19 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3327 if (tx_seq == pi->expected_tx_seq) 3972 if (tx_seq == pi->expected_tx_seq)
3328 goto expected; 3973 goto expected;
3329 3974
3975 tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
3976 if (tx_seq_offset < 0)
3977 tx_seq_offset += 64;
3978
3979 /* invalid tx_seq */
3980 if (tx_seq_offset >= pi->tx_win) {
3981 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
3982 goto drop;
3983 }
3984
3985 if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY)
3986 goto drop;
3987
3330 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { 3988 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3331 struct srej_list *first; 3989 struct srej_list *first;
3332 3990
@@ -3342,10 +4000,15 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3342 if (list_empty(SREJ_LIST(sk))) { 4000 if (list_empty(SREJ_LIST(sk))) {
3343 pi->buffer_seq = pi->buffer_seq_srej; 4001 pi->buffer_seq = pi->buffer_seq_srej;
3344 pi->conn_state &= ~L2CAP_CONN_SREJ_SENT; 4002 pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
4003 l2cap_send_ack(pi);
4004 BT_DBG("sk %p, Exit SREJ_SENT", sk);
3345 } 4005 }
3346 } else { 4006 } else {
3347 struct srej_list *l; 4007 struct srej_list *l;
3348 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); 4008
4009 /* duplicated tx_seq */
4010 if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0)
4011 goto drop;
3349 4012
3350 list_for_each_entry(l, SREJ_LIST(sk), list) { 4013 list_for_each_entry(l, SREJ_LIST(sk), list) {
3351 if (l->tx_seq == tx_seq) { 4014 if (l->tx_seq == tx_seq) {
@@ -3356,17 +4019,31 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
3356 l2cap_send_srejframe(sk, tx_seq); 4019 l2cap_send_srejframe(sk, tx_seq);
3357 } 4020 }
3358 } else { 4021 } else {
4022 expected_tx_seq_offset =
4023 (pi->expected_tx_seq - pi->buffer_seq) % 64;
4024 if (expected_tx_seq_offset < 0)
4025 expected_tx_seq_offset += 64;
4026
4027 /* duplicated tx_seq */
4028 if (tx_seq_offset < expected_tx_seq_offset)
4029 goto drop;
4030
3359 pi->conn_state |= L2CAP_CONN_SREJ_SENT; 4031 pi->conn_state |= L2CAP_CONN_SREJ_SENT;
3360 4032
4033 BT_DBG("sk %p, Enter SREJ", sk);
4034
3361 INIT_LIST_HEAD(SREJ_LIST(sk)); 4035 INIT_LIST_HEAD(SREJ_LIST(sk));
3362 pi->buffer_seq_srej = pi->buffer_seq; 4036 pi->buffer_seq_srej = pi->buffer_seq;
3363 4037
3364 __skb_queue_head_init(SREJ_QUEUE(sk)); 4038 __skb_queue_head_init(SREJ_QUEUE(sk));
4039 __skb_queue_head_init(BUSY_QUEUE(sk));
3365 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); 4040 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
3366 4041
3367 pi->conn_state |= L2CAP_CONN_SEND_PBIT; 4042 pi->conn_state |= L2CAP_CONN_SEND_PBIT;
3368 4043
3369 l2cap_send_srejframe(sk, tx_seq); 4044 l2cap_send_srejframe(sk, tx_seq);
4045
4046 del_timer(&pi->ack_timer);
3370 } 4047 }
3371 return 0; 4048 return 0;
3372 4049
@@ -3374,162 +4051,280 @@ expected:
3374 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; 4051 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3375 4052
3376 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { 4053 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3377 l2cap_add_to_srej_queue(sk, skb, tx_seq, sar); 4054 bt_cb(skb)->tx_seq = tx_seq;
4055 bt_cb(skb)->sar = sar;
4056 __skb_queue_tail(SREJ_QUEUE(sk), skb);
3378 return 0; 4057 return 0;
3379 } 4058 }
3380 4059
4060 err = l2cap_push_rx_skb(sk, skb, rx_control);
4061 if (err < 0)
4062 return 0;
4063
3381 if (rx_control & L2CAP_CTRL_FINAL) { 4064 if (rx_control & L2CAP_CTRL_FINAL) {
3382 if (pi->conn_state & L2CAP_CONN_REJ_ACT) 4065 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
3383 pi->conn_state &= ~L2CAP_CONN_REJ_ACT; 4066 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
3384 else { 4067 else
3385 sk->sk_send_head = TX_QUEUE(sk)->next; 4068 l2cap_retransmit_frames(sk);
3386 pi->next_tx_seq = pi->expected_ack_seq;
3387 l2cap_ertm_send(sk);
3388 }
3389 } 4069 }
3390 4070
3391 pi->buffer_seq = (pi->buffer_seq + 1) % 64; 4071 __mod_ack_timer();
3392 4072
3393 err = l2cap_sar_reassembly_sdu(sk, skb, rx_control); 4073 pi->num_acked = (pi->num_acked + 1) % num_to_ack;
3394 if (err < 0) 4074 if (pi->num_acked == num_to_ack - 1)
3395 return err; 4075 l2cap_send_ack(pi);
3396 4076
3397 pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK; 4077 return 0;
3398 if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) { 4078
3399 tx_control |= L2CAP_SUPER_RCV_READY; 4079drop:
3400 tx_control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT; 4080 kfree_skb(skb);
3401 l2cap_send_sframe(pi, tx_control);
3402 }
3403 return 0; 4081 return 0;
3404} 4082}
3405 4083
3406static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb) 4084static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
3407{ 4085{
3408 struct l2cap_pinfo *pi = l2cap_pi(sk); 4086 struct l2cap_pinfo *pi = l2cap_pi(sk);
3409 u8 tx_seq = __get_reqseq(rx_control);
3410
3411 BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
3412
3413 switch (rx_control & L2CAP_CTRL_SUPERVISE) {
3414 case L2CAP_SUPER_RCV_READY:
3415 if (rx_control & L2CAP_CTRL_POLL) {
3416 u16 control = L2CAP_CTRL_FINAL;
3417 control |= L2CAP_SUPER_RCV_READY |
3418 (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT);
3419 l2cap_send_sframe(l2cap_pi(sk), control);
3420 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3421 4087
3422 } else if (rx_control & L2CAP_CTRL_FINAL) { 4088 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control),
3423 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; 4089 rx_control);
3424 pi->expected_ack_seq = tx_seq;
3425 l2cap_drop_acked_frames(sk);
3426
3427 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
3428 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
3429 else {
3430 sk->sk_send_head = TX_QUEUE(sk)->next;
3431 pi->next_tx_seq = pi->expected_ack_seq;
3432 l2cap_ertm_send(sk);
3433 }
3434
3435 if (!(pi->conn_state & L2CAP_CONN_WAIT_F))
3436 break;
3437
3438 pi->conn_state &= ~L2CAP_CONN_WAIT_F;
3439 del_timer(&pi->monitor_timer);
3440 4090
3441 if (pi->unacked_frames > 0) 4091 pi->expected_ack_seq = __get_reqseq(rx_control);
3442 __mod_retrans_timer(); 4092 l2cap_drop_acked_frames(sk);
3443 } else {
3444 pi->expected_ack_seq = tx_seq;
3445 l2cap_drop_acked_frames(sk);
3446 4093
4094 if (rx_control & L2CAP_CTRL_POLL) {
4095 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4096 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3447 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) && 4097 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
3448 (pi->unacked_frames > 0)) 4098 (pi->unacked_frames > 0))
3449 __mod_retrans_timer(); 4099 __mod_retrans_timer();
3450 4100
3451 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; 4101 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3452 l2cap_ertm_send(sk); 4102 l2cap_send_srejtail(sk);
4103 } else {
4104 l2cap_send_i_or_rr_or_rnr(sk);
3453 } 4105 }
3454 break;
3455 4106
3456 case L2CAP_SUPER_REJECT: 4107 } else if (rx_control & L2CAP_CTRL_FINAL) {
3457 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; 4108 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3458 4109
3459 pi->expected_ack_seq = __get_reqseq(rx_control); 4110 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
3460 l2cap_drop_acked_frames(sk); 4111 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
4112 else
4113 l2cap_retransmit_frames(sk);
3461 4114
3462 if (rx_control & L2CAP_CTRL_FINAL) { 4115 } else {
3463 if (pi->conn_state & L2CAP_CONN_REJ_ACT) 4116 if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
3464 pi->conn_state &= ~L2CAP_CONN_REJ_ACT; 4117 (pi->unacked_frames > 0))
3465 else { 4118 __mod_retrans_timer();
3466 sk->sk_send_head = TX_QUEUE(sk)->next; 4119
3467 pi->next_tx_seq = pi->expected_ack_seq; 4120 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3468 l2cap_ertm_send(sk); 4121 if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
3469 } 4122 l2cap_send_ack(pi);
3470 } else { 4123 } else {
3471 sk->sk_send_head = TX_QUEUE(sk)->next;
3472 pi->next_tx_seq = pi->expected_ack_seq;
3473 l2cap_ertm_send(sk); 4124 l2cap_ertm_send(sk);
3474
3475 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
3476 pi->srej_save_reqseq = tx_seq;
3477 pi->conn_state |= L2CAP_CONN_REJ_ACT;
3478 }
3479 } 4125 }
4126 }
4127}
3480 4128
3481 break; 4129static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
4130{
4131 struct l2cap_pinfo *pi = l2cap_pi(sk);
4132 u8 tx_seq = __get_reqseq(rx_control);
3482 4133
3483 case L2CAP_SUPER_SELECT_REJECT: 4134 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
3484 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3485 4135
3486 if (rx_control & L2CAP_CTRL_POLL) { 4136 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
3487 pi->expected_ack_seq = tx_seq; 4137
3488 l2cap_drop_acked_frames(sk); 4138 pi->expected_ack_seq = tx_seq;
3489 l2cap_retransmit_frame(sk, tx_seq); 4139 l2cap_drop_acked_frames(sk);
3490 l2cap_ertm_send(sk); 4140
3491 if (pi->conn_state & L2CAP_CONN_WAIT_F) { 4141 if (rx_control & L2CAP_CTRL_FINAL) {
3492 pi->srej_save_reqseq = tx_seq; 4142 if (pi->conn_state & L2CAP_CONN_REJ_ACT)
3493 pi->conn_state |= L2CAP_CONN_SREJ_ACT; 4143 pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
3494 } 4144 else
3495 } else if (rx_control & L2CAP_CTRL_FINAL) { 4145 l2cap_retransmit_frames(sk);
3496 if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) && 4146 } else {
3497 pi->srej_save_reqseq == tx_seq) 4147 l2cap_retransmit_frames(sk);
3498 pi->conn_state &= ~L2CAP_CONN_SREJ_ACT; 4148
3499 else 4149 if (pi->conn_state & L2CAP_CONN_WAIT_F)
3500 l2cap_retransmit_frame(sk, tx_seq); 4150 pi->conn_state |= L2CAP_CONN_REJ_ACT;
4151 }
4152}
4153static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
4154{
4155 struct l2cap_pinfo *pi = l2cap_pi(sk);
4156 u8 tx_seq = __get_reqseq(rx_control);
4157
4158 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4159
4160 pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
4161
4162 if (rx_control & L2CAP_CTRL_POLL) {
4163 pi->expected_ack_seq = tx_seq;
4164 l2cap_drop_acked_frames(sk);
4165
4166 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4167 l2cap_retransmit_one_frame(sk, tx_seq);
4168
4169 l2cap_ertm_send(sk);
4170
4171 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
4172 pi->srej_save_reqseq = tx_seq;
4173 pi->conn_state |= L2CAP_CONN_SREJ_ACT;
3501 } 4174 }
3502 else { 4175 } else if (rx_control & L2CAP_CTRL_FINAL) {
3503 l2cap_retransmit_frame(sk, tx_seq); 4176 if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
3504 if (pi->conn_state & L2CAP_CONN_WAIT_F) { 4177 pi->srej_save_reqseq == tx_seq)
3505 pi->srej_save_reqseq = tx_seq; 4178 pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
3506 pi->conn_state |= L2CAP_CONN_SREJ_ACT; 4179 else
3507 } 4180 l2cap_retransmit_one_frame(sk, tx_seq);
4181 } else {
4182 l2cap_retransmit_one_frame(sk, tx_seq);
4183 if (pi->conn_state & L2CAP_CONN_WAIT_F) {
4184 pi->srej_save_reqseq = tx_seq;
4185 pi->conn_state |= L2CAP_CONN_SREJ_ACT;
3508 } 4186 }
4187 }
4188}
4189
4190static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
4191{
4192 struct l2cap_pinfo *pi = l2cap_pi(sk);
4193 u8 tx_seq = __get_reqseq(rx_control);
4194
4195 BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
4196
4197 pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
4198 pi->expected_ack_seq = tx_seq;
4199 l2cap_drop_acked_frames(sk);
4200
4201 if (rx_control & L2CAP_CTRL_POLL)
4202 pi->conn_state |= L2CAP_CONN_SEND_FBIT;
4203
4204 if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
4205 del_timer(&pi->retrans_timer);
4206 if (rx_control & L2CAP_CTRL_POLL)
4207 l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL);
4208 return;
4209 }
4210
4211 if (rx_control & L2CAP_CTRL_POLL)
4212 l2cap_send_srejtail(sk);
4213 else
4214 l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY);
4215}
4216
4217static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
4218{
4219 BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
4220
4221 if (L2CAP_CTRL_FINAL & rx_control &&
4222 l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
4223 del_timer(&l2cap_pi(sk)->monitor_timer);
4224 if (l2cap_pi(sk)->unacked_frames > 0)
4225 __mod_retrans_timer();
4226 l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F;
4227 }
4228
4229 switch (rx_control & L2CAP_CTRL_SUPERVISE) {
4230 case L2CAP_SUPER_RCV_READY:
4231 l2cap_data_channel_rrframe(sk, rx_control);
4232 break;
4233
4234 case L2CAP_SUPER_REJECT:
4235 l2cap_data_channel_rejframe(sk, rx_control);
4236 break;
4237
4238 case L2CAP_SUPER_SELECT_REJECT:
4239 l2cap_data_channel_srejframe(sk, rx_control);
3509 break; 4240 break;
3510 4241
3511 case L2CAP_SUPER_RCV_NOT_READY: 4242 case L2CAP_SUPER_RCV_NOT_READY:
3512 pi->conn_state |= L2CAP_CONN_REMOTE_BUSY; 4243 l2cap_data_channel_rnrframe(sk, rx_control);
3513 pi->expected_ack_seq = tx_seq; 4244 break;
3514 l2cap_drop_acked_frames(sk); 4245 }
3515 4246
3516 del_timer(&l2cap_pi(sk)->retrans_timer); 4247 kfree_skb(skb);
3517 if (rx_control & L2CAP_CTRL_POLL) { 4248 return 0;
3518 u16 control = L2CAP_CTRL_FINAL; 4249}
3519 l2cap_send_rr_or_rnr(l2cap_pi(sk), control); 4250
4251static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
4252{
4253 struct l2cap_pinfo *pi = l2cap_pi(sk);
4254 u16 control;
4255 u8 req_seq;
4256 int len, next_tx_seq_offset, req_seq_offset;
4257
4258 control = get_unaligned_le16(skb->data);
4259 skb_pull(skb, 2);
4260 len = skb->len;
4261
4262 /*
4263 * We can just drop the corrupted I-frame here.
4264 * Receiver will miss it and start proper recovery
4265 * procedures and ask retransmission.
4266 */
4267 if (l2cap_check_fcs(pi, skb))
4268 goto drop;
4269
4270 if (__is_sar_start(control) && __is_iframe(control))
4271 len -= 2;
4272
4273 if (pi->fcs == L2CAP_FCS_CRC16)
4274 len -= 2;
4275
4276 if (len > pi->mps) {
4277 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4278 goto drop;
4279 }
4280
4281 req_seq = __get_reqseq(control);
4282 req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
4283 if (req_seq_offset < 0)
4284 req_seq_offset += 64;
4285
4286 next_tx_seq_offset =
4287 (pi->next_tx_seq - pi->expected_ack_seq) % 64;
4288 if (next_tx_seq_offset < 0)
4289 next_tx_seq_offset += 64;
4290
4291 /* check for invalid req-seq */
4292 if (req_seq_offset > next_tx_seq_offset) {
4293 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4294 goto drop;
4295 }
4296
4297 if (__is_iframe(control)) {
4298 if (len < 0) {
4299 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4300 goto drop;
3520 } 4301 }
3521 break; 4302
4303 l2cap_data_channel_iframe(sk, control, skb);
4304 } else {
4305 if (len != 0) {
4306 BT_ERR("%d", len);
4307 l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
4308 goto drop;
4309 }
4310
4311 l2cap_data_channel_sframe(sk, control, skb);
3522 } 4312 }
3523 4313
3524 return 0; 4314 return 0;
4315
4316drop:
4317 kfree_skb(skb);
4318 return 0;
3525} 4319}
3526 4320
3527static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb) 4321static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
3528{ 4322{
3529 struct sock *sk; 4323 struct sock *sk;
3530 struct l2cap_pinfo *pi; 4324 struct l2cap_pinfo *pi;
3531 u16 control, len; 4325 u16 control;
3532 u8 tx_seq; 4326 u8 tx_seq;
4327 int len;
3533 4328
3534 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid); 4329 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
3535 if (!sk) { 4330 if (!sk) {
@@ -3559,31 +4354,12 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
3559 break; 4354 break;
3560 4355
3561 case L2CAP_MODE_ERTM: 4356 case L2CAP_MODE_ERTM:
3562 control = get_unaligned_le16(skb->data); 4357 if (!sock_owned_by_user(sk)) {
3563 skb_pull(skb, 2); 4358 l2cap_ertm_data_rcv(sk, skb);
3564 len = skb->len; 4359 } else {
3565 4360 if (sk_add_backlog(sk, skb))
3566 if (__is_sar_start(control)) 4361 goto drop;
3567 len -= 2; 4362 }
3568
3569 if (pi->fcs == L2CAP_FCS_CRC16)
3570 len -= 2;
3571
3572 /*
3573 * We can just drop the corrupted I-frame here.
3574 * Receiver will miss it and start proper recovery
3575 * procedures and ask retransmission.
3576 */
3577 if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
3578 goto drop;
3579
3580 if (l2cap_check_fcs(pi, skb))
3581 goto drop;
3582
3583 if (__is_iframe(control))
3584 l2cap_data_channel_iframe(sk, control, skb);
3585 else
3586 l2cap_data_channel_sframe(sk, control, skb);
3587 4363
3588 goto done; 4364 goto done;
3589 4365
@@ -3592,16 +4368,16 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
3592 skb_pull(skb, 2); 4368 skb_pull(skb, 2);
3593 len = skb->len; 4369 len = skb->len;
3594 4370
4371 if (l2cap_check_fcs(pi, skb))
4372 goto drop;
4373
3595 if (__is_sar_start(control)) 4374 if (__is_sar_start(control))
3596 len -= 2; 4375 len -= 2;
3597 4376
3598 if (pi->fcs == L2CAP_FCS_CRC16) 4377 if (pi->fcs == L2CAP_FCS_CRC16)
3599 len -= 2; 4378 len -= 2;
3600 4379
3601 if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || __is_sframe(control)) 4380 if (len > pi->mps || len < 0 || __is_sframe(control))
3602 goto drop;
3603
3604 if (l2cap_check_fcs(pi, skb))
3605 goto drop; 4381 goto drop;
3606 4382
3607 tx_seq = __get_txseq(control); 4383 tx_seq = __get_txseq(control);
@@ -3609,14 +4385,14 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
3609 if (pi->expected_tx_seq == tx_seq) 4385 if (pi->expected_tx_seq == tx_seq)
3610 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64; 4386 pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
3611 else 4387 else
3612 pi->expected_tx_seq = tx_seq + 1; 4388 pi->expected_tx_seq = (tx_seq + 1) % 64;
3613 4389
3614 l2cap_sar_reassembly_sdu(sk, skb, control); 4390 l2cap_streaming_reassembly_sdu(sk, skb, control);
3615 4391
3616 goto done; 4392 goto done;
3617 4393
3618 default: 4394 default:
3619 BT_DBG("sk %p: bad mode 0x%2.2x", sk, l2cap_pi(sk)->mode); 4395 BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode);
3620 break; 4396 break;
3621 } 4397 }
3622 4398
@@ -3701,7 +4477,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
3701 struct hlist_node *node; 4477 struct hlist_node *node;
3702 4478
3703 if (type != ACL_LINK) 4479 if (type != ACL_LINK)
3704 return 0; 4480 return -EINVAL;
3705 4481
3706 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); 4482 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
3707 4483
@@ -3734,7 +4510,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
3734 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status); 4510 BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
3735 4511
3736 if (hcon->type != ACL_LINK) 4512 if (hcon->type != ACL_LINK)
3737 return 0; 4513 return -EINVAL;
3738 4514
3739 if (!status) { 4515 if (!status) {
3740 conn = l2cap_conn_add(hcon, status); 4516 conn = l2cap_conn_add(hcon, status);
@@ -3763,7 +4539,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
3763 BT_DBG("hcon %p reason %d", hcon, reason); 4539 BT_DBG("hcon %p reason %d", hcon, reason);
3764 4540
3765 if (hcon->type != ACL_LINK) 4541 if (hcon->type != ACL_LINK)
3766 return 0; 4542 return -EINVAL;
3767 4543
3768 l2cap_conn_del(hcon, bt_err(reason)); 4544 l2cap_conn_del(hcon, bt_err(reason));
3769 4545
@@ -3772,7 +4548,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
3772 4548
3773static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) 4549static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
3774{ 4550{
3775 if (sk->sk_type != SOCK_SEQPACKET) 4551 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM)
3776 return; 4552 return;
3777 4553
3778 if (encrypt == 0x00) { 4554 if (encrypt == 0x00) {
@@ -3824,6 +4600,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
3824 req.psm = l2cap_pi(sk)->psm; 4600 req.psm = l2cap_pi(sk)->psm;
3825 4601
3826 l2cap_pi(sk)->ident = l2cap_get_ident(conn); 4602 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
4603 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
3827 4604
3828 l2cap_send_cmd(conn, l2cap_pi(sk)->ident, 4605 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
3829 L2CAP_CONN_REQ, sizeof(req), &req); 4606 L2CAP_CONN_REQ, sizeof(req), &req);
@@ -4030,6 +4807,10 @@ static int __init l2cap_init(void)
4030 if (err < 0) 4807 if (err < 0)
4031 return err; 4808 return err;
4032 4809
4810 _busy_wq = create_singlethread_workqueue("l2cap");
4811 if (!_busy_wq)
4812 goto error;
4813
4033 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops); 4814 err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
4034 if (err < 0) { 4815 if (err < 0) {
4035 BT_ERR("L2CAP socket registration failed"); 4816 BT_ERR("L2CAP socket registration failed");
@@ -4064,6 +4845,9 @@ static void __exit l2cap_exit(void)
4064{ 4845{
4065 debugfs_remove(l2cap_debugfs); 4846 debugfs_remove(l2cap_debugfs);
4066 4847
4848 flush_workqueue(_busy_wq);
4849 destroy_workqueue(_busy_wq);
4850
4067 if (bt_sock_unregister(BTPROTO_L2CAP) < 0) 4851 if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
4068 BT_ERR("L2CAP socket unregistration failed"); 4852 BT_ERR("L2CAP socket unregistration failed");
4069 4853
@@ -4078,18 +4862,14 @@ void l2cap_load(void)
4078 /* Dummy function to trigger automatic L2CAP module loading by 4862 /* Dummy function to trigger automatic L2CAP module loading by
4079 * other modules that use L2CAP sockets but don't use any other 4863 * other modules that use L2CAP sockets but don't use any other
4080 * symbols from it. */ 4864 * symbols from it. */
4081 return;
4082} 4865}
4083EXPORT_SYMBOL(l2cap_load); 4866EXPORT_SYMBOL(l2cap_load);
4084 4867
4085module_init(l2cap_init); 4868module_init(l2cap_init);
4086module_exit(l2cap_exit); 4869module_exit(l2cap_exit);
4087 4870
4088module_param(enable_ertm, bool, 0644); 4871module_param(disable_ertm, bool, 0644);
4089MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode"); 4872MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
4090
4091module_param(max_transmit, uint, 0644);
4092MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
4093 4873
4094MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); 4874MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
4095MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION); 4875MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8ed3c37684fa..44a623275951 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
503 BT_DBG("sk %p timeo %ld", sk, timeo); 503 BT_DBG("sk %p timeo %ld", sk, timeo);
504 504
505 /* Wait for an incoming connection. (wake-one). */ 505 /* Wait for an incoming connection. (wake-one). */
506 add_wait_queue_exclusive(sk->sk_sleep, &wait); 506 add_wait_queue_exclusive(sk_sleep(sk), &wait);
507 while (!(nsk = bt_accept_dequeue(sk, newsock))) { 507 while (!(nsk = bt_accept_dequeue(sk, newsock))) {
508 set_current_state(TASK_INTERRUPTIBLE); 508 set_current_state(TASK_INTERRUPTIBLE);
509 if (!timeo) { 509 if (!timeo) {
@@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
526 } 526 }
527 } 527 }
528 set_current_state(TASK_RUNNING); 528 set_current_state(TASK_RUNNING);
529 remove_wait_queue(sk->sk_sleep, &wait); 529 remove_wait_queue(sk_sleep(sk), &wait);
530 530
531 if (err) 531 if (err)
532 goto done; 532 goto done;
@@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
621{ 621{
622 DECLARE_WAITQUEUE(wait, current); 622 DECLARE_WAITQUEUE(wait, current);
623 623
624 add_wait_queue(sk->sk_sleep, &wait); 624 add_wait_queue(sk_sleep(sk), &wait);
625 for (;;) { 625 for (;;) {
626 set_current_state(TASK_INTERRUPTIBLE); 626 set_current_state(TASK_INTERRUPTIBLE);
627 627
@@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
640 } 640 }
641 641
642 __set_current_state(TASK_RUNNING); 642 __set_current_state(TASK_RUNNING);
643 remove_wait_queue(sk->sk_sleep, &wait); 643 remove_wait_queue(sk_sleep(sk), &wait);
644 return timeo; 644 return timeo;
645} 645}
646 646
@@ -1152,7 +1152,7 @@ error:
1152 return err; 1152 return err;
1153} 1153}
1154 1154
1155void rfcomm_cleanup_sockets(void) 1155void __exit rfcomm_cleanup_sockets(void)
1156{ 1156{
1157 debugfs_remove(rfcomm_sock_debugfs); 1157 debugfs_remove(rfcomm_sock_debugfs);
1158 1158
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cab71ea2796d..befc3a52aa04 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1014,8 +1014,6 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
1014 rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud, 1014 rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud,
1015 data_bits, stop_bits, parity, 1015 data_bits, stop_bits, parity,
1016 RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes); 1016 RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes);
1017
1018 return;
1019} 1017}
1020 1018
1021static void rfcomm_tty_throttle(struct tty_struct *tty) 1019static void rfcomm_tty_throttle(struct tty_struct *tty)
@@ -1155,7 +1153,7 @@ static const struct tty_operations rfcomm_ops = {
1155 .tiocmset = rfcomm_tty_tiocmset, 1153 .tiocmset = rfcomm_tty_tiocmset,
1156}; 1154};
1157 1155
1158int rfcomm_init_ttys(void) 1156int __init rfcomm_init_ttys(void)
1159{ 1157{
1160 rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS); 1158 rfcomm_tty_driver = alloc_tty_driver(RFCOMM_TTY_PORTS);
1161 if (!rfcomm_tty_driver) 1159 if (!rfcomm_tty_driver)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ca6b2ad1c3fc..d0927d1fdada 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -165,11 +165,11 @@ static inline int sco_chan_add(struct sco_conn *conn, struct sock *sk, struct so
165 int err = 0; 165 int err = 0;
166 166
167 sco_conn_lock(conn); 167 sco_conn_lock(conn);
168 if (conn->sk) { 168 if (conn->sk)
169 err = -EBUSY; 169 err = -EBUSY;
170 } else { 170 else
171 __sco_chan_add(conn, sk, parent); 171 __sco_chan_add(conn, sk, parent);
172 } 172
173 sco_conn_unlock(conn); 173 sco_conn_unlock(conn);
174 return err; 174 return err;
175} 175}
@@ -241,22 +241,19 @@ static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
241 BT_DBG("sk %p len %d", sk, len); 241 BT_DBG("sk %p len %d", sk, len);
242 242
243 count = min_t(unsigned int, conn->mtu, len); 243 count = min_t(unsigned int, conn->mtu, len);
244 if (!(skb = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err))) 244 skb = bt_skb_send_alloc(sk, count,
245 msg->msg_flags & MSG_DONTWAIT, &err);
246 if (!skb)
245 return err; 247 return err;
246 248
247 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) { 249 if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
248 err = -EFAULT; 250 kfree_skb(skb);
249 goto fail; 251 return -EFAULT;
250 } 252 }
251 253
252 if ((err = hci_send_sco(conn->hcon, skb)) < 0) 254 hci_send_sco(conn->hcon, skb);
253 return err;
254 255
255 return count; 256 return count;
256
257fail:
258 kfree_skb(skb);
259 return err;
260} 257}
261 258
262static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb) 259static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
@@ -276,7 +273,6 @@ static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
276 273
277drop: 274drop:
278 kfree_skb(skb); 275 kfree_skb(skb);
279 return;
280} 276}
281 277
282/* -------- Socket interface ---------- */ 278/* -------- Socket interface ---------- */
@@ -567,7 +563,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
567 BT_DBG("sk %p timeo %ld", sk, timeo); 563 BT_DBG("sk %p timeo %ld", sk, timeo);
568 564
569 /* Wait for an incoming connection. (wake-one). */ 565 /* Wait for an incoming connection. (wake-one). */
570 add_wait_queue_exclusive(sk->sk_sleep, &wait); 566 add_wait_queue_exclusive(sk_sleep(sk), &wait);
571 while (!(ch = bt_accept_dequeue(sk, newsock))) { 567 while (!(ch = bt_accept_dequeue(sk, newsock))) {
572 set_current_state(TASK_INTERRUPTIBLE); 568 set_current_state(TASK_INTERRUPTIBLE);
573 if (!timeo) { 569 if (!timeo) {
@@ -590,7 +586,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
590 } 586 }
591 } 587 }
592 set_current_state(TASK_RUNNING); 588 set_current_state(TASK_RUNNING);
593 remove_wait_queue(sk->sk_sleep, &wait); 589 remove_wait_queue(sk_sleep(sk), &wait);
594 590
595 if (err) 591 if (err)
596 goto done; 592 goto done;
@@ -626,7 +622,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
626 struct msghdr *msg, size_t len) 622 struct msghdr *msg, size_t len)
627{ 623{
628 struct sock *sk = sock->sk; 624 struct sock *sk = sock->sk;
629 int err = 0; 625 int err;
630 626
631 BT_DBG("sock %p, sk %p", sock, sk); 627 BT_DBG("sock %p, sk %p", sock, sk);
632 628
@@ -851,7 +847,8 @@ static void sco_conn_ready(struct sco_conn *conn)
851 847
852 bh_lock_sock(parent); 848 bh_lock_sock(parent);
853 849
854 sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC); 850 sk = sco_sock_alloc(sock_net(parent), NULL,
851 BTPROTO_SCO, GFP_ATOMIC);
855 if (!sk) { 852 if (!sk) {
856 bh_unlock_sock(parent); 853 bh_unlock_sock(parent);
857 goto done; 854 goto done;
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index d115d5cea5b6..9190ae462cb4 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -33,14 +33,14 @@ config BRIDGE
33 If unsure, say N. 33 If unsure, say N.
34 34
35config BRIDGE_IGMP_SNOOPING 35config BRIDGE_IGMP_SNOOPING
36 bool "IGMP snooping" 36 bool "IGMP/MLD snooping"
37 depends on BRIDGE 37 depends on BRIDGE
38 depends on INET 38 depends on INET
39 default y 39 default y
40 ---help--- 40 ---help---
41 If you say Y here, then the Ethernet bridge will be able selectively 41 If you say Y here, then the Ethernet bridge will be able selectively
42 forward multicast traffic based on IGMP traffic received from each 42 forward multicast traffic based on IGMP/MLD traffic received from
43 port. 43 each port.
44 44
45 Say N to exclude this support and reduce the binary size. 45 Say N to exclude this support and reduce the binary size.
46 46
diff --git a/net/bridge/br.c b/net/bridge/br.c
index e1241c76239a..c8436fa31344 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -38,7 +38,7 @@ static int __init br_init(void)
38 38
39 err = stp_proto_register(&br_stp_proto); 39 err = stp_proto_register(&br_stp_proto);
40 if (err < 0) { 40 if (err < 0) {
41 printk(KERN_ERR "bridge: can't register sap for STP\n"); 41 pr_err("bridge: can't register sap for STP\n");
42 return err; 42 return err;
43 } 43 }
44 44
@@ -63,7 +63,6 @@ static int __init br_init(void)
63 goto err_out4; 63 goto err_out4;
64 64
65 brioctl_set(br_ioctl_deviceless_stub); 65 brioctl_set(br_ioctl_deviceless_stub);
66 br_handle_frame_hook = br_handle_frame;
67 66
68#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) 67#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
69 br_fdb_test_addr_hook = br_fdb_test_addr; 68 br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
100 br_fdb_test_addr_hook = NULL; 99 br_fdb_test_addr_hook = NULL;
101#endif 100#endif
102 101
103 br_handle_frame_hook = NULL;
104 br_fdb_fini(); 102 br_fdb_fini();
105} 103}
106 104
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 90a9024e5c1e..cf09fe591fc2 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -13,31 +13,51 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/netpoll.h>
16#include <linux/etherdevice.h> 17#include <linux/etherdevice.h>
17#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <linux/list.h>
20#include <linux/netfilter_bridge.h>
18 21
19#include <asm/uaccess.h> 22#include <asm/uaccess.h>
20#include "br_private.h" 23#include "br_private.h"
21 24
22/* net device transmit always called with no BH (preempt_disabled) */ 25/* net device transmit always called with BH disabled */
23netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) 26netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
24{ 27{
25 struct net_bridge *br = netdev_priv(dev); 28 struct net_bridge *br = netdev_priv(dev);
26 const unsigned char *dest = skb->data; 29 const unsigned char *dest = skb->data;
27 struct net_bridge_fdb_entry *dst; 30 struct net_bridge_fdb_entry *dst;
28 struct net_bridge_mdb_entry *mdst; 31 struct net_bridge_mdb_entry *mdst;
32 struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
29 33
30 BR_INPUT_SKB_CB(skb)->brdev = dev; 34#ifdef CONFIG_BRIDGE_NETFILTER
35 if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
36 br_nf_pre_routing_finish_bridge_slow(skb);
37 return NETDEV_TX_OK;
38 }
39#endif
31 40
32 dev->stats.tx_packets++; 41 u64_stats_update_begin(&brstats->syncp);
33 dev->stats.tx_bytes += skb->len; 42 brstats->tx_packets++;
43 brstats->tx_bytes += skb->len;
44 u64_stats_update_end(&brstats->syncp);
45
46 BR_INPUT_SKB_CB(skb)->brdev = dev;
34 47
35 skb_reset_mac_header(skb); 48 skb_reset_mac_header(skb);
36 skb_pull(skb, ETH_HLEN); 49 skb_pull(skb, ETH_HLEN);
37 50
38 if (dest[0] & 1) { 51 rcu_read_lock();
39 if (br_multicast_rcv(br, NULL, skb)) 52 if (is_multicast_ether_addr(dest)) {
53 if (unlikely(netpoll_tx_running(dev))) {
54 br_flood_deliver(br, skb);
55 goto out;
56 }
57 if (br_multicast_rcv(br, NULL, skb)) {
58 kfree_skb(skb);
40 goto out; 59 goto out;
60 }
41 61
42 mdst = br_mdb_get(br, skb); 62 mdst = br_mdb_get(br, skb);
43 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) 63 if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
@@ -50,6 +70,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
50 br_flood_deliver(br, skb); 70 br_flood_deliver(br, skb);
51 71
52out: 72out:
73 rcu_read_unlock();
53 return NETDEV_TX_OK; 74 return NETDEV_TX_OK;
54} 75}
55 76
@@ -81,6 +102,35 @@ static int br_dev_stop(struct net_device *dev)
81 return 0; 102 return 0;
82} 103}
83 104
105static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
106 struct rtnl_link_stats64 *stats)
107{
108 struct net_bridge *br = netdev_priv(dev);
109 struct br_cpu_netstats tmp, sum = { 0 };
110 unsigned int cpu;
111
112 for_each_possible_cpu(cpu) {
113 unsigned int start;
114 const struct br_cpu_netstats *bstats
115 = per_cpu_ptr(br->stats, cpu);
116 do {
117 start = u64_stats_fetch_begin(&bstats->syncp);
118 memcpy(&tmp, bstats, sizeof(tmp));
119 } while (u64_stats_fetch_retry(&bstats->syncp, start));
120 sum.tx_bytes += tmp.tx_bytes;
121 sum.tx_packets += tmp.tx_packets;
122 sum.rx_bytes += tmp.rx_bytes;
123 sum.rx_packets += tmp.rx_packets;
124 }
125
126 stats->tx_bytes = sum.tx_bytes;
127 stats->tx_packets = sum.tx_packets;
128 stats->rx_bytes = sum.rx_bytes;
129 stats->rx_packets = sum.rx_packets;
130
131 return stats;
132}
133
84static int br_change_mtu(struct net_device *dev, int new_mtu) 134static int br_change_mtu(struct net_device *dev, int new_mtu)
85{ 135{
86 struct net_bridge *br = netdev_priv(dev); 136 struct net_bridge *br = netdev_priv(dev);
@@ -91,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
91 141
92#ifdef CONFIG_BRIDGE_NETFILTER 142#ifdef CONFIG_BRIDGE_NETFILTER
93 /* remember the MTU in the rtable for PMTU */ 143 /* remember the MTU in the rtable for PMTU */
94 br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; 144 br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
95#endif 145#endif
96 146
97 return 0; 147 return 0;
@@ -162,6 +212,86 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
162 return 0; 212 return 0;
163} 213}
164 214
215#ifdef CONFIG_NET_POLL_CONTROLLER
216static void br_poll_controller(struct net_device *br_dev)
217{
218}
219
220static void br_netpoll_cleanup(struct net_device *dev)
221{
222 struct net_bridge *br = netdev_priv(dev);
223 struct net_bridge_port *p, *n;
224
225 list_for_each_entry_safe(p, n, &br->port_list, list) {
226 br_netpoll_disable(p);
227 }
228}
229
230static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
231{
232 struct net_bridge *br = netdev_priv(dev);
233 struct net_bridge_port *p, *n;
234 int err = 0;
235
236 list_for_each_entry_safe(p, n, &br->port_list, list) {
237 if (!p->dev)
238 continue;
239
240 err = br_netpoll_enable(p);
241 if (err)
242 goto fail;
243 }
244
245out:
246 return err;
247
248fail:
249 br_netpoll_cleanup(dev);
250 goto out;
251}
252
253int br_netpoll_enable(struct net_bridge_port *p)
254{
255 struct netpoll *np;
256 int err = 0;
257
258 np = kzalloc(sizeof(*p->np), GFP_KERNEL);
259 err = -ENOMEM;
260 if (!np)
261 goto out;
262
263 np->dev = p->dev;
264
265 err = __netpoll_setup(np);
266 if (err) {
267 kfree(np);
268 goto out;
269 }
270
271 p->np = np;
272
273out:
274 return err;
275}
276
277void br_netpoll_disable(struct net_bridge_port *p)
278{
279 struct netpoll *np = p->np;
280
281 if (!np)
282 return;
283
284 p->np = NULL;
285
286 /* Wait for transmitting packets to finish before freeing. */
287 synchronize_rcu_bh();
288
289 __netpoll_cleanup(np);
290 kfree(np);
291}
292
293#endif
294
165static const struct ethtool_ops br_ethtool_ops = { 295static const struct ethtool_ops br_ethtool_ops = {
166 .get_drvinfo = br_getinfo, 296 .get_drvinfo = br_getinfo,
167 .get_link = ethtool_op_get_link, 297 .get_link = ethtool_op_get_link,
@@ -180,19 +310,33 @@ static const struct net_device_ops br_netdev_ops = {
180 .ndo_open = br_dev_open, 310 .ndo_open = br_dev_open,
181 .ndo_stop = br_dev_stop, 311 .ndo_stop = br_dev_stop,
182 .ndo_start_xmit = br_dev_xmit, 312 .ndo_start_xmit = br_dev_xmit,
313 .ndo_get_stats64 = br_get_stats64,
183 .ndo_set_mac_address = br_set_mac_address, 314 .ndo_set_mac_address = br_set_mac_address,
184 .ndo_set_multicast_list = br_dev_set_multicast_list, 315 .ndo_set_multicast_list = br_dev_set_multicast_list,
185 .ndo_change_mtu = br_change_mtu, 316 .ndo_change_mtu = br_change_mtu,
186 .ndo_do_ioctl = br_dev_ioctl, 317 .ndo_do_ioctl = br_dev_ioctl,
318#ifdef CONFIG_NET_POLL_CONTROLLER
319 .ndo_netpoll_setup = br_netpoll_setup,
320 .ndo_netpoll_cleanup = br_netpoll_cleanup,
321 .ndo_poll_controller = br_poll_controller,
322#endif
187}; 323};
188 324
325static void br_dev_free(struct net_device *dev)
326{
327 struct net_bridge *br = netdev_priv(dev);
328
329 free_percpu(br->stats);
330 free_netdev(dev);
331}
332
189void br_dev_setup(struct net_device *dev) 333void br_dev_setup(struct net_device *dev)
190{ 334{
191 random_ether_addr(dev->dev_addr); 335 random_ether_addr(dev->dev_addr);
192 ether_setup(dev); 336 ether_setup(dev);
193 337
194 dev->netdev_ops = &br_netdev_ops; 338 dev->netdev_ops = &br_netdev_ops;
195 dev->destructor = free_netdev; 339 dev->destructor = br_dev_free;
196 SET_ETHTOOL_OPS(dev, &br_ethtool_ops); 340 SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
197 dev->tx_queue_len = 0; 341 dev->tx_queue_len = 0;
198 dev->priv_flags = IFF_EBRIDGE; 342 dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9101a4e56201..90512ccfd3e9 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data)
128{ 128{
129 struct net_bridge *br = (struct net_bridge *)_data; 129 struct net_bridge *br = (struct net_bridge *)_data;
130 unsigned long delay = hold_time(br); 130 unsigned long delay = hold_time(br);
131 unsigned long next_timer = jiffies + br->forward_delay; 131 unsigned long next_timer = jiffies + br->ageing_time;
132 int i; 132 int i;
133 133
134 spin_lock_bh(&br->hash_lock); 134 spin_lock_bh(&br->hash_lock);
@@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data)
149 } 149 }
150 spin_unlock_bh(&br->hash_lock); 150 spin_unlock_bh(&br->hash_lock);
151 151
152 /* Add HZ/4 to ensure we round the jiffies upwards to be after the next 152 mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
153 * timer, otherwise we might round down and will have no-op run. */
154 mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4));
155} 153}
156 154
157/* Completely flush all dynamic entries in forwarding database.*/ 155/* Completely flush all dynamic entries in forwarding database.*/
@@ -216,7 +214,7 @@ void br_fdb_delete_by_port(struct net_bridge *br,
216 spin_unlock_bh(&br->hash_lock); 214 spin_unlock_bh(&br->hash_lock);
217} 215}
218 216
219/* No locking or refcounting, assumes caller has no preempt (rcu_read_lock) */ 217/* No locking or refcounting, assumes caller has rcu_read_lock */
220struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br, 218struct net_bridge_fdb_entry *__br_fdb_get(struct net_bridge *br,
221 const unsigned char *addr) 219 const unsigned char *addr)
222{ 220{
@@ -242,11 +240,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
242 struct net_bridge_fdb_entry *fdb; 240 struct net_bridge_fdb_entry *fdb;
243 int ret; 241 int ret;
244 242
245 if (!dev->br_port) 243 if (!br_port_exists(dev))
246 return 0; 244 return 0;
247 245
248 rcu_read_lock(); 246 rcu_read_lock();
249 fdb = __br_fdb_get(dev->br_port->br, addr); 247 fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
250 ret = fdb && fdb->dst->dev != dev && 248 ret = fdb && fdb->dst->dev != dev &&
251 fdb->dst->state == BR_STATE_FORWARDING; 249 fdb->dst->state == BR_STATE_FORWARDING;
252 rcu_read_unlock(); 250 rcu_read_unlock();
@@ -353,8 +351,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
353 */ 351 */
354 if (fdb->is_local) 352 if (fdb->is_local)
355 return 0; 353 return 0;
356 354 br_warn(br, "adding interface %s with same address "
357 printk(KERN_WARNING "%s adding interface with same address "
358 "as a received packet\n", 355 "as a received packet\n",
359 source->dev->name); 356 source->dev->name);
360 fdb_delete(fdb); 357 fdb_delete(fdb);
@@ -397,9 +394,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
397 /* attempt to update an entry for a local interface */ 394 /* attempt to update an entry for a local interface */
398 if (unlikely(fdb->is_local)) { 395 if (unlikely(fdb->is_local)) {
399 if (net_ratelimit()) 396 if (net_ratelimit())
400 printk(KERN_WARNING "%s: received packet with " 397 br_warn(br, "received packet on %s with "
401 "own address as source address\n", 398 "own address as source address\n",
402 source->dev->name); 399 source->dev->name);
403 } else { 400 } else {
404 /* fastpath: update of existing entry */ 401 /* fastpath: update of existing entry */
405 fdb->dst = source; 402 fdb->dst = source;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7a241c396981..cbfe87f0f34a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/netdevice.h> 17#include <linux/netdevice.h>
18#include <linux/netpoll.h>
18#include <linux/skbuff.h> 19#include <linux/skbuff.h>
19#include <linux/if_vlan.h> 20#include <linux/if_vlan.h>
20#include <linux/netfilter_bridge.h> 21#include <linux/netfilter_bridge.h>
@@ -44,12 +45,11 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
44 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) 45 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
45 kfree_skb(skb); 46 kfree_skb(skb);
46 else { 47 else {
47 /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ 48 /* ip_fragment doesn't copy the MAC header */
48 if (nf_bridge_maybe_copy_header(skb)) 49 if (nf_bridge_maybe_copy_header(skb))
49 kfree_skb(skb); 50 kfree_skb(skb);
50 else { 51 else {
51 skb_push(skb, ETH_HLEN); 52 skb_push(skb, ETH_HLEN);
52
53 dev_queue_xmit(skb); 53 dev_queue_xmit(skb);
54 } 54 }
55 } 55 }
@@ -59,7 +59,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
59 59
60int br_forward_finish(struct sk_buff *skb) 60int br_forward_finish(struct sk_buff *skb)
61{ 61{
62 return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, 62 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
63 br_dev_queue_push_xmit); 63 br_dev_queue_push_xmit);
64 64
65} 65}
@@ -67,8 +67,19 @@ int br_forward_finish(struct sk_buff *skb)
67static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) 67static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
68{ 68{
69 skb->dev = to->dev; 69 skb->dev = to->dev;
70 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 70
71 br_forward_finish); 71 if (unlikely(netpoll_tx_running(to->dev))) {
72 if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
73 kfree_skb(skb);
74 else {
75 skb_push(skb, ETH_HLEN);
76 br_netpoll_send_skb(to, skb);
77 }
78 return;
79 }
80
81 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
82 br_forward_finish);
72} 83}
73 84
74static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) 85static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
@@ -84,8 +95,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
84 skb->dev = to->dev; 95 skb->dev = to->dev;
85 skb_forward_csum(skb); 96 skb_forward_csum(skb);
86 97
87 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, 98 NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
88 br_forward_finish); 99 br_forward_finish);
89} 100}
90 101
91/* called with rcu_read_lock */ 102/* called with rcu_read_lock */
@@ -119,10 +130,10 @@ static int deliver_clone(const struct net_bridge_port *prev,
119 void (*__packet_hook)(const struct net_bridge_port *p, 130 void (*__packet_hook)(const struct net_bridge_port *p,
120 struct sk_buff *skb)) 131 struct sk_buff *skb))
121{ 132{
133 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
134
122 skb = skb_clone(skb, GFP_ATOMIC); 135 skb = skb_clone(skb, GFP_ATOMIC);
123 if (!skb) { 136 if (!skb) {
124 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
125
126 dev->stats.tx_dropped++; 137 dev->stats.tx_dropped++;
127 return -ENOMEM; 138 return -ENOMEM;
128 } 139 }
@@ -208,17 +219,15 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
208{ 219{
209 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; 220 struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
210 struct net_bridge *br = netdev_priv(dev); 221 struct net_bridge *br = netdev_priv(dev);
211 struct net_bridge_port *port; 222 struct net_bridge_port *prev = NULL;
212 struct net_bridge_port *lport, *rport;
213 struct net_bridge_port *prev;
214 struct net_bridge_port_group *p; 223 struct net_bridge_port_group *p;
215 struct hlist_node *rp; 224 struct hlist_node *rp;
216 225
217 prev = NULL; 226 rp = rcu_dereference(br->router_list.first);
218 227 p = mdst ? rcu_dereference(mdst->ports) : NULL;
219 rp = br->router_list.first;
220 p = mdst ? mdst->ports : NULL;
221 while (p || rp) { 228 while (p || rp) {
229 struct net_bridge_port *port, *lport, *rport;
230
222 lport = p ? p->port : NULL; 231 lport = p ? p->port : NULL;
223 rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) : 232 rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
224 NULL; 233 NULL;
@@ -231,9 +240,9 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
231 goto out; 240 goto out;
232 241
233 if ((unsigned long)lport >= (unsigned long)port) 242 if ((unsigned long)lport >= (unsigned long)port)
234 p = p->next; 243 p = rcu_dereference(p->next);
235 if ((unsigned long)rport >= (unsigned long)port) 244 if ((unsigned long)rport >= (unsigned long)port)
236 rp = rp->next; 245 rp = rcu_dereference(rp->next);
237 } 246 }
238 247
239 if (!prev) 248 if (!prev)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0b6b1f2ff7ac..c03d2c3ff03e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -13,6 +13,7 @@
13 13
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/netpoll.h>
16#include <linux/ethtool.h> 17#include <linux/ethtool.h>
17#include <linux/if_arp.h> 18#include <linux/if_arp.h>
18#include <linux/module.h> 19#include <linux/module.h>
@@ -132,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
132 struct net_bridge *br = p->br; 133 struct net_bridge *br = p->br;
133 struct net_device *dev = p->dev; 134 struct net_device *dev = p->dev;
134 135
135 sysfs_remove_link(br->ifobj, dev->name); 136 sysfs_remove_link(br->ifobj, p->dev->name);
136 137
137 dev_set_promiscuity(dev, -1); 138 dev_set_promiscuity(dev, -1);
138 139
@@ -146,13 +147,17 @@ static void del_nbp(struct net_bridge_port *p)
146 147
147 list_del_rcu(&p->list); 148 list_del_rcu(&p->list);
148 149
149 rcu_assign_pointer(dev->br_port, NULL); 150 dev->priv_flags &= ~IFF_BRIDGE_PORT;
151
152 netdev_rx_handler_unregister(dev);
150 153
151 br_multicast_del_port(p); 154 br_multicast_del_port(p);
152 155
153 kobject_uevent(&p->kobj, KOBJ_REMOVE); 156 kobject_uevent(&p->kobj, KOBJ_REMOVE);
154 kobject_del(&p->kobj); 157 kobject_del(&p->kobj);
155 158
159 br_netpoll_disable(p);
160
156 call_rcu(&p->rcu, destroy_nbp_rcu); 161 call_rcu(&p->rcu, destroy_nbp_rcu);
157} 162}
158 163
@@ -186,6 +191,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
186 br = netdev_priv(dev); 191 br = netdev_priv(dev);
187 br->dev = dev; 192 br->dev = dev;
188 193
194 br->stats = alloc_percpu(struct br_cpu_netstats);
195 if (!br->stats) {
196 free_netdev(dev);
197 return NULL;
198 }
199
189 spin_lock_init(&br->lock); 200 spin_lock_init(&br->lock);
190 INIT_LIST_HEAD(&br->port_list); 201 INIT_LIST_HEAD(&br->port_list);
191 spin_lock_init(&br->hash_lock); 202 spin_lock_init(&br->hash_lock);
@@ -390,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
390 return -ELOOP; 401 return -ELOOP;
391 402
392 /* Device is already being bridged */ 403 /* Device is already being bridged */
393 if (dev->br_port != NULL) 404 if (br_port_exists(dev))
394 return -EBUSY; 405 return -EBUSY;
395 406
396 /* No bridging devices that dislike that (e.g. wireless) */ 407 /* No bridging devices that dislike that (e.g. wireless) */
@@ -418,7 +429,15 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
418 if (err) 429 if (err)
419 goto err2; 430 goto err2;
420 431
421 rcu_assign_pointer(dev->br_port, p); 432 if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
433 goto err3;
434
435 err = netdev_rx_handler_register(dev, br_handle_frame, p);
436 if (err)
437 goto err3;
438
439 dev->priv_flags |= IFF_BRIDGE_PORT;
440
422 dev_disable_lro(dev); 441 dev_disable_lro(dev);
423 442
424 list_add_rcu(&p->list, &br->port_list); 443 list_add_rcu(&p->list, &br->port_list);
@@ -439,6 +458,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
439 kobject_uevent(&p->kobj, KOBJ_ADD); 458 kobject_uevent(&p->kobj, KOBJ_ADD);
440 459
441 return 0; 460 return 0;
461err3:
462 sysfs_remove_link(br->ifobj, p->dev->name);
442err2: 463err2:
443 br_fdb_delete_by_port(br, p, 1); 464 br_fdb_delete_by_port(br, p, 1);
444err1: 465err1:
@@ -455,9 +476,13 @@ put_back:
455/* called with RTNL */ 476/* called with RTNL */
456int br_del_if(struct net_bridge *br, struct net_device *dev) 477int br_del_if(struct net_bridge *br, struct net_device *dev)
457{ 478{
458 struct net_bridge_port *p = dev->br_port; 479 struct net_bridge_port *p;
480
481 if (!br_port_exists(dev))
482 return -EINVAL;
459 483
460 if (!p || p->br != br) 484 p = br_port_get(dev);
485 if (p->br != br)
461 return -EINVAL; 486 return -EINVAL;
462 487
463 del_nbp(p); 488 del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index a82dde2d2ead..826cd5221536 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -24,22 +24,26 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
24static int br_pass_frame_up(struct sk_buff *skb) 24static int br_pass_frame_up(struct sk_buff *skb)
25{ 25{
26 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; 26 struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
27 struct net_bridge *br = netdev_priv(brdev);
28 struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
27 29
28 brdev->stats.rx_packets++; 30 u64_stats_update_begin(&brstats->syncp);
29 brdev->stats.rx_bytes += skb->len; 31 brstats->rx_packets++;
32 brstats->rx_bytes += skb->len;
33 u64_stats_update_end(&brstats->syncp);
30 34
31 indev = skb->dev; 35 indev = skb->dev;
32 skb->dev = brdev; 36 skb->dev = brdev;
33 37
34 return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, 38 return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
35 netif_receive_skb); 39 netif_receive_skb);
36} 40}
37 41
38/* note: already called with rcu_read_lock (preempt_disabled) */ 42/* note: already called with rcu_read_lock */
39int br_handle_frame_finish(struct sk_buff *skb) 43int br_handle_frame_finish(struct sk_buff *skb)
40{ 44{
41 const unsigned char *dest = eth_hdr(skb)->h_dest; 45 const unsigned char *dest = eth_hdr(skb)->h_dest;
42 struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); 46 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
43 struct net_bridge *br; 47 struct net_bridge *br;
44 struct net_bridge_fdb_entry *dst; 48 struct net_bridge_fdb_entry *dst;
45 struct net_bridge_mdb_entry *mdst; 49 struct net_bridge_mdb_entry *mdst;
@@ -106,13 +110,12 @@ drop:
106 goto out; 110 goto out;
107} 111}
108 112
109/* note: already called with rcu_read_lock (preempt_disabled) */ 113/* note: already called with rcu_read_lock */
110static int br_handle_local_finish(struct sk_buff *skb) 114static int br_handle_local_finish(struct sk_buff *skb)
111{ 115{
112 struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); 116 struct net_bridge_port *p = br_port_get_rcu(skb->dev);
113 117
114 if (p) 118 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
115 br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
116 return 0; /* process further */ 119 return 0; /* process further */
117} 120}
118 121
@@ -129,15 +132,18 @@ static inline int is_link_local(const unsigned char *dest)
129} 132}
130 133
131/* 134/*
132 * Called via br_handle_frame_hook.
133 * Return NULL if skb is handled 135 * Return NULL if skb is handled
134 * note: already called with rcu_read_lock (preempt_disabled) 136 * note: already called with rcu_read_lock
135 */ 137 */
136struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) 138struct sk_buff *br_handle_frame(struct sk_buff *skb)
137{ 139{
140 struct net_bridge_port *p;
138 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
139 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
140 143
144 if (skb->pkt_type == PACKET_LOOPBACK)
145 return skb;
146
141 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
142 goto drop; 148 goto drop;
143 149
@@ -145,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
145 if (!skb) 151 if (!skb)
146 return NULL; 152 return NULL;
147 153
154 p = br_port_get_rcu(skb->dev);
155
148 if (unlikely(is_link_local(dest))) { 156 if (unlikely(is_link_local(dest))) {
149 /* Pause frames shouldn't be passed up by driver anyway */ 157 /* Pause frames shouldn't be passed up by driver anyway */
150 if (skb->protocol == htons(ETH_P_PAUSE)) 158 if (skb->protocol == htons(ETH_P_PAUSE))
@@ -154,7 +162,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
154 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) 162 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
155 goto forward; 163 goto forward;
156 164
157 if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 165 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
158 NULL, br_handle_local_finish)) 166 NULL, br_handle_local_finish))
159 return NULL; /* frame consumed by filter */ 167 return NULL; /* frame consumed by filter */
160 else 168 else
@@ -175,7 +183,7 @@ forward:
175 if (!compare_ether_addr(p->br->dev->dev_addr, dest)) 183 if (!compare_ether_addr(p->br->dev->dev_addr, dest))
176 skb->pkt_type = PACKET_HOST; 184 skb->pkt_type = PACKET_HOST;
177 185
178 NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 186 NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
179 br_handle_frame_finish); 187 br_handle_frame_finish);
180 break; 188 break;
181 default: 189 default:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 995afc4b04dc..cb43312b846e 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -412,6 +412,6 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
412 412
413 } 413 }
414 414
415 pr_debug("Bridge does not support ioctl 0x%x\n", cmd); 415 br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
416 return -EOPNOTSUPP; 416 return -EOPNOTSUPP;
417} 417}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index eaa0e1bae49b..eb5b256ffc88 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,22 +24,75 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/timer.h> 25#include <linux/timer.h>
26#include <net/ip.h> 26#include <net/ip.h>
27#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
28#include <net/ipv6.h>
29#include <net/mld.h>
30#include <net/addrconf.h>
31#include <net/ip6_checksum.h>
32#endif
27 33
28#include "br_private.h" 34#include "br_private.h"
29 35
30static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip) 36#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
37static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
31{ 38{
32 return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1); 39 if (ipv6_addr_is_multicast(addr) &&
40 IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
41 return 1;
42 return 0;
43}
44#endif
45
46static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
47{
48 if (a->proto != b->proto)
49 return 0;
50 switch (a->proto) {
51 case htons(ETH_P_IP):
52 return a->u.ip4 == b->u.ip4;
53#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
54 case htons(ETH_P_IPV6):
55 return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
56#endif
57 }
58 return 0;
59}
60
61static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
62{
63 return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
64}
65
66#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
67static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
68 const struct in6_addr *ip)
69{
70 return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
71}
72#endif
73
74static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
75 struct br_ip *ip)
76{
77 switch (ip->proto) {
78 case htons(ETH_P_IP):
79 return __br_ip4_hash(mdb, ip->u.ip4);
80#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
81 case htons(ETH_P_IPV6):
82 return __br_ip6_hash(mdb, &ip->u.ip6);
83#endif
84 }
85 return 0;
33} 86}
34 87
35static struct net_bridge_mdb_entry *__br_mdb_ip_get( 88static struct net_bridge_mdb_entry *__br_mdb_ip_get(
36 struct net_bridge_mdb_htable *mdb, __be32 dst, int hash) 89 struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
37{ 90{
38 struct net_bridge_mdb_entry *mp; 91 struct net_bridge_mdb_entry *mp;
39 struct hlist_node *p; 92 struct hlist_node *p;
40 93
41 hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { 94 hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
42 if (dst == mp->addr) 95 if (br_ip_equal(&mp->addr, dst))
43 return mp; 96 return mp;
44 } 97 }
45 98
@@ -47,7 +100,7 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
47} 100}
48 101
49static struct net_bridge_mdb_entry *br_mdb_ip_get( 102static struct net_bridge_mdb_entry *br_mdb_ip_get(
50 struct net_bridge_mdb_htable *mdb, __be32 dst) 103 struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
51{ 104{
52 if (!mdb) 105 if (!mdb)
53 return NULL; 106 return NULL;
@@ -55,20 +108,58 @@ static struct net_bridge_mdb_entry *br_mdb_ip_get(
55 return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst)); 108 return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
56} 109}
57 110
111static struct net_bridge_mdb_entry *br_mdb_ip4_get(
112 struct net_bridge_mdb_htable *mdb, __be32 dst)
113{
114 struct br_ip br_dst;
115
116 br_dst.u.ip4 = dst;
117 br_dst.proto = htons(ETH_P_IP);
118
119 return br_mdb_ip_get(mdb, &br_dst);
120}
121
122#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
123static struct net_bridge_mdb_entry *br_mdb_ip6_get(
124 struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
125{
126 struct br_ip br_dst;
127
128 ipv6_addr_copy(&br_dst.u.ip6, dst);
129 br_dst.proto = htons(ETH_P_IPV6);
130
131 return br_mdb_ip_get(mdb, &br_dst);
132}
133#endif
134
58struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, 135struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
59 struct sk_buff *skb) 136 struct sk_buff *skb)
60{ 137{
138 struct net_bridge_mdb_htable *mdb = br->mdb;
139 struct br_ip ip;
140
61 if (br->multicast_disabled) 141 if (br->multicast_disabled)
62 return NULL; 142 return NULL;
63 143
144 if (BR_INPUT_SKB_CB(skb)->igmp)
145 return NULL;
146
147 ip.proto = skb->protocol;
148
64 switch (skb->protocol) { 149 switch (skb->protocol) {
65 case htons(ETH_P_IP): 150 case htons(ETH_P_IP):
66 if (BR_INPUT_SKB_CB(skb)->igmp) 151 ip.u.ip4 = ip_hdr(skb)->daddr;
67 break; 152 break;
68 return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr); 153#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
154 case htons(ETH_P_IPV6):
155 ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
156 break;
157#endif
158 default:
159 return NULL;
69 } 160 }
70 161
71 return NULL; 162 return br_mdb_ip_get(mdb, &ip);
72} 163}
73 164
74static void br_mdb_free(struct rcu_head *head) 165static void br_mdb_free(struct rcu_head *head)
@@ -95,7 +186,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
95 for (i = 0; i < old->max; i++) 186 for (i = 0; i < old->max; i++)
96 hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver]) 187 hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
97 hlist_add_head(&mp->hlist[new->ver], 188 hlist_add_head(&mp->hlist[new->ver],
98 &new->mhash[br_ip_hash(new, mp->addr)]); 189 &new->mhash[br_ip_hash(new, &mp->addr)]);
99 190
100 if (!elasticity) 191 if (!elasticity)
101 return 0; 192 return 0;
@@ -163,7 +254,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
163 struct net_bridge_port_group *p; 254 struct net_bridge_port_group *p;
164 struct net_bridge_port_group **pp; 255 struct net_bridge_port_group **pp;
165 256
166 mp = br_mdb_ip_get(mdb, pg->addr); 257 mp = br_mdb_ip_get(mdb, &pg->addr);
167 if (WARN_ON(!mp)) 258 if (WARN_ON(!mp))
168 return; 259 return;
169 260
@@ -171,7 +262,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
171 if (p != pg) 262 if (p != pg)
172 continue; 263 continue;
173 264
174 *pp = p->next; 265 rcu_assign_pointer(*pp, p->next);
175 hlist_del_init(&p->mglist); 266 hlist_del_init(&p->mglist);
176 del_timer(&p->timer); 267 del_timer(&p->timer);
177 del_timer(&p->query_timer); 268 del_timer(&p->query_timer);
@@ -249,8 +340,8 @@ out:
249 return 0; 340 return 0;
250} 341}
251 342
252static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br, 343static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
253 __be32 group) 344 __be32 group)
254{ 345{
255 struct sk_buff *skb; 346 struct sk_buff *skb;
256 struct igmphdr *ih; 347 struct igmphdr *ih;
@@ -314,12 +405,104 @@ out:
314 return skb; 405 return skb;
315} 406}
316 407
408#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
409static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
410 struct in6_addr *group)
411{
412 struct sk_buff *skb;
413 struct ipv6hdr *ip6h;
414 struct mld_msg *mldq;
415 struct ethhdr *eth;
416 u8 *hopopt;
417 unsigned long interval;
418
419 skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
420 8 + sizeof(*mldq));
421 if (!skb)
422 goto out;
423
424 skb->protocol = htons(ETH_P_IPV6);
425
426 /* Ethernet header */
427 skb_reset_mac_header(skb);
428 eth = eth_hdr(skb);
429
430 memcpy(eth->h_source, br->dev->dev_addr, 6);
431 ipv6_eth_mc_map(group, eth->h_dest);
432 eth->h_proto = htons(ETH_P_IPV6);
433 skb_put(skb, sizeof(*eth));
434
435 /* IPv6 header + HbH option */
436 skb_set_network_header(skb, skb->len);
437 ip6h = ipv6_hdr(skb);
438
439 *(__force __be32 *)ip6h = htonl(0x60000000);
440 ip6h->payload_len = 8 + sizeof(*mldq);
441 ip6h->nexthdr = IPPROTO_HOPOPTS;
442 ip6h->hop_limit = 1;
443 ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
444 ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
445
446 hopopt = (u8 *)(ip6h + 1);
447 hopopt[0] = IPPROTO_ICMPV6; /* next hdr */
448 hopopt[1] = 0; /* length of HbH */
449 hopopt[2] = IPV6_TLV_ROUTERALERT; /* Router Alert */
450 hopopt[3] = 2; /* Length of RA Option */
451 hopopt[4] = 0; /* Type = 0x0000 (MLD) */
452 hopopt[5] = 0;
453 hopopt[6] = IPV6_TLV_PAD0; /* Pad0 */
454 hopopt[7] = IPV6_TLV_PAD0; /* Pad0 */
455
456 skb_put(skb, sizeof(*ip6h) + 8);
457
458 /* ICMPv6 */
459 skb_set_transport_header(skb, skb->len);
460 mldq = (struct mld_msg *) icmp6_hdr(skb);
461
462 interval = ipv6_addr_any(group) ? br->multicast_last_member_interval :
463 br->multicast_query_response_interval;
464
465 mldq->mld_type = ICMPV6_MGM_QUERY;
466 mldq->mld_code = 0;
467 mldq->mld_cksum = 0;
468 mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
469 mldq->mld_reserved = 0;
470 ipv6_addr_copy(&mldq->mld_mca, group);
471
472 /* checksum */
473 mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
474 sizeof(*mldq), IPPROTO_ICMPV6,
475 csum_partial(mldq,
476 sizeof(*mldq), 0));
477 skb_put(skb, sizeof(*mldq));
478
479 __skb_pull(skb, sizeof(*eth));
480
481out:
482 return skb;
483}
484#endif
485
486static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
487 struct br_ip *addr)
488{
489 switch (addr->proto) {
490 case htons(ETH_P_IP):
491 return br_ip4_multicast_alloc_query(br, addr->u.ip4);
492#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
493 case htons(ETH_P_IPV6):
494 return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
495#endif
496 }
497 return NULL;
498}
499
317static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp) 500static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
318{ 501{
319 struct net_bridge *br = mp->br; 502 struct net_bridge *br = mp->br;
320 struct sk_buff *skb; 503 struct sk_buff *skb;
321 504
322 skb = br_multicast_alloc_query(br, mp->addr); 505 skb = br_multicast_alloc_query(br, &mp->addr);
323 if (!skb) 506 if (!skb)
324 goto timer; 507 goto timer;
325 508
@@ -353,7 +536,7 @@ static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
353 struct net_bridge *br = port->br; 536 struct net_bridge *br = port->br;
354 struct sk_buff *skb; 537 struct sk_buff *skb;
355 538
356 skb = br_multicast_alloc_query(br, pg->addr); 539 skb = br_multicast_alloc_query(br, &pg->addr);
357 if (!skb) 540 if (!skb)
358 goto timer; 541 goto timer;
359 542
@@ -383,8 +566,8 @@ out:
383} 566}
384 567
385static struct net_bridge_mdb_entry *br_multicast_get_group( 568static struct net_bridge_mdb_entry *br_multicast_get_group(
386 struct net_bridge *br, struct net_bridge_port *port, __be32 group, 569 struct net_bridge *br, struct net_bridge_port *port,
387 int hash) 570 struct br_ip *group, int hash)
388{ 571{
389 struct net_bridge_mdb_htable *mdb = br->mdb; 572 struct net_bridge_mdb_htable *mdb = br->mdb;
390 struct net_bridge_mdb_entry *mp; 573 struct net_bridge_mdb_entry *mp;
@@ -396,9 +579,8 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
396 579
397 hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) { 580 hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
398 count++; 581 count++;
399 if (unlikely(group == mp->addr)) { 582 if (unlikely(br_ip_equal(group, &mp->addr)))
400 return mp; 583 return mp;
401 }
402 } 584 }
403 585
404 elasticity = 0; 586 elasticity = 0;
@@ -406,10 +588,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
406 588
407 if (unlikely(count > br->hash_elasticity && count)) { 589 if (unlikely(count > br->hash_elasticity && count)) {
408 if (net_ratelimit()) 590 if (net_ratelimit())
409 printk(KERN_INFO "%s: Multicast hash table " 591 br_info(br, "Multicast hash table "
410 "chain limit reached: %s\n", 592 "chain limit reached: %s\n",
411 br->dev->name, port ? port->dev->name : 593 port ? port->dev->name : br->dev->name);
412 br->dev->name);
413 594
414 elasticity = br->hash_elasticity; 595 elasticity = br->hash_elasticity;
415 } 596 }
@@ -417,11 +598,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
417 if (mdb->size >= max) { 598 if (mdb->size >= max) {
418 max *= 2; 599 max *= 2;
419 if (unlikely(max >= br->hash_max)) { 600 if (unlikely(max >= br->hash_max)) {
420 printk(KERN_WARNING "%s: Multicast hash table maximum " 601 br_warn(br, "Multicast hash table maximum "
421 "reached, disabling snooping: %s, %d\n", 602 "reached, disabling snooping: %s, %d\n",
422 br->dev->name, port ? port->dev->name : 603 port ? port->dev->name : br->dev->name, max);
423 br->dev->name,
424 max);
425 err = -E2BIG; 604 err = -E2BIG;
426disable: 605disable:
427 br->multicast_disabled = 1; 606 br->multicast_disabled = 1;
@@ -432,22 +611,19 @@ disable:
432 if (max > mdb->max || elasticity) { 611 if (max > mdb->max || elasticity) {
433 if (mdb->old) { 612 if (mdb->old) {
434 if (net_ratelimit()) 613 if (net_ratelimit())
435 printk(KERN_INFO "%s: Multicast hash table " 614 br_info(br, "Multicast hash table "
436 "on fire: %s\n", 615 "on fire: %s\n",
437 br->dev->name, port ? port->dev->name : 616 port ? port->dev->name : br->dev->name);
438 br->dev->name);
439 err = -EEXIST; 617 err = -EEXIST;
440 goto err; 618 goto err;
441 } 619 }
442 620
443 err = br_mdb_rehash(&br->mdb, max, elasticity); 621 err = br_mdb_rehash(&br->mdb, max, elasticity);
444 if (err) { 622 if (err) {
445 printk(KERN_WARNING "%s: Cannot rehash multicast " 623 br_warn(br, "Cannot rehash multicast "
446 "hash table, disabling snooping: " 624 "hash table, disabling snooping: %s, %d, %d\n",
447 "%s, %d, %d\n", 625 port ? port->dev->name : br->dev->name,
448 br->dev->name, port ? port->dev->name : 626 mdb->size, err);
449 br->dev->name,
450 mdb->size, err);
451 goto disable; 627 goto disable;
452 } 628 }
453 629
@@ -463,7 +639,8 @@ err:
463} 639}
464 640
465static struct net_bridge_mdb_entry *br_multicast_new_group( 641static struct net_bridge_mdb_entry *br_multicast_new_group(
466 struct net_bridge *br, struct net_bridge_port *port, __be32 group) 642 struct net_bridge *br, struct net_bridge_port *port,
643 struct br_ip *group)
467{ 644{
468 struct net_bridge_mdb_htable *mdb = br->mdb; 645 struct net_bridge_mdb_htable *mdb = br->mdb;
469 struct net_bridge_mdb_entry *mp; 646 struct net_bridge_mdb_entry *mp;
@@ -496,7 +673,7 @@ rehash:
496 goto out; 673 goto out;
497 674
498 mp->br = br; 675 mp->br = br;
499 mp->addr = group; 676 mp->addr = *group;
500 setup_timer(&mp->timer, br_multicast_group_expired, 677 setup_timer(&mp->timer, br_multicast_group_expired,
501 (unsigned long)mp); 678 (unsigned long)mp);
502 setup_timer(&mp->query_timer, br_multicast_group_query_expired, 679 setup_timer(&mp->query_timer, br_multicast_group_query_expired,
@@ -510,7 +687,8 @@ out:
510} 687}
511 688
512static int br_multicast_add_group(struct net_bridge *br, 689static int br_multicast_add_group(struct net_bridge *br,
513 struct net_bridge_port *port, __be32 group) 690 struct net_bridge_port *port,
691 struct br_ip *group)
514{ 692{
515 struct net_bridge_mdb_entry *mp; 693 struct net_bridge_mdb_entry *mp;
516 struct net_bridge_port_group *p; 694 struct net_bridge_port_group *p;
@@ -518,9 +696,6 @@ static int br_multicast_add_group(struct net_bridge *br,
518 unsigned long now = jiffies; 696 unsigned long now = jiffies;
519 int err; 697 int err;
520 698
521 if (ipv4_is_local_multicast(group))
522 return 0;
523
524 spin_lock(&br->multicast_lock); 699 spin_lock(&br->multicast_lock);
525 if (!netif_running(br->dev) || 700 if (!netif_running(br->dev) ||
526 (port && port->state == BR_STATE_DISABLED)) 701 (port && port->state == BR_STATE_DISABLED))
@@ -549,7 +724,7 @@ static int br_multicast_add_group(struct net_bridge *br,
549 if (unlikely(!p)) 724 if (unlikely(!p))
550 goto err; 725 goto err;
551 726
552 p->addr = group; 727 p->addr = *group;
553 p->port = port; 728 p->port = port;
554 p->next = *pp; 729 p->next = *pp;
555 hlist_add_head(&p->mglist, &port->mglist); 730 hlist_add_head(&p->mglist, &port->mglist);
@@ -570,6 +745,38 @@ err:
570 return err; 745 return err;
571} 746}
572 747
748static int br_ip4_multicast_add_group(struct net_bridge *br,
749 struct net_bridge_port *port,
750 __be32 group)
751{
752 struct br_ip br_group;
753
754 if (ipv4_is_local_multicast(group))
755 return 0;
756
757 br_group.u.ip4 = group;
758 br_group.proto = htons(ETH_P_IP);
759
760 return br_multicast_add_group(br, port, &br_group);
761}
762
763#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
764static int br_ip6_multicast_add_group(struct net_bridge *br,
765 struct net_bridge_port *port,
766 const struct in6_addr *group)
767{
768 struct br_ip br_group;
769
770 if (ipv6_is_local_multicast(group))
771 return 0;
772
773 ipv6_addr_copy(&br_group.u.ip6, group);
774 br_group.proto = htons(ETH_P_IP);
775
776 return br_multicast_add_group(br, port, &br_group);
777}
778#endif
779
573static void br_multicast_router_expired(unsigned long data) 780static void br_multicast_router_expired(unsigned long data)
574{ 781{
575 struct net_bridge_port *port = (void *)data; 782 struct net_bridge_port *port = (void *)data;
@@ -591,29 +798,45 @@ static void br_multicast_local_router_expired(unsigned long data)
591{ 798{
592} 799}
593 800
594static void br_multicast_send_query(struct net_bridge *br, 801static void __br_multicast_send_query(struct net_bridge *br,
595 struct net_bridge_port *port, u32 sent) 802 struct net_bridge_port *port,
803 struct br_ip *ip)
596{ 804{
597 unsigned long time;
598 struct sk_buff *skb; 805 struct sk_buff *skb;
599 806
600 if (!netif_running(br->dev) || br->multicast_disabled || 807 skb = br_multicast_alloc_query(br, ip);
601 timer_pending(&br->multicast_querier_timer))
602 return;
603
604 skb = br_multicast_alloc_query(br, 0);
605 if (!skb) 808 if (!skb)
606 goto timer; 809 return;
607 810
608 if (port) { 811 if (port) {
609 __skb_push(skb, sizeof(struct ethhdr)); 812 __skb_push(skb, sizeof(struct ethhdr));
610 skb->dev = port->dev; 813 skb->dev = port->dev;
611 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 814 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
612 dev_queue_xmit); 815 dev_queue_xmit);
613 } else 816 } else
614 netif_rx(skb); 817 netif_rx(skb);
818}
819
820static void br_multicast_send_query(struct net_bridge *br,
821 struct net_bridge_port *port, u32 sent)
822{
823 unsigned long time;
824 struct br_ip br_group;
825
826 if (!netif_running(br->dev) || br->multicast_disabled ||
827 timer_pending(&br->multicast_querier_timer))
828 return;
829
830 memset(&br_group.u, 0, sizeof(br_group.u));
831
832 br_group.proto = htons(ETH_P_IP);
833 __br_multicast_send_query(br, port, &br_group);
834
835#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
836 br_group.proto = htons(ETH_P_IPV6);
837 __br_multicast_send_query(br, port, &br_group);
838#endif
615 839
616timer:
617 time = jiffies; 840 time = jiffies;
618 time += sent < br->multicast_startup_query_count ? 841 time += sent < br->multicast_startup_query_count ?
619 br->multicast_startup_query_interval : 842 br->multicast_startup_query_interval :
@@ -698,9 +921,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
698 spin_unlock(&br->multicast_lock); 921 spin_unlock(&br->multicast_lock);
699} 922}
700 923
701static int br_multicast_igmp3_report(struct net_bridge *br, 924static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
702 struct net_bridge_port *port, 925 struct net_bridge_port *port,
703 struct sk_buff *skb) 926 struct sk_buff *skb)
704{ 927{
705 struct igmpv3_report *ih; 928 struct igmpv3_report *ih;
706 struct igmpv3_grec *grec; 929 struct igmpv3_grec *grec;
@@ -745,7 +968,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
745 continue; 968 continue;
746 } 969 }
747 970
748 err = br_multicast_add_group(br, port, group); 971 err = br_ip4_multicast_add_group(br, port, group);
749 if (err) 972 if (err)
750 break; 973 break;
751 } 974 }
@@ -753,24 +976,87 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
753 return err; 976 return err;
754} 977}
755 978
979#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
980static int br_ip6_multicast_mld2_report(struct net_bridge *br,
981 struct net_bridge_port *port,
982 struct sk_buff *skb)
983{
984 struct icmp6hdr *icmp6h;
985 struct mld2_grec *grec;
986 int i;
987 int len;
988 int num;
989 int err = 0;
990
991 if (!pskb_may_pull(skb, sizeof(*icmp6h)))
992 return -EINVAL;
993
994 icmp6h = icmp6_hdr(skb);
995 num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
996 len = sizeof(*icmp6h);
997
998 for (i = 0; i < num; i++) {
999 __be16 *nsrcs, _nsrcs;
1000
1001 nsrcs = skb_header_pointer(skb,
1002 len + offsetof(struct mld2_grec,
1003 grec_mca),
1004 sizeof(_nsrcs), &_nsrcs);
1005 if (!nsrcs)
1006 return -EINVAL;
1007
1008 if (!pskb_may_pull(skb,
1009 len + sizeof(*grec) +
1010 sizeof(struct in6_addr) * (*nsrcs)))
1011 return -EINVAL;
1012
1013 grec = (struct mld2_grec *)(skb->data + len);
1014 len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
1015
1016 /* We treat these as MLDv1 reports for now. */
1017 switch (grec->grec_type) {
1018 case MLD2_MODE_IS_INCLUDE:
1019 case MLD2_MODE_IS_EXCLUDE:
1020 case MLD2_CHANGE_TO_INCLUDE:
1021 case MLD2_CHANGE_TO_EXCLUDE:
1022 case MLD2_ALLOW_NEW_SOURCES:
1023 case MLD2_BLOCK_OLD_SOURCES:
1024 break;
1025
1026 default:
1027 continue;
1028 }
1029
1030 err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
1031 if (!err)
1032 break;
1033 }
1034
1035 return err;
1036}
1037#endif
1038
1039/*
1040 * Add port to rotuer_list
1041 * list is maintained ordered by pointer value
1042 * and locked by br->multicast_lock and RCU
1043 */
756static void br_multicast_add_router(struct net_bridge *br, 1044static void br_multicast_add_router(struct net_bridge *br,
757 struct net_bridge_port *port) 1045 struct net_bridge_port *port)
758{ 1046{
759 struct hlist_node *p; 1047 struct net_bridge_port *p;
760 struct hlist_node **h; 1048 struct hlist_node *n, *slot = NULL;
761 1049
762 for (h = &br->router_list.first; 1050 hlist_for_each_entry(p, n, &br->router_list, rlist) {
763 (p = *h) && 1051 if ((unsigned long) port >= (unsigned long) p)
764 (unsigned long)container_of(p, struct net_bridge_port, rlist) > 1052 break;
765 (unsigned long)port; 1053 slot = n;
766 h = &p->next) 1054 }
767 ; 1055
768 1056 if (slot)
769 port->rlist.pprev = h; 1057 hlist_add_after_rcu(slot, &port->rlist);
770 port->rlist.next = p; 1058 else
771 rcu_assign_pointer(*h, &port->rlist); 1059 hlist_add_head_rcu(&port->rlist, &br->router_list);
772 if (p)
773 p->pprev = &port->rlist.next;
774} 1060}
775 1061
776static void br_multicast_mark_router(struct net_bridge *br, 1062static void br_multicast_mark_router(struct net_bridge *br,
@@ -800,7 +1086,7 @@ timer:
800 1086
801static void br_multicast_query_received(struct net_bridge *br, 1087static void br_multicast_query_received(struct net_bridge *br,
802 struct net_bridge_port *port, 1088 struct net_bridge_port *port,
803 __be32 saddr) 1089 int saddr)
804{ 1090{
805 if (saddr) 1091 if (saddr)
806 mod_timer(&br->multicast_querier_timer, 1092 mod_timer(&br->multicast_querier_timer,
@@ -811,9 +1097,9 @@ static void br_multicast_query_received(struct net_bridge *br,
811 br_multicast_mark_router(br, port); 1097 br_multicast_mark_router(br, port);
812} 1098}
813 1099
814static int br_multicast_query(struct net_bridge *br, 1100static int br_ip4_multicast_query(struct net_bridge *br,
815 struct net_bridge_port *port, 1101 struct net_bridge_port *port,
816 struct sk_buff *skb) 1102 struct sk_buff *skb)
817{ 1103{
818 struct iphdr *iph = ip_hdr(skb); 1104 struct iphdr *iph = ip_hdr(skb);
819 struct igmphdr *ih = igmp_hdr(skb); 1105 struct igmphdr *ih = igmp_hdr(skb);
@@ -831,7 +1117,7 @@ static int br_multicast_query(struct net_bridge *br,
831 (port && port->state == BR_STATE_DISABLED)) 1117 (port && port->state == BR_STATE_DISABLED))
832 goto out; 1118 goto out;
833 1119
834 br_multicast_query_received(br, port, iph->saddr); 1120 br_multicast_query_received(br, port, !!iph->saddr);
835 1121
836 group = ih->group; 1122 group = ih->group;
837 1123
@@ -859,7 +1145,7 @@ static int br_multicast_query(struct net_bridge *br,
859 if (!group) 1145 if (!group)
860 goto out; 1146 goto out;
861 1147
862 mp = br_mdb_ip_get(br->mdb, group); 1148 mp = br_mdb_ip4_get(br->mdb, group);
863 if (!mp) 1149 if (!mp)
864 goto out; 1150 goto out;
865 1151
@@ -883,9 +1169,78 @@ out:
883 return err; 1169 return err;
884} 1170}
885 1171
1172#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1173static int br_ip6_multicast_query(struct net_bridge *br,
1174 struct net_bridge_port *port,
1175 struct sk_buff *skb)
1176{
1177 struct ipv6hdr *ip6h = ipv6_hdr(skb);
1178 struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
1179 struct net_bridge_mdb_entry *mp;
1180 struct mld2_query *mld2q;
1181 struct net_bridge_port_group *p, **pp;
1182 unsigned long max_delay;
1183 unsigned long now = jiffies;
1184 struct in6_addr *group = NULL;
1185 int err = 0;
1186
1187 spin_lock(&br->multicast_lock);
1188 if (!netif_running(br->dev) ||
1189 (port && port->state == BR_STATE_DISABLED))
1190 goto out;
1191
1192 br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
1193
1194 if (skb->len == sizeof(*mld)) {
1195 if (!pskb_may_pull(skb, sizeof(*mld))) {
1196 err = -EINVAL;
1197 goto out;
1198 }
1199 mld = (struct mld_msg *) icmp6_hdr(skb);
1200 max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay));
1201 if (max_delay)
1202 group = &mld->mld_mca;
1203 } else if (skb->len >= sizeof(*mld2q)) {
1204 if (!pskb_may_pull(skb, sizeof(*mld2q))) {
1205 err = -EINVAL;
1206 goto out;
1207 }
1208 mld2q = (struct mld2_query *)icmp6_hdr(skb);
1209 if (!mld2q->mld2q_nsrcs)
1210 group = &mld2q->mld2q_mca;
1211 max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1;
1212 }
1213
1214 if (!group)
1215 goto out;
1216
1217 mp = br_mdb_ip6_get(br->mdb, group);
1218 if (!mp)
1219 goto out;
1220
1221 max_delay *= br->multicast_last_member_count;
1222 if (!hlist_unhashed(&mp->mglist) &&
1223 (timer_pending(&mp->timer) ?
1224 time_after(mp->timer.expires, now + max_delay) :
1225 try_to_del_timer_sync(&mp->timer) >= 0))
1226 mod_timer(&mp->timer, now + max_delay);
1227
1228 for (pp = &mp->ports; (p = *pp); pp = &p->next) {
1229 if (timer_pending(&p->timer) ?
1230 time_after(p->timer.expires, now + max_delay) :
1231 try_to_del_timer_sync(&p->timer) >= 0)
1232 mod_timer(&mp->timer, now + max_delay);
1233 }
1234
1235out:
1236 spin_unlock(&br->multicast_lock);
1237 return err;
1238}
1239#endif
1240
886static void br_multicast_leave_group(struct net_bridge *br, 1241static void br_multicast_leave_group(struct net_bridge *br,
887 struct net_bridge_port *port, 1242 struct net_bridge_port *port,
888 __be32 group) 1243 struct br_ip *group)
889{ 1244{
890 struct net_bridge_mdb_htable *mdb; 1245 struct net_bridge_mdb_htable *mdb;
891 struct net_bridge_mdb_entry *mp; 1246 struct net_bridge_mdb_entry *mp;
@@ -893,9 +1248,6 @@ static void br_multicast_leave_group(struct net_bridge *br,
893 unsigned long now; 1248 unsigned long now;
894 unsigned long time; 1249 unsigned long time;
895 1250
896 if (ipv4_is_local_multicast(group))
897 return;
898
899 spin_lock(&br->multicast_lock); 1251 spin_lock(&br->multicast_lock);
900 if (!netif_running(br->dev) || 1252 if (!netif_running(br->dev) ||
901 (port && port->state == BR_STATE_DISABLED) || 1253 (port && port->state == BR_STATE_DISABLED) ||
@@ -946,6 +1298,38 @@ out:
946 spin_unlock(&br->multicast_lock); 1298 spin_unlock(&br->multicast_lock);
947} 1299}
948 1300
1301static void br_ip4_multicast_leave_group(struct net_bridge *br,
1302 struct net_bridge_port *port,
1303 __be32 group)
1304{
1305 struct br_ip br_group;
1306
1307 if (ipv4_is_local_multicast(group))
1308 return;
1309
1310 br_group.u.ip4 = group;
1311 br_group.proto = htons(ETH_P_IP);
1312
1313 br_multicast_leave_group(br, port, &br_group);
1314}
1315
1316#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1317static void br_ip6_multicast_leave_group(struct net_bridge *br,
1318 struct net_bridge_port *port,
1319 const struct in6_addr *group)
1320{
1321 struct br_ip br_group;
1322
1323 if (ipv6_is_local_multicast(group))
1324 return;
1325
1326 ipv6_addr_copy(&br_group.u.ip6, group);
1327 br_group.proto = htons(ETH_P_IPV6);
1328
1329 br_multicast_leave_group(br, port, &br_group);
1330}
1331#endif
1332
949static int br_multicast_ipv4_rcv(struct net_bridge *br, 1333static int br_multicast_ipv4_rcv(struct net_bridge *br,
950 struct net_bridge_port *port, 1334 struct net_bridge_port *port,
951 struct sk_buff *skb) 1335 struct sk_buff *skb)
@@ -1000,8 +1384,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1000 if (!pskb_may_pull(skb2, sizeof(*ih))) 1384 if (!pskb_may_pull(skb2, sizeof(*ih)))
1001 goto out; 1385 goto out;
1002 1386
1003 iph = ip_hdr(skb2);
1004
1005 switch (skb2->ip_summed) { 1387 switch (skb2->ip_summed) {
1006 case CHECKSUM_COMPLETE: 1388 case CHECKSUM_COMPLETE:
1007 if (!csum_fold(skb2->csum)) 1389 if (!csum_fold(skb2->csum))
@@ -1022,16 +1404,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
1022 case IGMP_HOST_MEMBERSHIP_REPORT: 1404 case IGMP_HOST_MEMBERSHIP_REPORT:
1023 case IGMPV2_HOST_MEMBERSHIP_REPORT: 1405 case IGMPV2_HOST_MEMBERSHIP_REPORT:
1024 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1; 1406 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
1025 err = br_multicast_add_group(br, port, ih->group); 1407 err = br_ip4_multicast_add_group(br, port, ih->group);
1026 break; 1408 break;
1027 case IGMPV3_HOST_MEMBERSHIP_REPORT: 1409 case IGMPV3_HOST_MEMBERSHIP_REPORT:
1028 err = br_multicast_igmp3_report(br, port, skb2); 1410 err = br_ip4_multicast_igmp3_report(br, port, skb2);
1029 break; 1411 break;
1030 case IGMP_HOST_MEMBERSHIP_QUERY: 1412 case IGMP_HOST_MEMBERSHIP_QUERY:
1031 err = br_multicast_query(br, port, skb2); 1413 err = br_ip4_multicast_query(br, port, skb2);
1032 break; 1414 break;
1033 case IGMP_HOST_LEAVE_MESSAGE: 1415 case IGMP_HOST_LEAVE_MESSAGE:
1034 br_multicast_leave_group(br, port, ih->group); 1416 br_ip4_multicast_leave_group(br, port, ih->group);
1035 break; 1417 break;
1036 } 1418 }
1037 1419
@@ -1043,6 +1425,123 @@ err_out:
1043 return err; 1425 return err;
1044} 1426}
1045 1427
1428#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1429static int br_multicast_ipv6_rcv(struct net_bridge *br,
1430 struct net_bridge_port *port,
1431 struct sk_buff *skb)
1432{
1433 struct sk_buff *skb2 = skb;
1434 struct ipv6hdr *ip6h;
1435 struct icmp6hdr *icmp6h;
1436 u8 nexthdr;
1437 unsigned len;
1438 int offset;
1439 int err;
1440
1441 if (!pskb_may_pull(skb, sizeof(*ip6h)))
1442 return -EINVAL;
1443
1444 ip6h = ipv6_hdr(skb);
1445
1446 /*
1447 * We're interested in MLD messages only.
1448 * - Version is 6
1449 * - MLD has always Router Alert hop-by-hop option
1450 * - But we do not support jumbrograms.
1451 */
1452 if (ip6h->version != 6 ||
1453 ip6h->nexthdr != IPPROTO_HOPOPTS ||
1454 ip6h->payload_len == 0)
1455 return 0;
1456
1457 len = ntohs(ip6h->payload_len);
1458 if (skb->len < len)
1459 return -EINVAL;
1460
1461 nexthdr = ip6h->nexthdr;
1462 offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr);
1463
1464 if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
1465 return 0;
1466
1467 /* Okay, we found ICMPv6 header */
1468 skb2 = skb_clone(skb, GFP_ATOMIC);
1469 if (!skb2)
1470 return -ENOMEM;
1471
1472 len -= offset - skb_network_offset(skb2);
1473
1474 __skb_pull(skb2, offset);
1475 skb_reset_transport_header(skb2);
1476
1477 err = -EINVAL;
1478 if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
1479 goto out;
1480
1481 icmp6h = icmp6_hdr(skb2);
1482
1483 switch (icmp6h->icmp6_type) {
1484 case ICMPV6_MGM_QUERY:
1485 case ICMPV6_MGM_REPORT:
1486 case ICMPV6_MGM_REDUCTION:
1487 case ICMPV6_MLD2_REPORT:
1488 break;
1489 default:
1490 err = 0;
1491 goto out;
1492 }
1493
1494 /* Okay, we found MLD message. Check further. */
1495 if (skb2->len > len) {
1496 err = pskb_trim_rcsum(skb2, len);
1497 if (err)
1498 goto out;
1499 }
1500
1501 switch (skb2->ip_summed) {
1502 case CHECKSUM_COMPLETE:
1503 if (!csum_fold(skb2->csum))
1504 break;
1505 /*FALLTHROUGH*/
1506 case CHECKSUM_NONE:
1507 skb2->csum = 0;
1508 if (skb_checksum_complete(skb2))
1509 goto out;
1510 }
1511
1512 err = 0;
1513
1514 BR_INPUT_SKB_CB(skb)->igmp = 1;
1515
1516 switch (icmp6h->icmp6_type) {
1517 case ICMPV6_MGM_REPORT:
1518 {
1519 struct mld_msg *mld = (struct mld_msg *)icmp6h;
1520 BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
1521 err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
1522 break;
1523 }
1524 case ICMPV6_MLD2_REPORT:
1525 err = br_ip6_multicast_mld2_report(br, port, skb2);
1526 break;
1527 case ICMPV6_MGM_QUERY:
1528 err = br_ip6_multicast_query(br, port, skb2);
1529 break;
1530 case ICMPV6_MGM_REDUCTION:
1531 {
1532 struct mld_msg *mld = (struct mld_msg *)icmp6h;
1533 br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
1534 }
1535 }
1536
1537out:
1538 __skb_push(skb2, offset);
1539 if (skb2 != skb)
1540 kfree_skb(skb2);
1541 return err;
1542}
1543#endif
1544
1046int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, 1545int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
1047 struct sk_buff *skb) 1546 struct sk_buff *skb)
1048{ 1547{
@@ -1055,6 +1554,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
1055 switch (skb->protocol) { 1554 switch (skb->protocol) {
1056 case htons(ETH_P_IP): 1555 case htons(ETH_P_IP):
1057 return br_multicast_ipv4_rcv(br, port, skb); 1556 return br_multicast_ipv4_rcv(br, port, skb);
1557#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1558 case htons(ETH_P_IPV6):
1559 return br_multicast_ipv6_rcv(br, port, skb);
1560#endif
1058 } 1561 }
1059 1562
1060 return 0; 1563 return 0;
@@ -1225,13 +1728,9 @@ unlock:
1225int br_multicast_toggle(struct net_bridge *br, unsigned long val) 1728int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1226{ 1729{
1227 struct net_bridge_port *port; 1730 struct net_bridge_port *port;
1228 int err = -ENOENT; 1731 int err = 0;
1229 1732
1230 spin_lock(&br->multicast_lock); 1733 spin_lock(&br->multicast_lock);
1231 if (!netif_running(br->dev))
1232 goto unlock;
1233
1234 err = 0;
1235 if (br->multicast_disabled == !val) 1734 if (br->multicast_disabled == !val)
1236 goto unlock; 1735 goto unlock;
1237 1736
@@ -1239,6 +1738,9 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
1239 if (br->multicast_disabled) 1738 if (br->multicast_disabled)
1240 goto unlock; 1739 goto unlock;
1241 1740
1741 if (!netif_running(br->dev))
1742 goto unlock;
1743
1242 if (br->mdb) { 1744 if (br->mdb) {
1243 if (br->mdb->old) { 1745 if (br->mdb->old) {
1244 err = -EEXIST; 1746 err = -EEXIST;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 4c4977d12fd6..2c911c0759c2 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -3,15 +3,8 @@
3 * Linux ethernet bridge 3 * Linux ethernet bridge
4 * 4 *
5 * Authors: 5 * Authors:
6 * Lennert Buytenhek <buytenh@gnu.org> 6 * Lennert Buytenhek <buytenh@gnu.org>
7 * Bart De Schuymer (maintainer) <bdschuym@pandora.be> 7 * Bart De Schuymer <bdschuym@pandora.be>
8 *
9 * Changes:
10 * Apr 29 2003: physdev module support (bdschuym)
11 * Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
12 * Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
13 * (bdschuym)
14 * Sep 01 2004: add IPv6 filtering (bdschuym)
15 * 8 *
16 * This program is free software; you can redistribute it and/or 9 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License 10 * modify it under the terms of the GNU General Public License
@@ -62,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1;
62static int brnf_filter_vlan_tagged __read_mostly = 0; 55static int brnf_filter_vlan_tagged __read_mostly = 0;
63static int brnf_filter_pppoe_tagged __read_mostly = 0; 56static int brnf_filter_pppoe_tagged __read_mostly = 0;
64#else 57#else
58#define brnf_call_iptables 1
59#define brnf_call_ip6tables 1
60#define brnf_call_arptables 1
65#define brnf_filter_vlan_tagged 0 61#define brnf_filter_vlan_tagged 0
66#define brnf_filter_pppoe_tagged 0 62#define brnf_filter_pppoe_tagged 0
67#endif 63#endif
@@ -124,26 +120,27 @@ void br_netfilter_rtable_init(struct net_bridge *br)
124{ 120{
125 struct rtable *rt = &br->fake_rtable; 121 struct rtable *rt = &br->fake_rtable;
126 122
127 atomic_set(&rt->u.dst.__refcnt, 1); 123 atomic_set(&rt->dst.__refcnt, 1);
128 rt->u.dst.dev = br->dev; 124 rt->dst.dev = br->dev;
129 rt->u.dst.path = &rt->u.dst; 125 rt->dst.path = &rt->dst;
130 rt->u.dst.metrics[RTAX_MTU - 1] = 1500; 126 rt->dst.metrics[RTAX_MTU - 1] = 1500;
131 rt->u.dst.flags = DST_NOXFRM; 127 rt->dst.flags = DST_NOXFRM;
132 rt->u.dst.ops = &fake_dst_ops; 128 rt->dst.ops = &fake_dst_ops;
133} 129}
134 130
135static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 131static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
136{ 132{
137 struct net_bridge_port *port = rcu_dereference(dev->br_port); 133 if (!br_port_exists(dev))
138 134 return NULL;
139 return port ? &port->br->fake_rtable : NULL; 135 return &br_port_get_rcu(dev)->br->fake_rtable;
140} 136}
141 137
142static inline struct net_device *bridge_parent(const struct net_device *dev) 138static inline struct net_device *bridge_parent(const struct net_device *dev)
143{ 139{
144 struct net_bridge_port *port = rcu_dereference(dev->br_port); 140 if (!br_port_exists(dev))
141 return NULL;
145 142
146 return port ? port->br->dev : NULL; 143 return br_port_get_rcu(dev)->br->dev;
147} 144}
148 145
149static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) 146static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
@@ -204,15 +201,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
204 skb->nf_bridge->data, header_size); 201 skb->nf_bridge->data, header_size);
205} 202}
206 203
207/* 204static inline void nf_bridge_update_protocol(struct sk_buff *skb)
208 * When forwarding bridge frames, we save a copy of the original 205{
209 * header before processing. 206 if (skb->nf_bridge->mask & BRNF_8021Q)
207 skb->protocol = htons(ETH_P_8021Q);
208 else if (skb->nf_bridge->mask & BRNF_PPPoE)
209 skb->protocol = htons(ETH_P_PPP_SES);
210}
211
212/* Fill in the header for fragmented IP packets handled by
213 * the IPv4 connection tracking code.
210 */ 214 */
211int nf_bridge_copy_header(struct sk_buff *skb) 215int nf_bridge_copy_header(struct sk_buff *skb)
212{ 216{
213 int err; 217 int err;
214 int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); 218 unsigned int header_size;
215 219
220 nf_bridge_update_protocol(skb);
221 header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
216 err = skb_cow_head(skb, header_size); 222 err = skb_cow_head(skb, header_size);
217 if (err) 223 if (err)
218 return err; 224 return err;
@@ -242,31 +248,51 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
242 kfree_skb(skb); 248 kfree_skb(skb);
243 return 0; 249 return 0;
244 } 250 }
245 dst_hold(&rt->u.dst); 251 skb_dst_set_noref(skb, &rt->dst);
246 skb_dst_set(skb, &rt->u.dst);
247 252
248 skb->dev = nf_bridge->physindev; 253 skb->dev = nf_bridge->physindev;
254 nf_bridge_update_protocol(skb);
249 nf_bridge_push_encap_header(skb); 255 nf_bridge_push_encap_header(skb);
250 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 256 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
251 br_handle_frame_finish, 1); 257 br_handle_frame_finish, 1);
252 258
253 return 0; 259 return 0;
254} 260}
255 261
256static void __br_dnat_complain(void) 262/* Obtain the correct destination MAC address, while preserving the original
263 * source MAC address. If we already know this address, we just copy it. If we
264 * don't, we use the neighbour framework to find out. In both cases, we make
265 * sure that br_handle_frame_finish() is called afterwards.
266 */
267static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
257{ 268{
258 static unsigned long last_complaint; 269 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
270 struct dst_entry *dst;
259 271
260 if (jiffies - last_complaint >= 5 * HZ) { 272 skb->dev = bridge_parent(skb->dev);
261 printk(KERN_WARNING "Performing cross-bridge DNAT requires IP " 273 if (!skb->dev)
262 "forwarding to be enabled\n"); 274 goto free_skb;
263 last_complaint = jiffies; 275 dst = skb_dst(skb);
276 if (dst->hh) {
277 neigh_hh_bridge(dst->hh, skb);
278 skb->dev = nf_bridge->physindev;
279 return br_handle_frame_finish(skb);
280 } else if (dst->neighbour) {
281 /* the neighbour function below overwrites the complete
282 * MAC header, so we save the Ethernet source address and
283 * protocol number. */
284 skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
285 /* tell br_dev_xmit to continue with forwarding */
286 nf_bridge->mask |= BRNF_BRIDGED_DNAT;
287 return dst->neighbour->output(skb);
264 } 288 }
289free_skb:
290 kfree_skb(skb);
291 return 0;
265} 292}
266 293
267/* This requires some explaining. If DNAT has taken place, 294/* This requires some explaining. If DNAT has taken place,
268 * we will need to fix up the destination Ethernet address, 295 * we will need to fix up the destination Ethernet address.
269 * and this is a tricky process.
270 * 296 *
271 * There are two cases to consider: 297 * There are two cases to consider:
272 * 1. The packet was DNAT'ed to a device in the same bridge 298 * 1. The packet was DNAT'ed to a device in the same bridge
@@ -280,62 +306,29 @@ static void __br_dnat_complain(void)
280 * call ip_route_input() and to look at skb->dst->dev, which is 306 * call ip_route_input() and to look at skb->dst->dev, which is
281 * changed to the destination device if ip_route_input() succeeds. 307 * changed to the destination device if ip_route_input() succeeds.
282 * 308 *
283 * Let us first consider the case that ip_route_input() succeeds: 309 * Let's first consider the case that ip_route_input() succeeds:
284 *
285 * If skb->dst->dev equals the logical bridge device the packet
286 * came in on, we can consider this bridging. The packet is passed
287 * through the neighbour output function to build a new destination
288 * MAC address, which will make the packet enter br_nf_local_out()
289 * not much later. In that function it is assured that the iptables
290 * FORWARD chain is traversed for the packet.
291 * 310 *
311 * If the output device equals the logical bridge device the packet
312 * came in on, we can consider this bridging. The corresponding MAC
313 * address will be obtained in br_nf_pre_routing_finish_bridge.
292 * Otherwise, the packet is considered to be routed and we just 314 * Otherwise, the packet is considered to be routed and we just
293 * change the destination MAC address so that the packet will 315 * change the destination MAC address so that the packet will
294 * later be passed up to the IP stack to be routed. For a redirected 316 * later be passed up to the IP stack to be routed. For a redirected
295 * packet, ip_route_input() will give back the localhost as output device, 317 * packet, ip_route_input() will give back the localhost as output device,
296 * which differs from the bridge device. 318 * which differs from the bridge device.
297 * 319 *
298 * Let us now consider the case that ip_route_input() fails: 320 * Let's now consider the case that ip_route_input() fails:
299 * 321 *
300 * This can be because the destination address is martian, in which case 322 * This can be because the destination address is martian, in which case
301 * the packet will be dropped. 323 * the packet will be dropped.
302 * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() 324 * If IP forwarding is disabled, ip_route_input() will fail, while
303 * will fail, while __ip_route_output_key() will return success. The source 325 * ip_route_output_key() can return success. The source
304 * address for __ip_route_output_key() is set to zero, so __ip_route_output_key 326 * address for ip_route_output_key() is set to zero, so ip_route_output_key()
305 * thinks we're handling a locally generated packet and won't care 327 * thinks we're handling a locally generated packet and won't care
306 * if IP forwarding is allowed. We send a warning message to the users's 328 * if IP forwarding is enabled. If the output device equals the logical bridge
307 * log telling her to put IP forwarding on. 329 * device, we proceed as if ip_route_input() succeeded. If it differs from the
308 * 330 * logical bridge port or if ip_route_output_key() fails we drop the packet.
309 * ip_route_input() will also fail if there is no route available. 331 */
310 * In that case we just drop the packet.
311 *
312 * --Lennert, 20020411
313 * --Bart, 20020416 (updated)
314 * --Bart, 20021007 (updated)
315 * --Bart, 20062711 (updated) */
316static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
317{
318 if (skb->pkt_type == PACKET_OTHERHOST) {
319 skb->pkt_type = PACKET_HOST;
320 skb->nf_bridge->mask |= BRNF_PKT_TYPE;
321 }
322 skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
323
324 skb->dev = bridge_parent(skb->dev);
325 if (skb->dev) {
326 struct dst_entry *dst = skb_dst(skb);
327
328 nf_bridge_pull_encap_header(skb);
329
330 if (dst->hh)
331 return neigh_hh_output(dst->hh, skb);
332 else if (dst->neighbour)
333 return dst->neighbour->output(skb);
334 }
335 kfree_skb(skb);
336 return 0;
337}
338
339static int br_nf_pre_routing_finish(struct sk_buff *skb) 332static int br_nf_pre_routing_finish(struct sk_buff *skb)
340{ 333{
341 struct net_device *dev = skb->dev; 334 struct net_device *dev = skb->dev;
@@ -379,11 +372,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
379 skb_dst_set(skb, (struct dst_entry *)rt); 372 skb_dst_set(skb, (struct dst_entry *)rt);
380 goto bridged_dnat; 373 goto bridged_dnat;
381 } 374 }
382 /* we are sure that forwarding is disabled, so printing
383 * this message is no problem. Note that the packet could
384 * still have a martian destination address, in which case
385 * the packet could be dropped even if forwarding were enabled */
386 __br_dnat_complain();
387 dst_release((struct dst_entry *)rt); 375 dst_release((struct dst_entry *)rt);
388 } 376 }
389free_skb: 377free_skb:
@@ -392,12 +380,11 @@ free_skb:
392 } else { 380 } else {
393 if (skb_dst(skb)->dev == dev) { 381 if (skb_dst(skb)->dev == dev) {
394bridged_dnat: 382bridged_dnat:
395 /* Tell br_nf_local_out this is a
396 * bridged frame */
397 nf_bridge->mask |= BRNF_BRIDGED_DNAT;
398 skb->dev = nf_bridge->physindev; 383 skb->dev = nf_bridge->physindev;
384 nf_bridge_update_protocol(skb);
399 nf_bridge_push_encap_header(skb); 385 nf_bridge_push_encap_header(skb);
400 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, 386 NF_HOOK_THRESH(NFPROTO_BRIDGE,
387 NF_BR_PRE_ROUTING,
401 skb, skb->dev, NULL, 388 skb, skb->dev, NULL,
402 br_nf_pre_routing_finish_bridge, 389 br_nf_pre_routing_finish_bridge,
403 1); 390 1);
@@ -412,13 +399,13 @@ bridged_dnat:
412 kfree_skb(skb); 399 kfree_skb(skb);
413 return 0; 400 return 0;
414 } 401 }
415 dst_hold(&rt->u.dst); 402 skb_dst_set_noref(skb, &rt->dst);
416 skb_dst_set(skb, &rt->u.dst);
417 } 403 }
418 404
419 skb->dev = nf_bridge->physindev; 405 skb->dev = nf_bridge->physindev;
406 nf_bridge_update_protocol(skb);
420 nf_bridge_push_encap_header(skb); 407 nf_bridge_push_encap_header(skb);
421 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 408 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
422 br_handle_frame_finish, 1); 409 br_handle_frame_finish, 1);
423 410
424 return 0; 411 return 0;
@@ -437,6 +424,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
437 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; 424 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
438 nf_bridge->physindev = skb->dev; 425 nf_bridge->physindev = skb->dev;
439 skb->dev = bridge_parent(skb->dev); 426 skb->dev = bridge_parent(skb->dev);
427 if (skb->protocol == htons(ETH_P_8021Q))
428 nf_bridge->mask |= BRNF_8021Q;
429 else if (skb->protocol == htons(ETH_P_PPP_SES))
430 nf_bridge->mask |= BRNF_PPPoE;
440 431
441 return skb->dev; 432 return skb->dev;
442} 433}
@@ -535,7 +526,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
535 if (!setup_pre_routing(skb)) 526 if (!setup_pre_routing(skb))
536 return NF_DROP; 527 return NF_DROP;
537 528
538 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 529 skb->protocol = htons(ETH_P_IPV6);
530 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
539 br_nf_pre_routing_finish_ipv6); 531 br_nf_pre_routing_finish_ipv6);
540 532
541 return NF_STOLEN; 533 return NF_STOLEN;
@@ -555,25 +547,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
555 const struct net_device *out, 547 const struct net_device *out,
556 int (*okfn)(struct sk_buff *)) 548 int (*okfn)(struct sk_buff *))
557{ 549{
550 struct net_bridge_port *p;
551 struct net_bridge *br;
558 struct iphdr *iph; 552 struct iphdr *iph;
559 __u32 len = nf_bridge_encap_header_len(skb); 553 __u32 len = nf_bridge_encap_header_len(skb);
560 554
561 if (unlikely(!pskb_may_pull(skb, len))) 555 if (unlikely(!pskb_may_pull(skb, len)))
562 goto out; 556 goto out;
563 557
558 p = br_port_get_rcu(in);
559 if (p == NULL)
560 goto out;
561 br = p->br;
562
564 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || 563 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
565 IS_PPPOE_IPV6(skb)) { 564 IS_PPPOE_IPV6(skb)) {
566#ifdef CONFIG_SYSCTL 565 if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
567 if (!brnf_call_ip6tables)
568 return NF_ACCEPT; 566 return NF_ACCEPT;
569#endif 567
570 nf_bridge_pull_encap_header_rcsum(skb); 568 nf_bridge_pull_encap_header_rcsum(skb);
571 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 569 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
572 } 570 }
573#ifdef CONFIG_SYSCTL 571
574 if (!brnf_call_iptables) 572 if (!brnf_call_iptables && !br->nf_call_iptables)
575 return NF_ACCEPT; 573 return NF_ACCEPT;
576#endif
577 574
578 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && 575 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
579 !IS_PPPOE_IP(skb)) 576 !IS_PPPOE_IP(skb))
@@ -601,14 +598,18 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
601 598
602 pskb_trim_rcsum(skb, len); 599 pskb_trim_rcsum(skb, len);
603 600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603
604 nf_bridge_put(skb->nf_bridge); 604 nf_bridge_put(skb->nf_bridge);
605 if (!nf_bridge_alloc(skb)) 605 if (!nf_bridge_alloc(skb))
606 return NF_DROP; 606 return NF_DROP;
607 if (!setup_pre_routing(skb)) 607 if (!setup_pre_routing(skb))
608 return NF_DROP; 608 return NF_DROP;
609 store_orig_dstaddr(skb); 609 store_orig_dstaddr(skb);
610 skb->protocol = htons(ETH_P_IP);
610 611
611 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 612 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
612 br_nf_pre_routing_finish); 613 br_nf_pre_routing_finish);
613 614
614 return NF_STOLEN; 615 return NF_STOLEN;
@@ -652,11 +653,13 @@ static int br_nf_forward_finish(struct sk_buff *skb)
652 skb->pkt_type = PACKET_OTHERHOST; 653 skb->pkt_type = PACKET_OTHERHOST;
653 nf_bridge->mask ^= BRNF_PKT_TYPE; 654 nf_bridge->mask ^= BRNF_PKT_TYPE;
654 } 655 }
656 nf_bridge_update_protocol(skb);
655 } else { 657 } else {
656 in = *((struct net_device **)(skb->cb)); 658 in = *((struct net_device **)(skb->cb));
657 } 659 }
658 nf_bridge_push_encap_header(skb); 660 nf_bridge_push_encap_header(skb);
659 NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, 661
662 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
660 skb->dev, br_forward_finish, 1); 663 skb->dev, br_forward_finish, 1);
661 return 0; 664 return 0;
662} 665}
@@ -707,6 +710,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
707 /* The physdev module checks on this */ 710 /* The physdev module checks on this */
708 nf_bridge->mask |= BRNF_BRIDGED; 711 nf_bridge->mask |= BRNF_BRIDGED;
709 nf_bridge->physoutdev = skb->dev; 712 nf_bridge->physoutdev = skb->dev;
713 if (pf == PF_INET)
714 skb->protocol = htons(ETH_P_IP);
715 else
716 skb->protocol = htons(ETH_P_IPV6);
710 717
711 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, 718 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
712 br_nf_forward_finish); 719 br_nf_forward_finish);
@@ -719,12 +726,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
719 const struct net_device *out, 726 const struct net_device *out,
720 int (*okfn)(struct sk_buff *)) 727 int (*okfn)(struct sk_buff *))
721{ 728{
729 struct net_bridge_port *p;
730 struct net_bridge *br;
722 struct net_device **d = (struct net_device **)(skb->cb); 731 struct net_device **d = (struct net_device **)(skb->cb);
723 732
724#ifdef CONFIG_SYSCTL 733 p = br_port_get_rcu(out);
725 if (!brnf_call_arptables) 734 if (p == NULL)
735 return NF_ACCEPT;
736 br = p->br;
737
738 if (!brnf_call_arptables && !br->nf_call_arptables)
726 return NF_ACCEPT; 739 return NF_ACCEPT;
727#endif
728 740
729 if (skb->protocol != htons(ETH_P_ARP)) { 741 if (skb->protocol != htons(ETH_P_ARP)) {
730 if (!IS_VLAN_ARP(skb)) 742 if (!IS_VLAN_ARP(skb))
@@ -744,60 +756,11 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
744 return NF_STOLEN; 756 return NF_STOLEN;
745} 757}
746 758
747/* PF_BRIDGE/LOCAL_OUT ***********************************************
748 *
749 * This function sees both locally originated IP packets and forwarded
750 * IP packets (in both cases the destination device is a bridge
751 * device). It also sees bridged-and-DNAT'ed packets.
752 *
753 * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
754 * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
755 * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
756 * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
757 * will be executed.
758 */
759static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
760 const struct net_device *in,
761 const struct net_device *out,
762 int (*okfn)(struct sk_buff *))
763{
764 struct net_device *realindev;
765 struct nf_bridge_info *nf_bridge;
766
767 if (!skb->nf_bridge)
768 return NF_ACCEPT;
769
770 /* Need exclusive nf_bridge_info since we might have multiple
771 * different physoutdevs. */
772 if (!nf_bridge_unshare(skb))
773 return NF_DROP;
774
775 nf_bridge = skb->nf_bridge;
776 if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
777 return NF_ACCEPT;
778
779 /* Bridged, take PF_BRIDGE/FORWARD.
780 * (see big note in front of br_nf_pre_routing_finish) */
781 nf_bridge->physoutdev = skb->dev;
782 realindev = nf_bridge->physindev;
783
784 if (nf_bridge->mask & BRNF_PKT_TYPE) {
785 skb->pkt_type = PACKET_OTHERHOST;
786 nf_bridge->mask ^= BRNF_PKT_TYPE;
787 }
788 nf_bridge_push_encap_header(skb);
789
790 NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
791 br_forward_finish);
792 return NF_STOLEN;
793}
794
795#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 759#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
796static int br_nf_dev_queue_xmit(struct sk_buff *skb) 760static int br_nf_dev_queue_xmit(struct sk_buff *skb)
797{ 761{
798 if (skb->nfct != NULL && 762 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
799 (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && 763 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
800 skb->len > skb->dev->mtu &&
801 !skb_is_gso(skb)) 764 !skb_is_gso(skb))
802 return ip_fragment(skb, br_dev_queue_push_xmit); 765 return ip_fragment(skb, br_dev_queue_push_xmit);
803 else 766 else
@@ -820,21 +783,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
820 struct net_device *realoutdev = bridge_parent(skb->dev); 783 struct net_device *realoutdev = bridge_parent(skb->dev);
821 u_int8_t pf; 784 u_int8_t pf;
822 785
823#ifdef CONFIG_NETFILTER_DEBUG 786 if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED))
824 /* Be very paranoid. This probably won't happen anymore, but let's
825 * keep the check just to be sure... */
826 if (skb_mac_header(skb) < skb->head ||
827 skb_mac_header(skb) + ETH_HLEN > skb->data) {
828 printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
829 "bad mac.raw pointer.\n");
830 goto print_error;
831 }
832#endif
833
834 if (!nf_bridge)
835 return NF_ACCEPT;
836
837 if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
838 return NF_ACCEPT; 787 return NF_ACCEPT;
839 788
840 if (!realoutdev) 789 if (!realoutdev)
@@ -849,13 +798,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
849 else 798 else
850 return NF_ACCEPT; 799 return NF_ACCEPT;
851 800
852#ifdef CONFIG_NETFILTER_DEBUG
853 if (skb_dst(skb) == NULL) {
854 printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
855 goto print_error;
856 }
857#endif
858
859 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care 801 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
860 * about the value of skb->pkt_type. */ 802 * about the value of skb->pkt_type. */
861 if (skb->pkt_type == PACKET_OTHERHOST) { 803 if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -865,24 +807,15 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
865 807
866 nf_bridge_pull_encap_header(skb); 808 nf_bridge_pull_encap_header(skb);
867 nf_bridge_save_header(skb); 809 nf_bridge_save_header(skb);
810 if (pf == PF_INET)
811 skb->protocol = htons(ETH_P_IP);
812 else
813 skb->protocol = htons(ETH_P_IPV6);
868 814
869 NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, 815 NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
870 br_nf_dev_queue_xmit); 816 br_nf_dev_queue_xmit);
871 817
872 return NF_STOLEN; 818 return NF_STOLEN;
873
874#ifdef CONFIG_NETFILTER_DEBUG
875print_error:
876 if (skb->dev != NULL) {
877 printk("[%s]", skb->dev->name);
878 if (realoutdev)
879 printk("[%s]", realoutdev->name);
880 }
881 printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
882 skb->data);
883 dump_stack();
884 return NF_ACCEPT;
885#endif
886} 819}
887 820
888/* IP/SABOTAGE *****************************************************/ 821/* IP/SABOTAGE *****************************************************/
@@ -901,10 +834,8 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
901 return NF_ACCEPT; 834 return NF_ACCEPT;
902} 835}
903 836
904/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent 837/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
905 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. 838 * br_dev_queue_push_xmit is called afterwards */
906 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
907 * ip_refrag() can return NF_STOLEN. */
908static struct nf_hook_ops br_nf_ops[] __read_mostly = { 839static struct nf_hook_ops br_nf_ops[] __read_mostly = {
909 { 840 {
910 .hook = br_nf_pre_routing, 841 .hook = br_nf_pre_routing,
@@ -935,13 +866,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
935 .priority = NF_BR_PRI_BRNF, 866 .priority = NF_BR_PRI_BRNF,
936 }, 867 },
937 { 868 {
938 .hook = br_nf_local_out,
939 .owner = THIS_MODULE,
940 .pf = PF_BRIDGE,
941 .hooknum = NF_BR_LOCAL_OUT,
942 .priority = NF_BR_PRI_FIRST,
943 },
944 {
945 .hook = br_nf_post_routing, 869 .hook = br_nf_post_routing,
946 .owner = THIS_MODULE, 870 .owner = THIS_MODULE,
947 .pf = PF_BRIDGE, 871 .pf = PF_BRIDGE,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index aa56ac2c8829..4a6a378c84e3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -42,8 +42,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
42 struct nlmsghdr *nlh; 42 struct nlmsghdr *nlh;
43 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; 43 u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
44 44
45 pr_debug("br_fill_info event %d port %s master %s\n", 45 br_debug(br, "br_fill_info event %d port %s master %s\n",
46 event, dev->name, br->dev->name); 46 event, dev->name, br->dev->name);
47 47
48 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); 48 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
49 if (nlh == NULL) 49 if (nlh == NULL)
@@ -87,7 +87,9 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
87 struct sk_buff *skb; 87 struct sk_buff *skb;
88 int err = -ENOBUFS; 88 int err = -ENOBUFS;
89 89
90 pr_debug("bridge notify event=%d\n", event); 90 br_debug(port->br, "port %u(%s) event %d\n",
91 (unsigned)port->port_no, port->dev->name, event);
92
91 skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC); 93 skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
92 if (skb == NULL) 94 if (skb == NULL)
93 goto errout; 95 goto errout;
@@ -118,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
118 idx = 0; 120 idx = 0;
119 for_each_netdev(net, dev) { 121 for_each_netdev(net, dev) {
120 /* not a bridge port */ 122 /* not a bridge port */
121 if (dev->br_port == NULL || idx < cb->args[0]) 123 if (!br_port_exists(dev) || idx < cb->args[0])
122 goto skip; 124 goto skip;
123 125
124 if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid, 126 if (br_fill_ifinfo(skb, br_port_get(dev),
127 NETLINK_CB(cb->skb).pid,
125 cb->nlh->nlmsg_seq, RTM_NEWLINK, 128 cb->nlh->nlmsg_seq, RTM_NEWLINK,
126 NLM_F_MULTI) < 0) 129 NLM_F_MULTI) < 0)
127 break; 130 break;
@@ -166,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
166 if (!dev) 169 if (!dev)
167 return -ENODEV; 170 return -ENODEV;
168 171
169 p = dev->br_port; 172 if (!br_port_exists(dev))
170 if (!p)
171 return -EINVAL; 173 return -EINVAL;
174 p = br_port_get(dev);
172 175
173 /* if kernel STP is running, don't allow changes */ 176 /* if kernel STP is running, don't allow changes */
174 if (p->br->stp_enabled == BR_KERNEL_STP) 177 if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 763a3ec292e5..404d4e14c6a7 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,13 +32,15 @@ struct notifier_block br_device_notifier = {
32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) 32static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
33{ 33{
34 struct net_device *dev = ptr; 34 struct net_device *dev = ptr;
35 struct net_bridge_port *p = dev->br_port; 35 struct net_bridge_port *p = br_port_get(dev);
36 struct net_bridge *br; 36 struct net_bridge *br;
37 int err;
37 38
38 /* not a port of a bridge */ 39 /* not a port of a bridge */
39 if (p == NULL) 40 if (!br_port_exists(dev))
40 return NOTIFY_DONE; 41 return NOTIFY_DONE;
41 42
43 p = br_port_get(dev);
42 br = p->br; 44 br = p->br;
43 45
44 switch (event) { 46 switch (event) {
@@ -82,6 +84,16 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
82 case NETDEV_UNREGISTER: 84 case NETDEV_UNREGISTER:
83 br_del_if(br, dev); 85 br_del_if(br, dev);
84 break; 86 break;
87
88 case NETDEV_CHANGENAME:
89 err = br_sysfs_renameif(p);
90 if (err)
91 return notifier_from_errno(err);
92 break;
93
94 case NETDEV_PRE_TYPE_CHANGE:
95 /* Forbid underlaying device to change its type. */
96 return NOTIFY_BAD;
85 } 97 }
86 98
87 /* Events that may cause spanning tree to refresh */ 99 /* Events that may cause spanning tree to refresh */
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 846d7d1e2075..75c90edaf7db 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,8 @@
15 15
16#include <linux/netdevice.h> 16#include <linux/netdevice.h>
17#include <linux/if_bridge.h> 17#include <linux/if_bridge.h>
18#include <linux/netpoll.h>
19#include <linux/u64_stats_sync.h>
18#include <net/route.h> 20#include <net/route.h>
19 21
20#define BR_HASH_BITS 8 22#define BR_HASH_BITS 8
@@ -45,6 +47,17 @@ struct mac_addr
45 unsigned char addr[6]; 47 unsigned char addr[6];
46}; 48};
47 49
50struct br_ip
51{
52 union {
53 __be32 ip4;
54#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
55 struct in6_addr ip6;
56#endif
57 } u;
58 __be16 proto;
59};
60
48struct net_bridge_fdb_entry 61struct net_bridge_fdb_entry
49{ 62{
50 struct hlist_node hlist; 63 struct hlist_node hlist;
@@ -64,7 +77,7 @@ struct net_bridge_port_group {
64 struct rcu_head rcu; 77 struct rcu_head rcu;
65 struct timer_list timer; 78 struct timer_list timer;
66 struct timer_list query_timer; 79 struct timer_list query_timer;
67 __be32 addr; 80 struct br_ip addr;
68 u32 queries_sent; 81 u32 queries_sent;
69}; 82};
70 83
@@ -77,7 +90,7 @@ struct net_bridge_mdb_entry
77 struct rcu_head rcu; 90 struct rcu_head rcu;
78 struct timer_list timer; 91 struct timer_list timer;
79 struct timer_list query_timer; 92 struct timer_list query_timer;
80 __be32 addr; 93 struct br_ip addr;
81 u32 queries_sent; 94 u32 queries_sent;
82}; 95};
83 96
@@ -128,6 +141,27 @@ struct net_bridge_port
128 struct hlist_head mglist; 141 struct hlist_head mglist;
129 struct hlist_node rlist; 142 struct hlist_node rlist;
130#endif 143#endif
144
145#ifdef CONFIG_SYSFS
146 char sysfs_name[IFNAMSIZ];
147#endif
148
149#ifdef CONFIG_NET_POLL_CONTROLLER
150 struct netpoll *np;
151#endif
152};
153
154#define br_port_get_rcu(dev) \
155 ((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
156#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
157#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
158
159struct br_cpu_netstats {
160 u64 rx_packets;
161 u64 rx_bytes;
162 u64 tx_packets;
163 u64 tx_bytes;
164 struct u64_stats_sync syncp;
131}; 165};
132 166
133struct net_bridge 167struct net_bridge
@@ -135,11 +169,16 @@ struct net_bridge
135 spinlock_t lock; 169 spinlock_t lock;
136 struct list_head port_list; 170 struct list_head port_list;
137 struct net_device *dev; 171 struct net_device *dev;
172
173 struct br_cpu_netstats __percpu *stats;
138 spinlock_t hash_lock; 174 spinlock_t hash_lock;
139 struct hlist_head hash[BR_HASH_SIZE]; 175 struct hlist_head hash[BR_HASH_SIZE];
140 unsigned long feature_mask; 176 unsigned long feature_mask;
141#ifdef CONFIG_BRIDGE_NETFILTER 177#ifdef CONFIG_BRIDGE_NETFILTER
142 struct rtable fake_rtable; 178 struct rtable fake_rtable;
179 bool nf_call_iptables;
180 bool nf_call_ip6tables;
181 bool nf_call_arptables;
143#endif 182#endif
144 unsigned long flags; 183 unsigned long flags;
145#define BR_SET_MAC_ADDR 0x00000001 184#define BR_SET_MAC_ADDR 0x00000001
@@ -220,6 +259,21 @@ struct br_input_skb_cb {
220# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0) 259# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0)
221#endif 260#endif
222 261
262#define br_printk(level, br, format, args...) \
263 printk(level "%s: " format, (br)->dev->name, ##args)
264
265#define br_err(__br, format, args...) \
266 br_printk(KERN_ERR, __br, format, ##args)
267#define br_warn(__br, format, args...) \
268 br_printk(KERN_WARNING, __br, format, ##args)
269#define br_notice(__br, format, args...) \
270 br_printk(KERN_NOTICE, __br, format, ##args)
271#define br_info(__br, format, args...) \
272 br_printk(KERN_INFO, __br, format, ##args)
273
274#define br_debug(br, format, args...) \
275 pr_debug("%s: " format, (br)->dev->name, ##args)
276
223extern struct notifier_block br_device_notifier; 277extern struct notifier_block br_device_notifier;
224extern const u8 br_group_address[ETH_ALEN]; 278extern const u8 br_group_address[ETH_ALEN];
225 279
@@ -233,6 +287,43 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
233extern void br_dev_setup(struct net_device *dev); 287extern void br_dev_setup(struct net_device *dev);
234extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, 288extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
235 struct net_device *dev); 289 struct net_device *dev);
290#ifdef CONFIG_NET_POLL_CONTROLLER
291static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
292{
293 return br->dev->npinfo;
294}
295
296static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
297 struct sk_buff *skb)
298{
299 struct netpoll *np = p->np;
300
301 if (np)
302 netpoll_send_skb(np, skb);
303}
304
305extern int br_netpoll_enable(struct net_bridge_port *p);
306extern void br_netpoll_disable(struct net_bridge_port *p);
307#else
308static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
309{
310 return NULL;
311}
312
313static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
314 struct sk_buff *skb)
315{
316}
317
318static inline int br_netpoll_enable(struct net_bridge_port *p)
319{
320 return 0;
321}
322
323static inline void br_netpoll_disable(struct net_bridge_port *p)
324{
325}
326#endif
236 327
237/* br_fdb.c */ 328/* br_fdb.c */
238extern int br_fdb_init(void); 329extern int br_fdb_init(void);
@@ -280,8 +371,7 @@ extern void br_features_recompute(struct net_bridge *br);
280 371
281/* br_input.c */ 372/* br_input.c */
282extern int br_handle_frame_finish(struct sk_buff *skb); 373extern int br_handle_frame_finish(struct sk_buff *skb);
283extern struct sk_buff *br_handle_frame(struct net_bridge_port *p, 374extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
284 struct sk_buff *skb);
285 375
286/* br_ioctl.c */ 376/* br_ioctl.c */
287extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); 377extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -433,6 +523,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
433/* br_sysfs_if.c */ 523/* br_sysfs_if.c */
434extern const struct sysfs_ops brport_sysfs_ops; 524extern const struct sysfs_ops brport_sysfs_ops;
435extern int br_sysfs_addif(struct net_bridge_port *p); 525extern int br_sysfs_addif(struct net_bridge_port *p);
526extern int br_sysfs_renameif(struct net_bridge_port *p);
436 527
437/* br_sysfs_br.c */ 528/* br_sysfs_br.c */
438extern int br_sysfs_addbr(struct net_device *dev); 529extern int br_sysfs_addbr(struct net_device *dev);
@@ -441,6 +532,7 @@ extern void br_sysfs_delbr(struct net_device *dev);
441#else 532#else
442 533
443#define br_sysfs_addif(p) (0) 534#define br_sysfs_addif(p) (0)
535#define br_sysfs_renameif(p) (0)
444#define br_sysfs_addbr(dev) (0) 536#define br_sysfs_addbr(dev) (0)
445#define br_sysfs_delbr(dev) do { } while(0) 537#define br_sysfs_delbr(dev) do { } while(0)
446#endif /* CONFIG_SYSFS */ 538#endif /* CONFIG_SYSFS */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index edcf14b560f6..57186d84d2bd 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -31,10 +31,9 @@ static const char *const br_port_state_names[] = {
31 31
32void br_log_state(const struct net_bridge_port *p) 32void br_log_state(const struct net_bridge_port *p)
33{ 33{
34 pr_info("%s: port %d(%s) entering %s state\n", 34 br_info(p->br, "port %u(%s) entering %s state\n",
35 p->br->dev->name, p->port_no, p->dev->name, 35 (unsigned) p->port_no, p->dev->name,
36 br_port_state_names[p->state]); 36 br_port_state_names[p->state]);
37
38} 37}
39 38
40/* called under bridge lock */ 39/* called under bridge lock */
@@ -300,7 +299,7 @@ void br_topology_change_detection(struct net_bridge *br)
300 if (br->stp_enabled != BR_KERNEL_STP) 299 if (br->stp_enabled != BR_KERNEL_STP)
301 return; 300 return;
302 301
303 pr_info("%s: topology change detected, %s\n", br->dev->name, 302 br_info(br, "topology change detected, %s\n",
304 isroot ? "propagating" : "sending tcn bpdu"); 303 isroot ? "propagating" : "sending tcn bpdu");
305 304
306 if (isroot) { 305 if (isroot) {
@@ -469,8 +468,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
469void br_received_tcn_bpdu(struct net_bridge_port *p) 468void br_received_tcn_bpdu(struct net_bridge_port *p)
470{ 469{
471 if (br_is_designated_port(p)) { 470 if (br_is_designated_port(p)) {
472 pr_info("%s: received tcn bpdu on port %i(%s)\n", 471 br_info(p->br, "port %u(%s) received tcn bpdu\n",
473 p->br->dev->name, p->port_no, p->dev->name); 472 (unsigned) p->port_no, p->dev->name);
474 473
475 br_topology_change_detection(p->br); 474 br_topology_change_detection(p->br);
476 br_topology_change_acknowledge(p); 475 br_topology_change_acknowledge(p);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index d66cce11f3bf..35cf27087b56 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -50,7 +50,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
50 50
51 llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr); 51 llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
52 52
53 NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, 53 NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
54 dev_queue_xmit); 54 dev_queue_xmit);
55} 55}
56 56
@@ -131,18 +131,19 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
131/* 131/*
132 * Called from llc. 132 * Called from llc.
133 * 133 *
134 * NO locks, but rcu_read_lock (preempt_disabled) 134 * NO locks, but rcu_read_lock
135 */ 135 */
136void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, 136void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
137 struct net_device *dev) 137 struct net_device *dev)
138{ 138{
139 const unsigned char *dest = eth_hdr(skb)->h_dest; 139 const unsigned char *dest = eth_hdr(skb)->h_dest;
140 struct net_bridge_port *p = rcu_dereference(dev->br_port); 140 struct net_bridge_port *p;
141 struct net_bridge *br; 141 struct net_bridge *br;
142 const unsigned char *buf; 142 const unsigned char *buf;
143 143
144 if (!p) 144 if (!br_port_exists(dev))
145 goto err; 145 goto err;
146 p = br_port_get_rcu(dev);
146 147
147 if (!pskb_may_pull(skb, 4)) 148 if (!pskb_may_pull(skb, 4))
148 goto err; 149 goto err;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d527119e9f54..1d8826914cbf 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -85,17 +85,16 @@ void br_stp_enable_port(struct net_bridge_port *p)
85{ 85{
86 br_init_port(p); 86 br_init_port(p);
87 br_port_state_selection(p->br); 87 br_port_state_selection(p->br);
88 br_log_state(p);
88} 89}
89 90
90/* called under bridge lock */ 91/* called under bridge lock */
91void br_stp_disable_port(struct net_bridge_port *p) 92void br_stp_disable_port(struct net_bridge_port *p)
92{ 93{
93 struct net_bridge *br; 94 struct net_bridge *br = p->br;
94 int wasroot; 95 int wasroot;
95 96
96 br = p->br; 97 br_log_state(p);
97 printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
98 br->dev->name, p->port_no, p->dev->name, "disabled");
99 98
100 wasroot = br_is_root_bridge(br); 99 wasroot = br_is_root_bridge(br);
101 br_become_designated_port(p); 100 br_become_designated_port(p);
@@ -127,11 +126,10 @@ static void br_stp_start(struct net_bridge *br)
127 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); 126 r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
128 if (r == 0) { 127 if (r == 0) {
129 br->stp_enabled = BR_USER_STP; 128 br->stp_enabled = BR_USER_STP;
130 printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); 129 br_debug(br, "userspace STP started\n");
131 } else { 130 } else {
132 br->stp_enabled = BR_KERNEL_STP; 131 br->stp_enabled = BR_KERNEL_STP;
133 printk(KERN_INFO "%s: starting userspace STP failed, " 132 br_debug(br, "using kernel STP\n");
134 "starting kernel STP\n", br->dev->name);
135 133
136 /* To start timers on any ports left in blocking */ 134 /* To start timers on any ports left in blocking */
137 spin_lock_bh(&br->lock); 135 spin_lock_bh(&br->lock);
@@ -148,9 +146,7 @@ static void br_stp_stop(struct net_bridge *br)
148 146
149 if (br->stp_enabled == BR_USER_STP) { 147 if (br->stp_enabled == BR_USER_STP) {
150 r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); 148 r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
151 printk(KERN_INFO "%s: userspace STP stopped, return code %d\n", 149 br_info(br, "userspace STP stopped, return code %d\n", r);
152 br->dev->name, r);
153
154 150
155 /* To start timers on any ports left in blocking */ 151 /* To start timers on any ports left in blocking */
156 spin_lock_bh(&br->lock); 152 spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 772a140bfdf0..7b22456023c5 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -35,7 +35,7 @@ static void br_hello_timer_expired(unsigned long arg)
35{ 35{
36 struct net_bridge *br = (struct net_bridge *)arg; 36 struct net_bridge *br = (struct net_bridge *)arg;
37 37
38 pr_debug("%s: hello timer expired\n", br->dev->name); 38 br_debug(br, "hello timer expired\n");
39 spin_lock(&br->lock); 39 spin_lock(&br->lock);
40 if (br->dev->flags & IFF_UP) { 40 if (br->dev->flags & IFF_UP) {
41 br_config_bpdu_generation(br); 41 br_config_bpdu_generation(br);
@@ -55,13 +55,9 @@ static void br_message_age_timer_expired(unsigned long arg)
55 if (p->state == BR_STATE_DISABLED) 55 if (p->state == BR_STATE_DISABLED)
56 return; 56 return;
57 57
58 58 br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n",
59 pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n", 59 (unsigned) p->port_no, p->dev->name,
60 br->dev->name, 60 id->prio[0], id->prio[1], &id->addr);
61 id->prio[0], id->prio[1],
62 id->addr[0], id->addr[1], id->addr[2],
63 id->addr[3], id->addr[4], id->addr[5],
64 p->port_no, p->dev->name);
65 61
66 /* 62 /*
67 * According to the spec, the message age timer cannot be 63 * According to the spec, the message age timer cannot be
@@ -87,8 +83,8 @@ static void br_forward_delay_timer_expired(unsigned long arg)
87 struct net_bridge_port *p = (struct net_bridge_port *) arg; 83 struct net_bridge_port *p = (struct net_bridge_port *) arg;
88 struct net_bridge *br = p->br; 84 struct net_bridge *br = p->br;
89 85
90 pr_debug("%s: %d(%s) forward delay timer\n", 86 br_debug(br, "port %u(%s) forward delay timer\n",
91 br->dev->name, p->port_no, p->dev->name); 87 (unsigned) p->port_no, p->dev->name);
92 spin_lock(&br->lock); 88 spin_lock(&br->lock);
93 if (p->state == BR_STATE_LISTENING) { 89 if (p->state == BR_STATE_LISTENING) {
94 p->state = BR_STATE_LEARNING; 90 p->state = BR_STATE_LEARNING;
@@ -107,7 +103,7 @@ static void br_tcn_timer_expired(unsigned long arg)
107{ 103{
108 struct net_bridge *br = (struct net_bridge *) arg; 104 struct net_bridge *br = (struct net_bridge *) arg;
109 105
110 pr_debug("%s: tcn timer expired\n", br->dev->name); 106 br_debug(br, "tcn timer expired\n");
111 spin_lock(&br->lock); 107 spin_lock(&br->lock);
112 if (br->dev->flags & IFF_UP) { 108 if (br->dev->flags & IFF_UP) {
113 br_transmit_tcn(br); 109 br_transmit_tcn(br);
@@ -121,7 +117,7 @@ static void br_topology_change_timer_expired(unsigned long arg)
121{ 117{
122 struct net_bridge *br = (struct net_bridge *) arg; 118 struct net_bridge *br = (struct net_bridge *) arg;
123 119
124 pr_debug("%s: topo change timer expired\n", br->dev->name); 120 br_debug(br, "topo change timer expired\n");
125 spin_lock(&br->lock); 121 spin_lock(&br->lock);
126 br->topology_change_detected = 0; 122 br->topology_change_detected = 0;
127 br->topology_change = 0; 123 br->topology_change = 0;
@@ -132,8 +128,8 @@ static void br_hold_timer_expired(unsigned long arg)
132{ 128{
133 struct net_bridge_port *p = (struct net_bridge_port *) arg; 129 struct net_bridge_port *p = (struct net_bridge_port *) arg;
134 130
135 pr_debug("%s: %d(%s) hold timer expired\n", 131 br_debug(p->br, "port %u(%s) hold timer expired\n",
136 p->br->dev->name, p->port_no, p->dev->name); 132 (unsigned) p->port_no, p->dev->name);
137 133
138 spin_lock(&p->br->lock); 134 spin_lock(&p->br->lock);
139 if (p->config_pending) 135 if (p->config_pending)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index dd321e39e621..5c1e5559ebba 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR,
611 show_multicast_startup_query_interval, 611 show_multicast_startup_query_interval,
612 store_multicast_startup_query_interval); 612 store_multicast_startup_query_interval);
613#endif 613#endif
614#ifdef CONFIG_BRIDGE_NETFILTER
615static ssize_t show_nf_call_iptables(
616 struct device *d, struct device_attribute *attr, char *buf)
617{
618 struct net_bridge *br = to_bridge(d);
619 return sprintf(buf, "%u\n", br->nf_call_iptables);
620}
621
622static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
623{
624 br->nf_call_iptables = val ? true : false;
625 return 0;
626}
627
628static ssize_t store_nf_call_iptables(
629 struct device *d, struct device_attribute *attr, const char *buf,
630 size_t len)
631{
632 return store_bridge_parm(d, buf, len, set_nf_call_iptables);
633}
634static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR,
635 show_nf_call_iptables, store_nf_call_iptables);
636
637static ssize_t show_nf_call_ip6tables(
638 struct device *d, struct device_attribute *attr, char *buf)
639{
640 struct net_bridge *br = to_bridge(d);
641 return sprintf(buf, "%u\n", br->nf_call_ip6tables);
642}
643
644static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
645{
646 br->nf_call_ip6tables = val ? true : false;
647 return 0;
648}
649
650static ssize_t store_nf_call_ip6tables(
651 struct device *d, struct device_attribute *attr, const char *buf,
652 size_t len)
653{
654 return store_bridge_parm(d, buf, len, set_nf_call_ip6tables);
655}
656static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR,
657 show_nf_call_ip6tables, store_nf_call_ip6tables);
658
659static ssize_t show_nf_call_arptables(
660 struct device *d, struct device_attribute *attr, char *buf)
661{
662 struct net_bridge *br = to_bridge(d);
663 return sprintf(buf, "%u\n", br->nf_call_arptables);
664}
665
666static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
667{
668 br->nf_call_arptables = val ? true : false;
669 return 0;
670}
671
672static ssize_t store_nf_call_arptables(
673 struct device *d, struct device_attribute *attr, const char *buf,
674 size_t len)
675{
676 return store_bridge_parm(d, buf, len, set_nf_call_arptables);
677}
678static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
679 show_nf_call_arptables, store_nf_call_arptables);
680#endif
614 681
615static struct attribute *bridge_attrs[] = { 682static struct attribute *bridge_attrs[] = {
616 &dev_attr_forward_delay.attr, 683 &dev_attr_forward_delay.attr,
@@ -645,6 +712,11 @@ static struct attribute *bridge_attrs[] = {
645 &dev_attr_multicast_query_response_interval.attr, 712 &dev_attr_multicast_query_response_interval.attr,
646 &dev_attr_multicast_startup_query_interval.attr, 713 &dev_attr_multicast_startup_query_interval.attr,
647#endif 714#endif
715#ifdef CONFIG_BRIDGE_NETFILTER
716 &dev_attr_nf_call_iptables.attr,
717 &dev_attr_nf_call_ip6tables.attr,
718 &dev_attr_nf_call_arptables.attr,
719#endif
648 NULL 720 NULL
649}; 721};
650 722
@@ -659,7 +731,7 @@ static struct attribute_group bridge_group = {
659 * 731 *
660 * Returns the number of bytes read. 732 * Returns the number of bytes read.
661 */ 733 */
662static ssize_t brforward_read(struct kobject *kobj, 734static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
663 struct bin_attribute *bin_attr, 735 struct bin_attribute *bin_attr,
664 char *buf, loff_t off, size_t count) 736 char *buf, loff_t off, size_t count)
665{ 737{
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0b9916489d6b..fd5799c9bc8d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -246,7 +246,7 @@ const struct sysfs_ops brport_sysfs_ops = {
246/* 246/*
247 * Add sysfs entries to ethernet device added to a bridge. 247 * Add sysfs entries to ethernet device added to a bridge.
248 * Creates a brport subdirectory with bridge attributes. 248 * Creates a brport subdirectory with bridge attributes.
249 * Puts symlink in bridge's brport subdirectory 249 * Puts symlink in bridge's brif subdirectory
250 */ 250 */
251int br_sysfs_addif(struct net_bridge_port *p) 251int br_sysfs_addif(struct net_bridge_port *p)
252{ 252{
@@ -257,15 +257,37 @@ int br_sysfs_addif(struct net_bridge_port *p)
257 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj, 257 err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
258 SYSFS_BRIDGE_PORT_LINK); 258 SYSFS_BRIDGE_PORT_LINK);
259 if (err) 259 if (err)
260 goto out2; 260 return err;
261 261
262 for (a = brport_attrs; *a; ++a) { 262 for (a = brport_attrs; *a; ++a) {
263 err = sysfs_create_file(&p->kobj, &((*a)->attr)); 263 err = sysfs_create_file(&p->kobj, &((*a)->attr));
264 if (err) 264 if (err)
265 goto out2; 265 return err;
266 } 266 }
267 267
268 err = sysfs_create_link(br->ifobj, &p->kobj, p->dev->name); 268 strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
269out2: 269 return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
270}
271
272/* Rename bridge's brif symlink */
273int br_sysfs_renameif(struct net_bridge_port *p)
274{
275 struct net_bridge *br = p->br;
276 int err;
277
278 /* If a rename fails, the rollback will cause another
279 * rename call with the existing name.
280 */
281 if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ))
282 return 0;
283
284 err = sysfs_rename_link(br->ifobj, &p->kobj,
285 p->sysfs_name, p->dev->name);
286 if (err)
287 netdev_notice(br->dev, "unable to rename link %s to %s",
288 p->sysfs_name, p->dev->name);
289 else
290 strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
291
270 return err; 292 return err;
271} 293}
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 5d1176758ca5..2a449b7ab8fa 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
13#include <linux/netfilter_bridge/ebt_802_3.h> 13#include <linux/netfilter_bridge/ebt_802_3.h>
14 14
15static bool 15static bool
16ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) 16ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
17{ 17{
18 const struct ebt_802_3_info *info = par->matchinfo; 18 const struct ebt_802_3_info *info = par->matchinfo;
19 const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); 19 const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
@@ -36,14 +36,14 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
36 return true; 36 return true;
37} 37}
38 38
39static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par) 39static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
40{ 40{
41 const struct ebt_802_3_info *info = par->matchinfo; 41 const struct ebt_802_3_info *info = par->matchinfo;
42 42
43 if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) 43 if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
44 return false; 44 return -EINVAL;
45 45
46 return true; 46 return 0;
47} 47}
48 48
49static struct xt_match ebt_802_3_mt_reg __read_mostly = { 49static struct xt_match ebt_802_3_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index b595f091f35b..8b84c581be30 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -7,6 +7,7 @@
7 * August, 2003 7 * August, 2003
8 * 8 *
9 */ 9 */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/ip.h> 11#include <linux/ip.h>
11#include <linux/if_arp.h> 12#include <linux/if_arp.h>
12#include <linux/module.h> 13#include <linux/module.h>
@@ -128,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
128} 129}
129 130
130static bool 131static bool
131ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) 132ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
132{ 133{
133 const struct ebt_among_info *info = par->matchinfo; 134 const struct ebt_among_info *info = par->matchinfo;
134 const char *dmac, *smac; 135 const char *dmac, *smac;
@@ -171,7 +172,7 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
171 return true; 172 return true;
172} 173}
173 174
174static bool ebt_among_mt_check(const struct xt_mtchk_param *par) 175static int ebt_among_mt_check(const struct xt_mtchk_param *par)
175{ 176{
176 const struct ebt_among_info *info = par->matchinfo; 177 const struct ebt_among_info *info = par->matchinfo;
177 const struct ebt_entry_match *em = 178 const struct ebt_entry_match *em =
@@ -186,24 +187,20 @@ static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
186 expected_length += ebt_mac_wormhash_size(wh_src); 187 expected_length += ebt_mac_wormhash_size(wh_src);
187 188
188 if (em->match_size != EBT_ALIGN(expected_length)) { 189 if (em->match_size != EBT_ALIGN(expected_length)) {
189 printk(KERN_WARNING 190 pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
190 "ebtables: among: wrong size: %d " 191 em->match_size, expected_length,
191 "against expected %d, rounded to %Zd\n", 192 EBT_ALIGN(expected_length));
192 em->match_size, expected_length, 193 return -EINVAL;
193 EBT_ALIGN(expected_length));
194 return false;
195 } 194 }
196 if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { 195 if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
197 printk(KERN_WARNING 196 pr_info("dst integrity fail: %x\n", -err);
198 "ebtables: among: dst integrity fail: %x\n", -err); 197 return -EINVAL;
199 return false;
200 } 198 }
201 if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { 199 if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
202 printk(KERN_WARNING 200 pr_info("src integrity fail: %x\n", -err);
203 "ebtables: among: src integrity fail: %x\n", -err); 201 return -EINVAL;
204 return false;
205 } 202 }
206 return true; 203 return 0;
207} 204}
208 205
209static struct xt_match ebt_among_mt_reg __read_mostly = { 206static struct xt_match ebt_among_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index e727697c5847..cd457b891b27 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
16#include <linux/netfilter_bridge/ebt_arp.h> 16#include <linux/netfilter_bridge/ebt_arp.h>
17 17
18static bool 18static bool
19ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 19ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
20{ 20{
21 const struct ebt_arp_info *info = par->matchinfo; 21 const struct ebt_arp_info *info = par->matchinfo;
22 const struct arphdr *ah; 22 const struct arphdr *ah;
@@ -100,7 +100,7 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
100 return true; 100 return true;
101} 101}
102 102
103static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) 103static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
104{ 104{
105 const struct ebt_arp_info *info = par->matchinfo; 105 const struct ebt_arp_info *info = par->matchinfo;
106 const struct ebt_entry *e = par->entryinfo; 106 const struct ebt_entry *e = par->entryinfo;
@@ -108,10 +108,10 @@ static bool ebt_arp_mt_check(const struct xt_mtchk_param *par)
108 if ((e->ethproto != htons(ETH_P_ARP) && 108 if ((e->ethproto != htons(ETH_P_ARP) &&
109 e->ethproto != htons(ETH_P_RARP)) || 109 e->ethproto != htons(ETH_P_RARP)) ||
110 e->invflags & EBT_IPROTO) 110 e->invflags & EBT_IPROTO)
111 return false; 111 return -EINVAL;
112 if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) 112 if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK)
113 return false; 113 return -EINVAL;
114 return true; 114 return 0;
115} 115}
116 116
117static struct xt_match ebt_arp_mt_reg __read_mostly = { 117static struct xt_match ebt_arp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index f392e9d93f53..070cf134a22f 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -16,7 +16,7 @@
16#include <linux/netfilter_bridge/ebt_arpreply.h> 16#include <linux/netfilter_bridge/ebt_arpreply.h>
17 17
18static unsigned int 18static unsigned int
19ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) 19ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
20{ 20{
21 const struct ebt_arpreply_info *info = par->targinfo; 21 const struct ebt_arpreply_info *info = par->targinfo;
22 const __be32 *siptr, *diptr; 22 const __be32 *siptr, *diptr;
@@ -57,17 +57,17 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
57 return info->target; 57 return info->target;
58} 58}
59 59
60static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par) 60static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
61{ 61{
62 const struct ebt_arpreply_info *info = par->targinfo; 62 const struct ebt_arpreply_info *info = par->targinfo;
63 const struct ebt_entry *e = par->entryinfo; 63 const struct ebt_entry *e = par->entryinfo;
64 64
65 if (BASE_CHAIN && info->target == EBT_RETURN) 65 if (BASE_CHAIN && info->target == EBT_RETURN)
66 return false; 66 return -EINVAL;
67 if (e->ethproto != htons(ETH_P_ARP) || 67 if (e->ethproto != htons(ETH_P_ARP) ||
68 e->invflags & EBT_IPROTO) 68 e->invflags & EBT_IPROTO)
69 return false; 69 return -EINVAL;
70 return true; 70 return 0;
71} 71}
72 72
73static struct xt_target ebt_arpreply_tg_reg __read_mostly = { 73static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 2bb40d728a35..c59f7bfae6e2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -15,7 +15,7 @@
15#include <linux/netfilter_bridge/ebt_nat.h> 15#include <linux/netfilter_bridge/ebt_nat.h>
16 16
17static unsigned int 17static unsigned int
18ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) 18ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
19{ 19{
20 const struct ebt_nat_info *info = par->targinfo; 20 const struct ebt_nat_info *info = par->targinfo;
21 21
@@ -26,13 +26,13 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
26 return info->target; 26 return info->target;
27} 27}
28 28
29static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) 29static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
30{ 30{
31 const struct ebt_nat_info *info = par->targinfo; 31 const struct ebt_nat_info *info = par->targinfo;
32 unsigned int hook_mask; 32 unsigned int hook_mask;
33 33
34 if (BASE_CHAIN && info->target == EBT_RETURN) 34 if (BASE_CHAIN && info->target == EBT_RETURN)
35 return false; 35 return -EINVAL;
36 36
37 hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); 37 hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
38 if ((strcmp(par->table, "nat") != 0 || 38 if ((strcmp(par->table, "nat") != 0 ||
@@ -40,10 +40,10 @@ static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par)
40 (1 << NF_BR_LOCAL_OUT)))) && 40 (1 << NF_BR_LOCAL_OUT)))) &&
41 (strcmp(par->table, "broute") != 0 || 41 (strcmp(par->table, "broute") != 0 ||
42 hook_mask & ~(1 << NF_BR_BROUTING))) 42 hook_mask & ~(1 << NF_BR_BROUTING)))
43 return false; 43 return -EINVAL;
44 if (INVALID_TARGET) 44 if (INVALID_TARGET)
45 return false; 45 return -EINVAL;
46 return true; 46 return 0;
47} 47}
48 48
49static struct xt_target ebt_dnat_tg_reg __read_mostly = { 49static struct xt_target ebt_dnat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 5de6df6f86b8..23bca62d58d2 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
25}; 25};
26 26
27static bool 27static bool
28ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) 28ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
29{ 29{
30 const struct ebt_ip_info *info = par->matchinfo; 30 const struct ebt_ip_info *info = par->matchinfo;
31 const struct iphdr *ih; 31 const struct iphdr *ih;
@@ -77,31 +77,31 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
77 return true; 77 return true;
78} 78}
79 79
80static bool ebt_ip_mt_check(const struct xt_mtchk_param *par) 80static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
81{ 81{
82 const struct ebt_ip_info *info = par->matchinfo; 82 const struct ebt_ip_info *info = par->matchinfo;
83 const struct ebt_entry *e = par->entryinfo; 83 const struct ebt_entry *e = par->entryinfo;
84 84
85 if (e->ethproto != htons(ETH_P_IP) || 85 if (e->ethproto != htons(ETH_P_IP) ||
86 e->invflags & EBT_IPROTO) 86 e->invflags & EBT_IPROTO)
87 return false; 87 return -EINVAL;
88 if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) 88 if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK)
89 return false; 89 return -EINVAL;
90 if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { 90 if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) {
91 if (info->invflags & EBT_IP_PROTO) 91 if (info->invflags & EBT_IP_PROTO)
92 return false; 92 return -EINVAL;
93 if (info->protocol != IPPROTO_TCP && 93 if (info->protocol != IPPROTO_TCP &&
94 info->protocol != IPPROTO_UDP && 94 info->protocol != IPPROTO_UDP &&
95 info->protocol != IPPROTO_UDPLITE && 95 info->protocol != IPPROTO_UDPLITE &&
96 info->protocol != IPPROTO_SCTP && 96 info->protocol != IPPROTO_SCTP &&
97 info->protocol != IPPROTO_DCCP) 97 info->protocol != IPPROTO_DCCP)
98 return false; 98 return -EINVAL;
99 } 99 }
100 if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) 100 if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
101 return false; 101 return -EINVAL;
102 if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) 102 if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
103 return false; 103 return -EINVAL;
104 return true; 104 return 0;
105} 105}
106 106
107static struct xt_match ebt_ip_mt_reg __read_mostly = { 107static struct xt_match ebt_ip_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index bbf2534ef026..50a46afc2bcc 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -4,7 +4,7 @@
4 * Authors: 4 * Authors:
5 * Manohar Castelino <manohar.r.castelino@intel.com> 5 * Manohar Castelino <manohar.r.castelino@intel.com>
6 * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> 6 * Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
7 * Jan Engelhardt <jengelh@computergmbh.de> 7 * Jan Engelhardt <jengelh@medozas.de>
8 * 8 *
9 * Summary: 9 * Summary:
10 * This is just a modification of the IPv4 code written by 10 * This is just a modification of the IPv4 code written by
@@ -28,15 +28,13 @@ struct tcpudphdr {
28}; 28};
29 29
30static bool 30static bool
31ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) 31ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
32{ 32{
33 const struct ebt_ip6_info *info = par->matchinfo; 33 const struct ebt_ip6_info *info = par->matchinfo;
34 const struct ipv6hdr *ih6; 34 const struct ipv6hdr *ih6;
35 struct ipv6hdr _ip6h; 35 struct ipv6hdr _ip6h;
36 const struct tcpudphdr *pptr; 36 const struct tcpudphdr *pptr;
37 struct tcpudphdr _ports; 37 struct tcpudphdr _ports;
38 struct in6_addr tmp_addr;
39 int i;
40 38
41 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); 39 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
42 if (ih6 == NULL) 40 if (ih6 == NULL)
@@ -44,18 +42,10 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
44 if (info->bitmask & EBT_IP6_TCLASS && 42 if (info->bitmask & EBT_IP6_TCLASS &&
45 FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) 43 FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
46 return false; 44 return false;
47 for (i = 0; i < 4; i++) 45 if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
48 tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & 46 &info->saddr), EBT_IP6_SOURCE) ||
49 info->smsk.in6_u.u6_addr32[i]; 47 FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
50 if (info->bitmask & EBT_IP6_SOURCE && 48 &info->daddr), EBT_IP6_DEST))
51 FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
52 EBT_IP6_SOURCE))
53 return false;
54 for (i = 0; i < 4; i++)
55 tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
56 info->dmsk.in6_u.u6_addr32[i];
57 if (info->bitmask & EBT_IP6_DEST &&
58 FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
59 return false; 49 return false;
60 if (info->bitmask & EBT_IP6_PROTO) { 50 if (info->bitmask & EBT_IP6_PROTO) {
61 uint8_t nexthdr = ih6->nexthdr; 51 uint8_t nexthdr = ih6->nexthdr;
@@ -90,30 +80,30 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
90 return true; 80 return true;
91} 81}
92 82
93static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par) 83static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
94{ 84{
95 const struct ebt_entry *e = par->entryinfo; 85 const struct ebt_entry *e = par->entryinfo;
96 struct ebt_ip6_info *info = par->matchinfo; 86 struct ebt_ip6_info *info = par->matchinfo;
97 87
98 if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) 88 if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
99 return false; 89 return -EINVAL;
100 if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) 90 if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
101 return false; 91 return -EINVAL;
102 if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { 92 if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
103 if (info->invflags & EBT_IP6_PROTO) 93 if (info->invflags & EBT_IP6_PROTO)
104 return false; 94 return -EINVAL;
105 if (info->protocol != IPPROTO_TCP && 95 if (info->protocol != IPPROTO_TCP &&
106 info->protocol != IPPROTO_UDP && 96 info->protocol != IPPROTO_UDP &&
107 info->protocol != IPPROTO_UDPLITE && 97 info->protocol != IPPROTO_UDPLITE &&
108 info->protocol != IPPROTO_SCTP && 98 info->protocol != IPPROTO_SCTP &&
109 info->protocol != IPPROTO_DCCP) 99 info->protocol != IPPROTO_DCCP)
110 return false; 100 return -EINVAL;
111 } 101 }
112 if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) 102 if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
113 return false; 103 return -EINVAL;
114 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) 104 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
115 return false; 105 return -EINVAL;
116 return true; 106 return 0;
117} 107}
118 108
119static struct xt_match ebt_ip6_mt_reg __read_mostly = { 109static struct xt_match ebt_ip6_mt_reg __read_mostly = {
@@ -139,4 +129,5 @@ static void __exit ebt_ip6_fini(void)
139module_init(ebt_ip6_init); 129module_init(ebt_ip6_init);
140module_exit(ebt_ip6_fini); 130module_exit(ebt_ip6_fini);
141MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); 131MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match");
132MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>");
142MODULE_LICENSE("GPL"); 133MODULE_LICENSE("GPL");
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 7a8182710eb3..517e78befcb2 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -10,6 +10,7 @@
10 * September, 2003 10 * September, 2003
11 * 11 *
12 */ 12 */
13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/netdevice.h> 15#include <linux/netdevice.h>
15#include <linux/spinlock.h> 16#include <linux/spinlock.h>
@@ -31,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
31#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) 32#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
32 33
33static bool 34static bool
34ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) 35ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
35{ 36{
36 struct ebt_limit_info *info = (void *)par->matchinfo; 37 struct ebt_limit_info *info = (void *)par->matchinfo;
37 unsigned long now = jiffies; 38 unsigned long now = jiffies;
@@ -64,16 +65,16 @@ user2credits(u_int32_t user)
64 return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; 65 return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
65} 66}
66 67
67static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) 68static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
68{ 69{
69 struct ebt_limit_info *info = par->matchinfo; 70 struct ebt_limit_info *info = par->matchinfo;
70 71
71 /* Check for overflow. */ 72 /* Check for overflow. */
72 if (info->burst == 0 || 73 if (info->burst == 0 ||
73 user2credits(info->avg * info->burst) < user2credits(info->avg)) { 74 user2credits(info->avg * info->burst) < user2credits(info->avg)) {
74 printk("Overflow in ebt_limit, try lower: %u/%u\n", 75 pr_info("overflow, try lower: %u/%u\n",
75 info->avg, info->burst); 76 info->avg, info->burst);
76 return false; 77 return -EINVAL;
77 } 78 }
78 79
79 /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ 80 /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */
@@ -81,7 +82,7 @@ static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
81 info->credit = user2credits(info->avg * info->burst); 82 info->credit = user2credits(info->avg * info->burst);
82 info->credit_cap = user2credits(info->avg * info->burst); 83 info->credit_cap = user2credits(info->avg * info->burst);
83 info->cost = user2credits(info->avg); 84 info->cost = user2credits(info->avg);
84 return true; 85 return 0;
85} 86}
86 87
87 88
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index e873924ddb5d..6e5a8bb9b940 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,16 +24,16 @@
24 24
25static DEFINE_SPINLOCK(ebt_log_lock); 25static DEFINE_SPINLOCK(ebt_log_lock);
26 26
27static bool ebt_log_tg_check(const struct xt_tgchk_param *par) 27static int ebt_log_tg_check(const struct xt_tgchk_param *par)
28{ 28{
29 struct ebt_log_info *info = par->targinfo; 29 struct ebt_log_info *info = par->targinfo;
30 30
31 if (info->bitmask & ~EBT_LOG_MASK) 31 if (info->bitmask & ~EBT_LOG_MASK)
32 return false; 32 return -EINVAL;
33 if (info->loglevel >= 8) 33 if (info->loglevel >= 8)
34 return false; 34 return -EINVAL;
35 info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; 35 info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0';
36 return true; 36 return 0;
37} 37}
38 38
39struct tcpudphdr 39struct tcpudphdr
@@ -171,7 +171,7 @@ out:
171} 171}
172 172
173static unsigned int 173static unsigned int
174ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par) 174ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
175{ 175{
176 const struct ebt_log_info *info = par->targinfo; 176 const struct ebt_log_info *info = par->targinfo;
177 struct nf_loginfo li; 177 struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 2b5ce533d6b9..66697cbd0a8b 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -19,7 +19,7 @@
19#include <linux/netfilter_bridge/ebt_mark_t.h> 19#include <linux/netfilter_bridge/ebt_mark_t.h>
20 20
21static unsigned int 21static unsigned int
22ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) 22ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
23{ 23{
24 const struct ebt_mark_t_info *info = par->targinfo; 24 const struct ebt_mark_t_info *info = par->targinfo;
25 int action = info->target & -16; 25 int action = info->target & -16;
@@ -36,21 +36,21 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
36 return info->target | ~EBT_VERDICT_BITS; 36 return info->target | ~EBT_VERDICT_BITS;
37} 37}
38 38
39static bool ebt_mark_tg_check(const struct xt_tgchk_param *par) 39static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
40{ 40{
41 const struct ebt_mark_t_info *info = par->targinfo; 41 const struct ebt_mark_t_info *info = par->targinfo;
42 int tmp; 42 int tmp;
43 43
44 tmp = info->target | ~EBT_VERDICT_BITS; 44 tmp = info->target | ~EBT_VERDICT_BITS;
45 if (BASE_CHAIN && tmp == EBT_RETURN) 45 if (BASE_CHAIN && tmp == EBT_RETURN)
46 return false; 46 return -EINVAL;
47 if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) 47 if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
48 return false; 48 return -EINVAL;
49 tmp = info->target & ~EBT_VERDICT_BITS; 49 tmp = info->target & ~EBT_VERDICT_BITS;
50 if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && 50 if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
51 tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) 51 tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
52 return false; 52 return -EINVAL;
53 return true; 53 return 0;
54} 54}
55#ifdef CONFIG_COMPAT 55#ifdef CONFIG_COMPAT
56struct compat_ebt_mark_t_info { 56struct compat_ebt_mark_t_info {
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 8de8c396d913..d98baefc4c7e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
13#include <linux/netfilter_bridge/ebt_mark_m.h> 13#include <linux/netfilter_bridge/ebt_mark_m.h>
14 14
15static bool 15static bool
16ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) 16ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
17{ 17{
18 const struct ebt_mark_m_info *info = par->matchinfo; 18 const struct ebt_mark_m_info *info = par->matchinfo;
19 19
@@ -22,17 +22,17 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
22 return ((skb->mark & info->mask) == info->mark) ^ info->invert; 22 return ((skb->mark & info->mask) == info->mark) ^ info->invert;
23} 23}
24 24
25static bool ebt_mark_mt_check(const struct xt_mtchk_param *par) 25static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
26{ 26{
27 const struct ebt_mark_m_info *info = par->matchinfo; 27 const struct ebt_mark_m_info *info = par->matchinfo;
28 28
29 if (info->bitmask & ~EBT_MARK_MASK) 29 if (info->bitmask & ~EBT_MARK_MASK)
30 return false; 30 return -EINVAL;
31 if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) 31 if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
32 return false; 32 return -EINVAL;
33 if (!info->bitmask) 33 if (!info->bitmask)
34 return false; 34 return -EINVAL;
35 return true; 35 return 0;
36} 36}
37 37
38 38
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 40dbd248b9ae..5be68bbcc341 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -20,7 +20,7 @@
20#include <net/netfilter/nf_log.h> 20#include <net/netfilter/nf_log.h>
21 21
22static unsigned int 22static unsigned int
23ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) 23ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
24{ 24{
25 const struct ebt_nflog_info *info = par->targinfo; 25 const struct ebt_nflog_info *info = par->targinfo;
26 struct nf_loginfo li; 26 struct nf_loginfo li;
@@ -35,14 +35,14 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
35 return EBT_CONTINUE; 35 return EBT_CONTINUE;
36} 36}
37 37
38static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par) 38static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
39{ 39{
40 struct ebt_nflog_info *info = par->targinfo; 40 struct ebt_nflog_info *info = par->targinfo;
41 41
42 if (info->flags & ~EBT_NFLOG_MASK) 42 if (info->flags & ~EBT_NFLOG_MASK)
43 return false; 43 return -EINVAL;
44 info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; 44 info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0';
45 return true; 45 return 0;
46} 46}
47 47
48static struct xt_target ebt_nflog_tg_reg __read_mostly = { 48static struct xt_target ebt_nflog_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index e2a07e6cbef3..496a56515307 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,21 +13,21 @@
13#include <linux/netfilter_bridge/ebt_pkttype.h> 13#include <linux/netfilter_bridge/ebt_pkttype.h>
14 14
15static bool 15static bool
16ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) 16ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
17{ 17{
18 const struct ebt_pkttype_info *info = par->matchinfo; 18 const struct ebt_pkttype_info *info = par->matchinfo;
19 19
20 return (skb->pkt_type == info->pkt_type) ^ info->invert; 20 return (skb->pkt_type == info->pkt_type) ^ info->invert;
21} 21}
22 22
23static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par) 23static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
24{ 24{
25 const struct ebt_pkttype_info *info = par->matchinfo; 25 const struct ebt_pkttype_info *info = par->matchinfo;
26 26
27 if (info->invert != 0 && info->invert != 1) 27 if (info->invert != 0 && info->invert != 1)
28 return false; 28 return -EINVAL;
29 /* Allow any pkt_type value */ 29 /* Allow any pkt_type value */
30 return true; 30 return 0;
31} 31}
32 32
33static struct xt_match ebt_pkttype_mt_reg __read_mostly = { 33static struct xt_match ebt_pkttype_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9be8fbcd370b..46624bb6d9be 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -16,7 +16,7 @@
16#include <linux/netfilter_bridge/ebt_redirect.h> 16#include <linux/netfilter_bridge/ebt_redirect.h>
17 17
18static unsigned int 18static unsigned int
19ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) 19ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
20{ 20{
21 const struct ebt_redirect_info *info = par->targinfo; 21 const struct ebt_redirect_info *info = par->targinfo;
22 22
@@ -24,31 +24,32 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
24 return EBT_DROP; 24 return EBT_DROP;
25 25
26 if (par->hooknum != NF_BR_BROUTING) 26 if (par->hooknum != NF_BR_BROUTING)
27 /* rcu_read_lock()ed by nf_hook_slow */
27 memcpy(eth_hdr(skb)->h_dest, 28 memcpy(eth_hdr(skb)->h_dest,
28 par->in->br_port->br->dev->dev_addr, ETH_ALEN); 29 br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
29 else 30 else
30 memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); 31 memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
31 skb->pkt_type = PACKET_HOST; 32 skb->pkt_type = PACKET_HOST;
32 return info->target; 33 return info->target;
33} 34}
34 35
35static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par) 36static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
36{ 37{
37 const struct ebt_redirect_info *info = par->targinfo; 38 const struct ebt_redirect_info *info = par->targinfo;
38 unsigned int hook_mask; 39 unsigned int hook_mask;
39 40
40 if (BASE_CHAIN && info->target == EBT_RETURN) 41 if (BASE_CHAIN && info->target == EBT_RETURN)
41 return false; 42 return -EINVAL;
42 43
43 hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); 44 hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
44 if ((strcmp(par->table, "nat") != 0 || 45 if ((strcmp(par->table, "nat") != 0 ||
45 hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && 46 hook_mask & ~(1 << NF_BR_PRE_ROUTING)) &&
46 (strcmp(par->table, "broute") != 0 || 47 (strcmp(par->table, "broute") != 0 ||
47 hook_mask & ~(1 << NF_BR_BROUTING))) 48 hook_mask & ~(1 << NF_BR_BROUTING)))
48 return false; 49 return -EINVAL;
49 if (INVALID_TARGET) 50 if (INVALID_TARGET)
50 return false; 51 return -EINVAL;
51 return true; 52 return 0;
52} 53}
53 54
54static struct xt_target ebt_redirect_tg_reg __read_mostly = { 55static struct xt_target ebt_redirect_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 9c7b520765a2..f8f0bd1a1d51 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -17,7 +17,7 @@
17#include <linux/netfilter_bridge/ebt_nat.h> 17#include <linux/netfilter_bridge/ebt_nat.h>
18 18
19static unsigned int 19static unsigned int
20ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par) 20ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
21{ 21{
22 const struct ebt_nat_info *info = par->targinfo; 22 const struct ebt_nat_info *info = par->targinfo;
23 23
@@ -42,21 +42,21 @@ out:
42 return info->target | ~EBT_VERDICT_BITS; 42 return info->target | ~EBT_VERDICT_BITS;
43} 43}
44 44
45static bool ebt_snat_tg_check(const struct xt_tgchk_param *par) 45static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
46{ 46{
47 const struct ebt_nat_info *info = par->targinfo; 47 const struct ebt_nat_info *info = par->targinfo;
48 int tmp; 48 int tmp;
49 49
50 tmp = info->target | ~EBT_VERDICT_BITS; 50 tmp = info->target | ~EBT_VERDICT_BITS;
51 if (BASE_CHAIN && tmp == EBT_RETURN) 51 if (BASE_CHAIN && tmp == EBT_RETURN)
52 return false; 52 return -EINVAL;
53 53
54 if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) 54 if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
55 return false; 55 return -EINVAL;
56 tmp = info->target | EBT_VERDICT_BITS; 56 tmp = info->target | EBT_VERDICT_BITS;
57 if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) 57 if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
58 return false; 58 return -EINVAL;
59 return true; 59 return 0;
60} 60}
61 61
62static struct xt_target ebt_snat_tg_reg __read_mostly = { 62static struct xt_target ebt_snat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 92a93d363765..5b33a2e634a6 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
120} 120}
121 121
122static bool 122static bool
123ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 123ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
124{ 124{
125 const struct ebt_stp_info *info = par->matchinfo; 125 const struct ebt_stp_info *info = par->matchinfo;
126 const struct stp_header *sp; 126 const struct stp_header *sp;
@@ -153,7 +153,7 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
153 return true; 153 return true;
154} 154}
155 155
156static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) 156static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
157{ 157{
158 const struct ebt_stp_info *info = par->matchinfo; 158 const struct ebt_stp_info *info = par->matchinfo;
159 const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; 159 const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
@@ -162,13 +162,13 @@ static bool ebt_stp_mt_check(const struct xt_mtchk_param *par)
162 162
163 if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || 163 if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
164 !(info->bitmask & EBT_STP_MASK)) 164 !(info->bitmask & EBT_STP_MASK))
165 return false; 165 return -EINVAL;
166 /* Make sure the match only receives stp frames */ 166 /* Make sure the match only receives stp frames */
167 if (compare_ether_addr(e->destmac, bridge_ula) || 167 if (compare_ether_addr(e->destmac, bridge_ula) ||
168 compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) 168 compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
169 return false; 169 return -EINVAL;
170 170
171 return true; 171 return 0;
172} 172}
173 173
174static struct xt_match ebt_stp_mt_reg __read_mostly = { 174static struct xt_match ebt_stp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index f9560f3dbdc7..26377e96fa1c 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -27,7 +27,7 @@
27 * flushed even if it is not full yet. 27 * flushed even if it is not full yet.
28 * 28 *
29 */ 29 */
30 30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/spinlock.h> 33#include <linux/spinlock.h>
@@ -44,9 +44,6 @@
44#include <net/sock.h> 44#include <net/sock.h>
45#include "../br_private.h" 45#include "../br_private.h"
46 46
47#define PRINTR(format, args...) do { if (net_ratelimit()) \
48 printk(format , ## args); } while (0)
49
50static unsigned int nlbufsiz = NLMSG_GOODSIZE; 47static unsigned int nlbufsiz = NLMSG_GOODSIZE;
51module_param(nlbufsiz, uint, 0600); 48module_param(nlbufsiz, uint, 0600);
52MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) " 49MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
@@ -107,15 +104,14 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
107 n = max(size, nlbufsiz); 104 n = max(size, nlbufsiz);
108 skb = alloc_skb(n, GFP_ATOMIC); 105 skb = alloc_skb(n, GFP_ATOMIC);
109 if (!skb) { 106 if (!skb) {
110 PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer " 107 pr_debug("cannot alloc whole buffer of size %ub!\n", n);
111 "of size %ub!\n", n);
112 if (n > size) { 108 if (n > size) {
113 /* try to allocate only as much as we need for 109 /* try to allocate only as much as we need for
114 * current packet */ 110 * current packet */
115 skb = alloc_skb(size, GFP_ATOMIC); 111 skb = alloc_skb(size, GFP_ATOMIC);
116 if (!skb) 112 if (!skb)
117 PRINTR(KERN_ERR "ebt_ulog: can't even allocate " 113 pr_debug("cannot even allocate "
118 "buffer of size %ub\n", size); 114 "buffer of size %ub\n", size);
119 } 115 }
120 } 116 }
121 117
@@ -142,8 +138,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
142 138
143 size = NLMSG_SPACE(sizeof(*pm) + copy_len); 139 size = NLMSG_SPACE(sizeof(*pm) + copy_len);
144 if (size > nlbufsiz) { 140 if (size > nlbufsiz) {
145 PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n", 141 pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
146 size, nlbufsiz);
147 return; 142 return;
148 } 143 }
149 144
@@ -182,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
182 if (in) { 177 if (in) {
183 strcpy(pm->physindev, in->name); 178 strcpy(pm->physindev, in->name);
184 /* If in isn't a bridge, then physindev==indev */ 179 /* If in isn't a bridge, then physindev==indev */
185 if (in->br_port) 180 if (br_port_exists(in))
186 strcpy(pm->indev, in->br_port->br->dev->name); 181 /* rcu_read_lock()ed by nf_hook_slow */
182 strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
187 else 183 else
188 strcpy(pm->indev, in->name); 184 strcpy(pm->indev, in->name);
189 } else 185 } else
@@ -192,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
192 if (out) { 188 if (out) {
193 /* If out exists, then out is a bridge port */ 189 /* If out exists, then out is a bridge port */
194 strcpy(pm->physoutdev, out->name); 190 strcpy(pm->physoutdev, out->name);
195 strcpy(pm->outdev, out->br_port->br->dev->name); 191 /* rcu_read_lock()ed by nf_hook_slow */
192 strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
196 } else 193 } else
197 pm->outdev[0] = pm->physoutdev[0] = '\0'; 194 pm->outdev[0] = pm->physoutdev[0] = '\0';
198 195
@@ -217,8 +214,8 @@ unlock:
217 return; 214 return;
218 215
219nlmsg_failure: 216nlmsg_failure:
220 printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should " 217 pr_debug("error during NLMSG_PUT. This should "
221 "not happen, please report to author.\n"); 218 "not happen, please report to author.\n");
222 goto unlock; 219 goto unlock;
223alloc_failure: 220alloc_failure:
224 goto unlock; 221 goto unlock;
@@ -248,26 +245,26 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
248} 245}
249 246
250static unsigned int 247static unsigned int
251ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) 248ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
252{ 249{
253 ebt_ulog_packet(par->hooknum, skb, par->in, par->out, 250 ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
254 par->targinfo, NULL); 251 par->targinfo, NULL);
255 return EBT_CONTINUE; 252 return EBT_CONTINUE;
256} 253}
257 254
258static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par) 255static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
259{ 256{
260 struct ebt_ulog_info *uloginfo = par->targinfo; 257 struct ebt_ulog_info *uloginfo = par->targinfo;
261 258
262 if (uloginfo->nlgroup > 31) 259 if (uloginfo->nlgroup > 31)
263 return false; 260 return -EINVAL;
264 261
265 uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; 262 uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
266 263
267 if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN) 264 if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
268 uloginfo->qthreshold = EBT_ULOG_MAX_QLEN; 265 uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
269 266
270 return true; 267 return 0;
271} 268}
272 269
273static struct xt_target ebt_ulog_tg_reg __read_mostly = { 270static struct xt_target ebt_ulog_tg_reg __read_mostly = {
@@ -292,8 +289,8 @@ static int __init ebt_ulog_init(void)
292 int i; 289 int i;
293 290
294 if (nlbufsiz >= 128*1024) { 291 if (nlbufsiz >= 128*1024) {
295 printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," 292 pr_warning("Netlink buffer has to be <= 128kB,"
296 " please try a smaller nlbufsiz parameter.\n"); 293 " please try a smaller nlbufsiz parameter.\n");
297 return -EINVAL; 294 return -EINVAL;
298 } 295 }
299 296
@@ -306,13 +303,10 @@ static int __init ebt_ulog_init(void)
306 ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, 303 ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
307 EBT_ULOG_MAXNLGROUPS, NULL, NULL, 304 EBT_ULOG_MAXNLGROUPS, NULL, NULL,
308 THIS_MODULE); 305 THIS_MODULE);
309 if (!ebtulognl) { 306 if (!ebtulognl)
310 printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
311 "call netlink_kernel_create\n");
312 ret = -ENOMEM; 307 ret = -ENOMEM;
313 } else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) { 308 else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
314 netlink_kernel_release(ebtulognl); 309 netlink_kernel_release(ebtulognl);
315 }
316 310
317 if (ret == 0) 311 if (ret == 0)
318 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); 312 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index be1dd2e1f615..87b53b3a921d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -26,22 +26,17 @@
26#include <linux/netfilter_bridge/ebtables.h> 26#include <linux/netfilter_bridge/ebtables.h>
27#include <linux/netfilter_bridge/ebt_vlan.h> 27#include <linux/netfilter_bridge/ebt_vlan.h>
28 28
29static int debug;
30#define MODULE_VERS "0.6" 29#define MODULE_VERS "0.6"
31 30
32module_param(debug, int, 0);
33MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
34MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>"); 31MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
35MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match"); 32MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match");
36MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
37 34
38
39#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
40#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ 35#define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
41#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } 36#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
42 37
43static bool 38static bool
44ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) 39ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
45{ 40{
46 const struct ebt_vlan_info *info = par->matchinfo; 41 const struct ebt_vlan_info *info = par->matchinfo;
47 const struct vlan_hdr *fp; 42 const struct vlan_hdr *fp;
@@ -84,32 +79,31 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
84 return true; 79 return true;
85} 80}
86 81
87static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) 82static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
88{ 83{
89 struct ebt_vlan_info *info = par->matchinfo; 84 struct ebt_vlan_info *info = par->matchinfo;
90 const struct ebt_entry *e = par->entryinfo; 85 const struct ebt_entry *e = par->entryinfo;
91 86
92 /* Is it 802.1Q frame checked? */ 87 /* Is it 802.1Q frame checked? */
93 if (e->ethproto != htons(ETH_P_8021Q)) { 88 if (e->ethproto != htons(ETH_P_8021Q)) {
94 DEBUG_MSG 89 pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n",
95 ("passed entry proto %2.4X is not 802.1Q (8100)\n", 90 ntohs(e->ethproto));
96 (unsigned short) ntohs(e->ethproto)); 91 return -EINVAL;
97 return false;
98 } 92 }
99 93
100 /* Check for bitmask range 94 /* Check for bitmask range
101 * True if even one bit is out of mask */ 95 * True if even one bit is out of mask */
102 if (info->bitmask & ~EBT_VLAN_MASK) { 96 if (info->bitmask & ~EBT_VLAN_MASK) {
103 DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", 97 pr_debug("bitmask %2X is out of mask (%2X)\n",
104 info->bitmask, EBT_VLAN_MASK); 98 info->bitmask, EBT_VLAN_MASK);
105 return false; 99 return -EINVAL;
106 } 100 }
107 101
108 /* Check for inversion flags range */ 102 /* Check for inversion flags range */
109 if (info->invflags & ~EBT_VLAN_MASK) { 103 if (info->invflags & ~EBT_VLAN_MASK) {
110 DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", 104 pr_debug("inversion flags %2X is out of mask (%2X)\n",
111 info->invflags, EBT_VLAN_MASK); 105 info->invflags, EBT_VLAN_MASK);
112 return false; 106 return -EINVAL;
113 } 107 }
114 108
115 /* Reserved VLAN ID (VID) values 109 /* Reserved VLAN ID (VID) values
@@ -121,10 +115,9 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
121 if (GET_BITMASK(EBT_VLAN_ID)) { 115 if (GET_BITMASK(EBT_VLAN_ID)) {
122 if (!!info->id) { /* if id!=0 => check vid range */ 116 if (!!info->id) { /* if id!=0 => check vid range */
123 if (info->id > VLAN_GROUP_ARRAY_LEN) { 117 if (info->id > VLAN_GROUP_ARRAY_LEN) {
124 DEBUG_MSG 118 pr_debug("id %d is out of range (1-4096)\n",
125 ("id %d is out of range (1-4096)\n", 119 info->id);
126 info->id); 120 return -EINVAL;
127 return false;
128 } 121 }
129 /* Note: This is valid VLAN-tagged frame point. 122 /* Note: This is valid VLAN-tagged frame point.
130 * Any value of user_priority are acceptable, 123 * Any value of user_priority are acceptable,
@@ -137,9 +130,9 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
137 130
138 if (GET_BITMASK(EBT_VLAN_PRIO)) { 131 if (GET_BITMASK(EBT_VLAN_PRIO)) {
139 if ((unsigned char) info->prio > 7) { 132 if ((unsigned char) info->prio > 7) {
140 DEBUG_MSG("prio %d is out of range (0-7)\n", 133 pr_debug("prio %d is out of range (0-7)\n",
141 info->prio); 134 info->prio);
142 return false; 135 return -EINVAL;
143 } 136 }
144 } 137 }
145 /* Check for encapsulated proto range - it is possible to be 138 /* Check for encapsulated proto range - it is possible to be
@@ -147,14 +140,13 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
147 * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */ 140 * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */
148 if (GET_BITMASK(EBT_VLAN_ENCAP)) { 141 if (GET_BITMASK(EBT_VLAN_ENCAP)) {
149 if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) { 142 if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
150 DEBUG_MSG 143 pr_debug("encap frame length %d is less than "
151 ("encap frame length %d is less than minimal\n", 144 "minimal\n", ntohs(info->encap));
152 ntohs(info->encap)); 145 return -EINVAL;
153 return false;
154 } 146 }
155 } 147 }
156 148
157 return true; 149 return 0;
158} 150}
159 151
160static struct xt_match ebt_vlan_mt_reg __read_mostly = { 152static struct xt_match ebt_vlan_mt_reg __read_mostly = {
@@ -169,9 +161,7 @@ static struct xt_match ebt_vlan_mt_reg __read_mostly = {
169 161
170static int __init ebt_vlan_init(void) 162static int __init ebt_vlan_init(void)
171{ 163{
172 DEBUG_MSG("ebtables 802.1Q extension module v" 164 pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n");
173 MODULE_VERS "\n");
174 DEBUG_MSG("module debug=%d\n", !!debug);
175 return xt_register_match(&ebt_vlan_mt_reg); 165 return xt_register_match(&ebt_vlan_mt_reg);
176} 166}
177 167
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f0865fd1e3ec..bcc102e3be4d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -14,8 +14,7 @@
14 * as published by the Free Software Foundation; either version 14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version. 15 * 2 of the License, or (at your option) any later version.
16 */ 16 */
17 17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18
19#include <linux/kmod.h> 18#include <linux/kmod.h>
20#include <linux/module.h> 19#include <linux/module.h>
21#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
@@ -87,7 +86,7 @@ static struct xt_target ebt_standard_target = {
87 86
88static inline int 87static inline int
89ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, 88ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
90 struct xt_target_param *par) 89 struct xt_action_param *par)
91{ 90{
92 par->target = w->u.watcher; 91 par->target = w->u.watcher;
93 par->targinfo = w->data; 92 par->targinfo = w->data;
@@ -96,8 +95,9 @@ ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
96 return 0; 95 return 0;
97} 96}
98 97
99static inline int ebt_do_match (struct ebt_entry_match *m, 98static inline int
100 const struct sk_buff *skb, struct xt_match_param *par) 99ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
100 struct xt_action_param *par)
101{ 101{
102 par->match = m->u.match; 102 par->match = m->u.match;
103 par->matchinfo = m->data; 103 par->matchinfo = m->data;
@@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
140 return 1; 140 return 1;
141 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) 141 if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
142 return 1; 142 return 1;
143 if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( 143 /* rcu_read_lock()ed by nf_hook_slow */
144 e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) 144 if (in && br_port_exists(in) &&
145 FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
146 EBT_ILOGICALIN))
145 return 1; 147 return 1;
146 if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( 148 if (out && br_port_exists(out) &&
147 e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) 149 FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
150 EBT_ILOGICALOUT))
148 return 1; 151 return 1;
149 152
150 if (e->bitmask & EBT_SOURCEMAC) { 153 if (e->bitmask & EBT_SOURCEMAC) {
@@ -186,15 +189,13 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
186 struct ebt_entries *chaininfo; 189 struct ebt_entries *chaininfo;
187 const char *base; 190 const char *base;
188 const struct ebt_table_info *private; 191 const struct ebt_table_info *private;
189 bool hotdrop = false; 192 struct xt_action_param acpar;
190 struct xt_match_param mtpar;
191 struct xt_target_param tgpar;
192 193
193 mtpar.family = tgpar.family = NFPROTO_BRIDGE; 194 acpar.family = NFPROTO_BRIDGE;
194 mtpar.in = tgpar.in = in; 195 acpar.in = in;
195 mtpar.out = tgpar.out = out; 196 acpar.out = out;
196 mtpar.hotdrop = &hotdrop; 197 acpar.hotdrop = false;
197 mtpar.hooknum = tgpar.hooknum = hook; 198 acpar.hooknum = hook;
198 199
199 read_lock_bh(&table->lock); 200 read_lock_bh(&table->lock);
200 private = table->private; 201 private = table->private;
@@ -215,9 +216,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
215 if (ebt_basic_match(point, eth_hdr(skb), in, out)) 216 if (ebt_basic_match(point, eth_hdr(skb), in, out))
216 goto letscontinue; 217 goto letscontinue;
217 218
218 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0) 219 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
219 goto letscontinue; 220 goto letscontinue;
220 if (hotdrop) { 221 if (acpar.hotdrop) {
221 read_unlock_bh(&table->lock); 222 read_unlock_bh(&table->lock);
222 return NF_DROP; 223 return NF_DROP;
223 } 224 }
@@ -228,7 +229,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
228 229
229 /* these should only watch: not modify, nor tell us 230 /* these should only watch: not modify, nor tell us
230 what to do with the packet */ 231 what to do with the packet */
231 EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar); 232 EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
232 233
233 t = (struct ebt_entry_target *) 234 t = (struct ebt_entry_target *)
234 (((char *)point) + point->target_offset); 235 (((char *)point) + point->target_offset);
@@ -236,9 +237,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
236 if (!t->u.target->target) 237 if (!t->u.target->target)
237 verdict = ((struct ebt_standard_target *)t)->verdict; 238 verdict = ((struct ebt_standard_target *)t)->verdict;
238 else { 239 else {
239 tgpar.target = t->u.target; 240 acpar.target = t->u.target;
240 tgpar.targinfo = t->data; 241 acpar.targinfo = t->data;
241 verdict = t->u.target->target(skb, &tgpar); 242 verdict = t->u.target->target(skb, &acpar);
242 } 243 }
243 if (verdict == EBT_ACCEPT) { 244 if (verdict == EBT_ACCEPT) {
244 read_unlock_bh(&table->lock); 245 read_unlock_bh(&table->lock);
@@ -363,12 +364,9 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
363 left - sizeof(struct ebt_entry_match) < m->match_size) 364 left - sizeof(struct ebt_entry_match) < m->match_size)
364 return -EINVAL; 365 return -EINVAL;
365 366
366 match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, 367 match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0);
367 m->u.name, 0), "ebt_%s", m->u.name);
368 if (IS_ERR(match)) 368 if (IS_ERR(match))
369 return PTR_ERR(match); 369 return PTR_ERR(match);
370 if (match == NULL)
371 return -ENOENT;
372 m->u.match = match; 370 m->u.match = match;
373 371
374 par->match = match; 372 par->match = match;
@@ -397,13 +395,9 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
397 left - sizeof(struct ebt_entry_watcher) < w->watcher_size) 395 left - sizeof(struct ebt_entry_watcher) < w->watcher_size)
398 return -EINVAL; 396 return -EINVAL;
399 397
400 watcher = try_then_request_module( 398 watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
401 xt_find_target(NFPROTO_BRIDGE, w->u.name, 0),
402 "ebt_%s", w->u.name);
403 if (IS_ERR(watcher)) 399 if (IS_ERR(watcher))
404 return PTR_ERR(watcher); 400 return PTR_ERR(watcher);
405 if (watcher == NULL)
406 return -ENOENT;
407 w->u.watcher = watcher; 401 w->u.watcher = watcher;
408 402
409 par->target = watcher; 403 par->target = watcher;
@@ -716,15 +710,10 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
716 t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); 710 t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
717 gap = e->next_offset - e->target_offset; 711 gap = e->next_offset - e->target_offset;
718 712
719 target = try_then_request_module( 713 target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
720 xt_find_target(NFPROTO_BRIDGE, t->u.name, 0),
721 "ebt_%s", t->u.name);
722 if (IS_ERR(target)) { 714 if (IS_ERR(target)) {
723 ret = PTR_ERR(target); 715 ret = PTR_ERR(target);
724 goto cleanup_watchers; 716 goto cleanup_watchers;
725 } else if (target == NULL) {
726 ret = -ENOENT;
727 goto cleanup_watchers;
728 } 717 }
729 718
730 t->u.target = target; 719 t->u.target = target;
@@ -2128,7 +2117,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
2128 return ret; 2117 return ret;
2129 new_offset += ret; 2118 new_offset += ret;
2130 if (offsets_update && new_offset) { 2119 if (offsets_update && new_offset) {
2131 pr_debug("ebtables: change offset %d to %d\n", 2120 pr_debug("change offset %d to %d\n",
2132 offsets_update[i], offsets[j] + new_offset); 2121 offsets_update[i], offsets[j] + new_offset);
2133 offsets_update[i] = offsets[j] + new_offset; 2122 offsets_update[i] = offsets[j] + new_offset;
2134 } 2123 }
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
new file mode 100644
index 000000000000..529750da9624
--- /dev/null
+++ b/net/caif/Kconfig
@@ -0,0 +1,42 @@
1#
2# CAIF net configurations
3#
4
5menuconfig CAIF
6 tristate "CAIF support"
7 select CRC_CCITT
8 default n
9 ---help---
10 The "Communication CPU to Application CPU Interface" (CAIF) is a packet
11 based connection-oriented MUX protocol developed by ST-Ericsson for use
12 with its modems. It is accessed from user space as sockets (PF_CAIF).
13
14 Say Y (or M) here if you build for a phone product (e.g. Android or
15 MeeGo ) that uses CAIF as transport, if unsure say N.
16
17 If you select to build it as module then CAIF_NETDEV also needs to be
18 built as modules. You will also need to say yes to any CAIF physical
19 devices that your platform requires.
20
21 See Documentation/networking/caif for a further explanation on how to
22 use and configure CAIF.
23
24config CAIF_DEBUG
25 bool "Enable Debug"
26 depends on CAIF
27 default n
28 --- help ---
29 Enable the inclusion of debug code in the CAIF stack.
30 Be aware that doing this will impact performance.
31 If unsure say N.
32
33config CAIF_NETDEV
34 tristate "CAIF GPRS Network device"
35 depends on CAIF
36 default CAIF
37 ---help---
38 Say Y if you will be using a CAIF based GPRS network device.
39 This can be either built-in or a loadable module,
40 If you select to build it as a built-in then the main CAIF device must
41 also be a built-in.
42 If unsure say Y.
diff --git a/net/caif/Makefile b/net/caif/Makefile
new file mode 100644
index 000000000000..f87481fb0e65
--- /dev/null
+++ b/net/caif/Makefile
@@ -0,0 +1,16 @@
1ifeq ($(CONFIG_CAIF_DEBUG),y)
2EXTRA_CFLAGS += -DDEBUG
3endif
4
5caif-objs := caif_dev.o \
6 cfcnfg.o cfmuxl.o cfctrl.o \
7 cffrml.o cfveil.o cfdbgl.o\
8 cfserl.o cfdgml.o \
9 cfrfml.o cfvidl.o cfutill.o \
10 cfsrvl.o cfpkt_skbuff.o caif_config_util.o
11
12obj-$(CONFIG_CAIF) += caif.o
13obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
14obj-$(CONFIG_CAIF) += caif_socket.o
15
16export-objs := caif.o
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
new file mode 100644
index 000000000000..76ae68303d3a
--- /dev/null
+++ b/net/caif/caif_config_util.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/module.h>
8#include <linux/spinlock.h>
9#include <net/caif/cfctrl.h>
10#include <net/caif/cfcnfg.h>
11#include <net/caif/caif_dev.h>
12
13int connect_req_to_link_param(struct cfcnfg *cnfg,
14 struct caif_connect_request *s,
15 struct cfctrl_link_param *l)
16{
17 struct dev_info *dev_info;
18 enum cfcnfg_phy_preference pref;
19 memset(l, 0, sizeof(*l));
20 l->priority = s->priority;
21
22 if (s->link_name[0] != '\0')
23 l->phyid = cfcnfg_get_named(cnfg, s->link_name);
24 else {
25 switch (s->link_selector) {
26 case CAIF_LINK_HIGH_BANDW:
27 pref = CFPHYPREF_HIGH_BW;
28 break;
29 case CAIF_LINK_LOW_LATENCY:
30 pref = CFPHYPREF_LOW_LAT;
31 break;
32 default:
33 return -EINVAL;
34 }
35 dev_info = cfcnfg_get_phyid(cnfg, pref);
36 if (dev_info == NULL)
37 return -ENODEV;
38 l->phyid = dev_info->id;
39 }
40 switch (s->protocol) {
41 case CAIFPROTO_AT:
42 l->linktype = CFCTRL_SRV_VEI;
43 if (s->sockaddr.u.at.type == CAIF_ATTYPE_PLAIN)
44 l->chtype = 0x02;
45 else
46 l->chtype = s->sockaddr.u.at.type;
47 l->endpoint = 0x00;
48 break;
49 case CAIFPROTO_DATAGRAM:
50 l->linktype = CFCTRL_SRV_DATAGRAM;
51 l->chtype = 0x00;
52 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
53 break;
54 case CAIFPROTO_DATAGRAM_LOOP:
55 l->linktype = CFCTRL_SRV_DATAGRAM;
56 l->chtype = 0x03;
57 l->endpoint = 0x00;
58 l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
59 break;
60 case CAIFPROTO_RFM:
61 l->linktype = CFCTRL_SRV_RFM;
62 l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
63 strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
64 sizeof(l->u.rfm.volume)-1);
65 l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
66 break;
67 case CAIFPROTO_UTIL:
68 l->linktype = CFCTRL_SRV_UTIL;
69 l->endpoint = 0x00;
70 l->chtype = 0x00;
71 strncpy(l->u.utility.name, s->sockaddr.u.util.service,
72 sizeof(l->u.utility.name)-1);
73 l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
74 caif_assert(sizeof(l->u.utility.name) > 10);
75 l->u.utility.paramlen = s->param.size;
76 if (l->u.utility.paramlen > sizeof(l->u.utility.params))
77 l->u.utility.paramlen = sizeof(l->u.utility.params);
78
79 memcpy(l->u.utility.params, s->param.data,
80 l->u.utility.paramlen);
81
82 break;
83 case CAIFPROTO_DEBUG:
84 l->linktype = CFCTRL_SRV_DBG;
85 l->endpoint = s->sockaddr.u.dbg.service;
86 l->chtype = s->sockaddr.u.dbg.type;
87 break;
88 default:
89 return -EINVAL;
90 }
91 return 0;
92}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
new file mode 100644
index 000000000000..0b586e9d1378
--- /dev/null
+++ b/net/caif/caif_dev.c
@@ -0,0 +1,421 @@
1/*
2 * CAIF Interface registration.
3 * Copyright (C) ST-Ericsson AB 2010
4 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
5 * License terms: GNU General Public License (GPL) version 2
6 *
7 * Borrowed heavily from file: pn_dev.c. Thanks to
8 * Remi Denis-Courmont <remi.denis-courmont@nokia.com>
9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */
11
12#include <linux/version.h>
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/if_arp.h>
16#include <linux/net.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/sched.h>
20#include <linux/wait.h>
21#include <net/netns/generic.h>
22#include <net/net_namespace.h>
23#include <net/pkt_sched.h>
24#include <net/caif/caif_device.h>
25#include <net/caif/caif_dev.h>
26#include <net/caif/caif_layer.h>
27#include <net/caif/cfpkt.h>
28#include <net/caif/cfcnfg.h>
29
30MODULE_LICENSE("GPL");
31#define TIMEOUT (HZ*5)
32
33/* Used for local tracking of the CAIF net devices */
34struct caif_device_entry {
35 struct cflayer layer;
36 struct list_head list;
37 atomic_t in_use;
38 atomic_t state;
39 u16 phyid;
40 struct net_device *netdev;
41 wait_queue_head_t event;
42};
43
44struct caif_device_entry_list {
45 struct list_head list;
46 /* Protects simulanous deletes in list */
47 spinlock_t lock;
48};
49
50struct caif_net {
51 struct caif_device_entry_list caifdevs;
52};
53
54static int caif_net_id;
55static struct cfcnfg *cfg;
56
57static struct caif_device_entry_list *caif_device_list(struct net *net)
58{
59 struct caif_net *caifn;
60 BUG_ON(!net);
61 caifn = net_generic(net, caif_net_id);
62 BUG_ON(!caifn);
63 return &caifn->caifdevs;
64}
65
66/* Allocate new CAIF device. */
67static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
68{
69 struct caif_device_entry_list *caifdevs;
70 struct caif_device_entry *caifd;
71 caifdevs = caif_device_list(dev_net(dev));
72 BUG_ON(!caifdevs);
73 caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
74 if (!caifd)
75 return NULL;
76 caifd->netdev = dev;
77 list_add(&caifd->list, &caifdevs->list);
78 init_waitqueue_head(&caifd->event);
79 return caifd;
80}
81
82static struct caif_device_entry *caif_get(struct net_device *dev)
83{
84 struct caif_device_entry_list *caifdevs =
85 caif_device_list(dev_net(dev));
86 struct caif_device_entry *caifd;
87 BUG_ON(!caifdevs);
88 list_for_each_entry(caifd, &caifdevs->list, list) {
89 if (caifd->netdev == dev)
90 return caifd;
91 }
92 return NULL;
93}
94
95static void caif_device_destroy(struct net_device *dev)
96{
97 struct caif_device_entry_list *caifdevs =
98 caif_device_list(dev_net(dev));
99 struct caif_device_entry *caifd;
100 ASSERT_RTNL();
101 if (dev->type != ARPHRD_CAIF)
102 return;
103
104 spin_lock_bh(&caifdevs->lock);
105 caifd = caif_get(dev);
106 if (caifd == NULL) {
107 spin_unlock_bh(&caifdevs->lock);
108 return;
109 }
110
111 list_del(&caifd->list);
112 spin_unlock_bh(&caifdevs->lock);
113
114 kfree(caifd);
115}
116
117static int transmit(struct cflayer *layer, struct cfpkt *pkt)
118{
119 struct caif_device_entry *caifd =
120 container_of(layer, struct caif_device_entry, layer);
121 struct sk_buff *skb, *skb2;
122 int ret = -EINVAL;
123 skb = cfpkt_tonative(pkt);
124 skb->dev = caifd->netdev;
125 /*
126 * Don't allow SKB to be destroyed upon error, but signal resend
127 * notification to clients. We can't rely on the return value as
128 * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't.
129 */
130 if (netif_queue_stopped(caifd->netdev))
131 return -EAGAIN;
132 skb2 = skb_get(skb);
133
134 ret = dev_queue_xmit(skb2);
135
136 if (!ret)
137 kfree_skb(skb);
138 else
139 return -EAGAIN;
140
141 return 0;
142}
143
144static int modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
145{
146 struct caif_device_entry *caifd;
147 struct caif_dev_common *caifdev;
148 caifd = container_of(layr, struct caif_device_entry, layer);
149 caifdev = netdev_priv(caifd->netdev);
150 if (ctrl == _CAIF_MODEMCMD_PHYIF_USEFULL) {
151 atomic_set(&caifd->in_use, 1);
152 wake_up_interruptible(&caifd->event);
153
154 } else if (ctrl == _CAIF_MODEMCMD_PHYIF_USELESS) {
155 atomic_set(&caifd->in_use, 0);
156 wake_up_interruptible(&caifd->event);
157 }
158 return 0;
159}
160
161/*
162 * Stuff received packets to associated sockets.
163 * On error, returns non-zero and releases the skb.
164 */
165static int receive(struct sk_buff *skb, struct net_device *dev,
166 struct packet_type *pkttype, struct net_device *orig_dev)
167{
168 struct net *net;
169 struct cfpkt *pkt;
170 struct caif_device_entry *caifd;
171 net = dev_net(dev);
172 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
173 caifd = caif_get(dev);
174 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
175 return NET_RX_DROP;
176
177 if (caifd->layer.up->receive(caifd->layer.up, pkt))
178 return NET_RX_DROP;
179
180 return 0;
181}
182
183static struct packet_type caif_packet_type __read_mostly = {
184 .type = cpu_to_be16(ETH_P_CAIF),
185 .func = receive,
186};
187
188static void dev_flowctrl(struct net_device *dev, int on)
189{
190 struct caif_device_entry *caifd = caif_get(dev);
191 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
192 return;
193
194 caifd->layer.up->ctrlcmd(caifd->layer.up,
195 on ?
196 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
197 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
198 caifd->layer.id);
199}
200
201/* notify Caif of device events */
202static int caif_device_notify(struct notifier_block *me, unsigned long what,
203 void *arg)
204{
205 struct net_device *dev = arg;
206 struct caif_device_entry *caifd = NULL;
207 struct caif_dev_common *caifdev;
208 enum cfcnfg_phy_preference pref;
209 int res = -EINVAL;
210 enum cfcnfg_phy_type phy_type;
211
212 if (dev->type != ARPHRD_CAIF)
213 return 0;
214
215 switch (what) {
216 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name);
218 caifd = caif_device_alloc(dev);
219 if (caifd == NULL)
220 break;
221 caifdev = netdev_priv(dev);
222 caifdev->flowctrl = dev_flowctrl;
223 atomic_set(&caifd->state, what);
224 res = 0;
225 break;
226
227 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name);
229 caifd = caif_get(dev);
230 if (caifd == NULL)
231 break;
232 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n",
235 __func__, dev->name);
236 break;
237 }
238 atomic_set(&caifd->state, what);
239 caifd->layer.transmit = transmit;
240 caifd->layer.modemcmd = modemcmd;
241
242 if (caifdev->use_frag)
243 phy_type = CFPHYTYPE_FRAG;
244 else
245 phy_type = CFPHYTYPE_CAIF;
246
247 switch (caifdev->link_select) {
248 case CAIF_LINK_HIGH_BANDW:
249 pref = CFPHYPREF_HIGH_BW;
250 break;
251 case CAIF_LINK_LOW_LATENCY:
252 pref = CFPHYPREF_LOW_LAT;
253 break;
254 default:
255 pref = CFPHYPREF_HIGH_BW;
256 break;
257 }
258 dev_hold(dev);
259 cfcnfg_add_phy_layer(get_caif_conf(),
260 phy_type,
261 dev,
262 &caifd->layer,
263 &caifd->phyid,
264 pref,
265 caifdev->use_fcs,
266 caifdev->use_stx);
267 strncpy(caifd->layer.name, dev->name,
268 sizeof(caifd->layer.name) - 1);
269 caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
270 break;
271
272 case NETDEV_GOING_DOWN:
273 caifd = caif_get(dev);
274 if (caifd == NULL)
275 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name);
277
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN)
280 break;
281
282 atomic_set(&caifd->state, what);
283 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
284 return -EINVAL;
285 caifd->layer.up->ctrlcmd(caifd->layer.up,
286 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
287 caifd->layer.id);
288 might_sleep();
289 res = wait_event_interruptible_timeout(caifd->event,
290 atomic_read(&caifd->in_use) == 0,
291 TIMEOUT);
292 break;
293
294 case NETDEV_DOWN:
295 caifd = caif_get(dev);
296 if (caifd == NULL)
297 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name);
299 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): "
301 "Unregistering an active CAIF device: %s\n",
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev);
305 atomic_set(&caifd->state, what);
306 break;
307
308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name);
311 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev);
313 break;
314 }
315 return 0;
316}
317
318static struct notifier_block caif_device_notifier = {
319 .notifier_call = caif_device_notify,
320 .priority = 0,
321};
322
323
324struct cfcnfg *get_caif_conf(void)
325{
326 return cfg;
327}
328EXPORT_SYMBOL(get_caif_conf);
329
330int caif_connect_client(struct caif_connect_request *conn_req,
331 struct cflayer *client_layer, int *ifindex,
332 int *headroom, int *tailroom)
333{
334 struct cfctrl_link_param param;
335 int ret;
336 ret = connect_req_to_link_param(get_caif_conf(), conn_req, &param);
337 if (ret)
338 return ret;
339 /* Hook up the adaptation layer. */
340 return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
341 client_layer, ifindex,
342 headroom, tailroom);
343}
344EXPORT_SYMBOL(caif_connect_client);
345
346int caif_disconnect_client(struct cflayer *adap_layer)
347{
348 return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer);
349}
350EXPORT_SYMBOL(caif_disconnect_client);
351
352void caif_release_client(struct cflayer *adap_layer)
353{
354 cfcnfg_release_adap_layer(adap_layer);
355}
356EXPORT_SYMBOL(caif_release_client);
357
358/* Per-namespace Caif devices handling */
359static int caif_init_net(struct net *net)
360{
361 struct caif_net *caifn = net_generic(net, caif_net_id);
362 INIT_LIST_HEAD(&caifn->caifdevs.list);
363 spin_lock_init(&caifn->caifdevs.lock);
364 return 0;
365}
366
367static void caif_exit_net(struct net *net)
368{
369 struct net_device *dev;
370 int res;
371 rtnl_lock();
372 for_each_netdev(net, dev) {
373 if (dev->type != ARPHRD_CAIF)
374 continue;
375 res = dev_close(dev);
376 caif_device_destroy(dev);
377 }
378 rtnl_unlock();
379}
380
381static struct pernet_operations caif_net_ops = {
382 .init = caif_init_net,
383 .exit = caif_exit_net,
384 .id = &caif_net_id,
385 .size = sizeof(struct caif_net),
386};
387
388/* Initialize Caif devices list */
389static int __init caif_device_init(void)
390{
391 int result;
392 cfg = cfcnfg_create();
393 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__);
395 goto err_cfcnfg_create_failed;
396 }
397 result = register_pernet_device(&caif_net_ops);
398
399 if (result) {
400 kfree(cfg);
401 cfg = NULL;
402 return result;
403 }
404 dev_add_pack(&caif_packet_type);
405 register_netdevice_notifier(&caif_device_notifier);
406
407 return result;
408err_cfcnfg_create_failed:
409 return -ENODEV;
410}
411
412static void __exit caif_device_exit(void)
413{
414 dev_remove_pack(&caif_packet_type);
415 unregister_pernet_device(&caif_net_ops);
416 unregister_netdevice_notifier(&caif_device_notifier);
417 cfcnfg_remove(cfg);
418}
419
420module_init(caif_device_init);
421module_exit(caif_device_exit);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
new file mode 100644
index 000000000000..8ce904786116
--- /dev/null
+++ b/net/caif/caif_socket.c
@@ -0,0 +1,1244 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/fs.h>
8#include <linux/init.h>
9#include <linux/module.h>
10#include <linux/sched.h>
11#include <linux/spinlock.h>
12#include <linux/mutex.h>
13#include <linux/list.h>
14#include <linux/wait.h>
15#include <linux/poll.h>
16#include <linux/tcp.h>
17#include <linux/uaccess.h>
18#include <linux/mutex.h>
19#include <linux/debugfs.h>
20#include <linux/caif/caif_socket.h>
21#include <asm/atomic.h>
22#include <net/sock.h>
23#include <net/tcp_states.h>
24#include <net/caif/caif_layer.h>
25#include <net/caif/caif_dev.h>
26#include <net/caif/cfpkt.h>
27
28MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF);
30
31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (4096*100)
33
34/*
35 * CAIF state is re-using the TCP socket states.
36 * caif_states stored in sk_state reflect the state as reported by
37 * the CAIF stack, while sk_socket->state is the state of the socket.
38 */
39enum caif_states {
40 CAIF_CONNECTED = TCP_ESTABLISHED,
41 CAIF_CONNECTING = TCP_SYN_SENT,
42 CAIF_DISCONNECTED = TCP_CLOSE
43};
44
45#define TX_FLOW_ON_BIT 1
46#define RX_FLOW_ON_BIT 2
47
48static struct dentry *debugfsdir;
49
50#ifdef CONFIG_DEBUG_FS
51struct debug_fs_counter {
52 atomic_t caif_nr_socks;
53 atomic_t num_connect_req;
54 atomic_t num_connect_resp;
55 atomic_t num_connect_fail_resp;
56 atomic_t num_disconnect;
57 atomic_t num_remote_shutdown_ind;
58 atomic_t num_tx_flow_off_ind;
59 atomic_t num_tx_flow_on_ind;
60 atomic_t num_rx_flow_off;
61 atomic_t num_rx_flow_on;
62};
63static struct debug_fs_counter cnt;
64#define dbfs_atomic_inc(v) atomic_inc(v)
65#define dbfs_atomic_dec(v) atomic_dec(v)
66#else
67#define dbfs_atomic_inc(v)
68#define dbfs_atomic_dec(v)
69#endif
70
71struct caifsock {
72 struct sock sk; /* must be first member */
73 struct cflayer layer;
74 char name[CAIF_LAYER_NAME_SZ]; /* Used for debugging */
75 u32 flow_state;
76 struct caif_connect_request conn_req;
77 struct mutex readlock;
78 struct dentry *debugfs_socket_dir;
79 int headroom, tailroom, maxframe;
80};
81
82static int rx_flow_is_on(struct caifsock *cf_sk)
83{
84 return test_bit(RX_FLOW_ON_BIT,
85 (void *) &cf_sk->flow_state);
86}
87
88static int tx_flow_is_on(struct caifsock *cf_sk)
89{
90 return test_bit(TX_FLOW_ON_BIT,
91 (void *) &cf_sk->flow_state);
92}
93
94static void set_rx_flow_off(struct caifsock *cf_sk)
95{
96 clear_bit(RX_FLOW_ON_BIT,
97 (void *) &cf_sk->flow_state);
98}
99
100static void set_rx_flow_on(struct caifsock *cf_sk)
101{
102 set_bit(RX_FLOW_ON_BIT,
103 (void *) &cf_sk->flow_state);
104}
105
106static void set_tx_flow_off(struct caifsock *cf_sk)
107{
108 clear_bit(TX_FLOW_ON_BIT,
109 (void *) &cf_sk->flow_state);
110}
111
112static void set_tx_flow_on(struct caifsock *cf_sk)
113{
114 set_bit(TX_FLOW_ON_BIT,
115 (void *) &cf_sk->flow_state);
116}
117
118static void caif_read_lock(struct sock *sk)
119{
120 struct caifsock *cf_sk;
121 cf_sk = container_of(sk, struct caifsock, sk);
122 mutex_lock(&cf_sk->readlock);
123}
124
125static void caif_read_unlock(struct sock *sk)
126{
127 struct caifsock *cf_sk;
128 cf_sk = container_of(sk, struct caifsock, sk);
129 mutex_unlock(&cf_sk->readlock);
130}
131
132static int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
133{
134 /* A quarter of full buffer is used a low water mark */
135 return cf_sk->sk.sk_rcvbuf / 4;
136}
137
138static void caif_flow_ctrl(struct sock *sk, int mode)
139{
140 struct caifsock *cf_sk;
141 cf_sk = container_of(sk, struct caifsock, sk);
142 if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd)
143 cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
144}
145
146/*
147 * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
148 * not dropped, but CAIF is sending flow off instead.
149 */
150static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
151{
152 int err;
153 int skb_len;
154 unsigned long flags;
155 struct sk_buff_head *list = &sk->sk_receive_queue;
156 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
157
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():"
161 " sending flow OFF (queue len = %d %d)\n",
162 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk));
165 set_rx_flow_off(cf_sk);
166 dbfs_atomic_inc(&cnt.num_rx_flow_off);
167 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
168 }
169
170 err = sk_filter(sk, skb);
171 if (err)
172 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():"
176 " sending flow OFF due to rmem_schedule\n",
177 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 }
181 skb->dev = NULL;
182 skb_set_owner_r(skb, sk);
183 /* Cache the SKB length before we tack it onto the receive
184 * queue. Once it is added it no longer belongs to us and
185 * may be freed by other threads of control pulling packets
186 * from the queue.
187 */
188 skb_len = skb->len;
189 spin_lock_irqsave(&list->lock, flags);
190 if (!sock_flag(sk, SOCK_DEAD))
191 __skb_queue_tail(list, skb);
192 spin_unlock_irqrestore(&list->lock, flags);
193
194 if (!sock_flag(sk, SOCK_DEAD))
195 sk->sk_data_ready(sk, skb_len);
196 else
197 kfree_skb(skb);
198 return 0;
199}
200
201/* Packet Receive Callback function called from CAIF Stack */
202static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
203{
204 struct caifsock *cf_sk;
205 struct sk_buff *skb;
206
207 cf_sk = container_of(layr, struct caifsock, layer);
208 skb = cfpkt_tonative(pkt);
209
210 if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
211 cfpkt_destroy(pkt);
212 return 0;
213 }
214 caif_queue_rcv_skb(&cf_sk->sk, skb);
215 return 0;
216}
217
218/* Packet Control Callback function called from CAIF */
219static void caif_ctrl_cb(struct cflayer *layr,
220 enum caif_ctrlcmd flow,
221 int phyid)
222{
223 struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
224 switch (flow) {
225 case CAIF_CTRLCMD_FLOW_ON_IND:
226 /* OK from modem to start sending again */
227 dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
228 set_tx_flow_on(cf_sk);
229 cf_sk->sk.sk_state_change(&cf_sk->sk);
230 break;
231
232 case CAIF_CTRLCMD_FLOW_OFF_IND:
233 /* Modem asks us to shut up */
234 dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
235 set_tx_flow_off(cf_sk);
236 cf_sk->sk.sk_state_change(&cf_sk->sk);
237 break;
238
239 case CAIF_CTRLCMD_INIT_RSP:
240 /* We're now connected */
241 dbfs_atomic_inc(&cnt.num_connect_resp);
242 cf_sk->sk.sk_state = CAIF_CONNECTED;
243 set_tx_flow_on(cf_sk);
244 cf_sk->sk.sk_state_change(&cf_sk->sk);
245 break;
246
247 case CAIF_CTRLCMD_DEINIT_RSP:
248 /* We're now disconnected */
249 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
250 cf_sk->sk.sk_state_change(&cf_sk->sk);
251 cfcnfg_release_adap_layer(&cf_sk->layer);
252 break;
253
254 case CAIF_CTRLCMD_INIT_FAIL_RSP:
255 /* Connect request failed */
256 dbfs_atomic_inc(&cnt.num_connect_fail_resp);
257 cf_sk->sk.sk_err = ECONNREFUSED;
258 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
259 cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
260 /*
261 * Socket "standards" seems to require POLLOUT to
262 * be set at connect failure.
263 */
264 set_tx_flow_on(cf_sk);
265 cf_sk->sk.sk_state_change(&cf_sk->sk);
266 break;
267
268 case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
269 /* Modem has closed this connection, or device is down. */
270 dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
271 cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
272 cf_sk->sk.sk_err = ECONNRESET;
273 set_rx_flow_on(cf_sk);
274 cf_sk->sk.sk_error_report(&cf_sk->sk);
275 break;
276
277 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n",
279 __func__, flow);
280 }
281}
282
283static void caif_check_flow_release(struct sock *sk)
284{
285 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
286
287 if (rx_flow_is_on(cf_sk))
288 return;
289
290 if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
291 dbfs_atomic_inc(&cnt.num_rx_flow_on);
292 set_rx_flow_on(cf_sk);
293 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
294 }
295}
296
297/*
298 * Copied from unix_dgram_recvmsg, but removed credit checks,
299 * changed locking, address handling and added MSG_TRUNC.
300 */
301static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
302 struct msghdr *m, size_t len, int flags)
303
304{
305 struct sock *sk = sock->sk;
306 struct sk_buff *skb;
307 int ret;
308 int copylen;
309
310 ret = -EOPNOTSUPP;
311 if (m->msg_flags&MSG_OOB)
312 goto read_error;
313
314 skb = skb_recv_datagram(sk, flags, 0 , &ret);
315 if (!skb)
316 goto read_error;
317 copylen = skb->len;
318 if (len < copylen) {
319 m->msg_flags |= MSG_TRUNC;
320 copylen = len;
321 }
322
323 ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
324 if (ret)
325 goto out_free;
326
327 ret = (flags & MSG_TRUNC) ? skb->len : copylen;
328out_free:
329 skb_free_datagram(sk, skb);
330 caif_check_flow_release(sk);
331 return ret;
332
333read_error:
334 return ret;
335}
336
337
338/* Copied from unix_stream_wait_data, identical except for lock call. */
339static long caif_stream_data_wait(struct sock *sk, long timeo)
340{
341 DEFINE_WAIT(wait);
342 lock_sock(sk);
343
344 for (;;) {
345 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
346
347 if (!skb_queue_empty(&sk->sk_receive_queue) ||
348 sk->sk_err ||
349 sk->sk_state != CAIF_CONNECTED ||
350 sock_flag(sk, SOCK_DEAD) ||
351 (sk->sk_shutdown & RCV_SHUTDOWN) ||
352 signal_pending(current) ||
353 !timeo)
354 break;
355
356 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
357 release_sock(sk);
358 timeo = schedule_timeout(timeo);
359 lock_sock(sk);
360 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
361 }
362
363 finish_wait(sk_sleep(sk), &wait);
364 release_sock(sk);
365 return timeo;
366}
367
368
369/*
370 * Copied from unix_stream_recvmsg, but removed credit checks,
371 * changed locking calls, changed address handling.
372 */
373static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
374 struct msghdr *msg, size_t size,
375 int flags)
376{
377 struct sock *sk = sock->sk;
378 int copied = 0;
379 int target;
380 int err = 0;
381 long timeo;
382
383 err = -EOPNOTSUPP;
384 if (flags&MSG_OOB)
385 goto out;
386
387 msg->msg_namelen = 0;
388
389 /*
390 * Lock the socket to prevent queue disordering
391 * while sleeps in memcpy_tomsg
392 */
393 err = -EAGAIN;
394 if (sk->sk_state == CAIF_CONNECTING)
395 goto out;
396
397 caif_read_lock(sk);
398 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
399 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
400
401 do {
402 int chunk;
403 struct sk_buff *skb;
404
405 lock_sock(sk);
406 skb = skb_dequeue(&sk->sk_receive_queue);
407 caif_check_flow_release(sk);
408
409 if (skb == NULL) {
410 if (copied >= target)
411 goto unlock;
412 /*
413 * POSIX 1003.1g mandates this order.
414 */
415 err = sock_error(sk);
416 if (err)
417 goto unlock;
418 err = -ECONNRESET;
419 if (sk->sk_shutdown & RCV_SHUTDOWN)
420 goto unlock;
421
422 err = -EPIPE;
423 if (sk->sk_state != CAIF_CONNECTED)
424 goto unlock;
425 if (sock_flag(sk, SOCK_DEAD))
426 goto unlock;
427
428 release_sock(sk);
429
430 err = -EAGAIN;
431 if (!timeo)
432 break;
433
434 caif_read_unlock(sk);
435
436 timeo = caif_stream_data_wait(sk, timeo);
437
438 if (signal_pending(current)) {
439 err = sock_intr_errno(timeo);
440 goto out;
441 }
442 caif_read_lock(sk);
443 continue;
444unlock:
445 release_sock(sk);
446 break;
447 }
448 release_sock(sk);
449 chunk = min_t(unsigned int, skb->len, size);
450 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
451 skb_queue_head(&sk->sk_receive_queue, skb);
452 if (copied == 0)
453 copied = -EFAULT;
454 break;
455 }
456 copied += chunk;
457 size -= chunk;
458
459 /* Mark read part of skb as used */
460 if (!(flags & MSG_PEEK)) {
461 skb_pull(skb, chunk);
462
463 /* put the skb back if we didn't use it up. */
464 if (skb->len) {
465 skb_queue_head(&sk->sk_receive_queue, skb);
466 break;
467 }
468 kfree_skb(skb);
469
470 } else {
471 /*
472 * It is questionable, see note in unix_dgram_recvmsg.
473 */
474 /* put message back and return */
475 skb_queue_head(&sk->sk_receive_queue, skb);
476 break;
477 }
478 } while (size);
479 caif_read_unlock(sk);
480
481out:
482 return copied ? : err;
483}
484
485/*
486 * Copied from sock.c:sock_wait_for_wmem, but change to wait for
487 * CAIF flow-on and sock_writable.
488 */
489static long caif_wait_for_flow_on(struct caifsock *cf_sk,
490 int wait_writeable, long timeo, int *err)
491{
492 struct sock *sk = &cf_sk->sk;
493 DEFINE_WAIT(wait);
494 for (;;) {
495 *err = 0;
496 if (tx_flow_is_on(cf_sk) &&
497 (!wait_writeable || sock_writeable(&cf_sk->sk)))
498 break;
499 *err = -ETIMEDOUT;
500 if (!timeo)
501 break;
502 *err = -ERESTARTSYS;
503 if (signal_pending(current))
504 break;
505 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
506 *err = -ECONNRESET;
507 if (sk->sk_shutdown & SHUTDOWN_MASK)
508 break;
509 *err = -sk->sk_err;
510 if (sk->sk_err)
511 break;
512 *err = -EPIPE;
513 if (cf_sk->sk.sk_state != CAIF_CONNECTED)
514 break;
515 timeo = schedule_timeout(timeo);
516 }
517 finish_wait(sk_sleep(sk), &wait);
518 return timeo;
519}
520
521/*
522 * Transmit a SKB. The device may temporarily request re-transmission
523 * by returning EAGAIN.
524 */
525static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
526 int noblock, long timeo)
527{
528 struct cfpkt *pkt;
529 int ret, loopcnt = 0;
530
531 pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
532 memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info));
533 do {
534
535 ret = -ETIMEDOUT;
536
537 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed,"
540 " error = %d\n", __func__, ret);
541 break;
542 }
543
544 if (cf_sk->layer.dn != NULL)
545 ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
546 if (likely(ret >= 0))
547 break;
548 /* if transmit return -EAGAIN, then retry */
549 if (noblock && ret == -EAGAIN)
550 break;
551 timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret);
552 if (signal_pending(current)) {
553 ret = sock_intr_errno(timeo);
554 break;
555 }
556 if (ret)
557 break;
558 if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
559 sock_flag(&cf_sk->sk, SOCK_DEAD) ||
560 (cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) {
561 ret = -EPIPE;
562 cf_sk->sk.sk_err = EPIPE;
563 break;
564 }
565 } while (ret == -EAGAIN);
566 return ret;
567}
568
569/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
570static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
571 struct msghdr *msg, size_t len)
572{
573 struct sock *sk = sock->sk;
574 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
575 int buffer_size;
576 int ret = 0;
577 struct sk_buff *skb = NULL;
578 int noblock;
579 long timeo;
580 caif_assert(cf_sk);
581 ret = sock_error(sk);
582 if (ret)
583 goto err;
584
585 ret = -EOPNOTSUPP;
586 if (msg->msg_flags&MSG_OOB)
587 goto err;
588
589 ret = -EOPNOTSUPP;
590 if (msg->msg_namelen)
591 goto err;
592
593 ret = -EINVAL;
594 if (unlikely(msg->msg_iov->iov_base == NULL))
595 goto err;
596 noblock = msg->msg_flags & MSG_DONTWAIT;
597
598 timeo = sock_sndtimeo(sk, noblock);
599 timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
600 1, timeo, &ret);
601
602 if (ret)
603 goto err;
604 ret = -EPIPE;
605 if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
606 sock_flag(sk, SOCK_DEAD) ||
607 (sk->sk_shutdown & RCV_SHUTDOWN))
608 goto err;
609
610 /* Error if trying to write more than maximum frame size. */
611 ret = -EMSGSIZE;
612 if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
613 goto err;
614
615 buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
616
617 ret = -ENOMEM;
618 skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
619
620 if (!skb || skb_tailroom(skb) < buffer_size)
621 goto err;
622
623 skb_reserve(skb, cf_sk->headroom);
624
625 ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
626
627 if (ret)
628 goto err;
629 ret = transmit_skb(skb, cf_sk, noblock, timeo);
630 if (ret < 0)
631 goto err;
632 return len;
633err:
634 kfree_skb(skb);
635 return ret;
636}
637
638/*
639 * Copied from unix_stream_sendmsg and adapted to CAIF:
640 * Changed removed permission handling and added waiting for flow on
641 * and other minor adaptations.
642 */
643static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
644 struct msghdr *msg, size_t len)
645{
646 struct sock *sk = sock->sk;
647 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
648 int err, size;
649 struct sk_buff *skb;
650 int sent = 0;
651 long timeo;
652
653 err = -EOPNOTSUPP;
654 if (unlikely(msg->msg_flags&MSG_OOB))
655 goto out_err;
656
657 if (unlikely(msg->msg_namelen))
658 goto out_err;
659
660 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
661 timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err);
662
663 if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
664 goto pipe_err;
665
666 while (sent < len) {
667
668 size = len-sent;
669
670 if (size > cf_sk->maxframe)
671 size = cf_sk->maxframe;
672
673 /* If size is more than half of sndbuf, chop up message */
674 if (size > ((sk->sk_sndbuf >> 1) - 64))
675 size = (sk->sk_sndbuf >> 1) - 64;
676
677 if (size > SKB_MAX_ALLOC)
678 size = SKB_MAX_ALLOC;
679
680 skb = sock_alloc_send_skb(sk,
681 size + cf_sk->headroom +
682 cf_sk->tailroom,
683 msg->msg_flags&MSG_DONTWAIT,
684 &err);
685 if (skb == NULL)
686 goto out_err;
687
688 skb_reserve(skb, cf_sk->headroom);
689 /*
690 * If you pass two values to the sock_alloc_send_skb
691 * it tries to grab the large buffer with GFP_NOFS
692 * (which can fail easily), and if it fails grab the
693 * fallback size buffer which is under a page and will
694 * succeed. [Alan]
695 */
696 size = min_t(int, size, skb_tailroom(skb));
697
698 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
699 if (err) {
700 kfree_skb(skb);
701 goto out_err;
702 }
703 err = transmit_skb(skb, cf_sk,
704 msg->msg_flags&MSG_DONTWAIT, timeo);
705 if (err < 0) {
706 kfree_skb(skb);
707 goto pipe_err;
708 }
709 sent += size;
710 }
711
712 return sent;
713
714pipe_err:
715 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
716 send_sig(SIGPIPE, current, 0);
717 err = -EPIPE;
718out_err:
719 return sent ? : err;
720}
721
722static int setsockopt(struct socket *sock,
723 int lvl, int opt, char __user *ov, unsigned int ol)
724{
725 struct sock *sk = sock->sk;
726 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
727 int prio, linksel;
728 struct ifreq ifreq;
729
730 if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
731 return -ENOPROTOOPT;
732
733 switch (opt) {
734 case CAIFSO_LINK_SELECT:
735 if (ol < sizeof(int))
736 return -EINVAL;
737 if (lvl != SOL_CAIF)
738 goto bad_sol;
739 if (copy_from_user(&linksel, ov, sizeof(int)))
740 return -EINVAL;
741 lock_sock(&(cf_sk->sk));
742 cf_sk->conn_req.link_selector = linksel;
743 release_sock(&cf_sk->sk);
744 return 0;
745
746 case SO_PRIORITY:
747 if (lvl != SOL_SOCKET)
748 goto bad_sol;
749 if (ol < sizeof(int))
750 return -EINVAL;
751 if (copy_from_user(&prio, ov, sizeof(int)))
752 return -EINVAL;
753 lock_sock(&(cf_sk->sk));
754 cf_sk->conn_req.priority = prio;
755 release_sock(&cf_sk->sk);
756 return 0;
757
758 case SO_BINDTODEVICE:
759 if (lvl != SOL_SOCKET)
760 goto bad_sol;
761 if (ol < sizeof(struct ifreq))
762 return -EINVAL;
763 if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
764 return -EFAULT;
765 lock_sock(&(cf_sk->sk));
766 strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
767 sizeof(cf_sk->conn_req.link_name));
768 cf_sk->conn_req.link_name
769 [sizeof(cf_sk->conn_req.link_name)-1] = 0;
770 release_sock(&cf_sk->sk);
771 return 0;
772
773 case CAIFSO_REQ_PARAM:
774 if (lvl != SOL_CAIF)
775 goto bad_sol;
776 if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
777 return -ENOPROTOOPT;
778 lock_sock(&(cf_sk->sk));
779 cf_sk->conn_req.param.size = ol;
780 if (ol > sizeof(cf_sk->conn_req.param.data) ||
781 copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
782 release_sock(&cf_sk->sk);
783 return -EINVAL;
784 }
785 release_sock(&cf_sk->sk);
786 return 0;
787
788 default:
789 return -ENOPROTOOPT;
790 }
791
792 return 0;
793bad_sol:
794 return -ENOPROTOOPT;
795
796}
797
798/*
799 * caif_connect() - Connect a CAIF Socket
800 * Copied and modified af_irda.c:irda_connect().
801 *
802 * Note : by consulting "errno", the user space caller may learn the cause
803 * of the failure. Most of them are visible in the function, others may come
804 * from subroutines called and are listed here :
805 * o -EAFNOSUPPORT: bad socket family or type.
806 * o -ESOCKTNOSUPPORT: bad socket type or protocol
807 * o -EINVAL: bad socket address, or CAIF link type
808 * o -ECONNREFUSED: remote end refused the connection.
809 * o -EINPROGRESS: connect request sent but timed out (or non-blocking)
810 * o -EISCONN: already connected.
811 * o -ETIMEDOUT: Connection timed out (send timeout)
812 * o -ENODEV: No link layer to send request
813 * o -ECONNRESET: Received Shutdown indication or lost link layer
814 * o -ENOMEM: Out of memory
815 *
816 * State Strategy:
817 * o sk_state: holds the CAIF_* protocol state, it's updated by
818 * caif_ctrl_cb.
819 * o sock->state: holds the SS_* socket state and is updated by connect and
820 * disconnect.
821 */
822static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
823 int addr_len, int flags)
824{
825 struct sock *sk = sock->sk;
826 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
827 long timeo;
828 int err;
829 int ifindex, headroom, tailroom;
830 struct net_device *dev;
831
832 lock_sock(sk);
833
834 err = -EAFNOSUPPORT;
835 if (uaddr->sa_family != AF_CAIF)
836 goto out;
837
838 switch (sock->state) {
839 case SS_UNCONNECTED:
840 /* Normal case, a fresh connect */
841 caif_assert(sk->sk_state == CAIF_DISCONNECTED);
842 break;
843 case SS_CONNECTING:
844 switch (sk->sk_state) {
845 case CAIF_CONNECTED:
846 sock->state = SS_CONNECTED;
847 err = -EISCONN;
848 goto out;
849 case CAIF_DISCONNECTED:
850 /* Reconnect allowed */
851 break;
852 case CAIF_CONNECTING:
853 err = -EALREADY;
854 if (flags & O_NONBLOCK)
855 goto out;
856 goto wait_connect;
857 }
858 break;
859 case SS_CONNECTED:
860 caif_assert(sk->sk_state == CAIF_CONNECTED ||
861 sk->sk_state == CAIF_DISCONNECTED);
862 if (sk->sk_shutdown & SHUTDOWN_MASK) {
863 /* Allow re-connect after SHUTDOWN_IND */
864 caif_disconnect_client(&cf_sk->layer);
865 break;
866 }
867 /* No reconnect on a seqpacket socket */
868 err = -EISCONN;
869 goto out;
870 case SS_DISCONNECTING:
871 case SS_FREE:
872 caif_assert(1); /*Should never happen */
873 break;
874 }
875 sk->sk_state = CAIF_DISCONNECTED;
876 sock->state = SS_UNCONNECTED;
877 sk_stream_kill_queues(&cf_sk->sk);
878
879 err = -EINVAL;
880 if (addr_len != sizeof(struct sockaddr_caif))
881 goto out;
882
883 memcpy(&cf_sk->conn_req.sockaddr, uaddr,
884 sizeof(struct sockaddr_caif));
885
886 /* Move to connecting socket, start sending Connect Requests */
887 sock->state = SS_CONNECTING;
888 sk->sk_state = CAIF_CONNECTING;
889
890 dbfs_atomic_inc(&cnt.num_connect_req);
891 cf_sk->layer.receive = caif_sktrecv_cb;
892 err = caif_connect_client(&cf_sk->conn_req,
893 &cf_sk->layer, &ifindex, &headroom, &tailroom);
894 if (err < 0) {
895 cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
896 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
897 goto out;
898 }
899 dev = dev_get_by_index(sock_net(sk), ifindex);
900 cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
901 cf_sk->tailroom = tailroom;
902 cf_sk->maxframe = dev->mtu - (headroom + tailroom);
903 dev_put(dev);
904 if (cf_sk->maxframe < 1) {
905 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
906 __func__, dev->mtu);
907 err = -ENODEV;
908 goto out;
909 }
910
911 err = -EINPROGRESS;
912wait_connect:
913
914 if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK))
915 goto out;
916
917 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
918
919 release_sock(sk);
920 err = -ERESTARTSYS;
921 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
922 sk->sk_state != CAIF_CONNECTING,
923 timeo);
924 lock_sock(sk);
925 if (timeo < 0)
926 goto out; /* -ERESTARTSYS */
927
928 err = -ETIMEDOUT;
929 if (timeo == 0 && sk->sk_state != CAIF_CONNECTED)
930 goto out;
931 if (sk->sk_state != CAIF_CONNECTED) {
932 sock->state = SS_UNCONNECTED;
933 err = sock_error(sk);
934 if (!err)
935 err = -ECONNREFUSED;
936 goto out;
937 }
938 sock->state = SS_CONNECTED;
939 err = 0;
940out:
941 release_sock(sk);
942 return err;
943}
944
945/*
946 * caif_release() - Disconnect a CAIF Socket
947 * Copied and modified af_irda.c:irda_release().
948 */
949static int caif_release(struct socket *sock)
950{
951 struct sock *sk = sock->sk;
952 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
953 int res = 0;
954
955 if (!sk)
956 return 0;
957
958 set_tx_flow_off(cf_sk);
959
960 /*
961 * Ensure that packets are not queued after this point in time.
962 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
963 * this ensures no packets when sock is dead.
964 */
965 spin_lock(&sk->sk_receive_queue.lock);
966 sock_set_flag(sk, SOCK_DEAD);
967 spin_unlock(&sk->sk_receive_queue.lock);
968 sock->sk = NULL;
969
970 dbfs_atomic_inc(&cnt.num_disconnect);
971
972 if (cf_sk->debugfs_socket_dir != NULL)
973 debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
974
975 lock_sock(&(cf_sk->sk));
976 sk->sk_state = CAIF_DISCONNECTED;
977 sk->sk_shutdown = SHUTDOWN_MASK;
978
979 if (cf_sk->sk.sk_socket->state == SS_CONNECTED ||
980 cf_sk->sk.sk_socket->state == SS_CONNECTING)
981 res = caif_disconnect_client(&cf_sk->layer);
982
983 cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
984 wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
985
986 sock_orphan(sk);
987 cf_sk->layer.dn = NULL;
988 sk_stream_kill_queues(&cf_sk->sk);
989 release_sock(sk);
990 sock_put(sk);
991 return res;
992}
993
994/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
995static unsigned int caif_poll(struct file *file,
996 struct socket *sock, poll_table *wait)
997{
998 struct sock *sk = sock->sk;
999 unsigned int mask;
1000 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
1001
1002 sock_poll_wait(file, sk_sleep(sk), wait);
1003 mask = 0;
1004
1005 /* exceptional events? */
1006 if (sk->sk_err)
1007 mask |= POLLERR;
1008 if (sk->sk_shutdown == SHUTDOWN_MASK)
1009 mask |= POLLHUP;
1010 if (sk->sk_shutdown & RCV_SHUTDOWN)
1011 mask |= POLLRDHUP;
1012
1013 /* readable? */
1014 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1015 (sk->sk_shutdown & RCV_SHUTDOWN))
1016 mask |= POLLIN | POLLRDNORM;
1017
1018 /*
1019 * we set writable also when the other side has shut down the
1020 * connection. This prevents stuck sockets.
1021 */
1022 if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
1023 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1024
1025 return mask;
1026}
1027
1028static const struct proto_ops caif_seqpacket_ops = {
1029 .family = PF_CAIF,
1030 .owner = THIS_MODULE,
1031 .release = caif_release,
1032 .bind = sock_no_bind,
1033 .connect = caif_connect,
1034 .socketpair = sock_no_socketpair,
1035 .accept = sock_no_accept,
1036 .getname = sock_no_getname,
1037 .poll = caif_poll,
1038 .ioctl = sock_no_ioctl,
1039 .listen = sock_no_listen,
1040 .shutdown = sock_no_shutdown,
1041 .setsockopt = setsockopt,
1042 .getsockopt = sock_no_getsockopt,
1043 .sendmsg = caif_seqpkt_sendmsg,
1044 .recvmsg = caif_seqpkt_recvmsg,
1045 .mmap = sock_no_mmap,
1046 .sendpage = sock_no_sendpage,
1047};
1048
1049static const struct proto_ops caif_stream_ops = {
1050 .family = PF_CAIF,
1051 .owner = THIS_MODULE,
1052 .release = caif_release,
1053 .bind = sock_no_bind,
1054 .connect = caif_connect,
1055 .socketpair = sock_no_socketpair,
1056 .accept = sock_no_accept,
1057 .getname = sock_no_getname,
1058 .poll = caif_poll,
1059 .ioctl = sock_no_ioctl,
1060 .listen = sock_no_listen,
1061 .shutdown = sock_no_shutdown,
1062 .setsockopt = setsockopt,
1063 .getsockopt = sock_no_getsockopt,
1064 .sendmsg = caif_stream_sendmsg,
1065 .recvmsg = caif_stream_recvmsg,
1066 .mmap = sock_no_mmap,
1067 .sendpage = sock_no_sendpage,
1068};
1069
1070/* This function is called when a socket is finally destroyed. */
1071static void caif_sock_destructor(struct sock *sk)
1072{
1073 struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
1074 caif_assert(!atomic_read(&sk->sk_wmem_alloc));
1075 caif_assert(sk_unhashed(sk));
1076 caif_assert(!sk->sk_socket);
1077 if (!sock_flag(sk, SOCK_DEAD)) {
1078 pr_info("Attempt to release alive CAIF socket: %p\n", sk);
1079 return;
1080 }
1081 sk_stream_kill_queues(&cf_sk->sk);
1082 dbfs_atomic_dec(&cnt.caif_nr_socks);
1083}
1084
1085static int caif_create(struct net *net, struct socket *sock, int protocol,
1086 int kern)
1087{
1088 struct sock *sk = NULL;
1089 struct caifsock *cf_sk = NULL;
1090 static struct proto prot = {.name = "PF_CAIF",
1091 .owner = THIS_MODULE,
1092 .obj_size = sizeof(struct caifsock),
1093 };
1094
1095 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
1096 return -EPERM;
1097 /*
1098 * The sock->type specifies the socket type to use.
1099 * The CAIF socket is a packet stream in the sense
1100 * that it is packet based. CAIF trusts the reliability
1101 * of the link, no resending is implemented.
1102 */
1103 if (sock->type == SOCK_SEQPACKET)
1104 sock->ops = &caif_seqpacket_ops;
1105 else if (sock->type == SOCK_STREAM)
1106 sock->ops = &caif_stream_ops;
1107 else
1108 return -ESOCKTNOSUPPORT;
1109
1110 if (protocol < 0 || protocol >= CAIFPROTO_MAX)
1111 return -EPROTONOSUPPORT;
1112 /*
1113 * Set the socket state to unconnected. The socket state
1114 * is really not used at all in the net/core or socket.c but the
1115 * initialization makes sure that sock->state is not uninitialized.
1116 */
1117 sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
1118 if (!sk)
1119 return -ENOMEM;
1120
1121 cf_sk = container_of(sk, struct caifsock, sk);
1122
1123 /* Store the protocol */
1124 sk->sk_protocol = (unsigned char) protocol;
1125
1126 /* Sendbuf dictates the amount of outbound packets not yet sent */
1127 sk->sk_sndbuf = CAIF_DEF_SNDBUF;
1128 sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
1129
1130 /*
1131 * Lock in order to try to stop someone from opening the socket
1132 * too early.
1133 */
1134 lock_sock(&(cf_sk->sk));
1135
1136 /* Initialize the nozero default sock structure data. */
1137 sock_init_data(sock, sk);
1138 sk->sk_destruct = caif_sock_destructor;
1139
1140 mutex_init(&cf_sk->readlock); /* single task reading lock */
1141 cf_sk->layer.ctrlcmd = caif_ctrl_cb;
1142 cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
1143 cf_sk->sk.sk_state = CAIF_DISCONNECTED;
1144
1145 set_tx_flow_off(cf_sk);
1146 set_rx_flow_on(cf_sk);
1147
1148 /* Set default options on configuration */
1149 cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
1150 cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
1151 cf_sk->conn_req.protocol = protocol;
1152 /* Increase the number of sockets created. */
1153 dbfs_atomic_inc(&cnt.caif_nr_socks);
1154#ifdef CONFIG_DEBUG_FS
1155 if (!IS_ERR(debugfsdir)) {
1156 /* Fill in some information concerning the misc socket. */
1157 snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d",
1158 atomic_read(&cnt.caif_nr_socks));
1159
1160 cf_sk->debugfs_socket_dir =
1161 debugfs_create_dir(cf_sk->name, debugfsdir);
1162 debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR,
1163 cf_sk->debugfs_socket_dir,
1164 (u32 *) &cf_sk->sk.sk_state);
1165 debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
1166 cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
1167 debugfs_create_u32("sk_rmem_alloc", S_IRUSR | S_IWUSR,
1168 cf_sk->debugfs_socket_dir,
1169 (u32 *) &cf_sk->sk.sk_rmem_alloc);
1170 debugfs_create_u32("sk_wmem_alloc", S_IRUSR | S_IWUSR,
1171 cf_sk->debugfs_socket_dir,
1172 (u32 *) &cf_sk->sk.sk_wmem_alloc);
1173 debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
1174 cf_sk->debugfs_socket_dir,
1175 (u32 *) &cf_sk->layer.id);
1176 }
1177#endif
1178 release_sock(&cf_sk->sk);
1179 return 0;
1180}
1181
1182
1183static struct net_proto_family caif_family_ops = {
1184 .family = PF_CAIF,
1185 .create = caif_create,
1186 .owner = THIS_MODULE,
1187};
1188
1189static int af_caif_init(void)
1190{
1191 int err = sock_register(&caif_family_ops);
1192 if (!err)
1193 return err;
1194 return 0;
1195}
1196
1197static int __init caif_sktinit_module(void)
1198{
1199#ifdef CONFIG_DEBUG_FS
1200 debugfsdir = debugfs_create_dir("caif_sk", NULL);
1201 if (!IS_ERR(debugfsdir)) {
1202 debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
1203 debugfsdir,
1204 (u32 *) &cnt.caif_nr_socks);
1205 debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR,
1206 debugfsdir,
1207 (u32 *) &cnt.num_connect_req);
1208 debugfs_create_u32("num_connect_resp", S_IRUSR | S_IWUSR,
1209 debugfsdir,
1210 (u32 *) &cnt.num_connect_resp);
1211 debugfs_create_u32("num_connect_fail_resp", S_IRUSR | S_IWUSR,
1212 debugfsdir,
1213 (u32 *) &cnt.num_connect_fail_resp);
1214 debugfs_create_u32("num_disconnect", S_IRUSR | S_IWUSR,
1215 debugfsdir,
1216 (u32 *) &cnt.num_disconnect);
1217 debugfs_create_u32("num_remote_shutdown_ind",
1218 S_IRUSR | S_IWUSR, debugfsdir,
1219 (u32 *) &cnt.num_remote_shutdown_ind);
1220 debugfs_create_u32("num_tx_flow_off_ind", S_IRUSR | S_IWUSR,
1221 debugfsdir,
1222 (u32 *) &cnt.num_tx_flow_off_ind);
1223 debugfs_create_u32("num_tx_flow_on_ind", S_IRUSR | S_IWUSR,
1224 debugfsdir,
1225 (u32 *) &cnt.num_tx_flow_on_ind);
1226 debugfs_create_u32("num_rx_flow_off", S_IRUSR | S_IWUSR,
1227 debugfsdir,
1228 (u32 *) &cnt.num_rx_flow_off);
1229 debugfs_create_u32("num_rx_flow_on", S_IRUSR | S_IWUSR,
1230 debugfsdir,
1231 (u32 *) &cnt.num_rx_flow_on);
1232 }
1233#endif
1234 return af_caif_init();
1235}
1236
1237static void __exit caif_sktexit_module(void)
1238{
1239 sock_unregister(PF_CAIF);
1240 if (debugfsdir != NULL)
1241 debugfs_remove_recursive(debugfsdir);
1242}
1243module_init(caif_sktinit_module);
1244module_exit(caif_sktexit_module);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
new file mode 100644
index 000000000000..1c29189b344d
--- /dev/null
+++ b/net/caif/cfcnfg.c
@@ -0,0 +1,504 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6#include <linux/kernel.h>
7#include <linux/stddef.h>
8#include <linux/slab.h>
9#include <linux/netdevice.h>
10#include <net/caif/caif_layer.h>
11#include <net/caif/cfpkt.h>
12#include <net/caif/cfcnfg.h>
13#include <net/caif/cfctrl.h>
14#include <net/caif/cfmuxl.h>
15#include <net/caif/cffrml.h>
16#include <net/caif/cfserl.h>
17#include <net/caif/cfsrvl.h>
18
19#include <linux/module.h>
20#include <asm/atomic.h>
21
22#define MAX_PHY_LAYERS 7
23#define PHY_NAME_LEN 20
24
25#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
26#define RFM_FRAGMENT_SIZE 4030
27
28/* Information about CAIF physical interfaces held by Config Module in order
29 * to manage physical interfaces
30 */
31struct cfcnfg_phyinfo {
32 /* Pointer to the layer below the MUX (framing layer) */
33 struct cflayer *frm_layer;
34 /* Pointer to the lowest actual physical layer */
35 struct cflayer *phy_layer;
36 /* Unique identifier of the physical interface */
37 unsigned int id;
38 /* Preference of the physical in interface */
39 enum cfcnfg_phy_preference pref;
40
41 /* Reference count, number of channels using the device */
42 int phy_ref_count;
43
44 /* Information about the physical device */
45 struct dev_info dev_info;
46
47 /* Interface index */
48 int ifindex;
49
50 /* Use Start of frame extension */
51 bool use_stx;
52
53 /* Use Start of frame checksum */
54 bool use_fcs;
55};
56
57struct cfcnfg {
58 struct cflayer layer;
59 struct cflayer *ctrl;
60 struct cflayer *mux;
61 u8 last_phyid;
62 struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
63};
64
65static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
66 enum cfctrl_srv serv, u8 phyid,
67 struct cflayer *adapt_layer);
68static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
69static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
70 struct cflayer *adapt_layer);
71static void cfctrl_resp_func(void);
72static void cfctrl_enum_resp(void);
73
74struct cfcnfg *cfcnfg_create(void)
75{
76 struct cfcnfg *this;
77 struct cfctrl_rsp *resp;
78 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__);
82 return NULL;
83 }
84 this->mux = cfmuxl_create();
85 if (!this->mux)
86 goto out_of_mem;
87 this->ctrl = cfctrl_create();
88 if (!this->ctrl)
89 goto out_of_mem;
90 /* Initiate response functions */
91 resp = cfctrl_get_respfuncs(this->ctrl);
92 resp->enum_rsp = cfctrl_enum_resp;
93 resp->linkerror_ind = cfctrl_resp_func;
94 resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp;
95 resp->sleep_rsp = cfctrl_resp_func;
96 resp->wake_rsp = cfctrl_resp_func;
97 resp->restart_rsp = cfctrl_resp_func;
98 resp->radioset_rsp = cfctrl_resp_func;
99 resp->linksetup_rsp = cfcnfg_linkup_rsp;
100 resp->reject_rsp = cfcnfg_reject_rsp;
101
102 this->last_phyid = 1;
103
104 cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
105 layer_set_dn(this->ctrl, this->mux);
106 layer_set_up(this->ctrl, this);
107 return this;
108out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__);
110 kfree(this->mux);
111 kfree(this->ctrl);
112 kfree(this);
113 return NULL;
114}
115EXPORT_SYMBOL(cfcnfg_create);
116
117void cfcnfg_remove(struct cfcnfg *cfg)
118{
119 if (cfg) {
120 kfree(cfg->mux);
121 kfree(cfg->ctrl);
122 kfree(cfg);
123 }
124}
125
126static void cfctrl_resp_func(void)
127{
128}
129
130static void cfctrl_enum_resp(void)
131{
132}
133
134struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
135 enum cfcnfg_phy_preference phy_pref)
136{
137 u16 i;
138
139 /* Try to match with specified preference */
140 for (i = 1; i < MAX_PHY_LAYERS; i++) {
141 if (cnfg->phy_layers[i].id == i &&
142 cnfg->phy_layers[i].pref == phy_pref &&
143 cnfg->phy_layers[i].frm_layer != NULL) {
144 caif_assert(cnfg->phy_layers != NULL);
145 caif_assert(cnfg->phy_layers[i].id == i);
146 return &cnfg->phy_layers[i].dev_info;
147 }
148 }
149 /* Otherwise just return something */
150 for (i = 1; i < MAX_PHY_LAYERS; i++) {
151 if (cnfg->phy_layers[i].id == i) {
152 caif_assert(cnfg->phy_layers != NULL);
153 caif_assert(cnfg->phy_layers[i].id == i);
154 return &cnfg->phy_layers[i].dev_info;
155 }
156 }
157
158 return NULL;
159}
160
161static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
162 u8 phyid)
163{
164 int i;
165 /* Try to match with specified preference */
166 for (i = 0; i < MAX_PHY_LAYERS; i++)
167 if (cnfg->phy_layers[i].frm_layer != NULL &&
168 cnfg->phy_layers[i].id == phyid)
169 return &cnfg->phy_layers[i];
170 return NULL;
171}
172
173int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
174{
175 int i;
176
177 /* Try to match with specified name */
178 for (i = 0; i < MAX_PHY_LAYERS; i++) {
179 if (cnfg->phy_layers[i].frm_layer != NULL
180 && strcmp(cnfg->phy_layers[i].phy_layer->name,
181 name) == 0)
182 return cnfg->phy_layers[i].frm_layer->id;
183 }
184 return 0;
185}
186
187int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
188{
189 u8 channel_id = 0;
190 int ret = 0;
191 struct cflayer *servl = NULL;
192 struct cfcnfg_phyinfo *phyinfo = NULL;
193 u8 phyid = 0;
194 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
198 ret = -ENOTCONN;
199 goto end;
200 }
201 servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
202 if (servl == NULL)
203 goto end;
204 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR "
208 "- Error removing service_layer Channel_Id(%d)",
209 __func__, channel_id);
210 ret = -EINVAL;
211 goto end;
212 }
213 caif_assert(channel_id == servl->id);
214 if (adap_layer->dn != NULL) {
215 phyid = cfsrvl_getphyid(adap_layer->dn);
216
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): "
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV;
223 goto end;
224 }
225 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): "
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL;
232 goto end;
233 }
234 }
235 if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
236 phyinfo->phy_layer != NULL &&
237 phyinfo->phy_layer->modemcmd != NULL) {
238 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
239 _CAIF_MODEMCMD_PHYIF_USELESS);
240 }
241end:
242 cfsrvl_put(servl);
243 cfctrl_cancel_req(cnfg->ctrl, adap_layer);
244 if (adap_layer->ctrlcmd != NULL)
245 adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0);
246 return ret;
247
248}
249EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer);
250
251void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
252{
253 if (adap_layer->dn)
254 cfsrvl_put(adap_layer->dn);
255}
256EXPORT_SYMBOL(cfcnfg_release_adap_layer);
257
258static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
259{
260}
261
262int protohead[CFCTRL_SRV_MASK] = {
263 [CFCTRL_SRV_VEI] = 4,
264 [CFCTRL_SRV_DATAGRAM] = 7,
265 [CFCTRL_SRV_UTIL] = 4,
266 [CFCTRL_SRV_RFM] = 3,
267 [CFCTRL_SRV_DBG] = 3,
268};
269
270int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
271 struct cfctrl_link_param *param,
272 struct cflayer *adap_layer,
273 int *ifindex,
274 int *proto_head,
275 int *proto_tail)
276{
277 struct cflayer *frml;
278 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__);
280 return -EINVAL;
281 }
282 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__);
284 return -EINVAL;
285 }
286 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__);
288 return -EINVAL;
289 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!",
293 __func__);
294 return -ENODEV;
295 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
297 caif_assert(cnfg->phy_layers[param->phyid].frm_layer->id ==
298 param->phyid);
299 caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
300 param->phyid);
301
302 *ifindex = cnfg->phy_layers[param->phyid].ifindex;
303 *proto_head =
304 protohead[param->linktype]+
305 (cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
306
307 *proto_tail = 2;
308
309 /* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
310 cfctrl_enum_req(cnfg->ctrl, param->phyid);
311 return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
312}
313EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
314
315static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
316 struct cflayer *adapt_layer)
317{
318 if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
319 adapt_layer->ctrlcmd(adapt_layer,
320 CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
321}
322
323static void
324cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
325 u8 phyid, struct cflayer *adapt_layer)
326{
327 struct cfcnfg *cnfg = container_obj(layer);
328 struct cflayer *servicel = NULL;
329 struct cfcnfg_phyinfo *phyinfo;
330 struct net_device *netdev;
331
332 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response "
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return;
338 }
339
340 caif_assert(cnfg != NULL);
341 caif_assert(phyid != 0);
342 phyinfo = &cnfg->phy_layers[phyid];
343 caif_assert(phyinfo->id == phyid);
344 caif_assert(phyinfo->phy_layer != NULL);
345 caif_assert(phyinfo->phy_layer->id == phyid);
346
347 phyinfo->phy_ref_count++;
348 if (phyinfo->phy_ref_count == 1 &&
349 phyinfo->phy_layer->modemcmd != NULL) {
350 phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
351 _CAIF_MODEMCMD_PHYIF_USEFULL);
352 }
353 adapt_layer->id = channel_id;
354
355 switch (serv) {
356 case CFCTRL_SRV_VEI:
357 servicel = cfvei_create(channel_id, &phyinfo->dev_info);
358 break;
359 case CFCTRL_SRV_DATAGRAM:
360 servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
361 break;
362 case CFCTRL_SRV_RFM:
363 netdev = phyinfo->dev_info.dev;
364 servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
365 netdev->mtu);
366 break;
367 case CFCTRL_SRV_UTIL:
368 servicel = cfutill_create(channel_id, &phyinfo->dev_info);
369 break;
370 case CFCTRL_SRV_VIDEO:
371 servicel = cfvidl_create(channel_id, &phyinfo->dev_info);
372 break;
373 case CFCTRL_SRV_DBG:
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break;
376 default:
377 pr_err("CAIF: %s(): Protocol error. "
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return;
381 }
382 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__);
384 return;
385 }
386 layer_set_dn(servicel, cnfg->mux);
387 cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
388 layer_set_up(servicel, adapt_layer);
389 layer_set_dn(adapt_layer, servicel);
390 cfsrvl_get(servicel);
391 servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
392}
393
394void
395cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
396 struct net_device *dev, struct cflayer *phy_layer,
397 u16 *phyid, enum cfcnfg_phy_preference pref,
398 bool fcs, bool stx)
399{
400 struct cflayer *frml;
401 struct cflayer *phy_driver = NULL;
402 int i;
403
404
405 if (cnfg->phy_layers[cnfg->last_phyid].frm_layer == NULL) {
406 *phyid = cnfg->last_phyid;
407
408 /* range: * 1..(MAX_PHY_LAYERS-1) */
409 cnfg->last_phyid =
410 (cnfg->last_phyid % (MAX_PHY_LAYERS - 1)) + 1;
411 } else {
412 *phyid = 0;
413 for (i = 1; i < MAX_PHY_LAYERS; i++) {
414 if (cnfg->phy_layers[i].frm_layer == NULL) {
415 *phyid = i;
416 break;
417 }
418 }
419 }
420 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__);
422 return;
423 }
424
425 switch (phy_type) {
426 case CFPHYTYPE_FRAG:
427 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__);
431 return;
432 }
433
434 break;
435 case CFPHYTYPE_CAIF:
436 phy_driver = NULL;
437 break;
438 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type);
440 return;
441 break;
442 }
443
444 phy_layer->id = *phyid;
445 cnfg->phy_layers[*phyid].pref = pref;
446 cnfg->phy_layers[*phyid].id = *phyid;
447 cnfg->phy_layers[*phyid].dev_info.id = *phyid;
448 cnfg->phy_layers[*phyid].dev_info.dev = dev;
449 cnfg->phy_layers[*phyid].phy_layer = phy_layer;
450 cnfg->phy_layers[*phyid].phy_ref_count = 0;
451 cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
452 cnfg->phy_layers[*phyid].use_stx = stx;
453 cnfg->phy_layers[*phyid].use_fcs = fcs;
454
455 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs);
457 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__);
459 return;
460 }
461 cnfg->phy_layers[*phyid].frm_layer = frml;
462 cfmuxl_set_dnlayer(cnfg->mux, frml, *phyid);
463 layer_set_up(frml, cnfg->mux);
464
465 if (phy_driver != NULL) {
466 phy_driver->id = *phyid;
467 layer_set_dn(frml, phy_driver);
468 layer_set_up(phy_driver, frml);
469 layer_set_dn(phy_driver, phy_layer);
470 layer_set_up(phy_layer, phy_driver);
471 } else {
472 layer_set_dn(frml, phy_layer);
473 layer_set_up(phy_layer, frml);
474 }
475}
476EXPORT_SYMBOL(cfcnfg_add_phy_layer);
477
478int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
479{
480 struct cflayer *frml, *frml_dn;
481 u16 phyid;
482 phyid = phy_layer->id;
483 caif_assert(phyid == cnfg->phy_layers[phyid].id);
484 caif_assert(phy_layer == cnfg->phy_layers[phyid].phy_layer);
485 caif_assert(phy_layer->id == phyid);
486 caif_assert(cnfg->phy_layers[phyid].frm_layer->id == phyid);
487
488 memset(&cnfg->phy_layers[phy_layer->id], 0,
489 sizeof(struct cfcnfg_phyinfo));
490 frml = cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
491 frml_dn = frml->dn;
492 cffrml_set_uplayer(frml, NULL);
493 cffrml_set_dnlayer(frml, NULL);
494 kfree(frml);
495
496 if (phy_layer != frml_dn) {
497 layer_set_up(frml_dn, NULL);
498 layer_set_dn(frml_dn, NULL);
499 kfree(frml_dn);
500 }
501 layer_set_up(phy_layer, NULL);
502 return 0;
503}
504EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
new file mode 100644
index 000000000000..563145fdc4c3
--- /dev/null
+++ b/net/caif/cfctrl.c
@@ -0,0 +1,652 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/spinlock.h>
9#include <linux/slab.h>
10#include <net/caif/caif_layer.h>
11#include <net/caif/cfpkt.h>
12#include <net/caif/cfctrl.h>
13
14#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer)
15#define UTILITY_NAME_LENGTH 16
16#define CFPKT_CTRL_PKT_LEN 20
17
18
19#ifdef CAIF_NO_LOOP
20static int handle_loop(struct cfctrl *ctrl,
21 int cmd, struct cfpkt *pkt){
22 return -1;
23}
24#else
25static int handle_loop(struct cfctrl *ctrl,
26 int cmd, struct cfpkt *pkt);
27#endif
28static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
29static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
30 int phyid);
31
32
33struct cflayer *cfctrl_create(void)
34{
35 struct dev_info dev_info;
36 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__);
40 return NULL;
41 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
43 memset(&dev_info, 0, sizeof(dev_info));
44 dev_info.id = 0xff;
45 memset(this, 0, sizeof(*this));
46 cfsrvl_init(&this->serv, 0, &dev_info, false);
47 atomic_set(&this->req_seq_no, 1);
48 atomic_set(&this->rsp_seq_no, 1);
49 this->serv.layer.receive = cfctrl_recv;
50 sprintf(this->serv.layer.name, "ctrl");
51 this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
52 spin_lock_init(&this->loop_linkid_lock);
53 spin_lock_init(&this->info_list_lock);
54 INIT_LIST_HEAD(&this->list);
55 this->loop_linkid = 1;
56 return &this->serv.layer;
57}
58
59static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2)
60{
61 bool eq =
62 p1->linktype == p2->linktype &&
63 p1->priority == p2->priority &&
64 p1->phyid == p2->phyid &&
65 p1->endpoint == p2->endpoint && p1->chtype == p2->chtype;
66
67 if (!eq)
68 return false;
69
70 switch (p1->linktype) {
71 case CFCTRL_SRV_VEI:
72 return true;
73 case CFCTRL_SRV_DATAGRAM:
74 return p1->u.datagram.connid == p2->u.datagram.connid;
75 case CFCTRL_SRV_RFM:
76 return
77 p1->u.rfm.connid == p2->u.rfm.connid &&
78 strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0;
79 case CFCTRL_SRV_UTIL:
80 return
81 p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb
82 && p1->u.utility.fifosize_bufs ==
83 p2->u.utility.fifosize_bufs
84 && strcmp(p1->u.utility.name, p2->u.utility.name) == 0
85 && p1->u.utility.paramlen == p2->u.utility.paramlen
86 && memcmp(p1->u.utility.params, p2->u.utility.params,
87 p1->u.utility.paramlen) == 0;
88
89 case CFCTRL_SRV_VIDEO:
90 return p1->u.video.connid == p2->u.video.connid;
91 case CFCTRL_SRV_DBG:
92 return true;
93 case CFCTRL_SRV_DECM:
94 return false;
95 default:
96 return false;
97 }
98 return false;
99}
100
101bool cfctrl_req_eq(struct cfctrl_request_info *r1,
102 struct cfctrl_request_info *r2)
103{
104 if (r1->cmd != r2->cmd)
105 return false;
106 if (r1->cmd == CFCTRL_CMD_LINK_SETUP)
107 return param_eq(&r1->param, &r2->param);
108 else
109 return r1->channel_id == r2->channel_id;
110}
111
112/* Insert request at the end */
113void cfctrl_insert_req(struct cfctrl *ctrl,
114 struct cfctrl_request_info *req)
115{
116 spin_lock(&ctrl->info_list_lock);
117 atomic_inc(&ctrl->req_seq_no);
118 req->sequence_no = atomic_read(&ctrl->req_seq_no);
119 list_add_tail(&req->list, &ctrl->list);
120 spin_unlock(&ctrl->info_list_lock);
121}
122
123/* Compare and remove request */
124struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
125 struct cfctrl_request_info *req)
126{
127 struct cfctrl_request_info *p, *tmp, *first;
128
129 spin_lock(&ctrl->info_list_lock);
130 first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
131
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) {
134 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not "
136 "received in order\n",
137 __func__);
138
139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no);
141 list_del(&p->list);
142 goto out;
143 }
144 }
145 p = NULL;
146out:
147 spin_unlock(&ctrl->info_list_lock);
148 return p;
149}
150
151struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
152{
153 struct cfctrl *this = container_obj(layer);
154 return &this->res;
155}
156
157void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn)
158{
159 this->dn = dn;
160}
161
162void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up)
163{
164 this->up = up;
165}
166
167static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
168{
169 info->hdr_len = 0;
170 info->channel_id = cfctrl->serv.layer.id;
171 info->dev_info = &cfctrl->serv.dev_info;
172}
173
174void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
175{
176 struct cfctrl *cfctrl = container_obj(layer);
177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__);
181 return;
182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
184 init_info(cfpkt_info(pkt), cfctrl);
185 cfpkt_info(pkt)->dev_info->id = physlinkid;
186 cfctrl->serv.dev_info.id = physlinkid;
187 cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM);
188 cfpkt_addbdy(pkt, physlinkid);
189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n",
193 __func__);
194 cfpkt_destroy(pkt);
195 }
196}
197
198int cfctrl_linkup_request(struct cflayer *layer,
199 struct cfctrl_link_param *param,
200 struct cflayer *user_layer)
201{
202 struct cfctrl *cfctrl = container_obj(layer);
203 u32 tmp32;
204 u16 tmp16;
205 u8 tmp8;
206 struct cfctrl_request_info *req;
207 int ret;
208 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__);
212 return -ENOMEM;
213 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
215 cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
216 cfpkt_addbdy(pkt, (param->priority << 3) + param->phyid);
217 cfpkt_addbdy(pkt, param->endpoint & 0x03);
218
219 switch (param->linktype) {
220 case CFCTRL_SRV_VEI:
221 break;
222 case CFCTRL_SRV_VIDEO:
223 cfpkt_addbdy(pkt, (u8) param->u.video.connid);
224 break;
225 case CFCTRL_SRV_DBG:
226 break;
227 case CFCTRL_SRV_DATAGRAM:
228 tmp32 = cpu_to_le32(param->u.datagram.connid);
229 cfpkt_add_body(pkt, &tmp32, 4);
230 break;
231 case CFCTRL_SRV_RFM:
232 /* Construct a frame, convert DatagramConnectionID to network
233 * format long and copy it out...
234 */
235 tmp32 = cpu_to_le32(param->u.rfm.connid);
236 cfpkt_add_body(pkt, &tmp32, 4);
237 /* Add volume name, including zero termination... */
238 cfpkt_add_body(pkt, param->u.rfm.volume,
239 strlen(param->u.rfm.volume) + 1);
240 break;
241 case CFCTRL_SRV_UTIL:
242 tmp16 = cpu_to_le16(param->u.utility.fifosize_kb);
243 cfpkt_add_body(pkt, &tmp16, 2);
244 tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
245 cfpkt_add_body(pkt, &tmp16, 2);
246 memset(utility_name, 0, sizeof(utility_name));
247 strncpy(utility_name, param->u.utility.name,
248 UTILITY_NAME_LENGTH - 1);
249 cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
250 tmp8 = param->u.utility.paramlen;
251 cfpkt_add_body(pkt, &tmp8, 1);
252 cfpkt_add_body(pkt, param->u.utility.params,
253 param->u.utility.paramlen);
254 break;
255 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
257 __func__, param->linktype);
258 return -EINVAL;
259 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__);
263 return -ENOMEM;
264 }
265 req->client_layer = user_layer;
266 req->cmd = CFCTRL_CMD_LINK_SETUP;
267 req->param = *param;
268 cfctrl_insert_req(cfctrl, req);
269 init_info(cfpkt_info(pkt), cfctrl);
270 /*
271 * NOTE:Always send linkup and linkdown request on the same
272 * device as the payload. Otherwise old queued up payload
273 * might arrive with the newly allocated channel ID.
274 */
275 cfpkt_info(pkt)->dev_info->id = param->phyid;
276 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n",
280 __func__);
281 cfpkt_destroy(pkt);
282 return -ENODEV;
283 }
284 return 0;
285}
286
287int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
288 struct cflayer *client)
289{
290 int ret;
291 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__);
295 return -ENOMEM;
296 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
298 cfpkt_addbdy(pkt, channelid);
299 init_info(cfpkt_info(pkt), cfctrl);
300 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n",
304 __func__);
305 cfpkt_destroy(pkt);
306 }
307 return ret;
308}
309
310void cfctrl_sleep_req(struct cflayer *layer)
311{
312 int ret;
313 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__);
317 return;
318 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
320 init_info(cfpkt_info(pkt), cfctrl);
321 ret =
322 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
323 if (ret < 0)
324 cfpkt_destroy(pkt);
325}
326
327void cfctrl_wake_req(struct cflayer *layer)
328{
329 int ret;
330 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__);
334 return;
335 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
337 init_info(cfpkt_info(pkt), cfctrl);
338 ret =
339 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
340 if (ret < 0)
341 cfpkt_destroy(pkt);
342}
343
344void cfctrl_getstartreason_req(struct cflayer *layer)
345{
346 int ret;
347 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__);
351 return;
352 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
354 init_info(cfpkt_info(pkt), cfctrl);
355 ret =
356 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
357 if (ret < 0)
358 cfpkt_destroy(pkt);
359}
360
361
362void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
363{
364 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__);
368
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
372 p->sequence_no);
373 list_del(&p->list);
374 kfree(p);
375 }
376 }
377
378 spin_unlock(&ctrl->info_list_lock);
379}
380
381static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
382{
383 u8 cmdrsp;
384 u8 cmd;
385 int ret = -1;
386 u16 tmp16;
387 u8 len;
388 u8 param[255];
389 u8 linkid;
390 struct cfctrl *cfctrl = container_obj(layer);
391 struct cfctrl_request_info rsp, *req;
392
393
394 cfpkt_extr_head(pkt, &cmdrsp, 1);
395 cmd = cmdrsp & CFCTRL_CMD_MASK;
396 if (cmd != CFCTRL_CMD_LINK_ERR
397 && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
398 if (handle_loop(cfctrl, cmd, pkt) != 0)
399 cmdrsp |= CFCTRL_ERR_BIT;
400 }
401
402 switch (cmd) {
403 case CFCTRL_CMD_LINK_SETUP:
404 {
405 enum cfctrl_srv serv;
406 enum cfctrl_srv servtype;
407 u8 endpoint;
408 u8 physlinkid;
409 u8 prio;
410 u8 tmp;
411 u32 tmp32;
412 u8 *cp;
413 int i;
414 struct cfctrl_link_param linkparam;
415 memset(&linkparam, 0, sizeof(linkparam));
416
417 cfpkt_extr_head(pkt, &tmp, 1);
418
419 serv = tmp & CFCTRL_SRV_MASK;
420 linkparam.linktype = serv;
421
422 servtype = tmp >> 4;
423 linkparam.chtype = servtype;
424
425 cfpkt_extr_head(pkt, &tmp, 1);
426 physlinkid = tmp & 0x07;
427 prio = tmp >> 3;
428
429 linkparam.priority = prio;
430 linkparam.phyid = physlinkid;
431 cfpkt_extr_head(pkt, &endpoint, 1);
432 linkparam.endpoint = endpoint & 0x03;
433
434 switch (serv) {
435 case CFCTRL_SRV_VEI:
436 case CFCTRL_SRV_DBG:
437 if (CFCTRL_ERR_BIT & cmdrsp)
438 break;
439 /* Link ID */
440 cfpkt_extr_head(pkt, &linkid, 1);
441 break;
442 case CFCTRL_SRV_VIDEO:
443 cfpkt_extr_head(pkt, &tmp, 1);
444 linkparam.u.video.connid = tmp;
445 if (CFCTRL_ERR_BIT & cmdrsp)
446 break;
447 /* Link ID */
448 cfpkt_extr_head(pkt, &linkid, 1);
449 break;
450
451 case CFCTRL_SRV_DATAGRAM:
452 cfpkt_extr_head(pkt, &tmp32, 4);
453 linkparam.u.datagram.connid =
454 le32_to_cpu(tmp32);
455 if (CFCTRL_ERR_BIT & cmdrsp)
456 break;
457 /* Link ID */
458 cfpkt_extr_head(pkt, &linkid, 1);
459 break;
460 case CFCTRL_SRV_RFM:
461 /* Construct a frame, convert
462 * DatagramConnectionID
463 * to network format long and copy it out...
464 */
465 cfpkt_extr_head(pkt, &tmp32, 4);
466 linkparam.u.rfm.connid =
467 le32_to_cpu(tmp32);
468 cp = (u8 *) linkparam.u.rfm.volume;
469 for (cfpkt_extr_head(pkt, &tmp, 1);
470 cfpkt_more(pkt) && tmp != '\0';
471 cfpkt_extr_head(pkt, &tmp, 1))
472 *cp++ = tmp;
473 *cp = '\0';
474
475 if (CFCTRL_ERR_BIT & cmdrsp)
476 break;
477 /* Link ID */
478 cfpkt_extr_head(pkt, &linkid, 1);
479
480 break;
481 case CFCTRL_SRV_UTIL:
482 /* Construct a frame, convert
483 * DatagramConnectionID
484 * to network format long and copy it out...
485 */
486 /* Fifosize KB */
487 cfpkt_extr_head(pkt, &tmp16, 2);
488 linkparam.u.utility.fifosize_kb =
489 le16_to_cpu(tmp16);
490 /* Fifosize bufs */
491 cfpkt_extr_head(pkt, &tmp16, 2);
492 linkparam.u.utility.fifosize_bufs =
493 le16_to_cpu(tmp16);
494 /* name */
495 cp = (u8 *) linkparam.u.utility.name;
496 caif_assert(sizeof(linkparam.u.utility.name)
497 >= UTILITY_NAME_LENGTH);
498 for (i = 0;
499 i < UTILITY_NAME_LENGTH
500 && cfpkt_more(pkt); i++) {
501 cfpkt_extr_head(pkt, &tmp, 1);
502 *cp++ = tmp;
503 }
504 /* Length */
505 cfpkt_extr_head(pkt, &len, 1);
506 linkparam.u.utility.paramlen = len;
507 /* Param Data */
508 cp = linkparam.u.utility.params;
509 while (cfpkt_more(pkt) && len--) {
510 cfpkt_extr_head(pkt, &tmp, 1);
511 *cp++ = tmp;
512 }
513 if (CFCTRL_ERR_BIT & cmdrsp)
514 break;
515 /* Link ID */
516 cfpkt_extr_head(pkt, &linkid, 1);
517 /* Length */
518 cfpkt_extr_head(pkt, &len, 1);
519 /* Param Data */
520 cfpkt_extr_head(pkt, &param, len);
521 break;
522 default:
523 pr_warning("CAIF: %s(): Request setup "
524 "- invalid link type (%d)",
525 __func__, serv);
526 goto error;
527 }
528
529 rsp.cmd = cmd;
530 rsp.param = linkparam;
531 req = cfctrl_remove_req(cfctrl, &rsp);
532
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse "
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0,
540 req ? req->client_layer
541 : NULL);
542 } else {
543 cfctrl->res.linksetup_rsp(cfctrl->serv.
544 layer.up, linkid,
545 serv, physlinkid,
546 req ? req->
547 client_layer : NULL);
548 }
549
550 if (req != NULL)
551 kfree(req);
552 }
553 break;
554 case CFCTRL_CMD_LINK_DESTROY:
555 cfpkt_extr_head(pkt, &linkid, 1);
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break;
558 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n",
560 __func__);
561 cfctrl->res.linkerror_ind();
562 break;
563 case CFCTRL_CMD_ENUM:
564 cfctrl->res.enum_rsp();
565 break;
566 case CFCTRL_CMD_SLEEP:
567 cfctrl->res.sleep_rsp();
568 break;
569 case CFCTRL_CMD_WAKE:
570 cfctrl->res.wake_rsp();
571 break;
572 case CFCTRL_CMD_LINK_RECONF:
573 cfctrl->res.restart_rsp();
574 break;
575 case CFCTRL_CMD_RADIO_SET:
576 cfctrl->res.radioset_rsp();
577 break;
578 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__);
580 goto error;
581 break;
582 }
583 ret = 0;
584error:
585 cfpkt_destroy(pkt);
586 return ret;
587}
588
589static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
590 int phyid)
591{
592 struct cfctrl *this = container_obj(layr);
593 switch (ctrl) {
594 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
595 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in "
599 "control layer", __func__);
600 }
601 spin_unlock(&this->info_list_lock);
602 break;
603 default:
604 break;
605 }
606}
607
608#ifndef CAIF_NO_LOOP
609static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
610{
611 static int last_linkid;
612 u8 linkid, linktype, tmp;
613 switch (cmd) {
614 case CFCTRL_CMD_LINK_SETUP:
615 spin_lock(&ctrl->loop_linkid_lock);
616 for (linkid = last_linkid + 1; linkid < 255; linkid++)
617 if (!ctrl->loop_linkused[linkid])
618 goto found;
619 for (linkid = last_linkid - 1; linkid > 0; linkid--)
620 if (!ctrl->loop_linkused[linkid])
621 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__);
624 return -EINVAL;
625found:
626 if (!ctrl->loop_linkused[linkid])
627 ctrl->loop_linkused[linkid] = 1;
628
629 last_linkid = linkid;
630
631 cfpkt_add_trail(pkt, &linkid, 1);
632 spin_unlock(&ctrl->loop_linkid_lock);
633 cfpkt_peek_head(pkt, &linktype, 1);
634 if (linktype == CFCTRL_SRV_UTIL) {
635 tmp = 0x01;
636 cfpkt_add_trail(pkt, &tmp, 1);
637 cfpkt_add_trail(pkt, &tmp, 1);
638 }
639 break;
640
641 case CFCTRL_CMD_LINK_DESTROY:
642 spin_lock(&ctrl->loop_linkid_lock);
643 cfpkt_peek_head(pkt, &linkid, 1);
644 ctrl->loop_linkused[linkid] = 0;
645 spin_unlock(&ctrl->loop_linkid_lock);
646 break;
647 default:
648 break;
649 }
650 return 0;
651}
652#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
new file mode 100644
index 000000000000..676648cac8dd
--- /dev/null
+++ b/net/caif/cfdbgl.c
@@ -0,0 +1,40 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/slab.h>
9#include <net/caif/caif_layer.h>
10#include <net/caif/cfsrvl.h>
11#include <net/caif/cfpkt.h>
12
13static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
14static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
15
16struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__);
21 return NULL;
22 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0);
24 memset(dbg, 0, sizeof(struct cfsrvl));
25 cfsrvl_init(dbg, channel_id, dev_info, false);
26 dbg->layer.receive = cfdbgl_receive;
27 dbg->layer.transmit = cfdbgl_transmit;
28 snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
29 return &dbg->layer;
30}
31
32static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
33{
34 return layr->up->receive(layr->up, pkt);
35}
36
37static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
38{
39 return layr->dn->transmit(layr->dn, pkt);
40}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
new file mode 100644
index 000000000000..ed9d53aff280
--- /dev/null
+++ b/net/caif/cfdgml.c
@@ -0,0 +1,113 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/spinlock.h>
9#include <linux/slab.h>
10#include <net/caif/caif_layer.h>
11#include <net/caif/cfsrvl.h>
12#include <net/caif/cfpkt.h>
13
14#define container_obj(layr) ((struct cfsrvl *) layr)
15
16#define DGM_CMD_BIT 0x80
17#define DGM_FLOW_OFF 0x81
18#define DGM_FLOW_ON 0x80
19#define DGM_CTRL_PKT_SIZE 1
20#define DGM_MTU 1500
21
22static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
23static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
24
25struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__);
30 return NULL;
31 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
33 memset(dgm, 0, sizeof(struct cfsrvl));
34 cfsrvl_init(dgm, channel_id, dev_info, true);
35 dgm->layer.receive = cfdgml_receive;
36 dgm->layer.transmit = cfdgml_transmit;
37 snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
38 dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0';
39 return &dgm->layer;
40}
41
42static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
43{
44 u8 cmd = -1;
45 u8 dgmhdr[3];
46 int ret;
47 caif_assert(layr->up != NULL);
48 caif_assert(layr->receive != NULL);
49 caif_assert(layr->ctrlcmd != NULL);
50
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
53 cfpkt_destroy(pkt);
54 return -EPROTO;
55 }
56
57 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
60 cfpkt_destroy(pkt);
61 return -EPROTO;
62 }
63 ret = layr->up->receive(layr->up, pkt);
64 return ret;
65 }
66
67 switch (cmd) {
68 case DGM_FLOW_OFF: /* FLOW OFF */
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
70 cfpkt_destroy(pkt);
71 return 0;
72 case DGM_FLOW_ON: /* FLOW ON */
73 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
74 cfpkt_destroy(pkt);
75 return 0;
76 default:
77 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n",
79 __func__, cmd, cmd);
80 return -EPROTO;
81 }
82}
83
84static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
85{
86 u32 zero = 0;
87 struct caif_payload_info *info;
88 struct cfsrvl *service = container_obj(layr);
89 int ret;
90 if (!cfsrvl_ready(service, &ret))
91 return ret;
92
93 /* STE Modem cannot handle more than 1500 bytes datagrams */
94 if (cfpkt_getlen(pkt) > DGM_MTU)
95 return -EMSGSIZE;
96
97 cfpkt_add_head(pkt, &zero, 4);
98
99 /* Add info for MUX-layer to route the packet out. */
100 info = cfpkt_info(pkt);
101 info->channel_id = service->layer.id;
102 /* To optimize alignment, we add up the size of CAIF header
103 * before payload.
104 */
105 info->hdr_len = 4;
106 info->dev_info = &service->dev_info;
107 ret = layr->dn->transmit(layr->dn, pkt);
108 if (ret < 0) {
109 u32 tmp32;
110 cfpkt_extr_head(pkt, &tmp32, 4);
111 }
112 return ret;
113}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
new file mode 100644
index 000000000000..e86a4ca3b217
--- /dev/null
+++ b/net/caif/cffrml.c
@@ -0,0 +1,151 @@
1/*
2 * CAIF Framing Layer.
3 *
4 * Copyright (C) ST-Ericsson AB 2010
5 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
6 * License terms: GNU General Public License (GPL) version 2
7 */
8
9#include <linux/stddef.h>
10#include <linux/spinlock.h>
11#include <linux/slab.h>
12#include <linux/crc-ccitt.h>
13#include <net/caif/caif_layer.h>
14#include <net/caif/cfpkt.h>
15#include <net/caif/cffrml.h>
16
17#define container_obj(layr) container_of(layr, struct cffrml, layer)
18
19struct cffrml {
20 struct cflayer layer;
21 bool dofcs; /* !< FCS active */
22};
23
24static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
25static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
26static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
27 int phyid);
28
29static u32 cffrml_rcv_error;
30static u32 cffrml_rcv_checsum_error;
31struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__);
36 return NULL;
37 }
38 caif_assert(offsetof(struct cffrml, layer) == 0);
39
40 memset(this, 0, sizeof(struct cflayer));
41 this->layer.receive = cffrml_receive;
42 this->layer.transmit = cffrml_transmit;
43 this->layer.ctrlcmd = cffrml_ctrlcmd;
44 snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid);
45 this->dofcs = use_fcs;
46 this->layer.id = phyid;
47 return (struct cflayer *) this;
48}
49
50void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
51{
52 this->up = up;
53}
54
55void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn)
56{
57 this->dn = dn;
58}
59
60static u16 cffrml_checksum(u16 chks, void *buf, u16 len)
61{
62 /* FIXME: FCS should be moved to glue in order to use OS-Specific
63 * solutions
64 */
65 return crc_ccitt(chks, buf, len);
66}
67
68static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
69{
70 u16 tmp;
71 u16 len;
72 u16 hdrchks;
73 u16 pktchks;
74 struct cffrml *this;
75 this = container_obj(layr);
76
77 cfpkt_extr_head(pkt, &tmp, 2);
78 len = le16_to_cpu(tmp);
79
80 /* Subtract for FCS on length if FCS is not used. */
81 if (!this->dofcs)
82 len -= 2;
83
84 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len);
87 cfpkt_destroy(pkt);
88 return -EPROTO;
89 }
90 /*
91 * Don't do extract if FCS is false, rather do setlen - then we don't
92 * get a cache-miss.
93 */
94 if (this->dofcs) {
95 cfpkt_extr_trail(pkt, &tmp, 2);
96 hdrchks = le16_to_cpu(tmp);
97 pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
98 if (pktchks != hdrchks) {
99 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error "
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks);
104 return -EILSEQ;
105 }
106 }
107 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
110 cfpkt_destroy(pkt);
111 return -EPROTO;
112 }
113 return layr->up->receive(layr->up, pkt);
114}
115
116static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
117{
118 int tmp;
119 u16 chks;
120 u16 len;
121 int ret;
122 struct cffrml *this = container_obj(layr);
123 if (this->dofcs) {
124 chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
125 tmp = cpu_to_le16(chks);
126 cfpkt_add_trail(pkt, &tmp, 2);
127 } else {
128 cfpkt_pad_trail(pkt, 2);
129 }
130 len = cfpkt_getlen(pkt);
131 tmp = cpu_to_le16(len);
132 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
136 return -EPROTO;
137 }
138 ret = layr->dn->transmit(layr->dn, pkt);
139 if (ret < 0) {
140 /* Remove header on faulty packet. */
141 cfpkt_extr_head(pkt, &tmp, 2);
142 }
143 return ret;
144}
145
146static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
147 int phyid)
148{
149 if (layr->up->ctrlcmd)
150 layr->up->ctrlcmd(layr->up, ctrl, layr->id);
151}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
new file mode 100644
index 000000000000..80c8d332b258
--- /dev/null
+++ b/net/caif/cfmuxl.c
@@ -0,0 +1,252 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6#include <linux/stddef.h>
7#include <linux/spinlock.h>
8#include <linux/slab.h>
9#include <net/caif/cfpkt.h>
10#include <net/caif/cfmuxl.h>
11#include <net/caif/cfsrvl.h>
12#include <net/caif/cffrml.h>
13
14#define container_obj(layr) container_of(layr, struct cfmuxl, layer)
15
16#define CAIF_CTRL_CHANNEL 0
17#define UP_CACHE_SIZE 8
18#define DN_CACHE_SIZE 8
19
20struct cfmuxl {
21 struct cflayer layer;
22 struct list_head srvl_list;
23 struct list_head frml_list;
24 struct cflayer *up_cache[UP_CACHE_SIZE];
25 struct cflayer *dn_cache[DN_CACHE_SIZE];
26 /*
27 * Set when inserting or removing downwards layers.
28 */
29 spinlock_t transmit_lock;
30
31 /*
32 * Set when inserting or removing upwards layers.
33 */
34 spinlock_t receive_lock;
35
36};
37
38static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
39static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
40static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
41 int phyid);
42static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
43
44struct cflayer *cfmuxl_create(void)
45{
46 struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC);
47 if (!this)
48 return NULL;
49 memset(this, 0, sizeof(*this));
50 this->layer.receive = cfmuxl_receive;
51 this->layer.transmit = cfmuxl_transmit;
52 this->layer.ctrlcmd = cfmuxl_ctrlcmd;
53 INIT_LIST_HEAD(&this->srvl_list);
54 INIT_LIST_HEAD(&this->frml_list);
55 spin_lock_init(&this->transmit_lock);
56 spin_lock_init(&this->receive_lock);
57 snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux");
58 return &this->layer;
59}
60
61int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
62{
63 struct cfmuxl *muxl = container_obj(layr);
64 spin_lock(&muxl->receive_lock);
65 cfsrvl_get(up);
66 list_add(&up->node, &muxl->srvl_list);
67 spin_unlock(&muxl->receive_lock);
68 return 0;
69}
70
71bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid)
72{
73 struct list_head *node;
74 struct cflayer *layer;
75 struct cfmuxl *muxl = container_obj(layr);
76 bool match = false;
77 spin_lock(&muxl->receive_lock);
78
79 list_for_each(node, &muxl->srvl_list) {
80 layer = list_entry(node, struct cflayer, node);
81 if (cfsrvl_phyid_match(layer, phyid)) {
82 match = true;
83 break;
84 }
85
86 }
87 spin_unlock(&muxl->receive_lock);
88 return match;
89}
90
91u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id)
92{
93 struct cflayer *up;
94 int phyid;
95 struct cfmuxl *muxl = container_obj(layr);
96 spin_lock(&muxl->receive_lock);
97 up = get_up(muxl, channel_id);
98 if (up != NULL)
99 phyid = cfsrvl_getphyid(up);
100 else
101 phyid = 0;
102 spin_unlock(&muxl->receive_lock);
103 return phyid;
104}
105
106int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
107{
108 struct cfmuxl *muxl = (struct cfmuxl *) layr;
109 spin_lock(&muxl->transmit_lock);
110 list_add(&dn->node, &muxl->frml_list);
111 spin_unlock(&muxl->transmit_lock);
112 return 0;
113}
114
115static struct cflayer *get_from_id(struct list_head *list, u16 id)
116{
117 struct list_head *node;
118 struct cflayer *layer;
119 list_for_each(node, list) {
120 layer = list_entry(node, struct cflayer, node);
121 if (layer->id == id)
122 return layer;
123 }
124 return NULL;
125}
126
127struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
128{
129 struct cfmuxl *muxl = container_obj(layr);
130 struct cflayer *dn;
131 spin_lock(&muxl->transmit_lock);
132 memset(muxl->dn_cache, 0, sizeof(muxl->dn_cache));
133 dn = get_from_id(&muxl->frml_list, phyid);
134 if (dn == NULL) {
135 spin_unlock(&muxl->transmit_lock);
136 return NULL;
137 }
138 list_del(&dn->node);
139 caif_assert(dn != NULL);
140 spin_unlock(&muxl->transmit_lock);
141 return dn;
142}
143
144/* Invariant: lock is taken */
145static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
146{
147 struct cflayer *up;
148 int idx = id % UP_CACHE_SIZE;
149 up = muxl->up_cache[idx];
150 if (up == NULL || up->id != id) {
151 up = get_from_id(&muxl->srvl_list, id);
152 muxl->up_cache[idx] = up;
153 }
154 return up;
155}
156
157/* Invariant: lock is taken */
158static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
159{
160 struct cflayer *dn;
161 int idx = dev_info->id % DN_CACHE_SIZE;
162 dn = muxl->dn_cache[idx];
163 if (dn == NULL || dn->id != dev_info->id) {
164 dn = get_from_id(&muxl->frml_list, dev_info->id);
165 muxl->dn_cache[idx] = dn;
166 }
167 return dn;
168}
169
170struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
171{
172 struct cflayer *up;
173 struct cfmuxl *muxl = container_obj(layr);
174 spin_lock(&muxl->receive_lock);
175 up = get_up(muxl, id);
176 if (up == NULL)
177 goto out;
178 memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
179 list_del(&up->node);
180 cfsrvl_put(up);
181out:
182 spin_unlock(&muxl->receive_lock);
183 return up;
184}
185
186static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
187{
188 int ret;
189 struct cfmuxl *muxl = container_obj(layr);
190 u8 id;
191 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__);
194 cfpkt_destroy(pkt);
195 return -EPROTO;
196 }
197
198 spin_lock(&muxl->receive_lock);
199 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d "
203 "(0x%x) up == NULL", __func__, id, id);
204 cfpkt_destroy(pkt);
205 /*
206 * Don't return ERROR, since modem misbehaves and sends out
207 * flow on before linksetup response.
208 */
209 return /* CFGLU_EPROT; */ 0;
210 }
211 cfsrvl_get(up);
212 ret = up->receive(up, pkt);
213 cfsrvl_put(up);
214 return ret;
215}
216
217static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
218{
219 int ret;
220 struct cfmuxl *muxl = container_obj(layr);
221 u8 linkid;
222 struct cflayer *dn;
223 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy "
227 "ID = %d (0x%x)\n",
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN;
230 }
231 info->hdr_len += 1;
232 linkid = info->channel_id;
233 cfpkt_add_head(pkt, &linkid, 1);
234 ret = dn->transmit(dn, pkt);
235 /* Remove MUX protocol header upon error. */
236 if (ret < 0)
237 cfpkt_extr_head(pkt, &linkid, 1);
238 return ret;
239}
240
241static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
242 int phyid)
243{
244 struct cfmuxl *muxl = container_obj(layr);
245 struct list_head *node;
246 struct cflayer *layer;
247 list_for_each(node, &muxl->srvl_list) {
248 layer = list_entry(node, struct cflayer, node);
249 if (cfsrvl_phyid_match(layer, phyid))
250 layer->ctrlcmd(layer, ctrl, phyid);
251 }
252}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
new file mode 100644
index 000000000000..01f238ff2346
--- /dev/null
+++ b/net/caif/cfpkt_skbuff.c
@@ -0,0 +1,579 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/string.h>
8#include <linux/skbuff.h>
9#include <linux/hardirq.h>
10#include <net/caif/cfpkt.h>
11
12#define PKT_PREFIX 16
13#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \
16 cfpkt_priv(pkt)->erronous = true; \
17 skb_reset_tail_pointer(&pkt->skb); \
18 pr_warning("CAIF: " errmsg);\
19 } while (0)
20
21struct cfpktq {
22 struct sk_buff_head head;
23 atomic_t count;
24 /* Lock protects count updates */
25 spinlock_t lock;
26};
27
28/*
29 * net/caif/ is generic and does not
30 * understand SKB, so we do this typecast
31 */
32struct cfpkt {
33 struct sk_buff skb;
34};
35
36/* Private data inside SKB */
37struct cfpkt_priv_data {
38 struct dev_info dev_info;
39 bool erronous;
40};
41
42inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
43{
44 return (struct cfpkt_priv_data *) pkt->skb.cb;
45}
46
47inline bool is_erronous(struct cfpkt *pkt)
48{
49 return cfpkt_priv(pkt)->erronous;
50}
51
52inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
53{
54 return &pkt->skb;
55}
56
57inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
58{
59 return (struct cfpkt *) skb;
60}
61
62
63struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
64{
65 struct cfpkt *pkt = skb_to_pkt(nativepkt);
66 cfpkt_priv(pkt)->erronous = false;
67 return pkt;
68}
69EXPORT_SYMBOL(cfpkt_fromnative);
70
71void *cfpkt_tonative(struct cfpkt *pkt)
72{
73 return (void *) pkt;
74}
75EXPORT_SYMBOL(cfpkt_tonative);
76
77static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
78{
79 struct sk_buff *skb;
80
81 if (likely(in_interrupt()))
82 skb = alloc_skb(len + pfx, GFP_ATOMIC);
83 else
84 skb = alloc_skb(len + pfx, GFP_KERNEL);
85
86 if (unlikely(skb == NULL))
87 return NULL;
88
89 skb_reserve(skb, pfx);
90 return skb_to_pkt(skb);
91}
92
93inline struct cfpkt *cfpkt_create(u16 len)
94{
95 return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
96}
97EXPORT_SYMBOL(cfpkt_create);
98
99void cfpkt_destroy(struct cfpkt *pkt)
100{
101 struct sk_buff *skb = pkt_to_skb(pkt);
102 kfree_skb(skb);
103}
104EXPORT_SYMBOL(cfpkt_destroy);
105
106inline bool cfpkt_more(struct cfpkt *pkt)
107{
108 struct sk_buff *skb = pkt_to_skb(pkt);
109 return skb->len > 0;
110}
111EXPORT_SYMBOL(cfpkt_more);
112
113int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
114{
115 struct sk_buff *skb = pkt_to_skb(pkt);
116 if (skb_headlen(skb) >= len) {
117 memcpy(data, skb->data, len);
118 return 0;
119 }
120 return !cfpkt_extr_head(pkt, data, len) &&
121 !cfpkt_add_head(pkt, data, len);
122}
123EXPORT_SYMBOL(cfpkt_peek_head);
124
125int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
126{
127 struct sk_buff *skb = pkt_to_skb(pkt);
128 u8 *from;
129 if (unlikely(is_erronous(pkt)))
130 return -EPROTO;
131
132 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n");
134 return -EPROTO;
135 }
136
137 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n");
140 return -EPROTO;
141 }
142 }
143 from = skb_pull(skb, len);
144 from -= len;
145 memcpy(data, from, len);
146 return 0;
147}
148EXPORT_SYMBOL(cfpkt_extr_head);
149
150int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
151{
152 struct sk_buff *skb = pkt_to_skb(pkt);
153 u8 *data = dta;
154 u8 *from;
155 if (unlikely(is_erronous(pkt)))
156 return -EPROTO;
157
158 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n");
160 return -EPROTO;
161 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n");
164 return -EPROTO;
165 }
166 from = skb_tail_pointer(skb) - len;
167 skb_trim(skb, skb->len - len);
168 memcpy(data, from, len);
169 return 0;
170}
171EXPORT_SYMBOL(cfpkt_extr_trail);
172
173int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
174{
175 return cfpkt_add_body(pkt, NULL, len);
176}
177EXPORT_SYMBOL(cfpkt_pad_trail);
178
179int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
180{
181 struct sk_buff *skb = pkt_to_skb(pkt);
182 struct sk_buff *lastskb;
183 u8 *to;
184 u16 addlen = 0;
185
186
187 if (unlikely(is_erronous(pkt)))
188 return -EPROTO;
189
190 lastskb = skb;
191
192 /* Check whether we need to add space at the tail */
193 if (unlikely(skb_tailroom(skb) < len)) {
194 if (likely(len < PKT_LEN_WHEN_EXTENDING))
195 addlen = PKT_LEN_WHEN_EXTENDING;
196 else
197 addlen = len;
198 }
199
200 /* Check whether we need to change the SKB before writing to the tail */
201 if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) {
202
203 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n");
206 return -EPROTO;
207 }
208 /*
209 * Is the SKB non-linear after skb_cow_data()? If so, we are
210 * going to add data to the last SKB, so we need to adjust
211 * lengths of the top SKB.
212 */
213 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n",
215 __func__);
216 skb->len += len;
217 skb->data_len += len;
218 }
219 }
220
221 /* All set to put the last SKB and optionally write data there. */
222 to = skb_put(lastskb, len);
223 if (likely(data))
224 memcpy(to, data, len);
225 return 0;
226}
227EXPORT_SYMBOL(cfpkt_add_body);
228
229inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
230{
231 return cfpkt_add_body(pkt, &data, 1);
232}
233EXPORT_SYMBOL(cfpkt_addbdy);
234
235int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
236{
237 struct sk_buff *skb = pkt_to_skb(pkt);
238 struct sk_buff *lastskb;
239 u8 *to;
240 const u8 *data = data2;
241 int ret;
242 if (unlikely(is_erronous(pkt)))
243 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n");
246 return -EPROTO;
247 }
248
249 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
253 return ret;
254 }
255
256 to = skb_push(skb, len);
257 memcpy(to, data, len);
258 return 0;
259}
260EXPORT_SYMBOL(cfpkt_add_head);
261
262inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
263{
264 return cfpkt_add_body(pkt, data, len);
265}
266EXPORT_SYMBOL(cfpkt_add_trail);
267
268inline u16 cfpkt_getlen(struct cfpkt *pkt)
269{
270 struct sk_buff *skb = pkt_to_skb(pkt);
271 return skb->len;
272}
273EXPORT_SYMBOL(cfpkt_getlen);
274
275inline u16 cfpkt_iterate(struct cfpkt *pkt,
276 u16 (*iter_func)(u16, void *, u16),
277 u16 data)
278{
279 /*
280 * Don't care about the performance hit of linearizing,
281 * Checksum should not be used on high-speed interfaces anyway.
282 */
283 if (unlikely(is_erronous(pkt)))
284 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n");
287 return -EPROTO;
288 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
290}
291EXPORT_SYMBOL(cfpkt_iterate);
292
293int cfpkt_setlen(struct cfpkt *pkt, u16 len)
294{
295 struct sk_buff *skb = pkt_to_skb(pkt);
296
297
298 if (unlikely(is_erronous(pkt)))
299 return -EPROTO;
300
301 if (likely(len <= skb->len)) {
302 if (unlikely(skb->data_len))
303 ___pskb_trim(skb, len);
304 else
305 skb_trim(skb, len);
306
307 return cfpkt_getlen(pkt);
308 }
309
310 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n");
313
314 return cfpkt_getlen(pkt);
315}
316EXPORT_SYMBOL(cfpkt_setlen);
317
318struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
319{
320 struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
321 if (!pkt)
322 return NULL;
323 if (unlikely(data != NULL))
324 cfpkt_add_body(pkt, data, len);
325 return pkt;
326}
327EXPORT_SYMBOL(cfpkt_create_uplink);
328
329struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
330 struct cfpkt *addpkt,
331 u16 expectlen)
332{
333 struct sk_buff *dst = pkt_to_skb(dstpkt);
334 struct sk_buff *add = pkt_to_skb(addpkt);
335 u16 addlen = skb_headlen(add);
336 u16 neededtailspace;
337 struct sk_buff *tmp;
338 u16 dstlen;
339 u16 createlen;
340 if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
341 return dstpkt;
342 }
343 if (expectlen > addlen)
344 neededtailspace = expectlen;
345 else
346 neededtailspace = addlen;
347
348 if (dst->tail + neededtailspace > dst->end) {
349 /* Create a dumplicate of 'dst' with more tail space */
350 struct cfpkt *tmppkt;
351 dstlen = skb_headlen(dst);
352 createlen = dstlen + neededtailspace;
353 tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX);
354 if (tmppkt == NULL)
355 return NULL;
356 tmp = pkt_to_skb(tmppkt);
357 skb_set_tail_pointer(tmp, dstlen);
358 tmp->len = dstlen;
359 memcpy(tmp->data, dst->data, dstlen);
360 cfpkt_destroy(dstpkt);
361 dst = tmp;
362 }
363 memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
364 cfpkt_destroy(addpkt);
365 dst->tail += addlen;
366 dst->len += addlen;
367 return skb_to_pkt(dst);
368}
369EXPORT_SYMBOL(cfpkt_append);
370
371struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
372{
373 struct sk_buff *skb2;
374 struct sk_buff *skb = pkt_to_skb(pkt);
375 struct cfpkt *tmppkt;
376 u8 *split = skb->data + pos;
377 u16 len2nd = skb_tail_pointer(skb) - split;
378
379 if (unlikely(is_erronous(pkt)))
380 return NULL;
381
382 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt,
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL;
386 }
387
388 /* Create a new packet for the second part of the data */
389 tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
390 PKT_PREFIX);
391 if (tmppkt == NULL)
392 return NULL;
393 skb2 = pkt_to_skb(tmppkt);
394
395
396 if (skb2 == NULL)
397 return NULL;
398
399 /* Reduce the length of the original packet */
400 skb_set_tail_pointer(skb, pos);
401 skb->len = pos;
402
403 memcpy(skb2->data, split, len2nd);
404 skb2->tail += len2nd;
405 skb2->len += len2nd;
406 return skb_to_pkt(skb2);
407}
408EXPORT_SYMBOL(cfpkt_split);
409
410char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen)
411{
412 struct sk_buff *skb = pkt_to_skb(pkt);
413 char *p = buf;
414 int i;
415
416 /*
417 * Sanity check buffer length, it needs to be at least as large as
418 * the header info: ~=50+ bytes
419 */
420 if (buflen < 50)
421 return NULL;
422
423 snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [",
424 is_erronous(pkt) ? "ERRONOUS-SKB" :
425 (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"),
426 skb,
427 (long) skb->len,
428 (long) (skb_tail_pointer(skb) - skb->data),
429 (long) skb->data_len,
430 (long) (skb->data - skb->head),
431 (long) (skb_tail_pointer(skb) - skb->head));
432 p = buf + strlen(buf);
433
434 for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) {
435 if (p > buf + buflen - 10) {
436 sprintf(p, "...");
437 p = buf + strlen(buf);
438 break;
439 }
440 sprintf(p, "%02x,", skb->data[i]);
441 p = buf + strlen(buf);
442 }
443 sprintf(p, "]\n");
444 return buf;
445}
446EXPORT_SYMBOL(cfpkt_log_pkt);
447
448int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
449{
450 struct sk_buff *skb = pkt_to_skb(pkt);
451 struct sk_buff *lastskb;
452
453 caif_assert(buf != NULL);
454 if (unlikely(is_erronous(pkt)))
455 return -EPROTO;
456 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n");
459 return -EPROTO;
460 }
461
462 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n");
464 return -EPROTO;
465 }
466
467 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n");
469 return -EPROTO;
470 }
471
472 *buf = skb_put(skb, buflen);
473 return 1;
474}
475EXPORT_SYMBOL(cfpkt_raw_append);
476
477int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
478{
479 struct sk_buff *skb = pkt_to_skb(pkt);
480
481 caif_assert(buf != NULL);
482 if (unlikely(is_erronous(pkt)))
483 return -EPROTO;
484
485 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large "
487 "- failed\n");
488 return -EPROTO;
489 }
490
491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n");
494 return -EPROTO;
495 }
496 }
497
498 *buf = skb->data;
499 skb_pull(skb, buflen);
500
501 return 1;
502}
503EXPORT_SYMBOL(cfpkt_raw_extract);
504
505inline bool cfpkt_erroneous(struct cfpkt *pkt)
506{
507 return cfpkt_priv(pkt)->erronous;
508}
509EXPORT_SYMBOL(cfpkt_erroneous);
510
511struct cfpktq *cfpktq_create(void)
512{
513 struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC);
514 if (!q)
515 return NULL;
516 skb_queue_head_init(&q->head);
517 atomic_set(&q->count, 0);
518 spin_lock_init(&q->lock);
519 return q;
520}
521EXPORT_SYMBOL(cfpktq_create);
522
523void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio)
524{
525 atomic_inc(&pktq->count);
526 spin_lock(&pktq->lock);
527 skb_queue_tail(&pktq->head, pkt_to_skb(pkt));
528 spin_unlock(&pktq->lock);
529
530}
531EXPORT_SYMBOL(cfpkt_queue);
532
533struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq)
534{
535 struct cfpkt *tmp;
536 spin_lock(&pktq->lock);
537 tmp = skb_to_pkt(skb_peek(&pktq->head));
538 spin_unlock(&pktq->lock);
539 return tmp;
540}
541EXPORT_SYMBOL(cfpkt_qpeek);
542
543struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq)
544{
545 struct cfpkt *pkt;
546 spin_lock(&pktq->lock);
547 pkt = skb_to_pkt(skb_dequeue(&pktq->head));
548 if (pkt) {
549 atomic_dec(&pktq->count);
550 caif_assert(atomic_read(&pktq->count) >= 0);
551 }
552 spin_unlock(&pktq->lock);
553 return pkt;
554}
555EXPORT_SYMBOL(cfpkt_dequeue);
556
557int cfpkt_qcount(struct cfpktq *pktq)
558{
559 return atomic_read(&pktq->count);
560}
561EXPORT_SYMBOL(cfpkt_qcount);
562
563struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt)
564{
565 struct cfpkt *clone;
566 clone = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC));
567 /* Free original packet. */
568 cfpkt_destroy(pkt);
569 if (!clone)
570 return NULL;
571 return clone;
572}
573EXPORT_SYMBOL(cfpkt_clone_release);
574
575struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
576{
577 return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
578}
579EXPORT_SYMBOL(cfpkt_info);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
new file mode 100644
index 000000000000..eb1602022ac0
--- /dev/null
+++ b/net/caif/cfrfml.c
@@ -0,0 +1,310 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/spinlock.h>
9#include <linux/slab.h>
10#include <linux/unaligned/le_byteshift.h>
11#include <net/caif/caif_layer.h>
12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h>
14
15#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
16#define RFM_SEGMENTATION_BIT 0x01
17#define RFM_HEAD_SIZE 7
18
19static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
20static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
21
22struct cfrfml {
23 struct cfsrvl serv;
24 struct cfpkt *incomplete_frm;
25 int fragment_size;
26 u8 seghead[6];
27 u16 pdu_size;
28 /* Protects serialized processing of packets */
29 spinlock_t sync;
30};
31
32static void cfrfml_release(struct kref *kref)
33{
34 struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
35 struct cfrfml *rfml = container_obj(&srvl->layer);
36
37 if (rfml->incomplete_frm)
38 cfpkt_destroy(rfml->incomplete_frm);
39
40 kfree(srvl);
41}
42
43struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
44 int mtu_size)
45{
46 int tmp;
47 struct cfrfml *this =
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49
50 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__);
52 return NULL;
53 }
54
55 cfsrvl_init(&this->serv, channel_id, dev_info, false);
56 this->serv.release = cfrfml_release;
57 this->serv.layer.receive = cfrfml_receive;
58 this->serv.layer.transmit = cfrfml_transmit;
59
60 /* Round down to closest multiple of 16 */
61 tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
62 tmp *= 16;
63
64 this->fragment_size = tmp;
65 spin_lock_init(&this->sync);
66 snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
67 "rfm%d", channel_id);
68
69 return &this->serv.layer;
70}
71
72static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
73 struct cfpkt *pkt, int *err)
74{
75 struct cfpkt *tmppkt;
76 *err = -EPROTO;
77 /* n-th but not last segment */
78
79 if (cfpkt_extr_head(pkt, seghead, 6) < 0)
80 return NULL;
81
82 /* Verify correct header */
83 if (memcmp(seghead, rfml->seghead, 6) != 0)
84 return NULL;
85
86 tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
87 rfml->pdu_size + RFM_HEAD_SIZE);
88
89 /* If cfpkt_append failes input pkts are not freed */
90 *err = -ENOMEM;
91 if (tmppkt == NULL)
92 return NULL;
93
94 *err = 0;
95 return tmppkt;
96}
97
98static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
99{
100 u8 tmp;
101 bool segmented;
102 int err;
103 u8 seghead[6];
104 struct cfrfml *rfml;
105 struct cfpkt *tmppkt = NULL;
106
107 caif_assert(layr->up != NULL);
108 caif_assert(layr->receive != NULL);
109 rfml = container_obj(layr);
110 spin_lock(&rfml->sync);
111
112 err = -EPROTO;
113 if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
114 goto out;
115 segmented = tmp & RFM_SEGMENTATION_BIT;
116
117 if (segmented) {
118 if (rfml->incomplete_frm == NULL) {
119 /* Initial Segment */
120 if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
121 goto out;
122
123 rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
124
125 if (cfpkt_erroneous(pkt))
126 goto out;
127 rfml->incomplete_frm = pkt;
128 pkt = NULL;
129 } else {
130
131 tmppkt = rfm_append(rfml, seghead, pkt, &err);
132 if (tmppkt == NULL)
133 goto out;
134
135 if (cfpkt_erroneous(tmppkt))
136 goto out;
137
138 rfml->incomplete_frm = tmppkt;
139
140
141 if (cfpkt_erroneous(tmppkt))
142 goto out;
143 }
144 err = 0;
145 goto out;
146 }
147
148 if (rfml->incomplete_frm) {
149
150 /* Last Segment */
151 tmppkt = rfm_append(rfml, seghead, pkt, &err);
152 if (tmppkt == NULL)
153 goto out;
154
155 if (cfpkt_erroneous(tmppkt))
156 goto out;
157
158 rfml->incomplete_frm = NULL;
159 pkt = tmppkt;
160 tmppkt = NULL;
161
162 /* Verify that length is correct */
163 err = EPROTO;
164 if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
165 goto out;
166 }
167
168 err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
169
170out:
171
172 if (err != 0) {
173 if (tmppkt)
174 cfpkt_destroy(tmppkt);
175 if (pkt)
176 cfpkt_destroy(pkt);
177 if (rfml->incomplete_frm)
178 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL;
180
181 pr_info("CAIF: %s(): "
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184
185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
187 rfml->serv.dev_info.id);
188 }
189 spin_unlock(&rfml->sync);
190 return err;
191}
192
193
194static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
195{
196 caif_assert(cfpkt_getlen(pkt) >= rfml->fragment_size);
197
198 /* Add info for MUX-layer to route the packet out. */
199 cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
200
201 /*
202 * To optimize alignment, we add up the size of CAIF header before
203 * payload.
204 */
205 cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
206 cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
207
208 return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
209}
210
211static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
212{
213 int err;
214 u8 seg;
215 u8 head[6];
216 struct cfpkt *rearpkt = NULL;
217 struct cfpkt *frontpkt = pkt;
218 struct cfrfml *rfml = container_obj(layr);
219
220 caif_assert(layr->dn != NULL);
221 caif_assert(layr->dn->transmit != NULL);
222
223 if (!cfsrvl_ready(&rfml->serv, &err))
224 return err;
225
226 err = -EPROTO;
227 if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
228 goto out;
229
230 err = 0;
231 if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
232 err = cfpkt_peek_head(pkt, head, 6);
233
234 if (err < 0)
235 goto out;
236
237 while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
238
239 seg = 1;
240 err = -EPROTO;
241
242 if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
243 goto out;
244 /*
245 * On OOM error cfpkt_split returns NULL.
246 *
247 * NOTE: Segmented pdu is not correctly aligned.
248 * This has negative performance impact.
249 */
250
251 rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
252 if (rearpkt == NULL)
253 goto out;
254
255 err = cfrfml_transmit_segment(rfml, frontpkt);
256
257 if (err != 0)
258 goto out;
259 frontpkt = rearpkt;
260 rearpkt = NULL;
261
262 err = -ENOMEM;
263 if (frontpkt == NULL)
264 goto out;
265 err = -EPROTO;
266 if (cfpkt_add_head(frontpkt, head, 6) < 0)
267 goto out;
268
269 }
270
271 seg = 0;
272 err = -EPROTO;
273
274 if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
275 goto out;
276
277 err = cfrfml_transmit_segment(rfml, frontpkt);
278
279 frontpkt = NULL;
280out:
281
282 if (err != 0) {
283 pr_info("CAIF: %s(): "
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/
287
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
289 rfml->serv.dev_info.id);
290
291 if (rearpkt)
292 cfpkt_destroy(rearpkt);
293
294 if (frontpkt && frontpkt != pkt) {
295
296 cfpkt_destroy(frontpkt);
297 /*
298 * Socket layer will free the original packet,
299 * but this packet may already be sent and
300 * freed. So we have to return 0 in this case
301 * to avoid socket layer to re-free this packet.
302 * The return of shutdown indication will
303 * cause connection to be invalidated anyhow.
304 */
305 err = 0;
306 }
307 }
308
309 return err;
310}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
new file mode 100644
index 000000000000..a11fbd68a13d
--- /dev/null
+++ b/net/caif/cfserl.c
@@ -0,0 +1,196 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/spinlock.h>
9#include <linux/slab.h>
10#include <net/caif/caif_layer.h>
11#include <net/caif/cfpkt.h>
12#include <net/caif/cfserl.h>
13
14#define container_obj(layr) ((struct cfserl *) layr)
15
16#define CFSERL_STX 0x02
17#define SERIAL_MINIUM_PACKET_SIZE 4
18#define SERIAL_MAX_FRAMESIZE 4096
19struct cfserl {
20 struct cflayer layer;
21 struct cfpkt *incomplete_frm;
22 /* Protects parallel processing of incoming packets */
23 spinlock_t sync;
24 bool usestx;
25};
26#define STXLEN(layr) (layr->usestx ? 1 : 0)
27
28static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
29static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
30static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
31 int phyid);
32
33struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__);
38 return NULL;
39 }
40 caif_assert(offsetof(struct cfserl, layer) == 0);
41 memset(this, 0, sizeof(struct cfserl));
42 this->layer.receive = cfserl_receive;
43 this->layer.transmit = cfserl_transmit;
44 this->layer.ctrlcmd = cfserl_ctrlcmd;
45 this->layer.type = type;
46 this->usestx = use_stx;
47 spin_lock_init(&this->sync);
48 snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
49 return &this->layer;
50}
51
52static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
53{
54 struct cfserl *layr = container_obj(l);
55 u16 pkt_len;
56 struct cfpkt *pkt = NULL;
57 struct cfpkt *tail_pkt = NULL;
58 u8 tmp8;
59 u16 tmp;
60 u8 stx = CFSERL_STX;
61 int ret;
62 u16 expectlen = 0;
63
64 caif_assert(newpkt != NULL);
65 spin_lock(&layr->sync);
66
67 if (layr->incomplete_frm != NULL) {
68 layr->incomplete_frm =
69 cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
70 pkt = layr->incomplete_frm;
71 if (pkt == NULL) {
72 spin_unlock(&layr->sync);
73 return -ENOMEM;
74 }
75 } else {
76 pkt = newpkt;
77 }
78 layr->incomplete_frm = NULL;
79
80 do {
81 /* Search for STX at start of pkt if STX is used */
82 if (layr->usestx) {
83 cfpkt_extr_head(pkt, &tmp8, 1);
84 if (tmp8 != CFSERL_STX) {
85 while (cfpkt_more(pkt)
86 && tmp8 != CFSERL_STX) {
87 cfpkt_extr_head(pkt, &tmp8, 1);
88 }
89 if (!cfpkt_more(pkt)) {
90 cfpkt_destroy(pkt);
91 layr->incomplete_frm = NULL;
92 spin_unlock(&layr->sync);
93 return -EPROTO;
94 }
95 }
96 }
97
98 pkt_len = cfpkt_getlen(pkt);
99
100 /*
101 * pkt_len is the accumulated length of the packet data
102 * we have received so far.
103 * Exit if frame doesn't hold length.
104 */
105
106 if (pkt_len < 2) {
107 if (layr->usestx)
108 cfpkt_add_head(pkt, &stx, 1);
109 layr->incomplete_frm = pkt;
110 spin_unlock(&layr->sync);
111 return 0;
112 }
113
114 /*
115 * Find length of frame.
116 * expectlen is the length we need for a full frame.
117 */
118 cfpkt_peek_head(pkt, &tmp, 2);
119 expectlen = le16_to_cpu(tmp) + 2;
120 /*
121 * Frame error handling
122 */
123 if (expectlen < SERIAL_MINIUM_PACKET_SIZE
124 || expectlen > SERIAL_MAX_FRAMESIZE) {
125 if (!layr->usestx) {
126 if (pkt != NULL)
127 cfpkt_destroy(pkt);
128 layr->incomplete_frm = NULL;
129 expectlen = 0;
130 spin_unlock(&layr->sync);
131 return -EPROTO;
132 }
133 continue;
134 }
135
136 if (pkt_len < expectlen) {
137 /* Too little received data */
138 if (layr->usestx)
139 cfpkt_add_head(pkt, &stx, 1);
140 layr->incomplete_frm = pkt;
141 spin_unlock(&layr->sync);
142 return 0;
143 }
144
145 /*
146 * Enough data for at least one frame.
147 * Split the frame, if too long
148 */
149 if (pkt_len > expectlen)
150 tail_pkt = cfpkt_split(pkt, expectlen);
151 else
152 tail_pkt = NULL;
153
154 /* Send the first part of packet upwards.*/
155 spin_unlock(&layr->sync);
156 ret = layr->layer.up->receive(layr->layer.up, pkt);
157 spin_lock(&layr->sync);
158 if (ret == -EILSEQ) {
159 if (layr->usestx) {
160 if (tail_pkt != NULL)
161 pkt = cfpkt_append(pkt, tail_pkt, 0);
162 /* Start search for next STX if frame failed */
163 continue;
164 } else {
165 cfpkt_destroy(pkt);
166 pkt = NULL;
167 }
168 }
169
170 pkt = tail_pkt;
171
172 } while (pkt != NULL);
173
174 spin_unlock(&layr->sync);
175 return 0;
176}
177
178static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
179{
180 struct cfserl *layr = container_obj(layer);
181 int ret;
182 u8 tmp8 = CFSERL_STX;
183 if (layr->usestx)
184 cfpkt_add_head(newpkt, &tmp8, 1);
185 ret = layer->dn->transmit(layer->dn, newpkt);
186 if (ret < 0)
187 cfpkt_extr_head(newpkt, &tmp8, 1);
188
189 return ret;
190}
191
192static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
193 int phyid)
194{
195 layr->up->ctrlcmd(layr->up, ctrl, phyid);
196}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
new file mode 100644
index 000000000000..f40939a91211
--- /dev/null
+++ b/net/caif/cfsrvl.c
@@ -0,0 +1,210 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/errno.h>
10#include <linux/slab.h>
11#include <net/caif/caif_layer.h>
12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h>
14
15#define SRVL_CTRL_PKT_SIZE 1
16#define SRVL_FLOW_OFF 0x81
17#define SRVL_FLOW_ON 0x80
18#define SRVL_SET_PIN 0x82
19#define SRVL_CTRL_PKT_SIZE 1
20
21#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
22
23static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
24 int phyid)
25{
26 struct cfsrvl *service = container_obj(layr);
27
28 caif_assert(layr->up != NULL);
29 caif_assert(layr->up->ctrlcmd != NULL);
30
31 switch (ctrl) {
32 case CAIF_CTRLCMD_INIT_RSP:
33 service->open = true;
34 layr->up->ctrlcmd(layr->up, ctrl, phyid);
35 break;
36 case CAIF_CTRLCMD_DEINIT_RSP:
37 case CAIF_CTRLCMD_INIT_FAIL_RSP:
38 service->open = false;
39 layr->up->ctrlcmd(layr->up, ctrl, phyid);
40 break;
41 case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
42 if (phyid != service->dev_info.id)
43 break;
44 if (service->modem_flow_on)
45 layr->up->ctrlcmd(layr->up,
46 CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
47 service->phy_flow_on = false;
48 break;
49 case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:
50 if (phyid != service->dev_info.id)
51 return;
52 if (service->modem_flow_on) {
53 layr->up->ctrlcmd(layr->up,
54 CAIF_CTRLCMD_FLOW_ON_IND,
55 phyid);
56 }
57 service->phy_flow_on = true;
58 break;
59 case CAIF_CTRLCMD_FLOW_OFF_IND:
60 if (service->phy_flow_on) {
61 layr->up->ctrlcmd(layr->up,
62 CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
63 }
64 service->modem_flow_on = false;
65 break;
66 case CAIF_CTRLCMD_FLOW_ON_IND:
67 if (service->phy_flow_on) {
68 layr->up->ctrlcmd(layr->up,
69 CAIF_CTRLCMD_FLOW_ON_IND, phyid);
70 }
71 service->modem_flow_on = true;
72 break;
73 case _CAIF_CTRLCMD_PHYIF_DOWN_IND:
74 /* In case interface is down, let's fake a remove shutdown */
75 layr->up->ctrlcmd(layr->up,
76 CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid);
77 break;
78 case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
79 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break;
81 default:
82 pr_warning("CAIF: %s(): "
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true;
87 break;
88 }
89}
90
91static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
92{
93 struct cfsrvl *service = container_obj(layr);
94
95 caif_assert(layr != NULL);
96 caif_assert(layr->dn != NULL);
97 caif_assert(layr->dn->transmit != NULL);
98
99 if (!service->supports_flowctrl)
100 return 0;
101
102 switch (ctrl) {
103 case CAIF_MODEMCMD_FLOW_ON_REQ:
104 {
105 struct cfpkt *pkt;
106 struct caif_payload_info *info;
107 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n",
111 __func__);
112 return -ENOMEM;
113 }
114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n",
117 __func__);
118 cfpkt_destroy(pkt);
119 return -EPROTO;
120 }
121 info = cfpkt_info(pkt);
122 info->channel_id = service->layer.id;
123 info->hdr_len = 1;
124 info->dev_info = &service->dev_info;
125 return layr->dn->transmit(layr->dn, pkt);
126 }
127 case CAIF_MODEMCMD_FLOW_OFF_REQ:
128 {
129 struct cfpkt *pkt;
130 struct caif_payload_info *info;
131 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n",
135 __func__);
136 return -ENOMEM;
137 }
138
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n",
141 __func__);
142 cfpkt_destroy(pkt);
143 return -EPROTO;
144 }
145 info = cfpkt_info(pkt);
146 info->channel_id = service->layer.id;
147 info->hdr_len = 1;
148 info->dev_info = &service->dev_info;
149 return layr->dn->transmit(layr->dn, pkt);
150 }
151 default:
152 break;
153 }
154 return -EINVAL;
155}
156
157void cfservl_destroy(struct cflayer *layer)
158{
159 kfree(layer);
160}
161
162void cfsrvl_release(struct kref *kref)
163{
164 struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
165 kfree(service);
166}
167
168void cfsrvl_init(struct cfsrvl *service,
169 u8 channel_id,
170 struct dev_info *dev_info,
171 bool supports_flowctrl
172 )
173{
174 caif_assert(offsetof(struct cfsrvl, layer) == 0);
175 service->open = false;
176 service->modem_flow_on = true;
177 service->phy_flow_on = true;
178 service->layer.id = channel_id;
179 service->layer.ctrlcmd = cfservl_ctrlcmd;
180 service->layer.modemcmd = cfservl_modemcmd;
181 service->dev_info = *dev_info;
182 service->supports_flowctrl = supports_flowctrl;
183 service->release = cfsrvl_release;
184 kref_init(&service->ref);
185}
186
187
188bool cfsrvl_ready(struct cfsrvl *service, int *err)
189{
190 if (service->open && service->modem_flow_on && service->phy_flow_on)
191 return true;
192 if (!service->open) {
193 *err = -ENOTCONN;
194 return false;
195 }
196 caif_assert(!(service->modem_flow_on && service->phy_flow_on));
197 *err = -EAGAIN;
198 return false;
199}
200u8 cfsrvl_getphyid(struct cflayer *layer)
201{
202 struct cfsrvl *servl = container_obj(layer);
203 return servl->dev_info.id;
204}
205
206bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
207{
208 struct cfsrvl *servl = container_obj(layer);
209 return servl->dev_info.id == phyid;
210}
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
new file mode 100644
index 000000000000..02795aff57a4
--- /dev/null
+++ b/net/caif/cfutill.c
@@ -0,0 +1,109 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/errno.h>
11#include <net/caif/caif_layer.h>
12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h>
14
15#define container_obj(layr) ((struct cfsrvl *) layr)
16#define UTIL_PAYLOAD 0x00
17#define UTIL_CMD_BIT 0x80
18#define UTIL_REMOTE_SHUTDOWN 0x82
19#define UTIL_FLOW_OFF 0x81
20#define UTIL_FLOW_ON 0x80
21#define UTIL_CTRL_PKT_SIZE 1
22static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
23static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
24
25struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__);
30 return NULL;
31 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0);
33 memset(util, 0, sizeof(struct cfsrvl));
34 cfsrvl_init(util, channel_id, dev_info, true);
35 util->layer.receive = cfutill_receive;
36 util->layer.transmit = cfutill_transmit;
37 snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
38 return &util->layer;
39}
40
41static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
42{
43 u8 cmd = -1;
44 struct cfsrvl *service = container_obj(layr);
45 caif_assert(layr != NULL);
46 caif_assert(layr->up != NULL);
47 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
51 cfpkt_destroy(pkt);
52 return -EPROTO;
53 }
54
55 switch (cmd) {
56 case UTIL_PAYLOAD:
57 return layr->up->receive(layr->up, pkt);
58 case UTIL_FLOW_OFF:
59 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
60 cfpkt_destroy(pkt);
61 return 0;
62 case UTIL_FLOW_ON:
63 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
64 cfpkt_destroy(pkt);
65 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n",
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false;
71 cfpkt_destroy(pkt);
72 return 0;
73 default:
74 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n",
76 __func__, cmd, cmd);
77 return -EPROTO;
78 }
79}
80
81static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
82{
83 u8 zero = 0;
84 struct caif_payload_info *info;
85 int ret;
86 struct cfsrvl *service = container_obj(layr);
87 caif_assert(layr != NULL);
88 caif_assert(layr->dn != NULL);
89 caif_assert(layr->dn->transmit != NULL);
90 if (!cfsrvl_ready(service, &ret))
91 return ret;
92
93 cfpkt_add_head(pkt, &zero, 1);
94 /* Add info for MUX-layer to route the packet out. */
95 info = cfpkt_info(pkt);
96 info->channel_id = service->layer.id;
97 /*
98 * To optimize alignment, we add up the size of CAIF header before
99 * payload.
100 */
101 info->hdr_len = 1;
102 info->dev_info = &service->dev_info;
103 ret = layr->dn->transmit(layr->dn, pkt);
104 if (ret < 0) {
105 u32 tmp32;
106 cfpkt_extr_head(pkt, &tmp32, 4);
107 }
108 return ret;
109}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
new file mode 100644
index 000000000000..77cc09faac9a
--- /dev/null
+++ b/net/caif/cfveil.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/stddef.h>
8#include <linux/slab.h>
9#include <net/caif/caif_layer.h>
10#include <net/caif/cfsrvl.h>
11#include <net/caif/cfpkt.h>
12
13#define VEI_PAYLOAD 0x00
14#define VEI_CMD_BIT 0x80
15#define VEI_FLOW_OFF 0x81
16#define VEI_FLOW_ON 0x80
17#define VEI_SET_PIN 0x82
18#define VEI_CTRL_PKT_SIZE 1
19#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
20
21static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
22static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
23
24struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__);
29 return NULL;
30 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0);
32 memset(vei, 0, sizeof(struct cfsrvl));
33 cfsrvl_init(vei, channel_id, dev_info, true);
34 vei->layer.receive = cfvei_receive;
35 vei->layer.transmit = cfvei_transmit;
36 snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
37 return &vei->layer;
38}
39
40static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
41{
42 u8 cmd;
43 int ret;
44 caif_assert(layr->up != NULL);
45 caif_assert(layr->receive != NULL);
46 caif_assert(layr->ctrlcmd != NULL);
47
48
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
51 cfpkt_destroy(pkt);
52 return -EPROTO;
53 }
54 switch (cmd) {
55 case VEI_PAYLOAD:
56 ret = layr->up->receive(layr->up, pkt);
57 return ret;
58 case VEI_FLOW_OFF:
59 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
60 cfpkt_destroy(pkt);
61 return 0;
62 case VEI_FLOW_ON:
63 layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
64 cfpkt_destroy(pkt);
65 return 0;
66 case VEI_SET_PIN: /* SET RS232 PIN */
67 cfpkt_destroy(pkt);
68 return 0;
69 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n",
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt);
73 return -EPROTO;
74 }
75}
76
77static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
78{
79 u8 tmp = 0;
80 struct caif_payload_info *info;
81 int ret;
82 struct cfsrvl *service = container_obj(layr);
83 if (!cfsrvl_ready(service, &ret))
84 return ret;
85 caif_assert(layr->dn != NULL);
86 caif_assert(layr->dn->transmit != NULL);
87
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
90 return -EPROTO;
91 }
92
93 /* Add info-> for MUX-layer to route the packet out. */
94 info = cfpkt_info(pkt);
95 info->channel_id = service->layer.id;
96 info->hdr_len = 1;
97 info->dev_info = &service->dev_info;
98 ret = layr->dn->transmit(layr->dn, pkt);
99 if (ret < 0)
100 cfpkt_extr_head(pkt, &tmp, 1);
101 return ret;
102}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
new file mode 100644
index 000000000000..ada6ee2d48f5
--- /dev/null
+++ b/net/caif/cfvidl.c
@@ -0,0 +1,65 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2
5 */
6
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/errno.h>
11#include <net/caif/caif_layer.h>
12#include <net/caif/cfsrvl.h>
13#include <net/caif/cfpkt.h>
14
15#define container_obj(layr) ((struct cfsrvl *) layr)
16
17static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt);
18static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
19
20struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__);
25 return NULL;
26 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0);
28
29 memset(vid, 0, sizeof(struct cfsrvl));
30 cfsrvl_init(vid, channel_id, dev_info, false);
31 vid->layer.receive = cfvidl_receive;
32 vid->layer.transmit = cfvidl_transmit;
33 snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
34 return &vid->layer;
35}
36
37static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{
39 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
42 cfpkt_destroy(pkt);
43 return -EPROTO;
44 }
45 return layr->up->receive(layr->up, pkt);
46}
47
48static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
49{
50 struct cfsrvl *service = container_obj(layr);
51 struct caif_payload_info *info;
52 u32 videoheader = 0;
53 int ret;
54 if (!cfsrvl_ready(service, &ret))
55 return ret;
56 cfpkt_add_head(pkt, &videoheader, 4);
57 /* Add info for MUX-layer to route the packet out */
58 info = cfpkt_info(pkt);
59 info->channel_id = service->layer.id;
60 info->dev_info = &service->dev_info;
61 ret = layr->dn->transmit(layr->dn, pkt);
62 if (ret < 0)
63 cfpkt_extr_head(pkt, &videoheader, 4);
64 return ret;
65}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
new file mode 100644
index 000000000000..4293e190ec53
--- /dev/null
+++ b/net/caif/chnl_net.c
@@ -0,0 +1,514 @@
1/*
2 * Copyright (C) ST-Ericsson AB 2010
3 * Authors: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * Daniel Martensson / Daniel.Martensson@stericsson.com
5 * License terms: GNU General Public License (GPL) version 2
6 */
7
8#include <linux/version.h>
9#include <linux/fs.h>
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/netdevice.h>
13#include <linux/if_ether.h>
14#include <linux/moduleparam.h>
15#include <linux/ip.h>
16#include <linux/sched.h>
17#include <linux/sockios.h>
18#include <linux/caif/if_caif.h>
19#include <net/rtnetlink.h>
20#include <net/caif/caif_layer.h>
21#include <net/caif/cfcnfg.h>
22#include <net/caif/cfpkt.h>
23#include <net/caif/caif_dev.h>
24
25/* GPRS PDP connection has MTU to 1500 */
26#define GPRS_PDP_MTU 1500
27/* 5 sec. connect timeout */
28#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30
31#undef pr_debug
32#define pr_debug pr_warning
33
34/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list);
36
37MODULE_LICENSE("GPL");
38MODULE_ALIAS_RTNL_LINK("caif");
39
40enum caif_states {
41 CAIF_CONNECTED = 1,
42 CAIF_CONNECTING,
43 CAIF_DISCONNECTED,
44 CAIF_SHUTDOWN
45};
46
47struct chnl_net {
48 struct cflayer chnl;
49 struct net_device_stats stats;
50 struct caif_connect_request conn_req;
51 struct list_head list_field;
52 struct net_device *netdev;
53 char name[256];
54 wait_queue_head_t netmgmt_wq;
55 /* Flow status to remember and control the transmission. */
56 bool flowenabled;
57 enum caif_states state;
58};
59
60static void robust_list_del(struct list_head *delete_node)
61{
62 struct list_head *list_node;
63 struct list_head *n;
64 ASSERT_RTNL();
65 list_for_each_safe(list_node, n, &chnl_net_list) {
66 if (list_node == delete_node) {
67 list_del(list_node);
68 return;
69 }
70 }
71 WARN_ON(1);
72}
73
74static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
75{
76 struct sk_buff *skb;
77 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
78 int pktlen;
79 int err = 0;
80
81 priv = container_of(layr, struct chnl_net, chnl);
82
83 if (!priv)
84 return -EINVAL;
85
86 /* Get length of CAIF packet. */
87 pktlen = cfpkt_getlen(pkt);
88
89 skb = (struct sk_buff *) cfpkt_tonative(pkt);
90 /* Pass some minimum information and
91 * send the packet to the net stack.
92 */
93 skb->dev = priv->netdev;
94 skb->protocol = htons(ETH_P_IP);
95
96 /* If we change the header in loop mode, the checksum is corrupted. */
97 if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
98 skb->ip_summed = CHECKSUM_UNNECESSARY;
99 else
100 skb->ip_summed = CHECKSUM_NONE;
101
102 if (in_interrupt())
103 netif_rx(skb);
104 else
105 netif_rx_ni(skb);
106
107 /* Update statistics. */
108 priv->netdev->stats.rx_packets++;
109 priv->netdev->stats.rx_bytes += pktlen;
110
111 return err;
112}
113
114static int delete_device(struct chnl_net *dev)
115{
116 ASSERT_RTNL();
117 if (dev->netdev)
118 unregister_netdevice(dev->netdev);
119 return 0;
120}
121
122static void close_work(struct work_struct *work)
123{
124 struct chnl_net *dev = NULL;
125 struct list_head *list_node;
126 struct list_head *_tmp;
127 /* May be called with or without RTNL lock held */
128 int islocked = rtnl_is_locked();
129 if (!islocked)
130 rtnl_lock();
131 list_for_each_safe(list_node, _tmp, &chnl_net_list) {
132 dev = list_entry(list_node, struct chnl_net, list_field);
133 if (dev->state == CAIF_SHUTDOWN)
134 dev_close(dev->netdev);
135 }
136 if (!islocked)
137 rtnl_unlock();
138}
139static DECLARE_WORK(close_worker, close_work);
140
141static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid)
143{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
150 flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
151 flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
152 flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
153 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
154
155
156
157 switch (flow) {
158 case CAIF_CTRLCMD_FLOW_OFF_IND:
159 priv->flowenabled = false;
160 netif_stop_queue(priv->netdev);
161 break;
162 case CAIF_CTRLCMD_DEINIT_RSP:
163 priv->state = CAIF_DISCONNECTED;
164 break;
165 case CAIF_CTRLCMD_INIT_FAIL_RSP:
166 priv->state = CAIF_DISCONNECTED;
167 wake_up_interruptible(&priv->netmgmt_wq);
168 break;
169 case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
170 priv->state = CAIF_SHUTDOWN;
171 netif_tx_disable(priv->netdev);
172 schedule_work(&close_worker);
173 break;
174 case CAIF_CTRLCMD_FLOW_ON_IND:
175 priv->flowenabled = true;
176 netif_wake_queue(priv->netdev);
177 break;
178 case CAIF_CTRLCMD_INIT_RSP:
179 priv->state = CAIF_CONNECTED;
180 priv->flowenabled = true;
181 netif_wake_queue(priv->netdev);
182 wake_up_interruptible(&priv->netmgmt_wq);
183 break;
184 default:
185 break;
186 }
187}
188
189static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
190{
191 struct chnl_net *priv;
192 struct cfpkt *pkt = NULL;
193 int len;
194 int result = -1;
195 /* Get our private data. */
196 priv = netdev_priv(dev);
197
198 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__);
200 return -ENOSPC;
201 }
202
203 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__);
205 return NETDEV_TX_BUSY;
206 }
207
208 if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
209 swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
210
211 /* Store original SKB length. */
212 len = skb->len;
213
214 pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
215
216 /* Send the packet down the stack. */
217 result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
218 if (result) {
219 if (result == -EAGAIN)
220 result = NETDEV_TX_BUSY;
221 return result;
222 }
223
224 /* Update statistics. */
225 dev->stats.tx_packets++;
226 dev->stats.tx_bytes += len;
227
228 return NETDEV_TX_OK;
229}
230
231static int chnl_net_open(struct net_device *dev)
232{
233 struct chnl_net *priv = NULL;
234 int result = -1;
235 int llifindex, headroom, tailroom, mtu;
236 struct net_device *lldev;
237 ASSERT_RTNL();
238 priv = netdev_priv(dev);
239 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
241 return -ENODEV;
242 }
243
244 if (priv->state != CAIF_CONNECTING) {
245 priv->state = CAIF_CONNECTING;
246 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom);
248 if (result != 0) {
249 pr_debug("CAIF: %s(): err: "
250 "Unable to register and open device,"
251 " Err:%d\n",
252 __func__,
253 result);
254 goto error;
255 }
256
257 lldev = dev_get_by_index(dev_net(dev), llifindex);
258
259 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__);
261 result = -ENODEV;
262 goto error;
263 }
264
265 dev->needed_tailroom = tailroom + lldev->needed_tailroom;
266 dev->hard_header_len = headroom + lldev->hard_header_len +
267 lldev->needed_tailroom;
268
269 /*
270 * MTU, head-room etc is not know before we have a
271 * CAIF link layer device available. MTU calculation may
272 * override initial RTNL configuration.
273 * MTU is minimum of current mtu, link layer mtu pluss
274 * CAIF head and tail, and PDP GPRS contexts max MTU.
275 */
276 mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
277 mtu = min_t(int, GPRS_PDP_MTU, mtu);
278 dev_set_mtu(dev, mtu);
279 dev_put(lldev);
280
281 if (mtu < 100) {
282 pr_warning("CAIF: %s(): "
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV;
286 goto error;
287 }
288 }
289
290 rtnl_unlock(); /* Release RTNL lock during connect wait */
291
292 result = wait_event_interruptible_timeout(priv->netmgmt_wq,
293 priv->state != CAIF_CONNECTING,
294 CONNECT_TIMEOUT);
295
296 rtnl_lock();
297
298 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible"
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS;
302 goto error;
303 }
304
305 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__);
307 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__);
310 result = -ETIMEDOUT;
311 goto error;
312 }
313
314 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__);
316 result = -ECONNREFUSED;
317 goto error;
318 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
320 return 0;
321
322error:
323 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__);
326 return result;
327
328}
329
330static int chnl_net_stop(struct net_device *dev)
331{
332 struct chnl_net *priv;
333
334 ASSERT_RTNL();
335 priv = netdev_priv(dev);
336 priv->state = CAIF_DISCONNECTED;
337 caif_disconnect_client(&priv->chnl);
338 return 0;
339}
340
341static int chnl_net_init(struct net_device *dev)
342{
343 struct chnl_net *priv;
344 ASSERT_RTNL();
345 priv = netdev_priv(dev);
346 strncpy(priv->name, dev->name, sizeof(priv->name));
347 return 0;
348}
349
350static void chnl_net_uninit(struct net_device *dev)
351{
352 struct chnl_net *priv;
353 ASSERT_RTNL();
354 priv = netdev_priv(dev);
355 robust_list_del(&priv->list_field);
356}
357
358static const struct net_device_ops netdev_ops = {
359 .ndo_open = chnl_net_open,
360 .ndo_stop = chnl_net_stop,
361 .ndo_init = chnl_net_init,
362 .ndo_uninit = chnl_net_uninit,
363 .ndo_start_xmit = chnl_net_start_xmit,
364};
365
366static void ipcaif_net_setup(struct net_device *dev)
367{
368 struct chnl_net *priv;
369 dev->netdev_ops = &netdev_ops;
370 dev->destructor = free_netdev;
371 dev->flags |= IFF_NOARP;
372 dev->flags |= IFF_POINTOPOINT;
373 dev->mtu = GPRS_PDP_MTU;
374 dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
375
376 priv = netdev_priv(dev);
377 priv->chnl.receive = chnl_recv_cb;
378 priv->chnl.ctrlcmd = chnl_flowctrl_cb;
379 priv->netdev = dev;
380 priv->conn_req.protocol = CAIFPROTO_DATAGRAM;
381 priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
382 priv->conn_req.priority = CAIF_PRIO_LOW;
383 /* Insert illegal value */
384 priv->conn_req.sockaddr.u.dgm.connection_id = -1;
385 priv->flowenabled = false;
386
387 ASSERT_RTNL();
388 init_waitqueue_head(&priv->netmgmt_wq);
389 list_add(&priv->list_field, &chnl_net_list);
390}
391
392
393static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev)
394{
395 struct chnl_net *priv;
396 u8 loop;
397 priv = netdev_priv(dev);
398 NLA_PUT_U32(skb, IFLA_CAIF_IPV4_CONNID,
399 priv->conn_req.sockaddr.u.dgm.connection_id);
400 NLA_PUT_U32(skb, IFLA_CAIF_IPV6_CONNID,
401 priv->conn_req.sockaddr.u.dgm.connection_id);
402 loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP;
403 NLA_PUT_U8(skb, IFLA_CAIF_LOOPBACK, loop);
404
405
406 return 0;
407nla_put_failure:
408 return -EMSGSIZE;
409
410}
411
412static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req)
414{
415 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__);
417 return;
418 }
419 if (data[IFLA_CAIF_IPV4_CONNID])
420 conn_req->sockaddr.u.dgm.connection_id =
421 nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]);
422 if (data[IFLA_CAIF_IPV6_CONNID])
423 conn_req->sockaddr.u.dgm.connection_id =
424 nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]);
425 if (data[IFLA_CAIF_LOOPBACK]) {
426 if (nla_get_u8(data[IFLA_CAIF_LOOPBACK]))
427 conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP;
428 else
429 conn_req->protocol = CAIFPROTO_DATAGRAM;
430 }
431}
432
433static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
434 struct nlattr *tb[], struct nlattr *data[])
435{
436 int ret;
437 struct chnl_net *caifdev;
438 ASSERT_RTNL();
439 caifdev = netdev_priv(dev);
440 caif_netlink_parms(data, &caifdev->conn_req);
441 dev_net_set(caifdev->netdev, src_net);
442
443 ret = register_netdevice(dev);
444 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n",
446 __func__);
447 return ret;
448}
449
450static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
451 struct nlattr *data[])
452{
453 struct chnl_net *caifdev;
454 ASSERT_RTNL();
455 caifdev = netdev_priv(dev);
456 caif_netlink_parms(data, &caifdev->conn_req);
457 netdev_state_change(dev);
458 return 0;
459}
460
461static size_t ipcaif_get_size(const struct net_device *dev)
462{
463 return
464 /* IFLA_CAIF_IPV4_CONNID */
465 nla_total_size(4) +
466 /* IFLA_CAIF_IPV6_CONNID */
467 nla_total_size(4) +
468 /* IFLA_CAIF_LOOPBACK */
469 nla_total_size(2) +
470 0;
471}
472
473static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = {
474 [IFLA_CAIF_IPV4_CONNID] = { .type = NLA_U32 },
475 [IFLA_CAIF_IPV6_CONNID] = { .type = NLA_U32 },
476 [IFLA_CAIF_LOOPBACK] = { .type = NLA_U8 }
477};
478
479
480static struct rtnl_link_ops ipcaif_link_ops __read_mostly = {
481 .kind = "caif",
482 .priv_size = sizeof(struct chnl_net),
483 .setup = ipcaif_net_setup,
484 .maxtype = IFLA_CAIF_MAX,
485 .policy = ipcaif_policy,
486 .newlink = ipcaif_newlink,
487 .changelink = ipcaif_changelink,
488 .get_size = ipcaif_get_size,
489 .fill_info = ipcaif_fill_info,
490
491};
492
493static int __init chnl_init_module(void)
494{
495 return rtnl_link_register(&ipcaif_link_ops);
496}
497
498static void __exit chnl_exit_module(void)
499{
500 struct chnl_net *dev = NULL;
501 struct list_head *list_node;
502 struct list_head *_tmp;
503 rtnl_link_unregister(&ipcaif_link_ops);
504 rtnl_lock();
505 list_for_each_safe(list_node, _tmp, &chnl_net_list) {
506 dev = list_entry(list_node, struct chnl_net, list_field);
507 list_del(list_node);
508 delete_device(dev);
509 }
510 rtnl_unlock();
511}
512
513module_init(chnl_init_module);
514module_exit(chnl_exit_module);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 907dc871fac8..9c65e9deb9c3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -713,8 +713,6 @@ static void bcm_remove_op(struct bcm_op *op)
713 kfree(op->last_frames); 713 kfree(op->last_frames);
714 714
715 kfree(op); 715 kfree(op);
716
717 return;
718} 716}
719 717
720static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) 718static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
diff --git a/net/can/raw.c b/net/can/raw.c
index da99cf153b33..a10e3338f084 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
436 436
437 if (count > 1) { 437 if (count > 1) {
438 /* filter does not fit into dfilter => alloc space */ 438 /* filter does not fit into dfilter => alloc space */
439 filter = kmalloc(optlen, GFP_KERNEL); 439 filter = memdup_user(optval, optlen);
440 if (!filter) 440 if (IS_ERR(filter))
441 return -ENOMEM; 441 return PTR_ERR(filter);
442
443 if (copy_from_user(filter, optval, optlen)) {
444 kfree(filter);
445 return -EFAULT;
446 }
447 } else if (count == 1) { 442 } else if (count == 1) {
448 if (copy_from_user(&sfilter, optval, sizeof(sfilter))) 443 if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
449 return -EFAULT; 444 return -EFAULT;
@@ -655,6 +650,10 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
655 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 650 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
656 if (err < 0) 651 if (err < 0)
657 goto free_skb; 652 goto free_skb;
653
654 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1;
656
658 skb->dev = dev; 657 skb->dev = dev;
659 skb->sk = sk; 658 skb->sk = sk;
660 659
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb025..63d260e81472 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
81 int tot_len; 81 int tot_len;
82 82
83 if (kern_msg->msg_namelen) { 83 if (kern_msg->msg_namelen) {
84 if (mode==VERIFY_READ) { 84 if (mode == VERIFY_READ) {
85 int err = move_addr_to_kernel(kern_msg->msg_name, 85 int err = move_addr_to_kernel(kern_msg->msg_name,
86 kern_msg->msg_namelen, 86 kern_msg->msg_namelen,
87 kern_address); 87 kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
354static int do_set_sock_timeout(struct socket *sock, int level, 354static int do_set_sock_timeout(struct socket *sock, int level,
355 int optname, char __user *optval, unsigned int optlen) 355 int optname, char __user *optval, unsigned int optlen)
356{ 356{
357 struct compat_timeval __user *up = (struct compat_timeval __user *) optval; 357 struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
358 struct timeval ktime; 358 struct timeval ktime;
359 mm_segment_t old_fs; 359 mm_segment_t old_fs;
360 int err; 360 int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
367 return -EFAULT; 367 return -EFAULT;
368 old_fs = get_fs(); 368 old_fs = get_fs();
369 set_fs(KERNEL_DS); 369 set_fs(KERNEL_DS);
370 err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime)); 370 err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
371 set_fs(old_fs); 371 set_fs(old_fs);
372 372
373 return err; 373 return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
389 char __user *optval, unsigned int optlen) 389 char __user *optval, unsigned int optlen)
390{ 390{
391 int err; 391 int err;
392 struct socket *sock; 392 struct socket *sock = sockfd_lookup(fd, &err);
393 393
394 if ((sock = sockfd_lookup(fd, &err))!=NULL) 394 if (sock) {
395 { 395 err = security_socket_setsockopt(sock, level, optname);
396 err = security_socket_setsockopt(sock,level,optname);
397 if (err) { 396 if (err) {
398 sockfd_put(sock); 397 sockfd_put(sock);
399 return err; 398 return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
453int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 452int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
454{ 453{
455 struct compat_timeval __user *ctv = 454 struct compat_timeval __user *ctv =
456 (struct compat_timeval __user*) userstamp; 455 (struct compat_timeval __user *) userstamp;
457 int err = -ENOENT; 456 int err = -ENOENT;
458 struct timeval tv; 457 struct timeval tv;
459 458
@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
477int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) 476int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
478{ 477{
479 struct compat_timespec __user *ctv = 478 struct compat_timespec __user *ctv =
480 (struct compat_timespec __user*) userstamp; 479 (struct compat_timespec __user *) userstamp;
481 int err = -ENOENT; 480 int err = -ENOENT;
482 struct timespec ts; 481 struct timespec ts;
483 482
@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
502 char __user *optval, int __user *optlen) 501 char __user *optval, int __user *optlen)
503{ 502{
504 int err; 503 int err;
505 struct socket *sock; 504 struct socket *sock = sockfd_lookup(fd, &err);
506 505
507 if ((sock = sockfd_lookup(fd, &err))!=NULL) 506 if (sock) {
508 { 507 err = security_socket_getsockopt(sock, level, optname);
509 err = security_socket_getsockopt(sock, level,
510 optname);
511 if (err) { 508 if (err) {
512 sockfd_put(sock); 509 sockfd_put(sock);
513 return err; 510 return err;
@@ -531,7 +528,7 @@ struct compat_group_req {
531 __u32 gr_interface; 528 __u32 gr_interface;
532 struct __kernel_sockaddr_storage gr_group 529 struct __kernel_sockaddr_storage gr_group
533 __attribute__ ((aligned(4))); 530 __attribute__ ((aligned(4)));
534} __attribute__ ((packed)); 531} __packed;
535 532
536struct compat_group_source_req { 533struct compat_group_source_req {
537 __u32 gsr_interface; 534 __u32 gsr_interface;
@@ -539,7 +536,7 @@ struct compat_group_source_req {
539 __attribute__ ((aligned(4))); 536 __attribute__ ((aligned(4)));
540 struct __kernel_sockaddr_storage gsr_source 537 struct __kernel_sockaddr_storage gsr_source
541 __attribute__ ((aligned(4))); 538 __attribute__ ((aligned(4)));
542} __attribute__ ((packed)); 539} __packed;
543 540
544struct compat_group_filter { 541struct compat_group_filter {
545 __u32 gf_interface; 542 __u32 gf_interface;
@@ -549,7 +546,7 @@ struct compat_group_filter {
549 __u32 gf_numsrc; 546 __u32 gf_numsrc;
550 struct __kernel_sockaddr_storage gf_slist[1] 547 struct __kernel_sockaddr_storage gf_slist[1]
551 __attribute__ ((aligned(4))); 548 __attribute__ ((aligned(4)));
552} __attribute__ ((packed)); 549} __packed;
553 550
554#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \ 551#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
555 sizeof(struct __kernel_sockaddr_storage)) 552 sizeof(struct __kernel_sockaddr_storage))
@@ -557,7 +554,7 @@ struct compat_group_filter {
557 554
558int compat_mc_setsockopt(struct sock *sock, int level, int optname, 555int compat_mc_setsockopt(struct sock *sock, int level, int optname,
559 char __user *optval, unsigned int optlen, 556 char __user *optval, unsigned int optlen,
560 int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int)) 557 int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
561{ 558{
562 char __user *koptval = optval; 559 char __user *koptval = optval;
563 int koptlen = optlen; 560 int koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
640 } 637 }
641 return setsockopt(sock, level, optname, koptval, koptlen); 638 return setsockopt(sock, level, optname, koptval, koptlen);
642} 639}
643
644EXPORT_SYMBOL(compat_mc_setsockopt); 640EXPORT_SYMBOL(compat_mc_setsockopt);
645 641
646int compat_mc_getsockopt(struct sock *sock, int level, int optname, 642int compat_mc_getsockopt(struct sock *sock, int level, int optname,
647 char __user *optval, int __user *optlen, 643 char __user *optval, int __user *optlen,
648 int (*getsockopt)(struct sock *,int,int,char __user *,int __user *)) 644 int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
649{ 645{
650 struct compat_group_filter __user *gf32 = (void *)optval; 646 struct compat_group_filter __user *gf32 = (void *)optval;
651 struct group_filter __user *kgf; 647 struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
681 __put_user(interface, &kgf->gf_interface) || 677 __put_user(interface, &kgf->gf_interface) ||
682 __put_user(fmode, &kgf->gf_fmode) || 678 __put_user(fmode, &kgf->gf_fmode) ||
683 __put_user(numsrc, &kgf->gf_numsrc) || 679 __put_user(numsrc, &kgf->gf_numsrc) ||
684 copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group))) 680 copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
685 return -EFAULT; 681 return -EFAULT;
686 682
687 err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen); 683 err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
714 copylen = numsrc * sizeof(gf32->gf_slist[0]); 710 copylen = numsrc * sizeof(gf32->gf_slist[0]);
715 if (copylen > klen) 711 if (copylen > klen)
716 copylen = klen; 712 copylen = klen;
717 if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen)) 713 if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
718 return -EFAULT; 714 return -EFAULT;
719 } 715 }
720 return err; 716 return err;
721} 717}
722
723EXPORT_SYMBOL(compat_mc_getsockopt); 718EXPORT_SYMBOL(compat_mc_getsockopt);
724 719
725 720
726/* Argument list sizes for compat_sys_socketcall */ 721/* Argument list sizes for compat_sys_socketcall */
727#define AL(x) ((x) * sizeof(u32)) 722#define AL(x) ((x) * sizeof(u32))
728static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 723static unsigned char nas[20] = {
729 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 724 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
730 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 725 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
731 AL(4),AL(5)}; 726 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
727 AL(4), AL(5)
728};
732#undef AL 729#undef AL
733 730
734asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) 731asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
827 compat_ptr(a[4]), compat_ptr(a[5])); 824 compat_ptr(a[4]), compat_ptr(a[5]));
828 break; 825 break;
829 case SYS_SHUTDOWN: 826 case SYS_SHUTDOWN:
830 ret = sys_shutdown(a0,a1); 827 ret = sys_shutdown(a0, a1);
831 break; 828 break;
832 case SYS_SETSOCKOPT: 829 case SYS_SETSOCKOPT:
833 ret = compat_sys_setsockopt(a0, a1, a[2], 830 ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,7 +7,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
7 7
8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o 8obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o
12 12
13obj-$(CONFIG_XFRM) += flow.o 13obj-$(CONFIG_XFRM) += flow.o
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
18obj-$(CONFIG_FIB_RULES) += fib_rules.o 18obj-$(CONFIG_FIB_RULES) += fib_rules.o
19obj-$(CONFIG_TRACEPOINTS) += net-traces.o 19obj-$(CONFIG_TRACEPOINTS) += net-traces.o
20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 20obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
21 21obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2dccd4ee591b..251997a95483 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
86 int error; 86 int error;
87 DEFINE_WAIT_FUNC(wait, receiver_wake_function); 87 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
88 88
89 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 89 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
90 90
91 /* Socket errors? */ 91 /* Socket errors? */
92 error = sock_error(sk); 92 error = sock_error(sk);
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
115 error = 0; 115 error = 0;
116 *timeo_p = schedule_timeout(*timeo_p); 116 *timeo_p = schedule_timeout(*timeo_p);
117out: 117out:
118 finish_wait(sk->sk_sleep, &wait); 118 finish_wait(sk_sleep(sk), &wait);
119 return error; 119 return error;
120interrupted: 120interrupted:
121 error = sock_intr_errno(*timeo_p); 121 error = sock_intr_errno(*timeo_p);
@@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
219 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 219 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
220 &peeked, err); 220 &peeked, err);
221} 221}
222EXPORT_SYMBOL(skb_recv_datagram);
222 223
223void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 224void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
224{ 225{
@@ -229,9 +230,20 @@ EXPORT_SYMBOL(skb_free_datagram);
229 230
230void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) 231void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
231{ 232{
232 lock_sock(sk); 233 bool slow;
233 skb_free_datagram(sk, skb); 234
234 release_sock(sk); 235 if (likely(atomic_read(&skb->users) == 1))
236 smp_rmb();
237 else if (likely(!atomic_dec_and_test(&skb->users)))
238 return;
239
240 slow = lock_sock_fast(sk);
241 skb_orphan(skb);
242 sk_mem_reclaim_partial(sk);
243 unlock_sock_fast(sk, slow);
244
245 /* skb is now orphaned, can be freed outside of locked section */
246 __kfree_skb(skb);
235} 247}
236EXPORT_SYMBOL(skb_free_datagram_locked); 248EXPORT_SYMBOL(skb_free_datagram_locked);
237 249
@@ -277,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
277 289
278 return err; 290 return err;
279} 291}
280
281EXPORT_SYMBOL(skb_kill_datagram); 292EXPORT_SYMBOL(skb_kill_datagram);
282 293
283/** 294/**
@@ -362,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
362fault: 373fault:
363 return -EFAULT; 374 return -EFAULT;
364} 375}
376EXPORT_SYMBOL(skb_copy_datagram_iovec);
365 377
366/** 378/**
367 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec. 379 * skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -705,6 +717,7 @@ csum_error:
705fault: 717fault:
706 return -EFAULT; 718 return -EFAULT;
707} 719}
720EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
708 721
709/** 722/**
710 * datagram_poll - generic datagram poll 723 * datagram_poll - generic datagram poll
@@ -726,7 +739,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
726 struct sock *sk = sock->sk; 739 struct sock *sk = sock->sk;
727 unsigned int mask; 740 unsigned int mask;
728 741
729 sock_poll_wait(file, sk->sk_sleep, wait); 742 sock_poll_wait(file, sk_sleep(sk), wait);
730 mask = 0; 743 mask = 0;
731 744
732 /* exceptional events? */ 745 /* exceptional events? */
@@ -759,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
759 772
760 return mask; 773 return mask;
761} 774}
762
763EXPORT_SYMBOL(datagram_poll); 775EXPORT_SYMBOL(datagram_poll);
764EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
765EXPORT_SYMBOL(skb_copy_datagram_iovec);
766EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index f769098774b7..1ae654391442 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -101,8 +101,6 @@
101#include <linux/proc_fs.h> 101#include <linux/proc_fs.h>
102#include <linux/seq_file.h> 102#include <linux/seq_file.h>
103#include <linux/stat.h> 103#include <linux/stat.h>
104#include <linux/if_bridge.h>
105#include <linux/if_macvlan.h>
106#include <net/dst.h> 104#include <net/dst.h>
107#include <net/pkt_sched.h> 105#include <net/pkt_sched.h>
108#include <net/checksum.h> 106#include <net/checksum.h>
@@ -130,6 +128,7 @@
130#include <linux/jhash.h> 128#include <linux/jhash.h>
131#include <linux/random.h> 129#include <linux/random.h>
132#include <trace/events/napi.h> 130#include <trace/events/napi.h>
131#include <linux/pci.h>
133 132
134#include "net-sysfs.h" 133#include "net-sysfs.h"
135 134
@@ -207,6 +206,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)]; 206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
208} 207}
209 208
209static inline void rps_lock(struct softnet_data *sd)
210{
211#ifdef CONFIG_RPS
212 spin_lock(&sd->input_pkt_queue.lock);
213#endif
214}
215
216static inline void rps_unlock(struct softnet_data *sd)
217{
218#ifdef CONFIG_RPS
219 spin_unlock(&sd->input_pkt_queue.lock);
220#endif
221}
222
210/* Device list insertion */ 223/* Device list insertion */
211static int list_netdevice(struct net_device *dev) 224static int list_netdevice(struct net_device *dev)
212{ 225{
@@ -249,7 +262,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
249 * queue in the local softnet handler. 262 * queue in the local softnet handler.
250 */ 263 */
251 264
252DEFINE_PER_CPU(struct softnet_data, softnet_data); 265DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
253EXPORT_PER_CPU_SYMBOL(softnet_data); 266EXPORT_PER_CPU_SYMBOL(softnet_data);
254 267
255#ifdef CONFIG_LOCKDEP 268#ifdef CONFIG_LOCKDEP
@@ -773,47 +786,46 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
773 786
774struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) 787struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
775{ 788{
776 struct net_device *dev; 789 struct net_device *dev, *ret = NULL;
777 790
778 rtnl_lock(); 791 rcu_read_lock();
779 dev = __dev_getfirstbyhwtype(net, type); 792 for_each_netdev_rcu(net, dev)
780 if (dev) 793 if (dev->type == type) {
781 dev_hold(dev); 794 dev_hold(dev);
782 rtnl_unlock(); 795 ret = dev;
783 return dev; 796 break;
797 }
798 rcu_read_unlock();
799 return ret;
784} 800}
785EXPORT_SYMBOL(dev_getfirstbyhwtype); 801EXPORT_SYMBOL(dev_getfirstbyhwtype);
786 802
787/** 803/**
788 * dev_get_by_flags - find any device with given flags 804 * dev_get_by_flags_rcu - find any device with given flags
789 * @net: the applicable net namespace 805 * @net: the applicable net namespace
790 * @if_flags: IFF_* values 806 * @if_flags: IFF_* values
791 * @mask: bitmask of bits in if_flags to check 807 * @mask: bitmask of bits in if_flags to check
792 * 808 *
793 * Search for any interface with the given flags. Returns NULL if a device 809 * Search for any interface with the given flags. Returns NULL if a device
794 * is not found or a pointer to the device. The device returned has 810 * is not found or a pointer to the device. Must be called inside
795 * had a reference added and the pointer is safe until the user calls 811 * rcu_read_lock(), and result refcount is unchanged.
796 * dev_put to indicate they have finished with it.
797 */ 812 */
798 813
799struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags, 814struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
800 unsigned short mask) 815 unsigned short mask)
801{ 816{
802 struct net_device *dev, *ret; 817 struct net_device *dev, *ret;
803 818
804 ret = NULL; 819 ret = NULL;
805 rcu_read_lock();
806 for_each_netdev_rcu(net, dev) { 820 for_each_netdev_rcu(net, dev) {
807 if (((dev->flags ^ if_flags) & mask) == 0) { 821 if (((dev->flags ^ if_flags) & mask) == 0) {
808 dev_hold(dev);
809 ret = dev; 822 ret = dev;
810 break; 823 break;
811 } 824 }
812 } 825 }
813 rcu_read_unlock();
814 return ret; 826 return ret;
815} 827}
816EXPORT_SYMBOL(dev_get_by_flags); 828EXPORT_SYMBOL(dev_get_by_flags_rcu);
817 829
818/** 830/**
819 * dev_valid_name - check if name is okay for network device 831 * dev_valid_name - check if name is okay for network device
@@ -936,18 +948,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
936} 948}
937EXPORT_SYMBOL(dev_alloc_name); 949EXPORT_SYMBOL(dev_alloc_name);
938 950
939static int dev_get_valid_name(struct net *net, const char *name, char *buf, 951static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
940 bool fmt)
941{ 952{
953 struct net *net;
954
955 BUG_ON(!dev_net(dev));
956 net = dev_net(dev);
957
942 if (!dev_valid_name(name)) 958 if (!dev_valid_name(name))
943 return -EINVAL; 959 return -EINVAL;
944 960
945 if (fmt && strchr(name, '%')) 961 if (fmt && strchr(name, '%'))
946 return __dev_alloc_name(net, name, buf); 962 return dev_alloc_name(dev, name);
947 else if (__dev_get_by_name(net, name)) 963 else if (__dev_get_by_name(net, name))
948 return -EEXIST; 964 return -EEXIST;
949 else if (buf != name) 965 else if (dev->name != name)
950 strlcpy(buf, name, IFNAMSIZ); 966 strlcpy(dev->name, name, IFNAMSIZ);
951 967
952 return 0; 968 return 0;
953} 969}
@@ -979,20 +995,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
979 995
980 memcpy(oldname, dev->name, IFNAMSIZ); 996 memcpy(oldname, dev->name, IFNAMSIZ);
981 997
982 err = dev_get_valid_name(net, newname, dev->name, 1); 998 err = dev_get_valid_name(dev, newname, 1);
983 if (err < 0) 999 if (err < 0)
984 return err; 1000 return err;
985 1001
986rollback: 1002rollback:
987 /* For now only devices in the initial network namespace 1003 ret = device_rename(&dev->dev, dev->name);
988 * are in sysfs. 1004 if (ret) {
989 */ 1005 memcpy(dev->name, oldname, IFNAMSIZ);
990 if (net_eq(net, &init_net)) { 1006 return ret;
991 ret = device_rename(&dev->dev, dev->name);
992 if (ret) {
993 memcpy(dev->name, oldname, IFNAMSIZ);
994 return ret;
995 }
996 } 1007 }
997 1008
998 write_lock_bh(&dev_base_lock); 1009 write_lock_bh(&dev_base_lock);
@@ -1085,9 +1096,9 @@ void netdev_state_change(struct net_device *dev)
1085} 1096}
1086EXPORT_SYMBOL(netdev_state_change); 1097EXPORT_SYMBOL(netdev_state_change);
1087 1098
1088void netdev_bonding_change(struct net_device *dev, unsigned long event) 1099int netdev_bonding_change(struct net_device *dev, unsigned long event)
1089{ 1100{
1090 call_netdevice_notifiers(event, dev); 1101 return call_netdevice_notifiers(event, dev);
1091} 1102}
1092EXPORT_SYMBOL(netdev_bonding_change); 1103EXPORT_SYMBOL(netdev_bonding_change);
1093 1104
@@ -1417,6 +1428,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
1417 1428
1418int call_netdevice_notifiers(unsigned long val, struct net_device *dev) 1429int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1419{ 1430{
1431 ASSERT_RTNL();
1420 return raw_notifier_call_chain(&netdev_chain, val, dev); 1432 return raw_notifier_call_chain(&netdev_chain, val, dev);
1421} 1433}
1422 1434
@@ -1435,7 +1447,7 @@ void net_disable_timestamp(void)
1435} 1447}
1436EXPORT_SYMBOL(net_disable_timestamp); 1448EXPORT_SYMBOL(net_disable_timestamp);
1437 1449
1438static inline void net_timestamp(struct sk_buff *skb) 1450static inline void net_timestamp_set(struct sk_buff *skb)
1439{ 1451{
1440 if (atomic_read(&netstamp_needed)) 1452 if (atomic_read(&netstamp_needed))
1441 __net_timestamp(skb); 1453 __net_timestamp(skb);
@@ -1443,6 +1455,12 @@ static inline void net_timestamp(struct sk_buff *skb)
1443 skb->tstamp.tv64 = 0; 1455 skb->tstamp.tv64 = 0;
1444} 1456}
1445 1457
1458static inline void net_timestamp_check(struct sk_buff *skb)
1459{
1460 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1461 __net_timestamp(skb);
1462}
1463
1446/** 1464/**
1447 * dev_forward_skb - loopback an skb to another netif 1465 * dev_forward_skb - loopback an skb to another netif
1448 * 1466 *
@@ -1451,7 +1469,7 @@ static inline void net_timestamp(struct sk_buff *skb)
1451 * 1469 *
1452 * return values: 1470 * return values:
1453 * NET_RX_SUCCESS (no congestion) 1471 * NET_RX_SUCCESS (no congestion)
1454 * NET_RX_DROP (packet was dropped) 1472 * NET_RX_DROP (packet was dropped, but freed)
1455 * 1473 *
1456 * dev_forward_skb can be used for injecting an skb from the 1474 * dev_forward_skb can be used for injecting an skb from the
1457 * start_xmit function of one device into the receive queue 1475 * start_xmit function of one device into the receive queue
@@ -1464,13 +1482,13 @@ static inline void net_timestamp(struct sk_buff *skb)
1464int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1482int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1465{ 1483{
1466 skb_orphan(skb); 1484 skb_orphan(skb);
1485 nf_reset(skb);
1467 1486
1468 if (!(dev->flags & IFF_UP)) 1487 if (!(dev->flags & IFF_UP) ||
1469 return NET_RX_DROP; 1488 (skb->len > (dev->mtu + dev->hard_header_len))) {
1470 1489 kfree_skb(skb);
1471 if (skb->len > (dev->mtu + dev->hard_header_len))
1472 return NET_RX_DROP; 1490 return NET_RX_DROP;
1473 1491 }
1474 skb_set_dev(skb, dev); 1492 skb_set_dev(skb, dev);
1475 skb->tstamp.tv64 = 0; 1493 skb->tstamp.tv64 = 0;
1476 skb->pkt_type = PACKET_HOST; 1494 skb->pkt_type = PACKET_HOST;
@@ -1490,9 +1508,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1490 1508
1491#ifdef CONFIG_NET_CLS_ACT 1509#ifdef CONFIG_NET_CLS_ACT
1492 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) 1510 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1493 net_timestamp(skb); 1511 net_timestamp_set(skb);
1494#else 1512#else
1495 net_timestamp(skb); 1513 net_timestamp_set(skb);
1496#endif 1514#endif
1497 1515
1498 rcu_read_lock(); 1516 rcu_read_lock();
@@ -1518,7 +1536,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1518 if (net_ratelimit()) 1536 if (net_ratelimit())
1519 printk(KERN_CRIT "protocol %04x is " 1537 printk(KERN_CRIT "protocol %04x is "
1520 "buggy, dev %s\n", 1538 "buggy, dev %s\n",
1521 skb2->protocol, dev->name); 1539 ntohs(skb2->protocol),
1540 dev->name);
1522 skb_reset_network_header(skb2); 1541 skb_reset_network_header(skb2);
1523 } 1542 }
1524 1543
@@ -1530,6 +1549,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1530 rcu_read_unlock(); 1549 rcu_read_unlock();
1531} 1550}
1532 1551
1552/*
1553 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1554 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1555 */
1556void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1557{
1558 unsigned int real_num = dev->real_num_tx_queues;
1559
1560 if (unlikely(txq > dev->num_tx_queues))
1561 ;
1562 else if (txq > real_num)
1563 dev->real_num_tx_queues = txq;
1564 else if (txq < real_num) {
1565 dev->real_num_tx_queues = txq;
1566 qdisc_reset_all_tx_gt(dev, txq);
1567 }
1568}
1569EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1533 1570
1534static inline void __netif_reschedule(struct Qdisc *q) 1571static inline void __netif_reschedule(struct Qdisc *q)
1535{ 1572{
@@ -1538,8 +1575,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
1538 1575
1539 local_irq_save(flags); 1576 local_irq_save(flags);
1540 sd = &__get_cpu_var(softnet_data); 1577 sd = &__get_cpu_var(softnet_data);
1541 q->next_sched = sd->output_queue; 1578 q->next_sched = NULL;
1542 sd->output_queue = q; 1579 *sd->output_queue_tailp = q;
1580 sd->output_queue_tailp = &q->next_sched;
1543 raise_softirq_irqoff(NET_TX_SOFTIRQ); 1581 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1544 local_irq_restore(flags); 1582 local_irq_restore(flags);
1545} 1583}
@@ -1784,18 +1822,27 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
1784 * 2. No high memory really exists on this machine. 1822 * 2. No high memory really exists on this machine.
1785 */ 1823 */
1786 1824
1787static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) 1825static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1788{ 1826{
1789#ifdef CONFIG_HIGHMEM 1827#ifdef CONFIG_HIGHMEM
1790 int i; 1828 int i;
1829 if (!(dev->features & NETIF_F_HIGHDMA)) {
1830 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1831 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1832 return 1;
1833 }
1791 1834
1792 if (dev->features & NETIF_F_HIGHDMA) 1835 if (PCI_DMA_BUS_IS_PHYS) {
1793 return 0; 1836 struct device *pdev = dev->dev.parent;
1794
1795 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1796 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1797 return 1;
1798 1837
1838 if (!pdev)
1839 return 0;
1840 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1841 dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
1842 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
1843 return 1;
1844 }
1845 }
1799#endif 1846#endif
1800 return 0; 1847 return 0;
1801} 1848}
@@ -1853,6 +1900,41 @@ static int dev_gso_segment(struct sk_buff *skb)
1853 return 0; 1900 return 0;
1854} 1901}
1855 1902
1903/*
1904 * Try to orphan skb early, right before transmission by the device.
1905 * We cannot orphan skb if tx timestamp is requested, since
1906 * drivers need to call skb_tstamp_tx() to send the timestamp.
1907 */
1908static inline void skb_orphan_try(struct sk_buff *skb)
1909{
1910 struct sock *sk = skb->sk;
1911
1912 if (sk && !skb_tx(skb)->flags) {
1913 /* skb_tx_hash() wont be able to get sk.
1914 * We copy sk_hash into skb->rxhash
1915 */
1916 if (!skb->rxhash)
1917 skb->rxhash = sk->sk_hash;
1918 skb_orphan(skb);
1919 }
1920}
1921
1922/*
1923 * Returns true if either:
1924 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
1925 * 2. skb is fragmented and the device does not support SG, or if
1926 * at least one of fragments is in highmem and device does not
1927 * support DMA from it.
1928 */
1929static inline int skb_needs_linearize(struct sk_buff *skb,
1930 struct net_device *dev)
1931{
1932 return skb_is_nonlinear(skb) &&
1933 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
1934 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
1935 illegal_highdma(dev, skb))));
1936}
1937
1856int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, 1938int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1857 struct netdev_queue *txq) 1939 struct netdev_queue *txq)
1858{ 1940{
@@ -1863,13 +1945,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1863 if (!list_empty(&ptype_all)) 1945 if (!list_empty(&ptype_all))
1864 dev_queue_xmit_nit(skb, dev); 1946 dev_queue_xmit_nit(skb, dev);
1865 1947
1866 if (netif_needs_gso(dev, skb)) {
1867 if (unlikely(dev_gso_segment(skb)))
1868 goto out_kfree_skb;
1869 if (skb->next)
1870 goto gso;
1871 }
1872
1873 /* 1948 /*
1874 * If device doesnt need skb->dst, release it right now while 1949 * If device doesnt need skb->dst, release it right now while
1875 * its hot in this cpu cache 1950 * its hot in this cpu cache
@@ -1877,23 +1952,34 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1877 if (dev->priv_flags & IFF_XMIT_DST_RELEASE) 1952 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1878 skb_dst_drop(skb); 1953 skb_dst_drop(skb);
1879 1954
1955 skb_orphan_try(skb);
1956
1957 if (netif_needs_gso(dev, skb)) {
1958 if (unlikely(dev_gso_segment(skb)))
1959 goto out_kfree_skb;
1960 if (skb->next)
1961 goto gso;
1962 } else {
1963 if (skb_needs_linearize(skb, dev) &&
1964 __skb_linearize(skb))
1965 goto out_kfree_skb;
1966
1967 /* If packet is not checksummed and device does not
1968 * support checksumming for this protocol, complete
1969 * checksumming here.
1970 */
1971 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1972 skb_set_transport_header(skb, skb->csum_start -
1973 skb_headroom(skb));
1974 if (!dev_can_checksum(dev, skb) &&
1975 skb_checksum_help(skb))
1976 goto out_kfree_skb;
1977 }
1978 }
1979
1880 rc = ops->ndo_start_xmit(skb, dev); 1980 rc = ops->ndo_start_xmit(skb, dev);
1881 if (rc == NETDEV_TX_OK) 1981 if (rc == NETDEV_TX_OK)
1882 txq_trans_update(txq); 1982 txq_trans_update(txq);
1883 /*
1884 * TODO: if skb_orphan() was called by
1885 * dev->hard_start_xmit() (for example, the unmodified
1886 * igb driver does that; bnx2 doesn't), then
1887 * skb_tx_software_timestamp() will be unable to send
1888 * back the time stamp.
1889 *
1890 * How can this be prevented? Always create another
1891 * reference to the socket before calling
1892 * dev->hard_start_xmit()? Prevent that skb_orphan()
1893 * does anything in dev->hard_start_xmit() by clearing
1894 * the skb destructor before the call and restoring it
1895 * afterwards, then doing the skb_orphan() ourselves?
1896 */
1897 return rc; 1983 return rc;
1898 } 1984 }
1899 1985
@@ -1932,7 +2018,7 @@ out_kfree_skb:
1932 return rc; 2018 return rc;
1933} 2019}
1934 2020
1935static u32 skb_tx_hashrnd; 2021static u32 hashrnd __read_mostly;
1936 2022
1937u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) 2023u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1938{ 2024{
@@ -1948,9 +2034,8 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1948 if (skb->sk && skb->sk->sk_hash) 2034 if (skb->sk && skb->sk->sk_hash)
1949 hash = skb->sk->sk_hash; 2035 hash = skb->sk->sk_hash;
1950 else 2036 else
1951 hash = skb->protocol; 2037 hash = (__force u16) skb->protocol ^ skb->rxhash;
1952 2038 hash = jhash_1word(hash, hashrnd);
1953 hash = jhash_1word(hash, skb_tx_hashrnd);
1954 2039
1955 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 2040 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1956} 2041}
@@ -1960,10 +2045,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1960{ 2045{
1961 if (unlikely(queue_index >= dev->real_num_tx_queues)) { 2046 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1962 if (net_ratelimit()) { 2047 if (net_ratelimit()) {
1963 WARN(1, "%s selects TX queue %d, but " 2048 pr_warning("%s selects TX queue %d, but "
1964 "real number of TX queues is %d\n", 2049 "real number of TX queues is %d\n",
1965 dev->name, queue_index, 2050 dev->name, queue_index, dev->real_num_tx_queues);
1966 dev->real_num_tx_queues);
1967 } 2051 }
1968 return 0; 2052 return 0;
1969 } 2053 }
@@ -1973,12 +2057,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1973static struct netdev_queue *dev_pick_tx(struct net_device *dev, 2057static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1974 struct sk_buff *skb) 2058 struct sk_buff *skb)
1975{ 2059{
1976 u16 queue_index; 2060 int queue_index;
1977 struct sock *sk = skb->sk; 2061 struct sock *sk = skb->sk;
1978 2062
1979 if (sk_tx_queue_recorded(sk)) { 2063 queue_index = sk_tx_queue_get(sk);
1980 queue_index = sk_tx_queue_get(sk); 2064 if (queue_index < 0) {
1981 } else {
1982 const struct net_device_ops *ops = dev->netdev_ops; 2065 const struct net_device_ops *ops = dev->netdev_ops;
1983 2066
1984 if (ops->ndo_select_queue) { 2067 if (ops->ndo_select_queue) {
@@ -1990,7 +2073,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1990 queue_index = skb_tx_hash(dev, skb); 2073 queue_index = skb_tx_hash(dev, skb);
1991 2074
1992 if (sk) { 2075 if (sk) {
1993 struct dst_entry *dst = rcu_dereference_bh(sk->sk_dst_cache); 2076 struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
1994 2077
1995 if (dst && skb_dst(skb) == dst) 2078 if (dst && skb_dst(skb) == dst)
1996 sk_tx_queue_set(sk, queue_index); 2079 sk_tx_queue_set(sk, queue_index);
@@ -2007,50 +2090,59 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2007 struct netdev_queue *txq) 2090 struct netdev_queue *txq)
2008{ 2091{
2009 spinlock_t *root_lock = qdisc_lock(q); 2092 spinlock_t *root_lock = qdisc_lock(q);
2093 bool contended = qdisc_is_running(q);
2010 int rc; 2094 int rc;
2011 2095
2096 /*
2097 * Heuristic to force contended enqueues to serialize on a
2098 * separate lock before trying to get qdisc main lock.
2099 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
2100 * and dequeue packets faster.
2101 */
2102 if (unlikely(contended))
2103 spin_lock(&q->busylock);
2104
2012 spin_lock(root_lock); 2105 spin_lock(root_lock);
2013 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { 2106 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2014 kfree_skb(skb); 2107 kfree_skb(skb);
2015 rc = NET_XMIT_DROP; 2108 rc = NET_XMIT_DROP;
2016 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) && 2109 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2017 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) { 2110 qdisc_run_begin(q)) {
2018 /* 2111 /*
2019 * This is a work-conserving queue; there are no old skbs 2112 * This is a work-conserving queue; there are no old skbs
2020 * waiting to be sent out; and the qdisc is not running - 2113 * waiting to be sent out; and the qdisc is not running -
2021 * xmit the skb directly. 2114 * xmit the skb directly.
2022 */ 2115 */
2116 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2117 skb_dst_force(skb);
2023 __qdisc_update_bstats(q, skb->len); 2118 __qdisc_update_bstats(q, skb->len);
2024 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) 2119 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2120 if (unlikely(contended)) {
2121 spin_unlock(&q->busylock);
2122 contended = false;
2123 }
2025 __qdisc_run(q); 2124 __qdisc_run(q);
2026 else 2125 } else
2027 clear_bit(__QDISC_STATE_RUNNING, &q->state); 2126 qdisc_run_end(q);
2028 2127
2029 rc = NET_XMIT_SUCCESS; 2128 rc = NET_XMIT_SUCCESS;
2030 } else { 2129 } else {
2130 skb_dst_force(skb);
2031 rc = qdisc_enqueue_root(skb, q); 2131 rc = qdisc_enqueue_root(skb, q);
2032 qdisc_run(q); 2132 if (qdisc_run_begin(q)) {
2133 if (unlikely(contended)) {
2134 spin_unlock(&q->busylock);
2135 contended = false;
2136 }
2137 __qdisc_run(q);
2138 }
2033 } 2139 }
2034 spin_unlock(root_lock); 2140 spin_unlock(root_lock);
2035 2141 if (unlikely(contended))
2142 spin_unlock(&q->busylock);
2036 return rc; 2143 return rc;
2037} 2144}
2038 2145
2039/*
2040 * Returns true if either:
2041 * 1. skb has frag_list and the device doesn't support FRAGLIST, or
2042 * 2. skb is fragmented and the device does not support SG, or if
2043 * at least one of fragments is in highmem and device does not
2044 * support DMA from it.
2045 */
2046static inline int skb_needs_linearize(struct sk_buff *skb,
2047 struct net_device *dev)
2048{
2049 return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
2050 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
2051 illegal_highdma(dev, skb)));
2052}
2053
2054/** 2146/**
2055 * dev_queue_xmit - transmit a buffer 2147 * dev_queue_xmit - transmit a buffer
2056 * @skb: buffer to transmit 2148 * @skb: buffer to transmit
@@ -2083,25 +2175,6 @@ int dev_queue_xmit(struct sk_buff *skb)
2083 struct Qdisc *q; 2175 struct Qdisc *q;
2084 int rc = -ENOMEM; 2176 int rc = -ENOMEM;
2085 2177
2086 /* GSO will handle the following emulations directly. */
2087 if (netif_needs_gso(dev, skb))
2088 goto gso;
2089
2090 /* Convert a paged skb to linear, if required */
2091 if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
2092 goto out_kfree_skb;
2093
2094 /* If packet is not checksummed and device does not support
2095 * checksumming for this protocol, complete checksumming here.
2096 */
2097 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2098 skb_set_transport_header(skb, skb->csum_start -
2099 skb_headroom(skb));
2100 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2101 goto out_kfree_skb;
2102 }
2103
2104gso:
2105 /* Disable soft irqs for various locks below. Also 2178 /* Disable soft irqs for various locks below. Also
2106 * stops preemption for RCU. 2179 * stops preemption for RCU.
2107 */ 2180 */
@@ -2160,7 +2233,6 @@ gso:
2160 rc = -ENETDOWN; 2233 rc = -ENETDOWN;
2161 rcu_read_unlock_bh(); 2234 rcu_read_unlock_bh();
2162 2235
2163out_kfree_skb:
2164 kfree_skb(skb); 2236 kfree_skb(skb);
2165 return rc; 2237 return rc;
2166out: 2238out:
@@ -2175,11 +2247,244 @@ EXPORT_SYMBOL(dev_queue_xmit);
2175 =======================================================================*/ 2247 =======================================================================*/
2176 2248
2177int netdev_max_backlog __read_mostly = 1000; 2249int netdev_max_backlog __read_mostly = 1000;
2250int netdev_tstamp_prequeue __read_mostly = 1;
2178int netdev_budget __read_mostly = 300; 2251int netdev_budget __read_mostly = 300;
2179int weight_p __read_mostly = 64; /* old backlog weight */ 2252int weight_p __read_mostly = 64; /* old backlog weight */
2180 2253
2181DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; 2254/* Called with irq disabled */
2255static inline void ____napi_schedule(struct softnet_data *sd,
2256 struct napi_struct *napi)
2257{
2258 list_add_tail(&napi->poll_list, &sd->poll_list);
2259 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2260}
2261
2262#ifdef CONFIG_RPS
2263
2264/* One global table that all flow-based protocols share. */
2265struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2266EXPORT_SYMBOL(rps_sock_flow_table);
2267
2268/*
2269 * get_rps_cpu is called from netif_receive_skb and returns the target
2270 * CPU from the RPS map of the receiving queue for a given skb.
2271 * rcu_read_lock must be held on entry.
2272 */
2273static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2274 struct rps_dev_flow **rflowp)
2275{
2276 struct ipv6hdr *ip6;
2277 struct iphdr *ip;
2278 struct netdev_rx_queue *rxqueue;
2279 struct rps_map *map;
2280 struct rps_dev_flow_table *flow_table;
2281 struct rps_sock_flow_table *sock_flow_table;
2282 int cpu = -1;
2283 u8 ip_proto;
2284 u16 tcpu;
2285 u32 addr1, addr2, ihl;
2286 union {
2287 u32 v32;
2288 u16 v16[2];
2289 } ports;
2290
2291 if (skb_rx_queue_recorded(skb)) {
2292 u16 index = skb_get_rx_queue(skb);
2293 if (unlikely(index >= dev->num_rx_queues)) {
2294 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2295 "on queue %u, but number of RX queues is %u\n",
2296 dev->name, index, dev->num_rx_queues);
2297 goto done;
2298 }
2299 rxqueue = dev->_rx + index;
2300 } else
2301 rxqueue = dev->_rx;
2302
2303 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2304 goto done;
2305
2306 if (skb->rxhash)
2307 goto got_hash; /* Skip hash computation on packet header */
2308
2309 switch (skb->protocol) {
2310 case __constant_htons(ETH_P_IP):
2311 if (!pskb_may_pull(skb, sizeof(*ip)))
2312 goto done;
2313
2314 ip = (struct iphdr *) skb->data;
2315 ip_proto = ip->protocol;
2316 addr1 = (__force u32) ip->saddr;
2317 addr2 = (__force u32) ip->daddr;
2318 ihl = ip->ihl;
2319 break;
2320 case __constant_htons(ETH_P_IPV6):
2321 if (!pskb_may_pull(skb, sizeof(*ip6)))
2322 goto done;
2323
2324 ip6 = (struct ipv6hdr *) skb->data;
2325 ip_proto = ip6->nexthdr;
2326 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2327 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2328 ihl = (40 >> 2);
2329 break;
2330 default:
2331 goto done;
2332 }
2333 switch (ip_proto) {
2334 case IPPROTO_TCP:
2335 case IPPROTO_UDP:
2336 case IPPROTO_DCCP:
2337 case IPPROTO_ESP:
2338 case IPPROTO_AH:
2339 case IPPROTO_SCTP:
2340 case IPPROTO_UDPLITE:
2341 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2342 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2343 if (ports.v16[1] < ports.v16[0])
2344 swap(ports.v16[0], ports.v16[1]);
2345 break;
2346 }
2347 default:
2348 ports.v32 = 0;
2349 break;
2350 }
2351
2352 /* get a consistent hash (same value on both flow directions) */
2353 if (addr2 < addr1)
2354 swap(addr1, addr2);
2355 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2356 if (!skb->rxhash)
2357 skb->rxhash = 1;
2358
2359got_hash:
2360 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2361 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2362 if (flow_table && sock_flow_table) {
2363 u16 next_cpu;
2364 struct rps_dev_flow *rflow;
2365
2366 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2367 tcpu = rflow->cpu;
2368
2369 next_cpu = sock_flow_table->ents[skb->rxhash &
2370 sock_flow_table->mask];
2371
2372 /*
2373 * If the desired CPU (where last recvmsg was done) is
2374 * different from current CPU (one in the rx-queue flow
2375 * table entry), switch if one of the following holds:
2376 * - Current CPU is unset (equal to RPS_NO_CPU).
2377 * - Current CPU is offline.
2378 * - The current CPU's queue tail has advanced beyond the
2379 * last packet that was enqueued using this table entry.
2380 * This guarantees that all previous packets for the flow
2381 * have been dequeued, thus preserving in order delivery.
2382 */
2383 if (unlikely(tcpu != next_cpu) &&
2384 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2385 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2386 rflow->last_qtail)) >= 0)) {
2387 tcpu = rflow->cpu = next_cpu;
2388 if (tcpu != RPS_NO_CPU)
2389 rflow->last_qtail = per_cpu(softnet_data,
2390 tcpu).input_queue_head;
2391 }
2392 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2393 *rflowp = rflow;
2394 cpu = tcpu;
2395 goto done;
2396 }
2397 }
2398
2399 map = rcu_dereference(rxqueue->rps_map);
2400 if (map) {
2401 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2402
2403 if (cpu_online(tcpu)) {
2404 cpu = tcpu;
2405 goto done;
2406 }
2407 }
2408
2409done:
2410 return cpu;
2411}
2412
2413/* Called from hardirq (IPI) context */
2414static void rps_trigger_softirq(void *data)
2415{
2416 struct softnet_data *sd = data;
2417
2418 ____napi_schedule(sd, &sd->backlog);
2419 sd->received_rps++;
2420}
2421
2422#endif /* CONFIG_RPS */
2423
2424/*
2425 * Check if this softnet_data structure is another cpu one
2426 * If yes, queue it to our IPI list and return 1
2427 * If no, return 0
2428 */
2429static int rps_ipi_queued(struct softnet_data *sd)
2430{
2431#ifdef CONFIG_RPS
2432 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2433
2434 if (sd != mysd) {
2435 sd->rps_ipi_next = mysd->rps_ipi_list;
2436 mysd->rps_ipi_list = sd;
2437
2438 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2439 return 1;
2440 }
2441#endif /* CONFIG_RPS */
2442 return 0;
2443}
2444
2445/*
2446 * enqueue_to_backlog is called to queue an skb to a per CPU backlog
2447 * queue (may be a remote CPU queue).
2448 */
2449static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2450 unsigned int *qtail)
2451{
2452 struct softnet_data *sd;
2453 unsigned long flags;
2454
2455 sd = &per_cpu(softnet_data, cpu);
2456
2457 local_irq_save(flags);
2458
2459 rps_lock(sd);
2460 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2461 if (skb_queue_len(&sd->input_pkt_queue)) {
2462enqueue:
2463 __skb_queue_tail(&sd->input_pkt_queue, skb);
2464 input_queue_tail_incr_save(sd, qtail);
2465 rps_unlock(sd);
2466 local_irq_restore(flags);
2467 return NET_RX_SUCCESS;
2468 }
2469
2470 /* Schedule NAPI for backlog device
2471 * We can use non atomic operation since we own the queue lock
2472 */
2473 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2474 if (!rps_ipi_queued(sd))
2475 ____napi_schedule(sd, &sd->backlog);
2476 }
2477 goto enqueue;
2478 }
2182 2479
2480 sd->dropped++;
2481 rps_unlock(sd);
2482
2483 local_irq_restore(flags);
2484
2485 kfree_skb(skb);
2486 return NET_RX_DROP;
2487}
2183 2488
2184/** 2489/**
2185 * netif_rx - post buffer to the network code 2490 * netif_rx - post buffer to the network code
@@ -2198,41 +2503,40 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2198 2503
2199int netif_rx(struct sk_buff *skb) 2504int netif_rx(struct sk_buff *skb)
2200{ 2505{
2201 struct softnet_data *queue; 2506 int ret;
2202 unsigned long flags;
2203 2507
2204 /* if netpoll wants it, pretend we never saw it */ 2508 /* if netpoll wants it, pretend we never saw it */
2205 if (netpoll_rx(skb)) 2509 if (netpoll_rx(skb))
2206 return NET_RX_DROP; 2510 return NET_RX_DROP;
2207 2511
2208 if (!skb->tstamp.tv64) 2512 if (netdev_tstamp_prequeue)
2209 net_timestamp(skb); 2513 net_timestamp_check(skb);
2210 2514
2211 /* 2515#ifdef CONFIG_RPS
2212 * The code is rearranged so that the path is the most 2516 {
2213 * short when CPU is congested, but is still operating. 2517 struct rps_dev_flow voidflow, *rflow = &voidflow;
2214 */ 2518 int cpu;
2215 local_irq_save(flags);
2216 queue = &__get_cpu_var(softnet_data);
2217 2519
2218 __get_cpu_var(netdev_rx_stat).total++; 2520 preempt_disable();
2219 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { 2521 rcu_read_lock();
2220 if (queue->input_pkt_queue.qlen) {
2221enqueue:
2222 __skb_queue_tail(&queue->input_pkt_queue, skb);
2223 local_irq_restore(flags);
2224 return NET_RX_SUCCESS;
2225 }
2226 2522
2227 napi_schedule(&queue->backlog); 2523 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2228 goto enqueue; 2524 if (cpu < 0)
2229 } 2525 cpu = smp_processor_id();
2230 2526
2231 __get_cpu_var(netdev_rx_stat).dropped++; 2527 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2232 local_irq_restore(flags);
2233 2528
2234 kfree_skb(skb); 2529 rcu_read_unlock();
2235 return NET_RX_DROP; 2530 preempt_enable();
2531 }
2532#else
2533 {
2534 unsigned int qtail;
2535 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
2536 put_cpu();
2537 }
2538#endif
2539 return ret;
2236} 2540}
2237EXPORT_SYMBOL(netif_rx); 2541EXPORT_SYMBOL(netif_rx);
2238 2542
@@ -2277,6 +2581,7 @@ static void net_tx_action(struct softirq_action *h)
2277 local_irq_disable(); 2581 local_irq_disable();
2278 head = sd->output_queue; 2582 head = sd->output_queue;
2279 sd->output_queue = NULL; 2583 sd->output_queue = NULL;
2584 sd->output_queue_tailp = &sd->output_queue;
2280 local_irq_enable(); 2585 local_irq_enable();
2281 2586
2282 while (head) { 2587 while (head) {
@@ -2314,66 +2619,14 @@ static inline int deliver_skb(struct sk_buff *skb,
2314 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 2619 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2315} 2620}
2316 2621
2317#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) 2622#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
2318 2623 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
2319#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2320/* This hook is defined here for ATM LANE */ 2624/* This hook is defined here for ATM LANE */
2321int (*br_fdb_test_addr_hook)(struct net_device *dev, 2625int (*br_fdb_test_addr_hook)(struct net_device *dev,
2322 unsigned char *addr) __read_mostly; 2626 unsigned char *addr) __read_mostly;
2323EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook); 2627EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2324#endif 2628#endif
2325 2629
2326/*
2327 * If bridge module is loaded call bridging hook.
2328 * returns NULL if packet was consumed.
2329 */
2330struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2331 struct sk_buff *skb) __read_mostly;
2332EXPORT_SYMBOL_GPL(br_handle_frame_hook);
2333
2334static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2335 struct packet_type **pt_prev, int *ret,
2336 struct net_device *orig_dev)
2337{
2338 struct net_bridge_port *port;
2339
2340 if (skb->pkt_type == PACKET_LOOPBACK ||
2341 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2342 return skb;
2343
2344 if (*pt_prev) {
2345 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2346 *pt_prev = NULL;
2347 }
2348
2349 return br_handle_frame_hook(port, skb);
2350}
2351#else
2352#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
2353#endif
2354
2355#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2356struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2357EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2358
2359static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2360 struct packet_type **pt_prev,
2361 int *ret,
2362 struct net_device *orig_dev)
2363{
2364 if (skb->dev->macvlan_port == NULL)
2365 return skb;
2366
2367 if (*pt_prev) {
2368 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2369 *pt_prev = NULL;
2370 }
2371 return macvlan_handle_frame_hook(skb);
2372}
2373#else
2374#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2375#endif
2376
2377#ifdef CONFIG_NET_CLS_ACT 2630#ifdef CONFIG_NET_CLS_ACT
2378/* TODO: Maybe we should just force sch_ingress to be compiled in 2631/* TODO: Maybe we should just force sch_ingress to be compiled in
2379 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions 2632 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2391,10 +2644,10 @@ static int ing_filter(struct sk_buff *skb)
2391 int result = TC_ACT_OK; 2644 int result = TC_ACT_OK;
2392 struct Qdisc *q; 2645 struct Qdisc *q;
2393 2646
2394 if (MAX_RED_LOOP < ttl++) { 2647 if (unlikely(MAX_RED_LOOP < ttl++)) {
2395 printk(KERN_WARNING 2648 if (net_ratelimit())
2396 "Redir loop detected Dropping packet (%d->%d)\n", 2649 pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
2397 skb->skb_iif, dev->ifindex); 2650 skb->skb_iif, dev->ifindex);
2398 return TC_ACT_SHOT; 2651 return TC_ACT_SHOT;
2399 } 2652 }
2400 2653
@@ -2424,9 +2677,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2424 if (*pt_prev) { 2677 if (*pt_prev) {
2425 *ret = deliver_skb(skb, *pt_prev, orig_dev); 2678 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2426 *pt_prev = NULL; 2679 *pt_prev = NULL;
2427 } else {
2428 /* Huh? Why does turning on AF_PACKET affect this? */
2429 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2430 } 2680 }
2431 2681
2432 switch (ing_filter(skb)) { 2682 switch (ing_filter(skb)) {
@@ -2470,32 +2720,113 @@ void netif_nit_deliver(struct sk_buff *skb)
2470} 2720}
2471 2721
2472/** 2722/**
2473 * netif_receive_skb - process receive buffer from network 2723 * netdev_rx_handler_register - register receive handler
2474 * @skb: buffer to process 2724 * @dev: device to register a handler for
2725 * @rx_handler: receive handler to register
2726 * @rx_handler_data: data pointer that is used by rx handler
2475 * 2727 *
2476 * netif_receive_skb() is the main receive data processing function. 2728 * Register a receive hander for a device. This handler will then be
2477 * It always succeeds. The buffer may be dropped during processing 2729 * called from __netif_receive_skb. A negative errno code is returned
2478 * for congestion control or by the protocol layers. 2730 * on a failure.
2479 * 2731 *
2480 * This function may only be called from softirq context and interrupts 2732 * The caller must hold the rtnl_mutex.
2481 * should be enabled. 2733 */
2734int netdev_rx_handler_register(struct net_device *dev,
2735 rx_handler_func_t *rx_handler,
2736 void *rx_handler_data)
2737{
2738 ASSERT_RTNL();
2739
2740 if (dev->rx_handler)
2741 return -EBUSY;
2742
2743 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
2744 rcu_assign_pointer(dev->rx_handler, rx_handler);
2745
2746 return 0;
2747}
2748EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
2749
2750/**
2751 * netdev_rx_handler_unregister - unregister receive handler
2752 * @dev: device to unregister a handler from
2482 * 2753 *
2483 * Return values (usually ignored): 2754 * Unregister a receive hander from a device.
2484 * NET_RX_SUCCESS: no congestion 2755 *
2485 * NET_RX_DROP: packet was dropped 2756 * The caller must hold the rtnl_mutex.
2486 */ 2757 */
2487int netif_receive_skb(struct sk_buff *skb) 2758void netdev_rx_handler_unregister(struct net_device *dev)
2759{
2760
2761 ASSERT_RTNL();
2762 rcu_assign_pointer(dev->rx_handler, NULL);
2763 rcu_assign_pointer(dev->rx_handler_data, NULL);
2764}
2765EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2766
2767static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
2768 struct net_device *master)
2769{
2770 if (skb->pkt_type == PACKET_HOST) {
2771 u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
2772
2773 memcpy(dest, master->dev_addr, ETH_ALEN);
2774 }
2775}
2776
2777/* On bonding slaves other than the currently active slave, suppress
2778 * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
2779 * ARP on active-backup slaves with arp_validate enabled.
2780 */
2781int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
2782{
2783 struct net_device *dev = skb->dev;
2784
2785 if (master->priv_flags & IFF_MASTER_ARPMON)
2786 dev->last_rx = jiffies;
2787
2788 if ((master->priv_flags & IFF_MASTER_ALB) &&
2789 (master->priv_flags & IFF_BRIDGE_PORT)) {
2790 /* Do address unmangle. The local destination address
2791 * will be always the one master has. Provides the right
2792 * functionality in a bridge.
2793 */
2794 skb_bond_set_mac_by_master(skb, master);
2795 }
2796
2797 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
2798 if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
2799 skb->protocol == __cpu_to_be16(ETH_P_ARP))
2800 return 0;
2801
2802 if (master->priv_flags & IFF_MASTER_ALB) {
2803 if (skb->pkt_type != PACKET_BROADCAST &&
2804 skb->pkt_type != PACKET_MULTICAST)
2805 return 0;
2806 }
2807 if (master->priv_flags & IFF_MASTER_8023AD &&
2808 skb->protocol == __cpu_to_be16(ETH_P_SLOW))
2809 return 0;
2810
2811 return 1;
2812 }
2813 return 0;
2814}
2815EXPORT_SYMBOL(__skb_bond_should_drop);
2816
2817static int __netif_receive_skb(struct sk_buff *skb)
2488{ 2818{
2489 struct packet_type *ptype, *pt_prev; 2819 struct packet_type *ptype, *pt_prev;
2820 rx_handler_func_t *rx_handler;
2490 struct net_device *orig_dev; 2821 struct net_device *orig_dev;
2491 struct net_device *master; 2822 struct net_device *master;
2492 struct net_device *null_or_orig; 2823 struct net_device *null_or_orig;
2493 struct net_device *null_or_bond; 2824 struct net_device *orig_or_bond;
2494 int ret = NET_RX_DROP; 2825 int ret = NET_RX_DROP;
2495 __be16 type; 2826 __be16 type;
2496 2827
2497 if (!skb->tstamp.tv64) 2828 if (!netdev_tstamp_prequeue)
2498 net_timestamp(skb); 2829 net_timestamp_check(skb);
2499 2830
2500 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) 2831 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2501 return NET_RX_SUCCESS; 2832 return NET_RX_SUCCESS;
@@ -2507,18 +2838,28 @@ int netif_receive_skb(struct sk_buff *skb)
2507 if (!skb->skb_iif) 2838 if (!skb->skb_iif)
2508 skb->skb_iif = skb->dev->ifindex; 2839 skb->skb_iif = skb->dev->ifindex;
2509 2840
2841 /*
2842 * bonding note: skbs received on inactive slaves should only
2843 * be delivered to pkt handlers that are exact matches. Also
2844 * the deliver_no_wcard flag will be set. If packet handlers
2845 * are sensitive to duplicate packets these skbs will need to
2846 * be dropped at the handler. The vlan accel path may have
2847 * already set the deliver_no_wcard flag.
2848 */
2510 null_or_orig = NULL; 2849 null_or_orig = NULL;
2511 orig_dev = skb->dev; 2850 orig_dev = skb->dev;
2512 master = ACCESS_ONCE(orig_dev->master); 2851 master = ACCESS_ONCE(orig_dev->master);
2513 if (master) { 2852 if (skb->deliver_no_wcard)
2514 if (skb_bond_should_drop(skb, master)) 2853 null_or_orig = orig_dev;
2854 else if (master) {
2855 if (skb_bond_should_drop(skb, master)) {
2856 skb->deliver_no_wcard = 1;
2515 null_or_orig = orig_dev; /* deliver only exact match */ 2857 null_or_orig = orig_dev; /* deliver only exact match */
2516 else 2858 } else
2517 skb->dev = master; 2859 skb->dev = master;
2518 } 2860 }
2519 2861
2520 __get_cpu_var(netdev_rx_stat).total++; 2862 __this_cpu_inc(softnet_data.processed);
2521
2522 skb_reset_network_header(skb); 2863 skb_reset_network_header(skb);
2523 skb_reset_transport_header(skb); 2864 skb_reset_transport_header(skb);
2524 skb->mac_len = skb->network_header - skb->mac_header; 2865 skb->mac_len = skb->network_header - skb->mac_header;
@@ -2550,12 +2891,17 @@ int netif_receive_skb(struct sk_buff *skb)
2550ncls: 2891ncls:
2551#endif 2892#endif
2552 2893
2553 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev); 2894 /* Handle special case of bridge or macvlan */
2554 if (!skb) 2895 rx_handler = rcu_dereference(skb->dev->rx_handler);
2555 goto out; 2896 if (rx_handler) {
2556 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev); 2897 if (pt_prev) {
2557 if (!skb) 2898 ret = deliver_skb(skb, pt_prev, orig_dev);
2558 goto out; 2899 pt_prev = NULL;
2900 }
2901 skb = rx_handler(skb);
2902 if (!skb)
2903 goto out;
2904 }
2559 2905
2560 /* 2906 /*
2561 * Make sure frames received on VLAN interfaces stacked on 2907 * Make sure frames received on VLAN interfaces stacked on
@@ -2563,10 +2909,10 @@ ncls:
2563 * device that may have registered for a specific ptype. The 2909 * device that may have registered for a specific ptype. The
2564 * handler may have to adjust skb->dev and orig_dev. 2910 * handler may have to adjust skb->dev and orig_dev.
2565 */ 2911 */
2566 null_or_bond = NULL; 2912 orig_or_bond = orig_dev;
2567 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) && 2913 if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
2568 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) { 2914 (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
2569 null_or_bond = vlan_dev_real_dev(skb->dev); 2915 orig_or_bond = vlan_dev_real_dev(skb->dev);
2570 } 2916 }
2571 2917
2572 type = skb->protocol; 2918 type = skb->protocol;
@@ -2574,7 +2920,7 @@ ncls:
2574 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2920 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2575 if (ptype->type == type && (ptype->dev == null_or_orig || 2921 if (ptype->type == type && (ptype->dev == null_or_orig ||
2576 ptype->dev == skb->dev || ptype->dev == orig_dev || 2922 ptype->dev == skb->dev || ptype->dev == orig_dev ||
2577 ptype->dev == null_or_bond)) { 2923 ptype->dev == orig_or_bond)) {
2578 if (pt_prev) 2924 if (pt_prev)
2579 ret = deliver_skb(skb, pt_prev, orig_dev); 2925 ret = deliver_skb(skb, pt_prev, orig_dev);
2580 pt_prev = ptype; 2926 pt_prev = ptype;
@@ -2595,20 +2941,81 @@ out:
2595 rcu_read_unlock(); 2941 rcu_read_unlock();
2596 return ret; 2942 return ret;
2597} 2943}
2944
2945/**
2946 * netif_receive_skb - process receive buffer from network
2947 * @skb: buffer to process
2948 *
2949 * netif_receive_skb() is the main receive data processing function.
2950 * It always succeeds. The buffer may be dropped during processing
2951 * for congestion control or by the protocol layers.
2952 *
2953 * This function may only be called from softirq context and interrupts
2954 * should be enabled.
2955 *
2956 * Return values (usually ignored):
2957 * NET_RX_SUCCESS: no congestion
2958 * NET_RX_DROP: packet was dropped
2959 */
2960int netif_receive_skb(struct sk_buff *skb)
2961{
2962 if (netdev_tstamp_prequeue)
2963 net_timestamp_check(skb);
2964
2965 if (skb_defer_rx_timestamp(skb))
2966 return NET_RX_SUCCESS;
2967
2968#ifdef CONFIG_RPS
2969 {
2970 struct rps_dev_flow voidflow, *rflow = &voidflow;
2971 int cpu, ret;
2972
2973 rcu_read_lock();
2974
2975 cpu = get_rps_cpu(skb->dev, skb, &rflow);
2976
2977 if (cpu >= 0) {
2978 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
2979 rcu_read_unlock();
2980 } else {
2981 rcu_read_unlock();
2982 ret = __netif_receive_skb(skb);
2983 }
2984
2985 return ret;
2986 }
2987#else
2988 return __netif_receive_skb(skb);
2989#endif
2990}
2598EXPORT_SYMBOL(netif_receive_skb); 2991EXPORT_SYMBOL(netif_receive_skb);
2599 2992
2600/* Network device is going away, flush any packets still pending */ 2993/* Network device is going away, flush any packets still pending
2994 * Called with irqs disabled.
2995 */
2601static void flush_backlog(void *arg) 2996static void flush_backlog(void *arg)
2602{ 2997{
2603 struct net_device *dev = arg; 2998 struct net_device *dev = arg;
2604 struct softnet_data *queue = &__get_cpu_var(softnet_data); 2999 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2605 struct sk_buff *skb, *tmp; 3000 struct sk_buff *skb, *tmp;
2606 3001
2607 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp) 3002 rps_lock(sd);
3003 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3004 if (skb->dev == dev) {
3005 __skb_unlink(skb, &sd->input_pkt_queue);
3006 kfree_skb(skb);
3007 input_queue_head_incr(sd);
3008 }
3009 }
3010 rps_unlock(sd);
3011
3012 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
2608 if (skb->dev == dev) { 3013 if (skb->dev == dev) {
2609 __skb_unlink(skb, &queue->input_pkt_queue); 3014 __skb_unlink(skb, &sd->process_queue);
2610 kfree_skb(skb); 3015 kfree_skb(skb);
3016 input_queue_head_incr(sd);
2611 } 3017 }
3018 }
2612} 3019}
2613 3020
2614static int napi_gro_complete(struct sk_buff *skb) 3021static int napi_gro_complete(struct sk_buff *skb)
@@ -2667,7 +3074,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2667 int mac_len; 3074 int mac_len;
2668 enum gro_result ret; 3075 enum gro_result ret;
2669 3076
2670 if (!(skb->dev->features & NETIF_F_GRO)) 3077 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
2671 goto normal; 3078 goto normal;
2672 3079
2673 if (skb_is_gso(skb) || skb_has_frags(skb)) 3080 if (skb_is_gso(skb) || skb_has_frags(skb))
@@ -2754,9 +3161,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2754{ 3161{
2755 struct sk_buff *p; 3162 struct sk_buff *p;
2756 3163
2757 if (netpoll_rx_on(skb))
2758 return GRO_NORMAL;
2759
2760 for (p = napi->gro_list; p; p = p->next) { 3164 for (p = napi->gro_list; p; p = p->next) {
2761 NAPI_GRO_CB(p)->same_flow = 3165 NAPI_GRO_CB(p)->same_flow =
2762 (p->dev == skb->dev) && 3166 (p->dev == skb->dev) &&
@@ -2911,27 +3315,87 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
2911} 3315}
2912EXPORT_SYMBOL(napi_gro_frags); 3316EXPORT_SYMBOL(napi_gro_frags);
2913 3317
3318/*
3319 * net_rps_action sends any pending IPI's for rps.
3320 * Note: called with local irq disabled, but exits with local irq enabled.
3321 */
3322static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3323{
3324#ifdef CONFIG_RPS
3325 struct softnet_data *remsd = sd->rps_ipi_list;
3326
3327 if (remsd) {
3328 sd->rps_ipi_list = NULL;
3329
3330 local_irq_enable();
3331
3332 /* Send pending IPI's to kick RPS processing on remote cpus. */
3333 while (remsd) {
3334 struct softnet_data *next = remsd->rps_ipi_next;
3335
3336 if (cpu_online(remsd->cpu))
3337 __smp_call_function_single(remsd->cpu,
3338 &remsd->csd, 0);
3339 remsd = next;
3340 }
3341 } else
3342#endif
3343 local_irq_enable();
3344}
3345
2914static int process_backlog(struct napi_struct *napi, int quota) 3346static int process_backlog(struct napi_struct *napi, int quota)
2915{ 3347{
2916 int work = 0; 3348 int work = 0;
2917 struct softnet_data *queue = &__get_cpu_var(softnet_data); 3349 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
2918 unsigned long start_time = jiffies;
2919 3350
3351#ifdef CONFIG_RPS
3352 /* Check if we have pending ipi, its better to send them now,
3353 * not waiting net_rx_action() end.
3354 */
3355 if (sd->rps_ipi_list) {
3356 local_irq_disable();
3357 net_rps_action_and_irq_enable(sd);
3358 }
3359#endif
2920 napi->weight = weight_p; 3360 napi->weight = weight_p;
2921 do { 3361 local_irq_disable();
3362 while (work < quota) {
2922 struct sk_buff *skb; 3363 struct sk_buff *skb;
3364 unsigned int qlen;
2923 3365
2924 local_irq_disable(); 3366 while ((skb = __skb_dequeue(&sd->process_queue))) {
2925 skb = __skb_dequeue(&queue->input_pkt_queue);
2926 if (!skb) {
2927 __napi_complete(napi);
2928 local_irq_enable(); 3367 local_irq_enable();
2929 break; 3368 __netif_receive_skb(skb);
3369 local_irq_disable();
3370 input_queue_head_incr(sd);
3371 if (++work >= quota) {
3372 local_irq_enable();
3373 return work;
3374 }
2930 } 3375 }
2931 local_irq_enable();
2932 3376
2933 netif_receive_skb(skb); 3377 rps_lock(sd);
2934 } while (++work < quota && jiffies == start_time); 3378 qlen = skb_queue_len(&sd->input_pkt_queue);
3379 if (qlen)
3380 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3381 &sd->process_queue);
3382
3383 if (qlen < quota - work) {
3384 /*
3385 * Inline a custom version of __napi_complete().
3386 * only current cpu owns and manipulates this napi,
3387 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
3388 * we can use a plain write instead of clear_bit(),
3389 * and we dont need an smp_mb() memory barrier.
3390 */
3391 list_del(&napi->poll_list);
3392 napi->state = 0;
3393
3394 quota = work + qlen;
3395 }
3396 rps_unlock(sd);
3397 }
3398 local_irq_enable();
2935 3399
2936 return work; 3400 return work;
2937} 3401}
@@ -2947,8 +3411,7 @@ void __napi_schedule(struct napi_struct *n)
2947 unsigned long flags; 3411 unsigned long flags;
2948 3412
2949 local_irq_save(flags); 3413 local_irq_save(flags);
2950 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list); 3414 ____napi_schedule(&__get_cpu_var(softnet_data), n);
2951 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2952 local_irq_restore(flags); 3415 local_irq_restore(flags);
2953} 3416}
2954EXPORT_SYMBOL(__napi_schedule); 3417EXPORT_SYMBOL(__napi_schedule);
@@ -3019,17 +3482,16 @@ void netif_napi_del(struct napi_struct *napi)
3019} 3482}
3020EXPORT_SYMBOL(netif_napi_del); 3483EXPORT_SYMBOL(netif_napi_del);
3021 3484
3022
3023static void net_rx_action(struct softirq_action *h) 3485static void net_rx_action(struct softirq_action *h)
3024{ 3486{
3025 struct list_head *list = &__get_cpu_var(softnet_data).poll_list; 3487 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3026 unsigned long time_limit = jiffies + 2; 3488 unsigned long time_limit = jiffies + 2;
3027 int budget = netdev_budget; 3489 int budget = netdev_budget;
3028 void *have; 3490 void *have;
3029 3491
3030 local_irq_disable(); 3492 local_irq_disable();
3031 3493
3032 while (!list_empty(list)) { 3494 while (!list_empty(&sd->poll_list)) {
3033 struct napi_struct *n; 3495 struct napi_struct *n;
3034 int work, weight; 3496 int work, weight;
3035 3497
@@ -3047,7 +3509,7 @@ static void net_rx_action(struct softirq_action *h)
3047 * entries to the tail of this list, and only ->poll() 3509 * entries to the tail of this list, and only ->poll()
3048 * calls can remove this head entry from the list. 3510 * calls can remove this head entry from the list.
3049 */ 3511 */
3050 n = list_first_entry(list, struct napi_struct, poll_list); 3512 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3051 3513
3052 have = netpoll_poll_lock(n); 3514 have = netpoll_poll_lock(n);
3053 3515
@@ -3082,13 +3544,13 @@ static void net_rx_action(struct softirq_action *h)
3082 napi_complete(n); 3544 napi_complete(n);
3083 local_irq_disable(); 3545 local_irq_disable();
3084 } else 3546 } else
3085 list_move_tail(&n->poll_list, list); 3547 list_move_tail(&n->poll_list, &sd->poll_list);
3086 } 3548 }
3087 3549
3088 netpoll_poll_unlock(have); 3550 netpoll_poll_unlock(have);
3089 } 3551 }
3090out: 3552out:
3091 local_irq_enable(); 3553 net_rps_action_and_irq_enable(sd);
3092 3554
3093#ifdef CONFIG_NET_DMA 3555#ifdef CONFIG_NET_DMA
3094 /* 3556 /*
@@ -3101,7 +3563,7 @@ out:
3101 return; 3563 return;
3102 3564
3103softnet_break: 3565softnet_break:
3104 __get_cpu_var(netdev_rx_stat).time_squeeze++; 3566 sd->time_squeeze++;
3105 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 3567 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3106 goto out; 3568 goto out;
3107} 3569}
@@ -3264,10 +3726,11 @@ void dev_seq_stop(struct seq_file *seq, void *v)
3264 3726
3265static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) 3727static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3266{ 3728{
3267 const struct net_device_stats *stats = dev_get_stats(dev); 3729 struct rtnl_link_stats64 temp;
3730 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
3268 3731
3269 seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " 3732 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
3270 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", 3733 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
3271 dev->name, stats->rx_bytes, stats->rx_packets, 3734 dev->name, stats->rx_bytes, stats->rx_packets,
3272 stats->rx_errors, 3735 stats->rx_errors,
3273 stats->rx_dropped + stats->rx_missed_errors, 3736 stats->rx_dropped + stats->rx_missed_errors,
@@ -3302,17 +3765,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
3302 return 0; 3765 return 0;
3303} 3766}
3304 3767
3305static struct netif_rx_stats *softnet_get_online(loff_t *pos) 3768static struct softnet_data *softnet_get_online(loff_t *pos)
3306{ 3769{
3307 struct netif_rx_stats *rc = NULL; 3770 struct softnet_data *sd = NULL;
3308 3771
3309 while (*pos < nr_cpu_ids) 3772 while (*pos < nr_cpu_ids)
3310 if (cpu_online(*pos)) { 3773 if (cpu_online(*pos)) {
3311 rc = &per_cpu(netdev_rx_stat, *pos); 3774 sd = &per_cpu(softnet_data, *pos);
3312 break; 3775 break;
3313 } else 3776 } else
3314 ++*pos; 3777 ++*pos;
3315 return rc; 3778 return sd;
3316} 3779}
3317 3780
3318static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) 3781static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3332,12 +3795,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
3332 3795
3333static int softnet_seq_show(struct seq_file *seq, void *v) 3796static int softnet_seq_show(struct seq_file *seq, void *v)
3334{ 3797{
3335 struct netif_rx_stats *s = v; 3798 struct softnet_data *sd = v;
3336 3799
3337 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", 3800 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3338 s->total, s->dropped, s->time_squeeze, 0, 3801 sd->processed, sd->dropped, sd->time_squeeze, 0,
3339 0, 0, 0, 0, /* was fastroute */ 3802 0, 0, 0, 0, /* was fastroute */
3340 s->cpu_collision); 3803 sd->cpu_collision, sd->received_rps);
3341 return 0; 3804 return 0;
3342} 3805}
3343 3806
@@ -3560,11 +4023,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
3560 4023
3561 slave->master = master; 4024 slave->master = master;
3562 4025
3563 synchronize_net(); 4026 if (old) {
3564 4027 synchronize_net();
3565 if (old)
3566 dev_put(old); 4028 dev_put(old);
3567 4029 }
3568 if (master) 4030 if (master)
3569 slave->flags |= IFF_SLAVE; 4031 slave->flags |= IFF_SLAVE;
3570 else 4032 else
@@ -3741,562 +4203,6 @@ void dev_set_rx_mode(struct net_device *dev)
3741 netif_addr_unlock_bh(dev); 4203 netif_addr_unlock_bh(dev);
3742} 4204}
3743 4205
3744/* hw addresses list handling functions */
3745
3746static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3747 int addr_len, unsigned char addr_type)
3748{
3749 struct netdev_hw_addr *ha;
3750 int alloc_size;
3751
3752 if (addr_len > MAX_ADDR_LEN)
3753 return -EINVAL;
3754
3755 list_for_each_entry(ha, &list->list, list) {
3756 if (!memcmp(ha->addr, addr, addr_len) &&
3757 ha->type == addr_type) {
3758 ha->refcount++;
3759 return 0;
3760 }
3761 }
3762
3763
3764 alloc_size = sizeof(*ha);
3765 if (alloc_size < L1_CACHE_BYTES)
3766 alloc_size = L1_CACHE_BYTES;
3767 ha = kmalloc(alloc_size, GFP_ATOMIC);
3768 if (!ha)
3769 return -ENOMEM;
3770 memcpy(ha->addr, addr, addr_len);
3771 ha->type = addr_type;
3772 ha->refcount = 1;
3773 ha->synced = false;
3774 list_add_tail_rcu(&ha->list, &list->list);
3775 list->count++;
3776 return 0;
3777}
3778
3779static void ha_rcu_free(struct rcu_head *head)
3780{
3781 struct netdev_hw_addr *ha;
3782
3783 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3784 kfree(ha);
3785}
3786
3787static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3788 int addr_len, unsigned char addr_type)
3789{
3790 struct netdev_hw_addr *ha;
3791
3792 list_for_each_entry(ha, &list->list, list) {
3793 if (!memcmp(ha->addr, addr, addr_len) &&
3794 (ha->type == addr_type || !addr_type)) {
3795 if (--ha->refcount)
3796 return 0;
3797 list_del_rcu(&ha->list);
3798 call_rcu(&ha->rcu_head, ha_rcu_free);
3799 list->count--;
3800 return 0;
3801 }
3802 }
3803 return -ENOENT;
3804}
3805
3806static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3807 struct netdev_hw_addr_list *from_list,
3808 int addr_len,
3809 unsigned char addr_type)
3810{
3811 int err;
3812 struct netdev_hw_addr *ha, *ha2;
3813 unsigned char type;
3814
3815 list_for_each_entry(ha, &from_list->list, list) {
3816 type = addr_type ? addr_type : ha->type;
3817 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
3818 if (err)
3819 goto unroll;
3820 }
3821 return 0;
3822
3823unroll:
3824 list_for_each_entry(ha2, &from_list->list, list) {
3825 if (ha2 == ha)
3826 break;
3827 type = addr_type ? addr_type : ha2->type;
3828 __hw_addr_del(to_list, ha2->addr, addr_len, type);
3829 }
3830 return err;
3831}
3832
3833static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3834 struct netdev_hw_addr_list *from_list,
3835 int addr_len,
3836 unsigned char addr_type)
3837{
3838 struct netdev_hw_addr *ha;
3839 unsigned char type;
3840
3841 list_for_each_entry(ha, &from_list->list, list) {
3842 type = addr_type ? addr_type : ha->type;
3843 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
3844 }
3845}
3846
3847static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3848 struct netdev_hw_addr_list *from_list,
3849 int addr_len)
3850{
3851 int err = 0;
3852 struct netdev_hw_addr *ha, *tmp;
3853
3854 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3855 if (!ha->synced) {
3856 err = __hw_addr_add(to_list, ha->addr,
3857 addr_len, ha->type);
3858 if (err)
3859 break;
3860 ha->synced = true;
3861 ha->refcount++;
3862 } else if (ha->refcount == 1) {
3863 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3864 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
3865 }
3866 }
3867 return err;
3868}
3869
3870static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3871 struct netdev_hw_addr_list *from_list,
3872 int addr_len)
3873{
3874 struct netdev_hw_addr *ha, *tmp;
3875
3876 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
3877 if (ha->synced) {
3878 __hw_addr_del(to_list, ha->addr,
3879 addr_len, ha->type);
3880 ha->synced = false;
3881 __hw_addr_del(from_list, ha->addr,
3882 addr_len, ha->type);
3883 }
3884 }
3885}
3886
3887static void __hw_addr_flush(struct netdev_hw_addr_list *list)
3888{
3889 struct netdev_hw_addr *ha, *tmp;
3890
3891 list_for_each_entry_safe(ha, tmp, &list->list, list) {
3892 list_del_rcu(&ha->list);
3893 call_rcu(&ha->rcu_head, ha_rcu_free);
3894 }
3895 list->count = 0;
3896}
3897
3898static void __hw_addr_init(struct netdev_hw_addr_list *list)
3899{
3900 INIT_LIST_HEAD(&list->list);
3901 list->count = 0;
3902}
3903
3904/* Device addresses handling functions */
3905
3906static void dev_addr_flush(struct net_device *dev)
3907{
3908 /* rtnl_mutex must be held here */
3909
3910 __hw_addr_flush(&dev->dev_addrs);
3911 dev->dev_addr = NULL;
3912}
3913
3914static int dev_addr_init(struct net_device *dev)
3915{
3916 unsigned char addr[MAX_ADDR_LEN];
3917 struct netdev_hw_addr *ha;
3918 int err;
3919
3920 /* rtnl_mutex must be held here */
3921
3922 __hw_addr_init(&dev->dev_addrs);
3923 memset(addr, 0, sizeof(addr));
3924 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
3925 NETDEV_HW_ADDR_T_LAN);
3926 if (!err) {
3927 /*
3928 * Get the first (previously created) address from the list
3929 * and set dev_addr pointer to this location.
3930 */
3931 ha = list_first_entry(&dev->dev_addrs.list,
3932 struct netdev_hw_addr, list);
3933 dev->dev_addr = ha->addr;
3934 }
3935 return err;
3936}
3937
3938/**
3939 * dev_addr_add - Add a device address
3940 * @dev: device
3941 * @addr: address to add
3942 * @addr_type: address type
3943 *
3944 * Add a device address to the device or increase the reference count if
3945 * it already exists.
3946 *
3947 * The caller must hold the rtnl_mutex.
3948 */
3949int dev_addr_add(struct net_device *dev, unsigned char *addr,
3950 unsigned char addr_type)
3951{
3952 int err;
3953
3954 ASSERT_RTNL();
3955
3956 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
3957 if (!err)
3958 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3959 return err;
3960}
3961EXPORT_SYMBOL(dev_addr_add);
3962
3963/**
3964 * dev_addr_del - Release a device address.
3965 * @dev: device
3966 * @addr: address to delete
3967 * @addr_type: address type
3968 *
3969 * Release reference to a device address and remove it from the device
3970 * if the reference count drops to zero.
3971 *
3972 * The caller must hold the rtnl_mutex.
3973 */
3974int dev_addr_del(struct net_device *dev, unsigned char *addr,
3975 unsigned char addr_type)
3976{
3977 int err;
3978 struct netdev_hw_addr *ha;
3979
3980 ASSERT_RTNL();
3981
3982 /*
3983 * We can not remove the first address from the list because
3984 * dev->dev_addr points to that.
3985 */
3986 ha = list_first_entry(&dev->dev_addrs.list,
3987 struct netdev_hw_addr, list);
3988 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3989 return -ENOENT;
3990
3991 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
3992 addr_type);
3993 if (!err)
3994 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3995 return err;
3996}
3997EXPORT_SYMBOL(dev_addr_del);
3998
3999/**
4000 * dev_addr_add_multiple - Add device addresses from another device
4001 * @to_dev: device to which addresses will be added
4002 * @from_dev: device from which addresses will be added
4003 * @addr_type: address type - 0 means type will be used from from_dev
4004 *
4005 * Add device addresses of the one device to another.
4006 **
4007 * The caller must hold the rtnl_mutex.
4008 */
4009int dev_addr_add_multiple(struct net_device *to_dev,
4010 struct net_device *from_dev,
4011 unsigned char addr_type)
4012{
4013 int err;
4014
4015 ASSERT_RTNL();
4016
4017 if (from_dev->addr_len != to_dev->addr_len)
4018 return -EINVAL;
4019 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4020 to_dev->addr_len, addr_type);
4021 if (!err)
4022 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4023 return err;
4024}
4025EXPORT_SYMBOL(dev_addr_add_multiple);
4026
4027/**
4028 * dev_addr_del_multiple - Delete device addresses by another device
4029 * @to_dev: device where the addresses will be deleted
4030 * @from_dev: device by which addresses the addresses will be deleted
4031 * @addr_type: address type - 0 means type will used from from_dev
4032 *
4033 * Deletes addresses in to device by the list of addresses in from device.
4034 *
4035 * The caller must hold the rtnl_mutex.
4036 */
4037int dev_addr_del_multiple(struct net_device *to_dev,
4038 struct net_device *from_dev,
4039 unsigned char addr_type)
4040{
4041 ASSERT_RTNL();
4042
4043 if (from_dev->addr_len != to_dev->addr_len)
4044 return -EINVAL;
4045 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
4046 to_dev->addr_len, addr_type);
4047 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
4048 return 0;
4049}
4050EXPORT_SYMBOL(dev_addr_del_multiple);
4051
4052/* multicast addresses handling functions */
4053
4054int __dev_addr_delete(struct dev_addr_list **list, int *count,
4055 void *addr, int alen, int glbl)
4056{
4057 struct dev_addr_list *da;
4058
4059 for (; (da = *list) != NULL; list = &da->next) {
4060 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4061 alen == da->da_addrlen) {
4062 if (glbl) {
4063 int old_glbl = da->da_gusers;
4064 da->da_gusers = 0;
4065 if (old_glbl == 0)
4066 break;
4067 }
4068 if (--da->da_users)
4069 return 0;
4070
4071 *list = da->next;
4072 kfree(da);
4073 (*count)--;
4074 return 0;
4075 }
4076 }
4077 return -ENOENT;
4078}
4079
4080int __dev_addr_add(struct dev_addr_list **list, int *count,
4081 void *addr, int alen, int glbl)
4082{
4083 struct dev_addr_list *da;
4084
4085 for (da = *list; da != NULL; da = da->next) {
4086 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4087 da->da_addrlen == alen) {
4088 if (glbl) {
4089 int old_glbl = da->da_gusers;
4090 da->da_gusers = 1;
4091 if (old_glbl)
4092 return 0;
4093 }
4094 da->da_users++;
4095 return 0;
4096 }
4097 }
4098
4099 da = kzalloc(sizeof(*da), GFP_ATOMIC);
4100 if (da == NULL)
4101 return -ENOMEM;
4102 memcpy(da->da_addr, addr, alen);
4103 da->da_addrlen = alen;
4104 da->da_users = 1;
4105 da->da_gusers = glbl ? 1 : 0;
4106 da->next = *list;
4107 *list = da;
4108 (*count)++;
4109 return 0;
4110}
4111
4112/**
4113 * dev_unicast_delete - Release secondary unicast address.
4114 * @dev: device
4115 * @addr: address to delete
4116 *
4117 * Release reference to a secondary unicast address and remove it
4118 * from the device if the reference count drops to zero.
4119 *
4120 * The caller must hold the rtnl_mutex.
4121 */
4122int dev_unicast_delete(struct net_device *dev, void *addr)
4123{
4124 int err;
4125
4126 ASSERT_RTNL();
4127
4128 netif_addr_lock_bh(dev);
4129 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
4130 NETDEV_HW_ADDR_T_UNICAST);
4131 if (!err)
4132 __dev_set_rx_mode(dev);
4133 netif_addr_unlock_bh(dev);
4134 return err;
4135}
4136EXPORT_SYMBOL(dev_unicast_delete);
4137
4138/**
4139 * dev_unicast_add - add a secondary unicast address
4140 * @dev: device
4141 * @addr: address to add
4142 *
4143 * Add a secondary unicast address to the device or increase
4144 * the reference count if it already exists.
4145 *
4146 * The caller must hold the rtnl_mutex.
4147 */
4148int dev_unicast_add(struct net_device *dev, void *addr)
4149{
4150 int err;
4151
4152 ASSERT_RTNL();
4153
4154 netif_addr_lock_bh(dev);
4155 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4156 NETDEV_HW_ADDR_T_UNICAST);
4157 if (!err)
4158 __dev_set_rx_mode(dev);
4159 netif_addr_unlock_bh(dev);
4160 return err;
4161}
4162EXPORT_SYMBOL(dev_unicast_add);
4163
4164int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4165 struct dev_addr_list **from, int *from_count)
4166{
4167 struct dev_addr_list *da, *next;
4168 int err = 0;
4169
4170 da = *from;
4171 while (da != NULL) {
4172 next = da->next;
4173 if (!da->da_synced) {
4174 err = __dev_addr_add(to, to_count,
4175 da->da_addr, da->da_addrlen, 0);
4176 if (err < 0)
4177 break;
4178 da->da_synced = 1;
4179 da->da_users++;
4180 } else if (da->da_users == 1) {
4181 __dev_addr_delete(to, to_count,
4182 da->da_addr, da->da_addrlen, 0);
4183 __dev_addr_delete(from, from_count,
4184 da->da_addr, da->da_addrlen, 0);
4185 }
4186 da = next;
4187 }
4188 return err;
4189}
4190EXPORT_SYMBOL_GPL(__dev_addr_sync);
4191
4192void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4193 struct dev_addr_list **from, int *from_count)
4194{
4195 struct dev_addr_list *da, *next;
4196
4197 da = *from;
4198 while (da != NULL) {
4199 next = da->next;
4200 if (da->da_synced) {
4201 __dev_addr_delete(to, to_count,
4202 da->da_addr, da->da_addrlen, 0);
4203 da->da_synced = 0;
4204 __dev_addr_delete(from, from_count,
4205 da->da_addr, da->da_addrlen, 0);
4206 }
4207 da = next;
4208 }
4209}
4210EXPORT_SYMBOL_GPL(__dev_addr_unsync);
4211
4212/**
4213 * dev_unicast_sync - Synchronize device's unicast list to another device
4214 * @to: destination device
4215 * @from: source device
4216 *
4217 * Add newly added addresses to the destination device and release
4218 * addresses that have no users left. The source device must be
4219 * locked by netif_tx_lock_bh.
4220 *
4221 * This function is intended to be called from the dev->set_rx_mode
4222 * function of layered software devices.
4223 */
4224int dev_unicast_sync(struct net_device *to, struct net_device *from)
4225{
4226 int err = 0;
4227
4228 if (to->addr_len != from->addr_len)
4229 return -EINVAL;
4230
4231 netif_addr_lock_bh(to);
4232 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
4233 if (!err)
4234 __dev_set_rx_mode(to);
4235 netif_addr_unlock_bh(to);
4236 return err;
4237}
4238EXPORT_SYMBOL(dev_unicast_sync);
4239
4240/**
4241 * dev_unicast_unsync - Remove synchronized addresses from the destination device
4242 * @to: destination device
4243 * @from: source device
4244 *
4245 * Remove all addresses that were added to the destination device by
4246 * dev_unicast_sync(). This function is intended to be called from the
4247 * dev->stop function of layered software devices.
4248 */
4249void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4250{
4251 if (to->addr_len != from->addr_len)
4252 return;
4253
4254 netif_addr_lock_bh(from);
4255 netif_addr_lock(to);
4256 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
4257 __dev_set_rx_mode(to);
4258 netif_addr_unlock(to);
4259 netif_addr_unlock_bh(from);
4260}
4261EXPORT_SYMBOL(dev_unicast_unsync);
4262
4263static void dev_unicast_flush(struct net_device *dev)
4264{
4265 netif_addr_lock_bh(dev);
4266 __hw_addr_flush(&dev->uc);
4267 netif_addr_unlock_bh(dev);
4268}
4269
4270static void dev_unicast_init(struct net_device *dev)
4271{
4272 __hw_addr_init(&dev->uc);
4273}
4274
4275
4276static void __dev_addr_discard(struct dev_addr_list **list)
4277{
4278 struct dev_addr_list *tmp;
4279
4280 while (*list != NULL) {
4281 tmp = *list;
4282 *list = tmp->next;
4283 if (tmp->da_users > tmp->da_gusers)
4284 printk("__dev_addr_discard: address leakage! "
4285 "da_users=%d\n", tmp->da_users);
4286 kfree(tmp);
4287 }
4288}
4289
4290static void dev_addr_discard(struct net_device *dev)
4291{
4292 netif_addr_lock_bh(dev);
4293
4294 __dev_addr_discard(&dev->mc_list);
4295 netdev_mc_count(dev) = 0;
4296
4297 netif_addr_unlock_bh(dev);
4298}
4299
4300/** 4206/**
4301 * dev_get_flags - get flags reported to userspace 4207 * dev_get_flags - get flags reported to userspace
4302 * @dev: device 4208 * @dev: device
@@ -4607,8 +4513,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4607 return -EINVAL; 4513 return -EINVAL;
4608 if (!netif_device_present(dev)) 4514 if (!netif_device_present(dev))
4609 return -ENODEV; 4515 return -ENODEV;
4610 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, 4516 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
4611 dev->addr_len, 1);
4612 4517
4613 case SIOCDELMULTI: 4518 case SIOCDELMULTI:
4614 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || 4519 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4616,8 +4521,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4616 return -EINVAL; 4521 return -EINVAL;
4617 if (!netif_device_present(dev)) 4522 if (!netif_device_present(dev))
4618 return -ENODEV; 4523 return -ENODEV;
4619 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, 4524 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
4620 dev->addr_len, 1);
4621 4525
4622 case SIOCSIFTXQLEN: 4526 case SIOCSIFTXQLEN:
4623 if (ifr->ifr_qlen < 0) 4527 if (ifr->ifr_qlen < 0)
@@ -4924,8 +4828,8 @@ static void rollback_registered_many(struct list_head *head)
4924 /* 4828 /*
4925 * Flush the unicast and multicast chains 4829 * Flush the unicast and multicast chains
4926 */ 4830 */
4927 dev_unicast_flush(dev); 4831 dev_uc_flush(dev);
4928 dev_addr_discard(dev); 4832 dev_mc_flush(dev);
4929 4833
4930 if (dev->netdev_ops->ndo_uninit) 4834 if (dev->netdev_ops->ndo_uninit)
4931 dev->netdev_ops->ndo_uninit(dev); 4835 dev->netdev_ops->ndo_uninit(dev);
@@ -5074,6 +4978,24 @@ int register_netdevice(struct net_device *dev)
5074 4978
5075 dev->iflink = -1; 4979 dev->iflink = -1;
5076 4980
4981#ifdef CONFIG_RPS
4982 if (!dev->num_rx_queues) {
4983 /*
4984 * Allocate a single RX queue if driver never called
4985 * alloc_netdev_mq
4986 */
4987
4988 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
4989 if (!dev->_rx) {
4990 ret = -ENOMEM;
4991 goto out;
4992 }
4993
4994 dev->_rx->first = dev->_rx;
4995 atomic_set(&dev->_rx->count, 1);
4996 dev->num_rx_queues = 1;
4997 }
4998#endif
5077 /* Init, if this function is available */ 4999 /* Init, if this function is available */
5078 if (dev->netdev_ops->ndo_init) { 5000 if (dev->netdev_ops->ndo_init) {
5079 ret = dev->netdev_ops->ndo_init(dev); 5001 ret = dev->netdev_ops->ndo_init(dev);
@@ -5084,7 +5006,7 @@ int register_netdevice(struct net_device *dev)
5084 } 5006 }
5085 } 5007 }
5086 5008
5087 ret = dev_get_valid_name(net, dev->name, dev->name, 0); 5009 ret = dev_get_valid_name(dev, dev->name, 0);
5088 if (ret) 5010 if (ret)
5089 goto err_uninit; 5011 goto err_uninit;
5090 5012
@@ -5113,8 +5035,6 @@ int register_netdevice(struct net_device *dev)
5113 if (dev->features & NETIF_F_SG) 5035 if (dev->features & NETIF_F_SG)
5114 dev->features |= NETIF_F_GSO; 5036 dev->features |= NETIF_F_GSO;
5115 5037
5116 netdev_initialize_kobject(dev);
5117
5118 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5038 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5119 ret = notifier_to_errno(ret); 5039 ret = notifier_to_errno(ret);
5120 if (ret) 5040 if (ret)
@@ -5359,20 +5279,22 @@ void netdev_run_todo(void)
5359/** 5279/**
5360 * dev_txq_stats_fold - fold tx_queues stats 5280 * dev_txq_stats_fold - fold tx_queues stats
5361 * @dev: device to get statistics from 5281 * @dev: device to get statistics from
5362 * @stats: struct net_device_stats to hold results 5282 * @stats: struct rtnl_link_stats64 to hold results
5363 */ 5283 */
5364void dev_txq_stats_fold(const struct net_device *dev, 5284void dev_txq_stats_fold(const struct net_device *dev,
5365 struct net_device_stats *stats) 5285 struct rtnl_link_stats64 *stats)
5366{ 5286{
5367 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0; 5287 u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5368 unsigned int i; 5288 unsigned int i;
5369 struct netdev_queue *txq; 5289 struct netdev_queue *txq;
5370 5290
5371 for (i = 0; i < dev->num_tx_queues; i++) { 5291 for (i = 0; i < dev->num_tx_queues; i++) {
5372 txq = netdev_get_tx_queue(dev, i); 5292 txq = netdev_get_tx_queue(dev, i);
5293 spin_lock_bh(&txq->_xmit_lock);
5373 tx_bytes += txq->tx_bytes; 5294 tx_bytes += txq->tx_bytes;
5374 tx_packets += txq->tx_packets; 5295 tx_packets += txq->tx_packets;
5375 tx_dropped += txq->tx_dropped; 5296 tx_dropped += txq->tx_dropped;
5297 spin_unlock_bh(&txq->_xmit_lock);
5376 } 5298 }
5377 if (tx_bytes || tx_packets || tx_dropped) { 5299 if (tx_bytes || tx_packets || tx_dropped) {
5378 stats->tx_bytes = tx_bytes; 5300 stats->tx_bytes = tx_bytes;
@@ -5382,23 +5304,53 @@ void dev_txq_stats_fold(const struct net_device *dev,
5382} 5304}
5383EXPORT_SYMBOL(dev_txq_stats_fold); 5305EXPORT_SYMBOL(dev_txq_stats_fold);
5384 5306
5307/* Convert net_device_stats to rtnl_link_stats64. They have the same
5308 * fields in the same order, with only the type differing.
5309 */
5310static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5311 const struct net_device_stats *netdev_stats)
5312{
5313#if BITS_PER_LONG == 64
5314 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5315 memcpy(stats64, netdev_stats, sizeof(*stats64));
5316#else
5317 size_t i, n = sizeof(*stats64) / sizeof(u64);
5318 const unsigned long *src = (const unsigned long *)netdev_stats;
5319 u64 *dst = (u64 *)stats64;
5320
5321 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5322 sizeof(*stats64) / sizeof(u64));
5323 for (i = 0; i < n; i++)
5324 dst[i] = src[i];
5325#endif
5326}
5327
5385/** 5328/**
5386 * dev_get_stats - get network device statistics 5329 * dev_get_stats - get network device statistics
5387 * @dev: device to get statistics from 5330 * @dev: device to get statistics from
5331 * @storage: place to store stats
5388 * 5332 *
5389 * Get network statistics from device. The device driver may provide 5333 * Get network statistics from device. Return @storage.
5390 * its own method by setting dev->netdev_ops->get_stats; otherwise 5334 * The device driver may provide its own method by setting
5391 * the internal statistics structure is used. 5335 * dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
5336 * otherwise the internal statistics structure is used.
5392 */ 5337 */
5393const struct net_device_stats *dev_get_stats(struct net_device *dev) 5338struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5339 struct rtnl_link_stats64 *storage)
5394{ 5340{
5395 const struct net_device_ops *ops = dev->netdev_ops; 5341 const struct net_device_ops *ops = dev->netdev_ops;
5396 5342
5397 if (ops->ndo_get_stats) 5343 if (ops->ndo_get_stats64) {
5398 return ops->ndo_get_stats(dev); 5344 memset(storage, 0, sizeof(*storage));
5399 5345 return ops->ndo_get_stats64(dev, storage);
5400 dev_txq_stats_fold(dev, &dev->stats); 5346 }
5401 return &dev->stats; 5347 if (ops->ndo_get_stats) {
5348 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5349 return storage;
5350 }
5351 netdev_stats_to_stats64(storage, &dev->stats);
5352 dev_txq_stats_fold(dev, storage);
5353 return storage;
5402} 5354}
5403EXPORT_SYMBOL(dev_get_stats); 5355EXPORT_SYMBOL(dev_get_stats);
5404 5356
@@ -5434,6 +5386,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5434 struct net_device *dev; 5386 struct net_device *dev;
5435 size_t alloc_size; 5387 size_t alloc_size;
5436 struct net_device *p; 5388 struct net_device *p;
5389#ifdef CONFIG_RPS
5390 struct netdev_rx_queue *rx;
5391 int i;
5392#endif
5437 5393
5438 BUG_ON(strlen(name) >= sizeof(dev->name)); 5394 BUG_ON(strlen(name) >= sizeof(dev->name));
5439 5395
@@ -5459,13 +5415,32 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5459 goto free_p; 5415 goto free_p;
5460 } 5416 }
5461 5417
5418#ifdef CONFIG_RPS
5419 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5420 if (!rx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "rx queues.\n");
5423 goto free_tx;
5424 }
5425
5426 atomic_set(&rx->count, queue_count);
5427
5428 /*
5429 * Set a pointer to first element in the array which holds the
5430 * reference count.
5431 */
5432 for (i = 0; i < queue_count; i++)
5433 rx[i].first = rx;
5434#endif
5435
5462 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5436 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5463 dev->padded = (char *)dev - (char *)p; 5437 dev->padded = (char *)dev - (char *)p;
5464 5438
5465 if (dev_addr_init(dev)) 5439 if (dev_addr_init(dev))
5466 goto free_tx; 5440 goto free_rx;
5467 5441
5468 dev_unicast_init(dev); 5442 dev_mc_init(dev);
5443 dev_uc_init(dev);
5469 5444
5470 dev_net_set(dev, &init_net); 5445 dev_net_set(dev, &init_net);
5471 5446
@@ -5473,6 +5448,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5473 dev->num_tx_queues = queue_count; 5448 dev->num_tx_queues = queue_count;
5474 dev->real_num_tx_queues = queue_count; 5449 dev->real_num_tx_queues = queue_count;
5475 5450
5451#ifdef CONFIG_RPS
5452 dev->_rx = rx;
5453 dev->num_rx_queues = queue_count;
5454#endif
5455
5476 dev->gso_max_size = GSO_MAX_SIZE; 5456 dev->gso_max_size = GSO_MAX_SIZE;
5477 5457
5478 netdev_init_queues(dev); 5458 netdev_init_queues(dev);
@@ -5487,9 +5467,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5487 strcpy(dev->name, name); 5467 strcpy(dev->name, name);
5488 return dev; 5468 return dev;
5489 5469
5470free_rx:
5471#ifdef CONFIG_RPS
5472 kfree(rx);
5490free_tx: 5473free_tx:
5474#endif
5491 kfree(tx); 5475 kfree(tx);
5492
5493free_p: 5476free_p:
5494 kfree(p); 5477 kfree(p);
5495 return NULL; 5478 return NULL;
@@ -5635,15 +5618,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5635 if (dev->features & NETIF_F_NETNS_LOCAL) 5618 if (dev->features & NETIF_F_NETNS_LOCAL)
5636 goto out; 5619 goto out;
5637 5620
5638#ifdef CONFIG_SYSFS
5639 /* Don't allow real devices to be moved when sysfs
5640 * is enabled.
5641 */
5642 err = -EINVAL;
5643 if (dev->dev.parent)
5644 goto out;
5645#endif
5646
5647 /* Ensure the device has been registrered */ 5621 /* Ensure the device has been registrered */
5648 err = -EINVAL; 5622 err = -EINVAL;
5649 if (dev->reg_state != NETREG_REGISTERED) 5623 if (dev->reg_state != NETREG_REGISTERED)
@@ -5662,7 +5636,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5662 /* We get here if we can't use the current device name */ 5636 /* We get here if we can't use the current device name */
5663 if (!pat) 5637 if (!pat)
5664 goto out; 5638 goto out;
5665 if (dev_get_valid_name(net, pat, dev->name, 1)) 5639 if (dev_get_valid_name(dev, pat, 1))
5666 goto out; 5640 goto out;
5667 } 5641 }
5668 5642
@@ -5691,10 +5665,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5691 /* 5665 /*
5692 * Flush the unicast and multicast chains 5666 * Flush the unicast and multicast chains
5693 */ 5667 */
5694 dev_unicast_flush(dev); 5668 dev_uc_flush(dev);
5695 dev_addr_discard(dev); 5669 dev_mc_flush(dev);
5696
5697 netdev_unregister_kobject(dev);
5698 5670
5699 /* Actually switch the network namespace */ 5671 /* Actually switch the network namespace */
5700 dev_net_set(dev, net); 5672 dev_net_set(dev, net);
@@ -5708,7 +5680,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5708 } 5680 }
5709 5681
5710 /* Fixup kobjects */ 5682 /* Fixup kobjects */
5711 err = netdev_register_kobject(dev); 5683 err = device_rename(&dev->dev, dev->name);
5712 WARN_ON(err); 5684 WARN_ON(err);
5713 5685
5714 /* Add the device back in the hashes */ 5686 /* Add the device back in the hashes */
@@ -5735,7 +5707,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5735 void *ocpu) 5707 void *ocpu)
5736{ 5708{
5737 struct sk_buff **list_skb; 5709 struct sk_buff **list_skb;
5738 struct Qdisc **list_net;
5739 struct sk_buff *skb; 5710 struct sk_buff *skb;
5740 unsigned int cpu, oldcpu = (unsigned long)ocpu; 5711 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5741 struct softnet_data *sd, *oldsd; 5712 struct softnet_data *sd, *oldsd;
@@ -5756,20 +5727,26 @@ static int dev_cpu_callback(struct notifier_block *nfb,
5756 *list_skb = oldsd->completion_queue; 5727 *list_skb = oldsd->completion_queue;
5757 oldsd->completion_queue = NULL; 5728 oldsd->completion_queue = NULL;
5758 5729
5759 /* Find end of our output_queue. */
5760 list_net = &sd->output_queue;
5761 while (*list_net)
5762 list_net = &(*list_net)->next_sched;
5763 /* Append output queue from offline CPU. */ 5730 /* Append output queue from offline CPU. */
5764 *list_net = oldsd->output_queue; 5731 if (oldsd->output_queue) {
5765 oldsd->output_queue = NULL; 5732 *sd->output_queue_tailp = oldsd->output_queue;
5733 sd->output_queue_tailp = oldsd->output_queue_tailp;
5734 oldsd->output_queue = NULL;
5735 oldsd->output_queue_tailp = &oldsd->output_queue;
5736 }
5766 5737
5767 raise_softirq_irqoff(NET_TX_SOFTIRQ); 5738 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5768 local_irq_enable(); 5739 local_irq_enable();
5769 5740
5770 /* Process offline CPU's input_pkt_queue */ 5741 /* Process offline CPU's input_pkt_queue */
5771 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) 5742 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
5743 netif_rx(skb);
5744 input_queue_head_incr(oldsd);
5745 }
5746 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
5772 netif_rx(skb); 5747 netif_rx(skb);
5748 input_queue_head_incr(oldsd);
5749 }
5773 5750
5774 return NOTIFY_OK; 5751 return NOTIFY_OK;
5775} 5752}
@@ -5878,6 +5855,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5878 return buffer; 5855 return buffer;
5879} 5856}
5880 5857
5858static int __netdev_printk(const char *level, const struct net_device *dev,
5859 struct va_format *vaf)
5860{
5861 int r;
5862
5863 if (dev && dev->dev.parent)
5864 r = dev_printk(level, dev->dev.parent, "%s: %pV",
5865 netdev_name(dev), vaf);
5866 else if (dev)
5867 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
5868 else
5869 r = printk("%s(NULL net_device): %pV", level, vaf);
5870
5871 return r;
5872}
5873
5874int netdev_printk(const char *level, const struct net_device *dev,
5875 const char *format, ...)
5876{
5877 struct va_format vaf;
5878 va_list args;
5879 int r;
5880
5881 va_start(args, format);
5882
5883 vaf.fmt = format;
5884 vaf.va = &args;
5885
5886 r = __netdev_printk(level, dev, &vaf);
5887 va_end(args);
5888
5889 return r;
5890}
5891EXPORT_SYMBOL(netdev_printk);
5892
5893#define define_netdev_printk_level(func, level) \
5894int func(const struct net_device *dev, const char *fmt, ...) \
5895{ \
5896 int r; \
5897 struct va_format vaf; \
5898 va_list args; \
5899 \
5900 va_start(args, fmt); \
5901 \
5902 vaf.fmt = fmt; \
5903 vaf.va = &args; \
5904 \
5905 r = __netdev_printk(level, dev, &vaf); \
5906 va_end(args); \
5907 \
5908 return r; \
5909} \
5910EXPORT_SYMBOL(func);
5911
5912define_netdev_printk_level(netdev_emerg, KERN_EMERG);
5913define_netdev_printk_level(netdev_alert, KERN_ALERT);
5914define_netdev_printk_level(netdev_crit, KERN_CRIT);
5915define_netdev_printk_level(netdev_err, KERN_ERR);
5916define_netdev_printk_level(netdev_warn, KERN_WARNING);
5917define_netdev_printk_level(netdev_notice, KERN_NOTICE);
5918define_netdev_printk_level(netdev_info, KERN_INFO);
5919
5881static void __net_exit netdev_exit(struct net *net) 5920static void __net_exit netdev_exit(struct net *net)
5882{ 5921{
5883 kfree(net->dev_name_head); 5922 kfree(net->dev_name_head);
@@ -5985,17 +6024,26 @@ static int __init net_dev_init(void)
5985 */ 6024 */
5986 6025
5987 for_each_possible_cpu(i) { 6026 for_each_possible_cpu(i) {
5988 struct softnet_data *queue; 6027 struct softnet_data *sd = &per_cpu(softnet_data, i);
5989 6028
5990 queue = &per_cpu(softnet_data, i); 6029 memset(sd, 0, sizeof(*sd));
5991 skb_queue_head_init(&queue->input_pkt_queue); 6030 skb_queue_head_init(&sd->input_pkt_queue);
5992 queue->completion_queue = NULL; 6031 skb_queue_head_init(&sd->process_queue);
5993 INIT_LIST_HEAD(&queue->poll_list); 6032 sd->completion_queue = NULL;
6033 INIT_LIST_HEAD(&sd->poll_list);
6034 sd->output_queue = NULL;
6035 sd->output_queue_tailp = &sd->output_queue;
6036#ifdef CONFIG_RPS
6037 sd->csd.func = rps_trigger_softirq;
6038 sd->csd.info = sd;
6039 sd->csd.flags = 0;
6040 sd->cpu = i;
6041#endif
5994 6042
5995 queue->backlog.poll = process_backlog; 6043 sd->backlog.poll = process_backlog;
5996 queue->backlog.weight = weight_p; 6044 sd->backlog.weight = weight_p;
5997 queue->backlog.gro_list = NULL; 6045 sd->backlog.gro_list = NULL;
5998 queue->backlog.gro_count = 0; 6046 sd->backlog.gro_count = 0;
5999 } 6047 }
6000 6048
6001 dev_boot_phase = 0; 6049 dev_boot_phase = 0;
@@ -6030,7 +6078,7 @@ subsys_initcall(net_dev_init);
6030 6078
6031static int __init initialize_hashrnd(void) 6079static int __init initialize_hashrnd(void)
6032{ 6080{
6033 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); 6081 get_random_bytes(&hashrnd, sizeof(hashrnd));
6034 return 0; 6082 return 0;
6035} 6083}
6036 6084
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..508f9c18992f
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,741 @@
1/*
2 * net/core/dev_addr_lists.c - Functions for handling net device lists
3 * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
4 *
5 * This file contains functions for working with unicast, multicast and device
6 * addresses lists.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 */
13
14#include <linux/netdevice.h>
15#include <linux/rtnetlink.h>
16#include <linux/list.h>
17#include <linux/proc_fs.h>
18
19/*
20 * General list handling functions
21 */
22
23static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
24 unsigned char *addr, int addr_len,
25 unsigned char addr_type, bool global)
26{
27 struct netdev_hw_addr *ha;
28 int alloc_size;
29
30 if (addr_len > MAX_ADDR_LEN)
31 return -EINVAL;
32
33 list_for_each_entry(ha, &list->list, list) {
34 if (!memcmp(ha->addr, addr, addr_len) &&
35 ha->type == addr_type) {
36 if (global) {
37 /* check if addr is already used as global */
38 if (ha->global_use)
39 return 0;
40 else
41 ha->global_use = true;
42 }
43 ha->refcount++;
44 return 0;
45 }
46 }
47
48
49 alloc_size = sizeof(*ha);
50 if (alloc_size < L1_CACHE_BYTES)
51 alloc_size = L1_CACHE_BYTES;
52 ha = kmalloc(alloc_size, GFP_ATOMIC);
53 if (!ha)
54 return -ENOMEM;
55 memcpy(ha->addr, addr, addr_len);
56 ha->type = addr_type;
57 ha->refcount = 1;
58 ha->global_use = global;
59 ha->synced = false;
60 list_add_tail_rcu(&ha->list, &list->list);
61 list->count++;
62 return 0;
63}
64
65static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
66 int addr_len, unsigned char addr_type)
67{
68 return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
69}
70
71static void ha_rcu_free(struct rcu_head *head)
72{
73 struct netdev_hw_addr *ha;
74
75 ha = container_of(head, struct netdev_hw_addr, rcu_head);
76 kfree(ha);
77}
78
79static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
80 unsigned char *addr, int addr_len,
81 unsigned char addr_type, bool global)
82{
83 struct netdev_hw_addr *ha;
84
85 list_for_each_entry(ha, &list->list, list) {
86 if (!memcmp(ha->addr, addr, addr_len) &&
87 (ha->type == addr_type || !addr_type)) {
88 if (global) {
89 if (!ha->global_use)
90 break;
91 else
92 ha->global_use = false;
93 }
94 if (--ha->refcount)
95 return 0;
96 list_del_rcu(&ha->list);
97 call_rcu(&ha->rcu_head, ha_rcu_free);
98 list->count--;
99 return 0;
100 }
101 }
102 return -ENOENT;
103}
104
105static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
106 int addr_len, unsigned char addr_type)
107{
108 return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
109}
110
111int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
112 struct netdev_hw_addr_list *from_list,
113 int addr_len, unsigned char addr_type)
114{
115 int err;
116 struct netdev_hw_addr *ha, *ha2;
117 unsigned char type;
118
119 list_for_each_entry(ha, &from_list->list, list) {
120 type = addr_type ? addr_type : ha->type;
121 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
122 if (err)
123 goto unroll;
124 }
125 return 0;
126
127unroll:
128 list_for_each_entry(ha2, &from_list->list, list) {
129 if (ha2 == ha)
130 break;
131 type = addr_type ? addr_type : ha2->type;
132 __hw_addr_del(to_list, ha2->addr, addr_len, type);
133 }
134 return err;
135}
136EXPORT_SYMBOL(__hw_addr_add_multiple);
137
138void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
139 struct netdev_hw_addr_list *from_list,
140 int addr_len, unsigned char addr_type)
141{
142 struct netdev_hw_addr *ha;
143 unsigned char type;
144
145 list_for_each_entry(ha, &from_list->list, list) {
146 type = addr_type ? addr_type : ha->type;
147 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
148 }
149}
150EXPORT_SYMBOL(__hw_addr_del_multiple);
151
152int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
153 struct netdev_hw_addr_list *from_list,
154 int addr_len)
155{
156 int err = 0;
157 struct netdev_hw_addr *ha, *tmp;
158
159 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
160 if (!ha->synced) {
161 err = __hw_addr_add(to_list, ha->addr,
162 addr_len, ha->type);
163 if (err)
164 break;
165 ha->synced = true;
166 ha->refcount++;
167 } else if (ha->refcount == 1) {
168 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
169 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
170 }
171 }
172 return err;
173}
174EXPORT_SYMBOL(__hw_addr_sync);
175
176void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
177 struct netdev_hw_addr_list *from_list,
178 int addr_len)
179{
180 struct netdev_hw_addr *ha, *tmp;
181
182 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
183 if (ha->synced) {
184 __hw_addr_del(to_list, ha->addr,
185 addr_len, ha->type);
186 ha->synced = false;
187 __hw_addr_del(from_list, ha->addr,
188 addr_len, ha->type);
189 }
190 }
191}
192EXPORT_SYMBOL(__hw_addr_unsync);
193
194void __hw_addr_flush(struct netdev_hw_addr_list *list)
195{
196 struct netdev_hw_addr *ha, *tmp;
197
198 list_for_each_entry_safe(ha, tmp, &list->list, list) {
199 list_del_rcu(&ha->list);
200 call_rcu(&ha->rcu_head, ha_rcu_free);
201 }
202 list->count = 0;
203}
204EXPORT_SYMBOL(__hw_addr_flush);
205
206void __hw_addr_init(struct netdev_hw_addr_list *list)
207{
208 INIT_LIST_HEAD(&list->list);
209 list->count = 0;
210}
211EXPORT_SYMBOL(__hw_addr_init);
212
213/*
214 * Device addresses handling functions
215 */
216
217/**
218 * dev_addr_flush - Flush device address list
219 * @dev: device
220 *
221 * Flush device address list and reset ->dev_addr.
222 *
223 * The caller must hold the rtnl_mutex.
224 */
225void dev_addr_flush(struct net_device *dev)
226{
227 /* rtnl_mutex must be held here */
228
229 __hw_addr_flush(&dev->dev_addrs);
230 dev->dev_addr = NULL;
231}
232EXPORT_SYMBOL(dev_addr_flush);
233
234/**
235 * dev_addr_init - Init device address list
236 * @dev: device
237 *
238 * Init device address list and create the first element,
239 * used by ->dev_addr.
240 *
241 * The caller must hold the rtnl_mutex.
242 */
243int dev_addr_init(struct net_device *dev)
244{
245 unsigned char addr[MAX_ADDR_LEN];
246 struct netdev_hw_addr *ha;
247 int err;
248
249 /* rtnl_mutex must be held here */
250
251 __hw_addr_init(&dev->dev_addrs);
252 memset(addr, 0, sizeof(addr));
253 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
254 NETDEV_HW_ADDR_T_LAN);
255 if (!err) {
256 /*
257 * Get the first (previously created) address from the list
258 * and set dev_addr pointer to this location.
259 */
260 ha = list_first_entry(&dev->dev_addrs.list,
261 struct netdev_hw_addr, list);
262 dev->dev_addr = ha->addr;
263 }
264 return err;
265}
266EXPORT_SYMBOL(dev_addr_init);
267
268/**
269 * dev_addr_add - Add a device address
270 * @dev: device
271 * @addr: address to add
272 * @addr_type: address type
273 *
274 * Add a device address to the device or increase the reference count if
275 * it already exists.
276 *
277 * The caller must hold the rtnl_mutex.
278 */
279int dev_addr_add(struct net_device *dev, unsigned char *addr,
280 unsigned char addr_type)
281{
282 int err;
283
284 ASSERT_RTNL();
285
286 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
287 if (!err)
288 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
289 return err;
290}
291EXPORT_SYMBOL(dev_addr_add);
292
293/**
294 * dev_addr_del - Release a device address.
295 * @dev: device
296 * @addr: address to delete
297 * @addr_type: address type
298 *
299 * Release reference to a device address and remove it from the device
300 * if the reference count drops to zero.
301 *
302 * The caller must hold the rtnl_mutex.
303 */
304int dev_addr_del(struct net_device *dev, unsigned char *addr,
305 unsigned char addr_type)
306{
307 int err;
308 struct netdev_hw_addr *ha;
309
310 ASSERT_RTNL();
311
312 /*
313 * We can not remove the first address from the list because
314 * dev->dev_addr points to that.
315 */
316 ha = list_first_entry(&dev->dev_addrs.list,
317 struct netdev_hw_addr, list);
318 if (ha->addr == dev->dev_addr && ha->refcount == 1)
319 return -ENOENT;
320
321 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
322 addr_type);
323 if (!err)
324 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
325 return err;
326}
327EXPORT_SYMBOL(dev_addr_del);
328
329/**
330 * dev_addr_add_multiple - Add device addresses from another device
331 * @to_dev: device to which addresses will be added
332 * @from_dev: device from which addresses will be added
333 * @addr_type: address type - 0 means type will be used from from_dev
334 *
335 * Add device addresses of the one device to another.
336 **
337 * The caller must hold the rtnl_mutex.
338 */
339int dev_addr_add_multiple(struct net_device *to_dev,
340 struct net_device *from_dev,
341 unsigned char addr_type)
342{
343 int err;
344
345 ASSERT_RTNL();
346
347 if (from_dev->addr_len != to_dev->addr_len)
348 return -EINVAL;
349 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
350 to_dev->addr_len, addr_type);
351 if (!err)
352 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
353 return err;
354}
355EXPORT_SYMBOL(dev_addr_add_multiple);
356
357/**
358 * dev_addr_del_multiple - Delete device addresses by another device
359 * @to_dev: device where the addresses will be deleted
360 * @from_dev: device by which addresses the addresses will be deleted
361 * @addr_type: address type - 0 means type will used from from_dev
362 *
363 * Deletes addresses in to device by the list of addresses in from device.
364 *
365 * The caller must hold the rtnl_mutex.
366 */
367int dev_addr_del_multiple(struct net_device *to_dev,
368 struct net_device *from_dev,
369 unsigned char addr_type)
370{
371 ASSERT_RTNL();
372
373 if (from_dev->addr_len != to_dev->addr_len)
374 return -EINVAL;
375 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
376 to_dev->addr_len, addr_type);
377 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
378 return 0;
379}
380EXPORT_SYMBOL(dev_addr_del_multiple);
381
382/*
383 * Unicast list handling functions
384 */
385
386/**
387 * dev_uc_add - Add a secondary unicast address
388 * @dev: device
389 * @addr: address to add
390 *
391 * Add a secondary unicast address to the device or increase
392 * the reference count if it already exists.
393 */
394int dev_uc_add(struct net_device *dev, unsigned char *addr)
395{
396 int err;
397
398 netif_addr_lock_bh(dev);
399 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
400 NETDEV_HW_ADDR_T_UNICAST);
401 if (!err)
402 __dev_set_rx_mode(dev);
403 netif_addr_unlock_bh(dev);
404 return err;
405}
406EXPORT_SYMBOL(dev_uc_add);
407
408/**
409 * dev_uc_del - Release secondary unicast address.
410 * @dev: device
411 * @addr: address to delete
412 *
413 * Release reference to a secondary unicast address and remove it
414 * from the device if the reference count drops to zero.
415 */
416int dev_uc_del(struct net_device *dev, unsigned char *addr)
417{
418 int err;
419
420 netif_addr_lock_bh(dev);
421 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
422 NETDEV_HW_ADDR_T_UNICAST);
423 if (!err)
424 __dev_set_rx_mode(dev);
425 netif_addr_unlock_bh(dev);
426 return err;
427}
428EXPORT_SYMBOL(dev_uc_del);
429
430/**
431 * dev_uc_sync - Synchronize device's unicast list to another device
432 * @to: destination device
433 * @from: source device
434 *
435 * Add newly added addresses to the destination device and release
436 * addresses that have no users left. The source device must be
437 * locked by netif_tx_lock_bh.
438 *
439 * This function is intended to be called from the dev->set_rx_mode
440 * function of layered software devices.
441 */
442int dev_uc_sync(struct net_device *to, struct net_device *from)
443{
444 int err = 0;
445
446 if (to->addr_len != from->addr_len)
447 return -EINVAL;
448
449 netif_addr_lock_bh(to);
450 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
451 if (!err)
452 __dev_set_rx_mode(to);
453 netif_addr_unlock_bh(to);
454 return err;
455}
456EXPORT_SYMBOL(dev_uc_sync);
457
458/**
459 * dev_uc_unsync - Remove synchronized addresses from the destination device
460 * @to: destination device
461 * @from: source device
462 *
463 * Remove all addresses that were added to the destination device by
464 * dev_uc_sync(). This function is intended to be called from the
465 * dev->stop function of layered software devices.
466 */
467void dev_uc_unsync(struct net_device *to, struct net_device *from)
468{
469 if (to->addr_len != from->addr_len)
470 return;
471
472 netif_addr_lock_bh(from);
473 netif_addr_lock(to);
474 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
475 __dev_set_rx_mode(to);
476 netif_addr_unlock(to);
477 netif_addr_unlock_bh(from);
478}
479EXPORT_SYMBOL(dev_uc_unsync);
480
481/**
482 * dev_uc_flush - Flush unicast addresses
483 * @dev: device
484 *
485 * Flush unicast addresses.
486 */
487void dev_uc_flush(struct net_device *dev)
488{
489 netif_addr_lock_bh(dev);
490 __hw_addr_flush(&dev->uc);
491 netif_addr_unlock_bh(dev);
492}
493EXPORT_SYMBOL(dev_uc_flush);
494
495/**
496 * dev_uc_flush - Init unicast address list
497 * @dev: device
498 *
499 * Init unicast address list.
500 */
501void dev_uc_init(struct net_device *dev)
502{
503 __hw_addr_init(&dev->uc);
504}
505EXPORT_SYMBOL(dev_uc_init);
506
507/*
508 * Multicast list handling functions
509 */
510
511static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
512 bool global)
513{
514 int err;
515
516 netif_addr_lock_bh(dev);
517 err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
518 NETDEV_HW_ADDR_T_MULTICAST, global);
519 if (!err)
520 __dev_set_rx_mode(dev);
521 netif_addr_unlock_bh(dev);
522 return err;
523}
524/**
525 * dev_mc_add - Add a multicast address
526 * @dev: device
527 * @addr: address to add
528 *
529 * Add a multicast address to the device or increase
530 * the reference count if it already exists.
531 */
532int dev_mc_add(struct net_device *dev, unsigned char *addr)
533{
534 return __dev_mc_add(dev, addr, false);
535}
536EXPORT_SYMBOL(dev_mc_add);
537
538/**
539 * dev_mc_add_global - Add a global multicast address
540 * @dev: device
541 * @addr: address to add
542 *
543 * Add a global multicast address to the device.
544 */
545int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
546{
547 return __dev_mc_add(dev, addr, true);
548}
549EXPORT_SYMBOL(dev_mc_add_global);
550
551static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
552 bool global)
553{
554 int err;
555
556 netif_addr_lock_bh(dev);
557 err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
558 NETDEV_HW_ADDR_T_MULTICAST, global);
559 if (!err)
560 __dev_set_rx_mode(dev);
561 netif_addr_unlock_bh(dev);
562 return err;
563}
564
565/**
566 * dev_mc_del - Delete a multicast address.
567 * @dev: device
568 * @addr: address to delete
569 *
570 * Release reference to a multicast address and remove it
571 * from the device if the reference count drops to zero.
572 */
573int dev_mc_del(struct net_device *dev, unsigned char *addr)
574{
575 return __dev_mc_del(dev, addr, false);
576}
577EXPORT_SYMBOL(dev_mc_del);
578
579/**
580 * dev_mc_del_global - Delete a global multicast address.
581 * @dev: device
582 * @addr: address to delete
583 *
584 * Release reference to a multicast address and remove it
585 * from the device if the reference count drops to zero.
586 */
587int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
588{
589 return __dev_mc_del(dev, addr, true);
590}
591EXPORT_SYMBOL(dev_mc_del_global);
592
593/**
594 * dev_mc_sync - Synchronize device's unicast list to another device
595 * @to: destination device
596 * @from: source device
597 *
598 * Add newly added addresses to the destination device and release
599 * addresses that have no users left. The source device must be
600 * locked by netif_tx_lock_bh.
601 *
602 * This function is intended to be called from the dev->set_multicast_list
603 * or dev->set_rx_mode function of layered software devices.
604 */
605int dev_mc_sync(struct net_device *to, struct net_device *from)
606{
607 int err = 0;
608
609 if (to->addr_len != from->addr_len)
610 return -EINVAL;
611
612 netif_addr_lock_bh(to);
613 err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
614 if (!err)
615 __dev_set_rx_mode(to);
616 netif_addr_unlock_bh(to);
617 return err;
618}
619EXPORT_SYMBOL(dev_mc_sync);
620
621/**
622 * dev_mc_unsync - Remove synchronized addresses from the destination device
623 * @to: destination device
624 * @from: source device
625 *
626 * Remove all addresses that were added to the destination device by
627 * dev_mc_sync(). This function is intended to be called from the
628 * dev->stop function of layered software devices.
629 */
630void dev_mc_unsync(struct net_device *to, struct net_device *from)
631{
632 if (to->addr_len != from->addr_len)
633 return;
634
635 netif_addr_lock_bh(from);
636 netif_addr_lock(to);
637 __hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
638 __dev_set_rx_mode(to);
639 netif_addr_unlock(to);
640 netif_addr_unlock_bh(from);
641}
642EXPORT_SYMBOL(dev_mc_unsync);
643
644/**
645 * dev_mc_flush - Flush multicast addresses
646 * @dev: device
647 *
648 * Flush multicast addresses.
649 */
650void dev_mc_flush(struct net_device *dev)
651{
652 netif_addr_lock_bh(dev);
653 __hw_addr_flush(&dev->mc);
654 netif_addr_unlock_bh(dev);
655}
656EXPORT_SYMBOL(dev_mc_flush);
657
658/**
659 * dev_mc_flush - Init multicast address list
660 * @dev: device
661 *
662 * Init multicast address list.
663 */
664void dev_mc_init(struct net_device *dev)
665{
666 __hw_addr_init(&dev->mc);
667}
668EXPORT_SYMBOL(dev_mc_init);
669
670#ifdef CONFIG_PROC_FS
671#include <linux/seq_file.h>
672
673static int dev_mc_seq_show(struct seq_file *seq, void *v)
674{
675 struct netdev_hw_addr *ha;
676 struct net_device *dev = v;
677
678 if (v == SEQ_START_TOKEN)
679 return 0;
680
681 netif_addr_lock_bh(dev);
682 netdev_for_each_mc_addr(ha, dev) {
683 int i;
684
685 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
686 dev->name, ha->refcount, ha->global_use);
687
688 for (i = 0; i < dev->addr_len; i++)
689 seq_printf(seq, "%02x", ha->addr[i]);
690
691 seq_putc(seq, '\n');
692 }
693 netif_addr_unlock_bh(dev);
694 return 0;
695}
696
697static const struct seq_operations dev_mc_seq_ops = {
698 .start = dev_seq_start,
699 .next = dev_seq_next,
700 .stop = dev_seq_stop,
701 .show = dev_mc_seq_show,
702};
703
704static int dev_mc_seq_open(struct inode *inode, struct file *file)
705{
706 return seq_open_net(inode, file, &dev_mc_seq_ops,
707 sizeof(struct seq_net_private));
708}
709
710static const struct file_operations dev_mc_seq_fops = {
711 .owner = THIS_MODULE,
712 .open = dev_mc_seq_open,
713 .read = seq_read,
714 .llseek = seq_lseek,
715 .release = seq_release_net,
716};
717
718#endif
719
720static int __net_init dev_mc_net_init(struct net *net)
721{
722 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
723 return -ENOMEM;
724 return 0;
725}
726
727static void __net_exit dev_mc_net_exit(struct net *net)
728{
729 proc_net_remove(net, "dev_mcast");
730}
731
732static struct pernet_operations __net_initdata dev_mc_net_ops = {
733 .init = dev_mc_net_init,
734 .exit = dev_mc_net_exit,
735};
736
737void __init dev_mcast_init(void)
738{
739 register_pernet_subsys(&dev_mc_net_ops);
740}
741
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
1/*
2 * Linux NET3: Multicast List maintenance.
3 *
4 * Authors:
5 * Tim Kordas <tjk@nostromo.eeap.cwru.edu>
6 * Richard Underwood <richard@wuzz.demon.co.uk>
7 *
8 * Stir fried together from the IP multicast and CAP patches above
9 * Alan Cox <alan@lxorguk.ukuu.org.uk>
10 *
11 * Fixes:
12 * Alan Cox : Update the device on a real delete
13 * rather than any time but...
14 * Alan Cox : IFF_ALLMULTI support.
15 * Alan Cox : New format set_multicast_list() calls.
16 * Gleb Natapov : Remove dev_mc_lock.
17 *
18 * This program is free software; you can redistribute it and/or
19 * modify it under the terms of the GNU General Public License
20 * as published by the Free Software Foundation; either version
21 * 2 of the License, or (at your option) any later version.
22 */
23
24#include <linux/module.h>
25#include <asm/uaccess.h>
26#include <asm/system.h>
27#include <linux/bitops.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/string.h>
31#include <linux/mm.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/in.h>
35#include <linux/errno.h>
36#include <linux/interrupt.h>
37#include <linux/if_ether.h>
38#include <linux/inet.h>
39#include <linux/netdevice.h>
40#include <linux/etherdevice.h>
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#include <linux/init.h>
44#include <net/net_namespace.h>
45#include <net/ip.h>
46#include <net/route.h>
47#include <linux/skbuff.h>
48#include <net/sock.h>
49#include <net/arp.h>
50
51
52/*
53 * Device multicast list maintenance.
54 *
55 * This is used both by IP and by the user level maintenance functions.
56 * Unlike BSD we maintain a usage count on a given multicast address so
57 * that a casual user application can add/delete multicasts used by
58 * protocols without doing damage to the protocols when it deletes the
59 * entries. It also helps IP as it tracks overlapping maps.
60 *
61 * Device mc lists are changed by bh at least if IPv6 is enabled,
62 * so that it must be bh protected.
63 *
64 * We block accesses to device mc filters with netif_tx_lock.
65 */
66
67/*
68 * Delete a device level multicast
69 */
70
71int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
72{
73 int err;
74
75 netif_addr_lock_bh(dev);
76 err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
77 addr, alen, glbl);
78 if (!err) {
79 /*
80 * We have altered the list, so the card
81 * loaded filter is now wrong. Fix it
82 */
83
84 __dev_set_rx_mode(dev);
85 }
86 netif_addr_unlock_bh(dev);
87 return err;
88}
89
90/*
91 * Add a device level multicast
92 */
93
94int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
95{
96 int err;
97
98 netif_addr_lock_bh(dev);
99 if (alen != dev->addr_len)
100 err = -EINVAL;
101 else
102 err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
103 if (!err)
104 __dev_set_rx_mode(dev);
105 netif_addr_unlock_bh(dev);
106 return err;
107}
108
109/**
110 * dev_mc_sync - Synchronize device's multicast list to another device
111 * @to: destination device
112 * @from: source device
113 *
114 * Add newly added addresses to the destination device and release
115 * addresses that have no users left. The source device must be
116 * locked by netif_tx_lock_bh.
117 *
118 * This function is intended to be called from the dev->set_multicast_list
119 * or dev->set_rx_mode function of layered software devices.
120 */
121int dev_mc_sync(struct net_device *to, struct net_device *from)
122{
123 int err = 0;
124
125 netif_addr_lock_bh(to);
126 err = __dev_addr_sync(&to->mc_list, &to->mc_count,
127 &from->mc_list, &from->mc_count);
128 if (!err)
129 __dev_set_rx_mode(to);
130 netif_addr_unlock_bh(to);
131
132 return err;
133}
134EXPORT_SYMBOL(dev_mc_sync);
135
136
137/**
138 * dev_mc_unsync - Remove synchronized addresses from the destination
139 * device
140 * @to: destination device
141 * @from: source device
142 *
143 * Remove all addresses that were added to the destination device by
144 * dev_mc_sync(). This function is intended to be called from the
145 * dev->stop function of layered software devices.
146 */
147void dev_mc_unsync(struct net_device *to, struct net_device *from)
148{
149 netif_addr_lock_bh(from);
150 netif_addr_lock(to);
151
152 __dev_addr_unsync(&to->mc_list, &to->mc_count,
153 &from->mc_list, &from->mc_count);
154 __dev_set_rx_mode(to);
155
156 netif_addr_unlock(to);
157 netif_addr_unlock_bh(from);
158}
159EXPORT_SYMBOL(dev_mc_unsync);
160
161#ifdef CONFIG_PROC_FS
162static int dev_mc_seq_show(struct seq_file *seq, void *v)
163{
164 struct dev_addr_list *m;
165 struct net_device *dev = v;
166
167 if (v == SEQ_START_TOKEN)
168 return 0;
169
170 netif_addr_lock_bh(dev);
171 for (m = dev->mc_list; m; m = m->next) {
172 int i;
173
174 seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
175 dev->name, m->dmi_users, m->dmi_gusers);
176
177 for (i = 0; i < m->dmi_addrlen; i++)
178 seq_printf(seq, "%02x", m->dmi_addr[i]);
179
180 seq_putc(seq, '\n');
181 }
182 netif_addr_unlock_bh(dev);
183 return 0;
184}
185
186static const struct seq_operations dev_mc_seq_ops = {
187 .start = dev_seq_start,
188 .next = dev_seq_next,
189 .stop = dev_seq_stop,
190 .show = dev_mc_seq_show,
191};
192
193static int dev_mc_seq_open(struct inode *inode, struct file *file)
194{
195 return seq_open_net(inode, file, &dev_mc_seq_ops,
196 sizeof(struct seq_net_private));
197}
198
199static const struct file_operations dev_mc_seq_fops = {
200 .owner = THIS_MODULE,
201 .open = dev_mc_seq_open,
202 .read = seq_read,
203 .llseek = seq_lseek,
204 .release = seq_release_net,
205};
206
207#endif
208
209static int __net_init dev_mc_net_init(struct net *net)
210{
211 if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
212 return -ENOMEM;
213 return 0;
214}
215
216static void __net_exit dev_mc_net_exit(struct net *net)
217{
218 proc_net_remove(net, "dev_mcast");
219}
220
221static struct pernet_operations __net_initdata dev_mc_net_ops = {
222 .init = dev_mc_net_init,
223 .exit = dev_mc_net_exit,
224};
225
226void __init dev_mcast_init(void)
227{
228 register_pernet_subsys(&dev_mc_net_ops);
229}
230
231EXPORT_SYMBOL(dev_mc_add);
232EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cf208d8042b1..36e603c78ce9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -172,12 +172,12 @@ out:
172 return; 172 return;
173} 173}
174 174
175static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) 175static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
176{ 176{
177 trace_drop_common(skb, location); 177 trace_drop_common(skb, location);
178} 178}
179 179
180static void trace_napi_poll_hit(struct napi_struct *napi) 180static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
181{ 181{
182 struct dm_hw_stat_delta *new_stat; 182 struct dm_hw_stat_delta *new_stat;
183 183
@@ -223,14 +223,19 @@ static int set_all_monitor_traces(int state)
223 223
224 spin_lock(&trace_state_lock); 224 spin_lock(&trace_state_lock);
225 225
226 if (state == trace_state) {
227 rc = -EAGAIN;
228 goto out_unlock;
229 }
230
226 switch (state) { 231 switch (state) {
227 case TRACE_ON: 232 case TRACE_ON:
228 rc |= register_trace_kfree_skb(trace_kfree_skb_hit); 233 rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
229 rc |= register_trace_napi_poll(trace_napi_poll_hit); 234 rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
230 break; 235 break;
231 case TRACE_OFF: 236 case TRACE_OFF:
232 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); 237 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
233 rc |= unregister_trace_napi_poll(trace_napi_poll_hit); 238 rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
234 239
235 tracepoint_synchronize_unregister(); 240 tracepoint_synchronize_unregister();
236 241
@@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state)
251 256
252 if (!rc) 257 if (!rc)
253 trace_state = state; 258 trace_state = state;
259 else
260 rc = -EINPROGRESS;
254 261
262out_unlock:
255 spin_unlock(&trace_state_lock); 263 spin_unlock(&trace_state_lock);
256 264
257 if (rc)
258 return -EINPROGRESS;
259 return rc; 265 return rc;
260} 266}
261 267
@@ -341,9 +347,9 @@ static struct notifier_block dropmon_net_notifier = {
341 347
342static int __init init_net_drop_monitor(void) 348static int __init init_net_drop_monitor(void)
343{ 349{
344 int cpu;
345 int rc, i, ret;
346 struct per_cpu_dm_data *data; 350 struct per_cpu_dm_data *data;
351 int cpu, rc;
352
347 printk(KERN_INFO "Initalizing network drop monitor service\n"); 353 printk(KERN_INFO "Initalizing network drop monitor service\n");
348 354
349 if (sizeof(void *) > 8) { 355 if (sizeof(void *) > 8) {
@@ -351,21 +357,12 @@ static int __init init_net_drop_monitor(void)
351 return -ENOSPC; 357 return -ENOSPC;
352 } 358 }
353 359
354 if (genl_register_family(&net_drop_monitor_family) < 0) { 360 rc = genl_register_family_with_ops(&net_drop_monitor_family,
361 dropmon_ops,
362 ARRAY_SIZE(dropmon_ops));
363 if (rc) {
355 printk(KERN_ERR "Could not create drop monitor netlink family\n"); 364 printk(KERN_ERR "Could not create drop monitor netlink family\n");
356 return -EFAULT; 365 return rc;
357 }
358
359 rc = -EFAULT;
360
361 for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
362 ret = genl_register_ops(&net_drop_monitor_family,
363 &dropmon_ops[i]);
364 if (ret) {
365 printk(KERN_CRIT "Failed to register operation %d\n",
366 dropmon_ops[i].cmd);
367 goto out_unreg;
368 }
369 } 366 }
370 367
371 rc = register_netdevice_notifier(&dropmon_net_notifier); 368 rc = register_netdevice_notifier(&dropmon_net_notifier);
diff --git a/net/core/dst.c b/net/core/dst.c
index f307bc18f6a0..6c41b1fac3db 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -44,7 +44,7 @@ static atomic_t dst_total = ATOMIC_INIT(0);
44 */ 44 */
45static struct { 45static struct {
46 spinlock_t lock; 46 spinlock_t lock;
47 struct dst_entry *list; 47 struct dst_entry *list;
48 unsigned long timer_inc; 48 unsigned long timer_inc;
49 unsigned long timer_expires; 49 unsigned long timer_expires;
50} dst_garbage = { 50} dst_garbage = {
@@ -52,7 +52,7 @@ static struct {
52 .timer_inc = DST_GC_MAX, 52 .timer_inc = DST_GC_MAX,
53}; 53};
54static void dst_gc_task(struct work_struct *work); 54static void dst_gc_task(struct work_struct *work);
55static void ___dst_free(struct dst_entry * dst); 55static void ___dst_free(struct dst_entry *dst);
56 56
57static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task); 57static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
58 58
@@ -136,8 +136,8 @@ loop:
136 } 136 }
137 expires = dst_garbage.timer_expires; 137 expires = dst_garbage.timer_expires;
138 /* 138 /*
139 * if the next desired timer is more than 4 seconds in the future 139 * if the next desired timer is more than 4 seconds in the
140 * then round the timer to whole seconds 140 * future then round the timer to whole seconds
141 */ 141 */
142 if (expires > 4*HZ) 142 if (expires > 4*HZ)
143 expires = round_jiffies_relative(expires); 143 expires = round_jiffies_relative(expires);
@@ -152,7 +152,8 @@ loop:
152 " expires: %lu elapsed: %lu us\n", 152 " expires: %lu elapsed: %lu us\n",
153 atomic_read(&dst_total), delayed, work_performed, 153 atomic_read(&dst_total), delayed, work_performed,
154 expires, 154 expires,
155 elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC); 155 elapsed.tv_sec * USEC_PER_SEC +
156 elapsed.tv_nsec / NSEC_PER_USEC);
156#endif 157#endif
157} 158}
158 159
@@ -163,9 +164,9 @@ int dst_discard(struct sk_buff *skb)
163} 164}
164EXPORT_SYMBOL(dst_discard); 165EXPORT_SYMBOL(dst_discard);
165 166
166void * dst_alloc(struct dst_ops * ops) 167void *dst_alloc(struct dst_ops *ops)
167{ 168{
168 struct dst_entry * dst; 169 struct dst_entry *dst;
169 170
170 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
171 if (ops->gc(ops)) 172 if (ops->gc(ops))
@@ -185,19 +186,19 @@ void * dst_alloc(struct dst_ops * ops)
185 atomic_inc(&ops->entries); 186 atomic_inc(&ops->entries);
186 return dst; 187 return dst;
187} 188}
189EXPORT_SYMBOL(dst_alloc);
188 190
189static void ___dst_free(struct dst_entry * dst) 191static void ___dst_free(struct dst_entry *dst)
190{ 192{
191 /* The first case (dev==NULL) is required, when 193 /* The first case (dev==NULL) is required, when
192 protocol module is unloaded. 194 protocol module is unloaded.
193 */ 195 */
194 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) { 196 if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
195 dst->input = dst->output = dst_discard; 197 dst->input = dst->output = dst_discard;
196 }
197 dst->obsolete = 2; 198 dst->obsolete = 2;
198} 199}
199 200
200void __dst_free(struct dst_entry * dst) 201void __dst_free(struct dst_entry *dst)
201{ 202{
202 spin_lock_bh(&dst_garbage.lock); 203 spin_lock_bh(&dst_garbage.lock);
203 ___dst_free(dst); 204 ___dst_free(dst);
@@ -211,6 +212,7 @@ void __dst_free(struct dst_entry * dst)
211 } 212 }
212 spin_unlock_bh(&dst_garbage.lock); 213 spin_unlock_bh(&dst_garbage.lock);
213} 214}
215EXPORT_SYMBOL(__dst_free);
214 216
215struct dst_entry *dst_destroy(struct dst_entry * dst) 217struct dst_entry *dst_destroy(struct dst_entry * dst)
216{ 218{
@@ -262,15 +264,16 @@ again:
262 } 264 }
263 return NULL; 265 return NULL;
264} 266}
267EXPORT_SYMBOL(dst_destroy);
265 268
266void dst_release(struct dst_entry *dst) 269void dst_release(struct dst_entry *dst)
267{ 270{
268 if (dst) { 271 if (dst) {
269 int newrefcnt; 272 int newrefcnt;
270 273
271 smp_mb__before_atomic_dec(); 274 smp_mb__before_atomic_dec();
272 newrefcnt = atomic_dec_return(&dst->__refcnt); 275 newrefcnt = atomic_dec_return(&dst->__refcnt);
273 WARN_ON(newrefcnt < 0); 276 WARN_ON(newrefcnt < 0);
274 } 277 }
275} 278}
276EXPORT_SYMBOL(dst_release); 279EXPORT_SYMBOL(dst_release);
@@ -283,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
283 * 286 *
284 * Commented and originally written by Alexey. 287 * Commented and originally written by Alexey.
285 */ 288 */
286static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev, 289static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287 int unregister) 290 int unregister)
288{ 291{
289 if (dst->ops->ifdown) 292 if (dst->ops->ifdown)
290 dst->ops->ifdown(dst, dev, unregister); 293 dst->ops->ifdown(dst, dev, unregister);
@@ -306,7 +309,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
306 } 309 }
307} 310}
308 311
309static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) 312static int dst_dev_event(struct notifier_block *this, unsigned long event,
313 void *ptr)
310{ 314{
311 struct net_device *dev = ptr; 315 struct net_device *dev = ptr;
312 struct dst_entry *dst, *last = NULL; 316 struct dst_entry *dst, *last = NULL;
@@ -329,9 +333,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
329 last->next = dst; 333 last->next = dst;
330 else 334 else
331 dst_busy_list = dst; 335 dst_busy_list = dst;
332 for (; dst; dst = dst->next) { 336 for (; dst; dst = dst->next)
333 dst_ifdown(dst, dev, event != NETDEV_DOWN); 337 dst_ifdown(dst, dev, event != NETDEV_DOWN);
334 }
335 mutex_unlock(&dst_gc_mutex); 338 mutex_unlock(&dst_gc_mutex);
336 break; 339 break;
337 } 340 }
@@ -346,7 +349,3 @@ void __init dst_init(void)
346{ 349{
347 register_netdevice_notifier(&dst_dev_notifier); 350 register_netdevice_notifier(&dst_dev_notifier);
348} 351}
349
350EXPORT_SYMBOL(__dst_free);
351EXPORT_SYMBOL(dst_alloc);
352EXPORT_SYMBOL(dst_destroy);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9d55c57f318a..7a85367b3c2f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,8 +18,8 @@
18#include <linux/ethtool.h> 18#include <linux/ethtool.h>
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
22#include <asm/uaccess.h>
23 23
24/* 24/*
25 * Some useful ethtool_ops methods that're device independent. 25 * Some useful ethtool_ops methods that're device independent.
@@ -31,6 +31,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
31{ 31{
32 return netif_carrier_ok(dev) ? 1 : 0; 32 return netif_carrier_ok(dev) ? 1 : 0;
33} 33}
34EXPORT_SYMBOL(ethtool_op_get_link);
34 35
35u32 ethtool_op_get_rx_csum(struct net_device *dev) 36u32 ethtool_op_get_rx_csum(struct net_device *dev)
36{ 37{
@@ -63,6 +64,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
63 64
64 return 0; 65 return 0;
65} 66}
67EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
66 68
67int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data) 69int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
68{ 70{
@@ -73,11 +75,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
73 75
74 return 0; 76 return 0;
75} 77}
78EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
76 79
77u32 ethtool_op_get_sg(struct net_device *dev) 80u32 ethtool_op_get_sg(struct net_device *dev)
78{ 81{
79 return (dev->features & NETIF_F_SG) != 0; 82 return (dev->features & NETIF_F_SG) != 0;
80} 83}
84EXPORT_SYMBOL(ethtool_op_get_sg);
81 85
82int ethtool_op_set_sg(struct net_device *dev, u32 data) 86int ethtool_op_set_sg(struct net_device *dev, u32 data)
83{ 87{
@@ -88,11 +92,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
88 92
89 return 0; 93 return 0;
90} 94}
95EXPORT_SYMBOL(ethtool_op_set_sg);
91 96
92u32 ethtool_op_get_tso(struct net_device *dev) 97u32 ethtool_op_get_tso(struct net_device *dev)
93{ 98{
94 return (dev->features & NETIF_F_TSO) != 0; 99 return (dev->features & NETIF_F_TSO) != 0;
95} 100}
101EXPORT_SYMBOL(ethtool_op_get_tso);
96 102
97int ethtool_op_set_tso(struct net_device *dev, u32 data) 103int ethtool_op_set_tso(struct net_device *dev, u32 data)
98{ 104{
@@ -103,11 +109,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
103 109
104 return 0; 110 return 0;
105} 111}
112EXPORT_SYMBOL(ethtool_op_set_tso);
106 113
107u32 ethtool_op_get_ufo(struct net_device *dev) 114u32 ethtool_op_get_ufo(struct net_device *dev)
108{ 115{
109 return (dev->features & NETIF_F_UFO) != 0; 116 return (dev->features & NETIF_F_UFO) != 0;
110} 117}
118EXPORT_SYMBOL(ethtool_op_get_ufo);
111 119
112int ethtool_op_set_ufo(struct net_device *dev, u32 data) 120int ethtool_op_set_ufo(struct net_device *dev, u32 data)
113{ 121{
@@ -117,12 +125,13 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
117 dev->features &= ~NETIF_F_UFO; 125 dev->features &= ~NETIF_F_UFO;
118 return 0; 126 return 0;
119} 127}
128EXPORT_SYMBOL(ethtool_op_set_ufo);
120 129
121/* the following list of flags are the same as their associated 130/* the following list of flags are the same as their associated
122 * NETIF_F_xxx values in include/linux/netdevice.h 131 * NETIF_F_xxx values in include/linux/netdevice.h
123 */ 132 */
124static const u32 flags_dup_features = 133static const u32 flags_dup_features =
125 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE); 134 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
126 135
127u32 ethtool_op_get_flags(struct net_device *dev) 136u32 ethtool_op_get_flags(struct net_device *dev)
128{ 137{
@@ -133,29 +142,18 @@ u32 ethtool_op_get_flags(struct net_device *dev)
133 142
134 return dev->features & flags_dup_features; 143 return dev->features & flags_dup_features;
135} 144}
145EXPORT_SYMBOL(ethtool_op_get_flags);
136 146
137int ethtool_op_set_flags(struct net_device *dev, u32 data) 147int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
138{ 148{
139 const struct ethtool_ops *ops = dev->ethtool_ops; 149 if (data & ~supported)
140 unsigned long features = dev->features; 150 return -EINVAL;
141
142 if (data & ETH_FLAG_LRO)
143 features |= NETIF_F_LRO;
144 else
145 features &= ~NETIF_F_LRO;
146
147 if (data & ETH_FLAG_NTUPLE) {
148 if (!ops->set_rx_ntuple)
149 return -EOPNOTSUPP;
150 features |= NETIF_F_NTUPLE;
151 } else {
152 /* safe to clear regardless */
153 features &= ~NETIF_F_NTUPLE;
154 }
155 151
156 dev->features = features; 152 dev->features = ((dev->features & ~flags_dup_features) |
153 (data & flags_dup_features));
157 return 0; 154 return 0;
158} 155}
156EXPORT_SYMBOL(ethtool_op_set_flags);
159 157
160void ethtool_ntuple_flush(struct net_device *dev) 158void ethtool_ntuple_flush(struct net_device *dev)
161{ 159{
@@ -201,7 +199,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
201 return dev->ethtool_ops->set_settings(dev, &cmd); 199 return dev->ethtool_ops->set_settings(dev, &cmd);
202} 200}
203 201
204static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) 202static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
203 void __user *useraddr)
205{ 204{
206 struct ethtool_drvinfo info; 205 struct ethtool_drvinfo info;
207 const struct ethtool_ops *ops = dev->ethtool_ops; 206 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -241,7 +240,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
241} 240}
242 241
243static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev, 242static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
244 void __user *useraddr) 243 void __user *useraddr)
245{ 244{
246 struct ethtool_sset_info info; 245 struct ethtool_sset_info info;
247 const struct ethtool_ops *ops = dev->ethtool_ops; 246 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -300,22 +299,34 @@ out:
300 return ret; 299 return ret;
301} 300}
302 301
303static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) 302static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
303 u32 cmd, void __user *useraddr)
304{ 304{
305 struct ethtool_rxnfc cmd; 305 struct ethtool_rxnfc info;
306 size_t info_size = sizeof(info);
306 307
307 if (!dev->ethtool_ops->set_rxnfc) 308 if (!dev->ethtool_ops->set_rxnfc)
308 return -EOPNOTSUPP; 309 return -EOPNOTSUPP;
309 310
310 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 311 /* struct ethtool_rxnfc was originally defined for
312 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
313 * members. User-space might still be using that
314 * definition. */
315 if (cmd == ETHTOOL_SRXFH)
316 info_size = (offsetof(struct ethtool_rxnfc, data) +
317 sizeof(info.data));
318
319 if (copy_from_user(&info, useraddr, info_size))
311 return -EFAULT; 320 return -EFAULT;
312 321
313 return dev->ethtool_ops->set_rxnfc(dev, &cmd); 322 return dev->ethtool_ops->set_rxnfc(dev, &info);
314} 323}
315 324
316static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) 325static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
326 u32 cmd, void __user *useraddr)
317{ 327{
318 struct ethtool_rxnfc info; 328 struct ethtool_rxnfc info;
329 size_t info_size = sizeof(info);
319 const struct ethtool_ops *ops = dev->ethtool_ops; 330 const struct ethtool_ops *ops = dev->ethtool_ops;
320 int ret; 331 int ret;
321 void *rule_buf = NULL; 332 void *rule_buf = NULL;
@@ -323,13 +334,22 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __u
323 if (!ops->get_rxnfc) 334 if (!ops->get_rxnfc)
324 return -EOPNOTSUPP; 335 return -EOPNOTSUPP;
325 336
326 if (copy_from_user(&info, useraddr, sizeof(info))) 337 /* struct ethtool_rxnfc was originally defined for
338 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
339 * members. User-space might still be using that
340 * definition. */
341 if (cmd == ETHTOOL_GRXFH)
342 info_size = (offsetof(struct ethtool_rxnfc, data) +
343 sizeof(info.data));
344
345 if (copy_from_user(&info, useraddr, info_size))
327 return -EFAULT; 346 return -EFAULT;
328 347
329 if (info.cmd == ETHTOOL_GRXCLSRLALL) { 348 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
330 if (info.rule_cnt > 0) { 349 if (info.rule_cnt > 0) {
331 rule_buf = kmalloc(info.rule_cnt * sizeof(u32), 350 if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
332 GFP_USER); 351 rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
352 GFP_USER);
333 if (!rule_buf) 353 if (!rule_buf)
334 return -ENOMEM; 354 return -ENOMEM;
335 } 355 }
@@ -340,7 +360,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __u
340 goto err_out; 360 goto err_out;
341 361
342 ret = -EFAULT; 362 ret = -EFAULT;
343 if (copy_to_user(useraddr, &info, sizeof(info))) 363 if (copy_to_user(useraddr, &info, info_size))
344 goto err_out; 364 goto err_out;
345 365
346 if (rule_buf) { 366 if (rule_buf) {
@@ -357,9 +377,83 @@ err_out:
357 return ret; 377 return ret;
358} 378}
359 379
380static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
381 void __user *useraddr)
382{
383 struct ethtool_rxfh_indir *indir;
384 u32 table_size;
385 size_t full_size;
386 int ret;
387
388 if (!dev->ethtool_ops->get_rxfh_indir)
389 return -EOPNOTSUPP;
390
391 if (copy_from_user(&table_size,
392 useraddr + offsetof(struct ethtool_rxfh_indir, size),
393 sizeof(table_size)))
394 return -EFAULT;
395
396 if (table_size >
397 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
398 return -ENOMEM;
399 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
400 indir = kmalloc(full_size, GFP_USER);
401 if (!indir)
402 return -ENOMEM;
403
404 indir->cmd = ETHTOOL_GRXFHINDIR;
405 indir->size = table_size;
406 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
407 if (ret)
408 goto out;
409
410 if (copy_to_user(useraddr, indir, full_size))
411 ret = -EFAULT;
412
413out:
414 kfree(indir);
415 return ret;
416}
417
418static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
419 void __user *useraddr)
420{
421 struct ethtool_rxfh_indir *indir;
422 u32 table_size;
423 size_t full_size;
424 int ret;
425
426 if (!dev->ethtool_ops->set_rxfh_indir)
427 return -EOPNOTSUPP;
428
429 if (copy_from_user(&table_size,
430 useraddr + offsetof(struct ethtool_rxfh_indir, size),
431 sizeof(table_size)))
432 return -EFAULT;
433
434 if (table_size >
435 (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
436 return -ENOMEM;
437 full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
438 indir = kmalloc(full_size, GFP_USER);
439 if (!indir)
440 return -ENOMEM;
441
442 if (copy_from_user(indir, useraddr, full_size)) {
443 ret = -EFAULT;
444 goto out;
445 }
446
447 ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
448
449out:
450 kfree(indir);
451 return ret;
452}
453
360static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list, 454static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
361 struct ethtool_rx_ntuple_flow_spec *spec, 455 struct ethtool_rx_ntuple_flow_spec *spec,
362 struct ethtool_rx_ntuple_flow_spec_container *fsc) 456 struct ethtool_rx_ntuple_flow_spec_container *fsc)
363{ 457{
364 458
365 /* don't add filters forever */ 459 /* don't add filters forever */
@@ -385,7 +479,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
385 list->count++; 479 list->count++;
386} 480}
387 481
388static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr) 482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr)
389{ 484{
390 struct ethtool_rx_ntuple cmd; 485 struct ethtool_rx_ntuple cmd;
391 const struct ethtool_ops *ops = dev->ethtool_ops; 486 const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -502,7 +597,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
502 p += ETH_GSTRING_LEN; 597 p += ETH_GSTRING_LEN;
503 num_strings++; 598 num_strings++;
504 goto unknown_filter; 599 goto unknown_filter;
505 }; 600 }
506 601
507 /* now the rest of the filters */ 602 /* now the rest of the filters */
508 switch (fsc->fs.flow_type) { 603 switch (fsc->fs.flow_type) {
@@ -510,125 +605,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
510 case UDP_V4_FLOW: 605 case UDP_V4_FLOW:
511 case SCTP_V4_FLOW: 606 case SCTP_V4_FLOW:
512 sprintf(p, "\tSrc IP addr: 0x%x\n", 607 sprintf(p, "\tSrc IP addr: 0x%x\n",
513 fsc->fs.h_u.tcp_ip4_spec.ip4src); 608 fsc->fs.h_u.tcp_ip4_spec.ip4src);
514 p += ETH_GSTRING_LEN; 609 p += ETH_GSTRING_LEN;
515 num_strings++; 610 num_strings++;
516 sprintf(p, "\tSrc IP mask: 0x%x\n", 611 sprintf(p, "\tSrc IP mask: 0x%x\n",
517 fsc->fs.m_u.tcp_ip4_spec.ip4src); 612 fsc->fs.m_u.tcp_ip4_spec.ip4src);
518 p += ETH_GSTRING_LEN; 613 p += ETH_GSTRING_LEN;
519 num_strings++; 614 num_strings++;
520 sprintf(p, "\tDest IP addr: 0x%x\n", 615 sprintf(p, "\tDest IP addr: 0x%x\n",
521 fsc->fs.h_u.tcp_ip4_spec.ip4dst); 616 fsc->fs.h_u.tcp_ip4_spec.ip4dst);
522 p += ETH_GSTRING_LEN; 617 p += ETH_GSTRING_LEN;
523 num_strings++; 618 num_strings++;
524 sprintf(p, "\tDest IP mask: 0x%x\n", 619 sprintf(p, "\tDest IP mask: 0x%x\n",
525 fsc->fs.m_u.tcp_ip4_spec.ip4dst); 620 fsc->fs.m_u.tcp_ip4_spec.ip4dst);
526 p += ETH_GSTRING_LEN; 621 p += ETH_GSTRING_LEN;
527 num_strings++; 622 num_strings++;
528 sprintf(p, "\tSrc Port: %d, mask: 0x%x\n", 623 sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
529 fsc->fs.h_u.tcp_ip4_spec.psrc, 624 fsc->fs.h_u.tcp_ip4_spec.psrc,
530 fsc->fs.m_u.tcp_ip4_spec.psrc); 625 fsc->fs.m_u.tcp_ip4_spec.psrc);
531 p += ETH_GSTRING_LEN; 626 p += ETH_GSTRING_LEN;
532 num_strings++; 627 num_strings++;
533 sprintf(p, "\tDest Port: %d, mask: 0x%x\n", 628 sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
534 fsc->fs.h_u.tcp_ip4_spec.pdst, 629 fsc->fs.h_u.tcp_ip4_spec.pdst,
535 fsc->fs.m_u.tcp_ip4_spec.pdst); 630 fsc->fs.m_u.tcp_ip4_spec.pdst);
536 p += ETH_GSTRING_LEN; 631 p += ETH_GSTRING_LEN;
537 num_strings++; 632 num_strings++;
538 sprintf(p, "\tTOS: %d, mask: 0x%x\n", 633 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
539 fsc->fs.h_u.tcp_ip4_spec.tos, 634 fsc->fs.h_u.tcp_ip4_spec.tos,
540 fsc->fs.m_u.tcp_ip4_spec.tos); 635 fsc->fs.m_u.tcp_ip4_spec.tos);
541 p += ETH_GSTRING_LEN; 636 p += ETH_GSTRING_LEN;
542 num_strings++; 637 num_strings++;
543 break; 638 break;
544 case AH_ESP_V4_FLOW: 639 case AH_ESP_V4_FLOW:
545 case ESP_V4_FLOW: 640 case ESP_V4_FLOW:
546 sprintf(p, "\tSrc IP addr: 0x%x\n", 641 sprintf(p, "\tSrc IP addr: 0x%x\n",
547 fsc->fs.h_u.ah_ip4_spec.ip4src); 642 fsc->fs.h_u.ah_ip4_spec.ip4src);
548 p += ETH_GSTRING_LEN; 643 p += ETH_GSTRING_LEN;
549 num_strings++; 644 num_strings++;
550 sprintf(p, "\tSrc IP mask: 0x%x\n", 645 sprintf(p, "\tSrc IP mask: 0x%x\n",
551 fsc->fs.m_u.ah_ip4_spec.ip4src); 646 fsc->fs.m_u.ah_ip4_spec.ip4src);
552 p += ETH_GSTRING_LEN; 647 p += ETH_GSTRING_LEN;
553 num_strings++; 648 num_strings++;
554 sprintf(p, "\tDest IP addr: 0x%x\n", 649 sprintf(p, "\tDest IP addr: 0x%x\n",
555 fsc->fs.h_u.ah_ip4_spec.ip4dst); 650 fsc->fs.h_u.ah_ip4_spec.ip4dst);
556 p += ETH_GSTRING_LEN; 651 p += ETH_GSTRING_LEN;
557 num_strings++; 652 num_strings++;
558 sprintf(p, "\tDest IP mask: 0x%x\n", 653 sprintf(p, "\tDest IP mask: 0x%x\n",
559 fsc->fs.m_u.ah_ip4_spec.ip4dst); 654 fsc->fs.m_u.ah_ip4_spec.ip4dst);
560 p += ETH_GSTRING_LEN; 655 p += ETH_GSTRING_LEN;
561 num_strings++; 656 num_strings++;
562 sprintf(p, "\tSPI: %d, mask: 0x%x\n", 657 sprintf(p, "\tSPI: %d, mask: 0x%x\n",
563 fsc->fs.h_u.ah_ip4_spec.spi, 658 fsc->fs.h_u.ah_ip4_spec.spi,
564 fsc->fs.m_u.ah_ip4_spec.spi); 659 fsc->fs.m_u.ah_ip4_spec.spi);
565 p += ETH_GSTRING_LEN; 660 p += ETH_GSTRING_LEN;
566 num_strings++; 661 num_strings++;
567 sprintf(p, "\tTOS: %d, mask: 0x%x\n", 662 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
568 fsc->fs.h_u.ah_ip4_spec.tos, 663 fsc->fs.h_u.ah_ip4_spec.tos,
569 fsc->fs.m_u.ah_ip4_spec.tos); 664 fsc->fs.m_u.ah_ip4_spec.tos);
570 p += ETH_GSTRING_LEN; 665 p += ETH_GSTRING_LEN;
571 num_strings++; 666 num_strings++;
572 break; 667 break;
573 case IP_USER_FLOW: 668 case IP_USER_FLOW:
574 sprintf(p, "\tSrc IP addr: 0x%x\n", 669 sprintf(p, "\tSrc IP addr: 0x%x\n",
575 fsc->fs.h_u.raw_ip4_spec.ip4src); 670 fsc->fs.h_u.raw_ip4_spec.ip4src);
576 p += ETH_GSTRING_LEN; 671 p += ETH_GSTRING_LEN;
577 num_strings++; 672 num_strings++;
578 sprintf(p, "\tSrc IP mask: 0x%x\n", 673 sprintf(p, "\tSrc IP mask: 0x%x\n",
579 fsc->fs.m_u.raw_ip4_spec.ip4src); 674 fsc->fs.m_u.raw_ip4_spec.ip4src);
580 p += ETH_GSTRING_LEN; 675 p += ETH_GSTRING_LEN;
581 num_strings++; 676 num_strings++;
582 sprintf(p, "\tDest IP addr: 0x%x\n", 677 sprintf(p, "\tDest IP addr: 0x%x\n",
583 fsc->fs.h_u.raw_ip4_spec.ip4dst); 678 fsc->fs.h_u.raw_ip4_spec.ip4dst);
584 p += ETH_GSTRING_LEN; 679 p += ETH_GSTRING_LEN;
585 num_strings++; 680 num_strings++;
586 sprintf(p, "\tDest IP mask: 0x%x\n", 681 sprintf(p, "\tDest IP mask: 0x%x\n",
587 fsc->fs.m_u.raw_ip4_spec.ip4dst); 682 fsc->fs.m_u.raw_ip4_spec.ip4dst);
588 p += ETH_GSTRING_LEN; 683 p += ETH_GSTRING_LEN;
589 num_strings++; 684 num_strings++;
590 break; 685 break;
591 case IPV4_FLOW: 686 case IPV4_FLOW:
592 sprintf(p, "\tSrc IP addr: 0x%x\n", 687 sprintf(p, "\tSrc IP addr: 0x%x\n",
593 fsc->fs.h_u.usr_ip4_spec.ip4src); 688 fsc->fs.h_u.usr_ip4_spec.ip4src);
594 p += ETH_GSTRING_LEN; 689 p += ETH_GSTRING_LEN;
595 num_strings++; 690 num_strings++;
596 sprintf(p, "\tSrc IP mask: 0x%x\n", 691 sprintf(p, "\tSrc IP mask: 0x%x\n",
597 fsc->fs.m_u.usr_ip4_spec.ip4src); 692 fsc->fs.m_u.usr_ip4_spec.ip4src);
598 p += ETH_GSTRING_LEN; 693 p += ETH_GSTRING_LEN;
599 num_strings++; 694 num_strings++;
600 sprintf(p, "\tDest IP addr: 0x%x\n", 695 sprintf(p, "\tDest IP addr: 0x%x\n",
601 fsc->fs.h_u.usr_ip4_spec.ip4dst); 696 fsc->fs.h_u.usr_ip4_spec.ip4dst);
602 p += ETH_GSTRING_LEN; 697 p += ETH_GSTRING_LEN;
603 num_strings++; 698 num_strings++;
604 sprintf(p, "\tDest IP mask: 0x%x\n", 699 sprintf(p, "\tDest IP mask: 0x%x\n",
605 fsc->fs.m_u.usr_ip4_spec.ip4dst); 700 fsc->fs.m_u.usr_ip4_spec.ip4dst);
606 p += ETH_GSTRING_LEN; 701 p += ETH_GSTRING_LEN;
607 num_strings++; 702 num_strings++;
608 sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n", 703 sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
609 fsc->fs.h_u.usr_ip4_spec.l4_4_bytes, 704 fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
610 fsc->fs.m_u.usr_ip4_spec.l4_4_bytes); 705 fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
611 p += ETH_GSTRING_LEN; 706 p += ETH_GSTRING_LEN;
612 num_strings++; 707 num_strings++;
613 sprintf(p, "\tTOS: %d, mask: 0x%x\n", 708 sprintf(p, "\tTOS: %d, mask: 0x%x\n",
614 fsc->fs.h_u.usr_ip4_spec.tos, 709 fsc->fs.h_u.usr_ip4_spec.tos,
615 fsc->fs.m_u.usr_ip4_spec.tos); 710 fsc->fs.m_u.usr_ip4_spec.tos);
616 p += ETH_GSTRING_LEN; 711 p += ETH_GSTRING_LEN;
617 num_strings++; 712 num_strings++;
618 sprintf(p, "\tIP Version: %d, mask: 0x%x\n", 713 sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
619 fsc->fs.h_u.usr_ip4_spec.ip_ver, 714 fsc->fs.h_u.usr_ip4_spec.ip_ver,
620 fsc->fs.m_u.usr_ip4_spec.ip_ver); 715 fsc->fs.m_u.usr_ip4_spec.ip_ver);
621 p += ETH_GSTRING_LEN; 716 p += ETH_GSTRING_LEN;
622 num_strings++; 717 num_strings++;
623 sprintf(p, "\tProtocol: %d, mask: 0x%x\n", 718 sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
624 fsc->fs.h_u.usr_ip4_spec.proto, 719 fsc->fs.h_u.usr_ip4_spec.proto,
625 fsc->fs.m_u.usr_ip4_spec.proto); 720 fsc->fs.m_u.usr_ip4_spec.proto);
626 p += ETH_GSTRING_LEN; 721 p += ETH_GSTRING_LEN;
627 num_strings++; 722 num_strings++;
628 break; 723 break;
629 }; 724 }
630 sprintf(p, "\tVLAN: %d, mask: 0x%x\n", 725 sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
631 fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask); 726 fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
632 p += ETH_GSTRING_LEN; 727 p += ETH_GSTRING_LEN;
633 num_strings++; 728 num_strings++;
634 sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data); 729 sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -641,7 +736,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
641 sprintf(p, "\tAction: Drop\n"); 736 sprintf(p, "\tAction: Drop\n");
642 else 737 else
643 sprintf(p, "\tAction: Direct to queue %d\n", 738 sprintf(p, "\tAction: Direct to queue %d\n",
644 fsc->fs.action); 739 fsc->fs.action);
645 p += ETH_GSTRING_LEN; 740 p += ETH_GSTRING_LEN;
646 num_strings++; 741 num_strings++;
647unknown_filter: 742unknown_filter:
@@ -853,7 +948,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
853 return ret; 948 return ret;
854} 949}
855 950
856static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr) 951static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
952 void __user *useraddr)
857{ 953{
858 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE }; 954 struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
859 955
@@ -867,7 +963,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
867 return 0; 963 return 0;
868} 964}
869 965
870static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) 966static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
967 void __user *useraddr)
871{ 968{
872 struct ethtool_coalesce coalesce; 969 struct ethtool_coalesce coalesce;
873 970
@@ -971,6 +1068,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
971 1068
972 return dev->ethtool_ops->set_tx_csum(dev, edata.data); 1069 return dev->ethtool_ops->set_tx_csum(dev, edata.data);
973} 1070}
1071EXPORT_SYMBOL(ethtool_op_set_tx_csum);
974 1072
975static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) 1073static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
976{ 1074{
@@ -1042,7 +1140,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
1042 1140
1043 edata.data = dev->features & NETIF_F_GSO; 1141 edata.data = dev->features & NETIF_F_GSO;
1044 if (copy_to_user(useraddr, &edata, sizeof(edata))) 1142 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1045 return -EFAULT; 1143 return -EFAULT;
1046 return 0; 1144 return 0;
1047} 1145}
1048 1146
@@ -1065,7 +1163,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
1065 1163
1066 edata.data = dev->features & NETIF_F_GRO; 1164 edata.data = dev->features & NETIF_F_GRO;
1067 if (copy_to_user(useraddr, &edata, sizeof(edata))) 1165 if (copy_to_user(useraddr, &edata, sizeof(edata)))
1068 return -EFAULT; 1166 return -EFAULT;
1069 return 0; 1167 return 0;
1070} 1168}
1071 1169
@@ -1277,7 +1375,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
1277 return actor(dev, edata.data); 1375 return actor(dev, edata.data);
1278} 1376}
1279 1377
1280static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr) 1378static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
1379 char __user *useraddr)
1281{ 1380{
1282 struct ethtool_flash efl; 1381 struct ethtool_flash efl;
1283 1382
@@ -1306,11 +1405,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1306 if (!dev->ethtool_ops) 1405 if (!dev->ethtool_ops)
1307 return -EOPNOTSUPP; 1406 return -EOPNOTSUPP;
1308 1407
1309 if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd))) 1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1310 return -EFAULT; 1409 return -EFAULT;
1311 1410
1312 /* Allow some commands to be done by anyone */ 1411 /* Allow some commands to be done by anyone */
1313 switch(ethcmd) { 1412 switch (ethcmd) {
1314 case ETHTOOL_GDRVINFO: 1413 case ETHTOOL_GDRVINFO:
1315 case ETHTOOL_GMSGLVL: 1414 case ETHTOOL_GMSGLVL:
1316 case ETHTOOL_GCOALESCE: 1415 case ETHTOOL_GCOALESCE:
@@ -1338,10 +1437,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1338 return -EPERM; 1437 return -EPERM;
1339 } 1438 }
1340 1439
1341 if (dev->ethtool_ops->begin) 1440 if (dev->ethtool_ops->begin) {
1342 if ((rc = dev->ethtool_ops->begin(dev)) < 0) 1441 rc = dev->ethtool_ops->begin(dev);
1442 if (rc < 0)
1343 return rc; 1443 return rc;
1344 1444 }
1345 old_features = dev->features; 1445 old_features = dev->features;
1346 1446
1347 switch (ethcmd) { 1447 switch (ethcmd) {
@@ -1491,12 +1591,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1491 case ETHTOOL_GRXCLSRLCNT: 1591 case ETHTOOL_GRXCLSRLCNT:
1492 case ETHTOOL_GRXCLSRULE: 1592 case ETHTOOL_GRXCLSRULE:
1493 case ETHTOOL_GRXCLSRLALL: 1593 case ETHTOOL_GRXCLSRLALL:
1494 rc = ethtool_get_rxnfc(dev, useraddr); 1594 rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
1495 break; 1595 break;
1496 case ETHTOOL_SRXFH: 1596 case ETHTOOL_SRXFH:
1497 case ETHTOOL_SRXCLSRLDEL: 1597 case ETHTOOL_SRXCLSRLDEL:
1498 case ETHTOOL_SRXCLSRLINS: 1598 case ETHTOOL_SRXCLSRLINS:
1499 rc = ethtool_set_rxnfc(dev, useraddr); 1599 rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
1500 break; 1600 break;
1501 case ETHTOOL_GGRO: 1601 case ETHTOOL_GGRO:
1502 rc = ethtool_get_gro(dev, useraddr); 1602 rc = ethtool_get_gro(dev, useraddr);
@@ -1519,6 +1619,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1519 case ETHTOOL_GSSET_INFO: 1619 case ETHTOOL_GSSET_INFO:
1520 rc = ethtool_get_sset_info(dev, useraddr); 1620 rc = ethtool_get_sset_info(dev, useraddr);
1521 break; 1621 break;
1622 case ETHTOOL_GRXFHINDIR:
1623 rc = ethtool_get_rxfh_indir(dev, useraddr);
1624 break;
1625 case ETHTOOL_SRXFHINDIR:
1626 rc = ethtool_set_rxfh_indir(dev, useraddr);
1627 break;
1522 default: 1628 default:
1523 rc = -EOPNOTSUPP; 1629 rc = -EOPNOTSUPP;
1524 } 1630 }
@@ -1531,16 +1637,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1531 1637
1532 return rc; 1638 return rc;
1533} 1639}
1534
1535EXPORT_SYMBOL(ethtool_op_get_link);
1536EXPORT_SYMBOL(ethtool_op_get_sg);
1537EXPORT_SYMBOL(ethtool_op_get_tso);
1538EXPORT_SYMBOL(ethtool_op_set_sg);
1539EXPORT_SYMBOL(ethtool_op_set_tso);
1540EXPORT_SYMBOL(ethtool_op_set_tx_csum);
1541EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
1542EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
1543EXPORT_SYMBOL(ethtool_op_set_ufo);
1544EXPORT_SYMBOL(ethtool_op_get_ufo);
1545EXPORT_SYMBOL(ethtool_op_set_flags);
1546EXPORT_SYMBOL(ethtool_op_get_flags);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d2c3e7dc2e5f..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
39} 39}
40EXPORT_SYMBOL(fib_default_rule_add); 40EXPORT_SYMBOL(fib_default_rule_add);
41 41
42u32 fib_default_rule_pref(struct fib_rules_ops *ops)
43{
44 struct list_head *pos;
45 struct fib_rule *rule;
46
47 if (!list_empty(&ops->rules_list)) {
48 pos = ops->rules_list.next;
49 if (pos->next != &ops->rules_list) {
50 rule = list_entry(pos->next, struct fib_rule, list);
51 if (rule->pref)
52 return rule->pref - 1;
53 }
54 }
55
56 return 0;
57}
58EXPORT_SYMBOL(fib_default_rule_pref);
59
42static void notify_rule_change(int event, struct fib_rule *rule, 60static void notify_rule_change(int event, struct fib_rule *rule,
43 struct fib_rules_ops *ops, struct nlmsghdr *nlh, 61 struct fib_rules_ops *ops, struct nlmsghdr *nlh,
44 u32 pid); 62 u32 pid);
@@ -104,12 +122,12 @@ errout:
104} 122}
105 123
106struct fib_rules_ops * 124struct fib_rules_ops *
107fib_rules_register(struct fib_rules_ops *tmpl, struct net *net) 125fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
108{ 126{
109 struct fib_rules_ops *ops; 127 struct fib_rules_ops *ops;
110 int err; 128 int err;
111 129
112 ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL); 130 ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
113 if (ops == NULL) 131 if (ops == NULL)
114 return ERR_PTR(-ENOMEM); 132 return ERR_PTR(-ENOMEM);
115 133
@@ -124,7 +142,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
124 142
125 return ops; 143 return ops;
126} 144}
127
128EXPORT_SYMBOL_GPL(fib_rules_register); 145EXPORT_SYMBOL_GPL(fib_rules_register);
129 146
130void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 147void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -158,7 +175,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
158 175
159 call_rcu(&ops->rcu, fib_rules_put_rcu); 176 call_rcu(&ops->rcu, fib_rules_put_rcu);
160} 177}
161
162EXPORT_SYMBOL_GPL(fib_rules_unregister); 178EXPORT_SYMBOL_GPL(fib_rules_unregister);
163 179
164static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 180static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -221,7 +237,6 @@ out:
221 237
222 return err; 238 return err;
223} 239}
224
225EXPORT_SYMBOL_GPL(fib_rules_lookup); 240EXPORT_SYMBOL_GPL(fib_rules_lookup);
226 241
227static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, 242static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -520,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
520 return -EMSGSIZE; 535 return -EMSGSIZE;
521 536
522 frh = nlmsg_data(nlh); 537 frh = nlmsg_data(nlh);
538 frh->family = ops->family;
523 frh->table = rule->table; 539 frh->table = rule->table;
524 NLA_PUT_U32(skb, FRA_TABLE, rule->table); 540 NLA_PUT_U32(skb, FRA_TABLE, rule->table);
525 frh->res1 = 0; 541 frh->res1 = 0;
@@ -614,7 +630,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
614 break; 630 break;
615 631
616 cb->args[1] = 0; 632 cb->args[1] = 0;
617 skip: 633skip:
618 idx++; 634 idx++;
619 } 635 }
620 rcu_read_unlock(); 636 rcu_read_unlock();
@@ -686,7 +702,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
686 struct fib_rules_ops *ops; 702 struct fib_rules_ops *ops;
687 703
688 ASSERT_RTNL(); 704 ASSERT_RTNL();
689 rcu_read_lock();
690 705
691 switch (event) { 706 switch (event) {
692 case NETDEV_REGISTER: 707 case NETDEV_REGISTER:
@@ -700,8 +715,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
700 break; 715 break;
701 } 716 }
702 717
703 rcu_read_unlock();
704
705 return NOTIFY_DONE; 718 return NOTIFY_DONE;
706} 719}
707 720
diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..52b051f82a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
128 fentry = &filter[pc]; 128 fentry = &filter[pc];
129 129
130 switch (fentry->code) { 130 switch (fentry->code) {
131 case BPF_ALU|BPF_ADD|BPF_X: 131 case BPF_S_ALU_ADD_X:
132 A += X; 132 A += X;
133 continue; 133 continue;
134 case BPF_ALU|BPF_ADD|BPF_K: 134 case BPF_S_ALU_ADD_K:
135 A += fentry->k; 135 A += fentry->k;
136 continue; 136 continue;
137 case BPF_ALU|BPF_SUB|BPF_X: 137 case BPF_S_ALU_SUB_X:
138 A -= X; 138 A -= X;
139 continue; 139 continue;
140 case BPF_ALU|BPF_SUB|BPF_K: 140 case BPF_S_ALU_SUB_K:
141 A -= fentry->k; 141 A -= fentry->k;
142 continue; 142 continue;
143 case BPF_ALU|BPF_MUL|BPF_X: 143 case BPF_S_ALU_MUL_X:
144 A *= X; 144 A *= X;
145 continue; 145 continue;
146 case BPF_ALU|BPF_MUL|BPF_K: 146 case BPF_S_ALU_MUL_K:
147 A *= fentry->k; 147 A *= fentry->k;
148 continue; 148 continue;
149 case BPF_ALU|BPF_DIV|BPF_X: 149 case BPF_S_ALU_DIV_X:
150 if (X == 0) 150 if (X == 0)
151 return 0; 151 return 0;
152 A /= X; 152 A /= X;
153 continue; 153 continue;
154 case BPF_ALU|BPF_DIV|BPF_K: 154 case BPF_S_ALU_DIV_K:
155 A /= fentry->k; 155 A /= fentry->k;
156 continue; 156 continue;
157 case BPF_ALU|BPF_AND|BPF_X: 157 case BPF_S_ALU_AND_X:
158 A &= X; 158 A &= X;
159 continue; 159 continue;
160 case BPF_ALU|BPF_AND|BPF_K: 160 case BPF_S_ALU_AND_K:
161 A &= fentry->k; 161 A &= fentry->k;
162 continue; 162 continue;
163 case BPF_ALU|BPF_OR|BPF_X: 163 case BPF_S_ALU_OR_X:
164 A |= X; 164 A |= X;
165 continue; 165 continue;
166 case BPF_ALU|BPF_OR|BPF_K: 166 case BPF_S_ALU_OR_K:
167 A |= fentry->k; 167 A |= fentry->k;
168 continue; 168 continue;
169 case BPF_ALU|BPF_LSH|BPF_X: 169 case BPF_S_ALU_LSH_X:
170 A <<= X; 170 A <<= X;
171 continue; 171 continue;
172 case BPF_ALU|BPF_LSH|BPF_K: 172 case BPF_S_ALU_LSH_K:
173 A <<= fentry->k; 173 A <<= fentry->k;
174 continue; 174 continue;
175 case BPF_ALU|BPF_RSH|BPF_X: 175 case BPF_S_ALU_RSH_X:
176 A >>= X; 176 A >>= X;
177 continue; 177 continue;
178 case BPF_ALU|BPF_RSH|BPF_K: 178 case BPF_S_ALU_RSH_K:
179 A >>= fentry->k; 179 A >>= fentry->k;
180 continue; 180 continue;
181 case BPF_ALU|BPF_NEG: 181 case BPF_S_ALU_NEG:
182 A = -A; 182 A = -A;
183 continue; 183 continue;
184 case BPF_JMP|BPF_JA: 184 case BPF_S_JMP_JA:
185 pc += fentry->k; 185 pc += fentry->k;
186 continue; 186 continue;
187 case BPF_JMP|BPF_JGT|BPF_K: 187 case BPF_S_JMP_JGT_K:
188 pc += (A > fentry->k) ? fentry->jt : fentry->jf; 188 pc += (A > fentry->k) ? fentry->jt : fentry->jf;
189 continue; 189 continue;
190 case BPF_JMP|BPF_JGE|BPF_K: 190 case BPF_S_JMP_JGE_K:
191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf; 191 pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
192 continue; 192 continue;
193 case BPF_JMP|BPF_JEQ|BPF_K: 193 case BPF_S_JMP_JEQ_K:
194 pc += (A == fentry->k) ? fentry->jt : fentry->jf; 194 pc += (A == fentry->k) ? fentry->jt : fentry->jf;
195 continue; 195 continue;
196 case BPF_JMP|BPF_JSET|BPF_K: 196 case BPF_S_JMP_JSET_K:
197 pc += (A & fentry->k) ? fentry->jt : fentry->jf; 197 pc += (A & fentry->k) ? fentry->jt : fentry->jf;
198 continue; 198 continue;
199 case BPF_JMP|BPF_JGT|BPF_X: 199 case BPF_S_JMP_JGT_X:
200 pc += (A > X) ? fentry->jt : fentry->jf; 200 pc += (A > X) ? fentry->jt : fentry->jf;
201 continue; 201 continue;
202 case BPF_JMP|BPF_JGE|BPF_X: 202 case BPF_S_JMP_JGE_X:
203 pc += (A >= X) ? fentry->jt : fentry->jf; 203 pc += (A >= X) ? fentry->jt : fentry->jf;
204 continue; 204 continue;
205 case BPF_JMP|BPF_JEQ|BPF_X: 205 case BPF_S_JMP_JEQ_X:
206 pc += (A == X) ? fentry->jt : fentry->jf; 206 pc += (A == X) ? fentry->jt : fentry->jf;
207 continue; 207 continue;
208 case BPF_JMP|BPF_JSET|BPF_X: 208 case BPF_S_JMP_JSET_X:
209 pc += (A & X) ? fentry->jt : fentry->jf; 209 pc += (A & X) ? fentry->jt : fentry->jf;
210 continue; 210 continue;
211 case BPF_LD|BPF_W|BPF_ABS: 211 case BPF_S_LD_W_ABS:
212 k = fentry->k; 212 k = fentry->k;
213load_w: 213load_w:
214 ptr = load_pointer(skb, k, 4, &tmp); 214 ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
217 continue; 217 continue;
218 } 218 }
219 break; 219 break;
220 case BPF_LD|BPF_H|BPF_ABS: 220 case BPF_S_LD_H_ABS:
221 k = fentry->k; 221 k = fentry->k;
222load_h: 222load_h:
223 ptr = load_pointer(skb, k, 2, &tmp); 223 ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
226 continue; 226 continue;
227 } 227 }
228 break; 228 break;
229 case BPF_LD|BPF_B|BPF_ABS: 229 case BPF_S_LD_B_ABS:
230 k = fentry->k; 230 k = fentry->k;
231load_b: 231load_b:
232 ptr = load_pointer(skb, k, 1, &tmp); 232 ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
235 continue; 235 continue;
236 } 236 }
237 break; 237 break;
238 case BPF_LD|BPF_W|BPF_LEN: 238 case BPF_S_LD_W_LEN:
239 A = skb->len; 239 A = skb->len;
240 continue; 240 continue;
241 case BPF_LDX|BPF_W|BPF_LEN: 241 case BPF_S_LDX_W_LEN:
242 X = skb->len; 242 X = skb->len;
243 continue; 243 continue;
244 case BPF_LD|BPF_W|BPF_IND: 244 case BPF_S_LD_W_IND:
245 k = X + fentry->k; 245 k = X + fentry->k;
246 goto load_w; 246 goto load_w;
247 case BPF_LD|BPF_H|BPF_IND: 247 case BPF_S_LD_H_IND:
248 k = X + fentry->k; 248 k = X + fentry->k;
249 goto load_h; 249 goto load_h;
250 case BPF_LD|BPF_B|BPF_IND: 250 case BPF_S_LD_B_IND:
251 k = X + fentry->k; 251 k = X + fentry->k;
252 goto load_b; 252 goto load_b;
253 case BPF_LDX|BPF_B|BPF_MSH: 253 case BPF_S_LDX_B_MSH:
254 ptr = load_pointer(skb, fentry->k, 1, &tmp); 254 ptr = load_pointer(skb, fentry->k, 1, &tmp);
255 if (ptr != NULL) { 255 if (ptr != NULL) {
256 X = (*(u8 *)ptr & 0xf) << 2; 256 X = (*(u8 *)ptr & 0xf) << 2;
257 continue; 257 continue;
258 } 258 }
259 return 0; 259 return 0;
260 case BPF_LD|BPF_IMM: 260 case BPF_S_LD_IMM:
261 A = fentry->k; 261 A = fentry->k;
262 continue; 262 continue;
263 case BPF_LDX|BPF_IMM: 263 case BPF_S_LDX_IMM:
264 X = fentry->k; 264 X = fentry->k;
265 continue; 265 continue;
266 case BPF_LD|BPF_MEM: 266 case BPF_S_LD_MEM:
267 A = mem[fentry->k]; 267 A = mem[fentry->k];
268 continue; 268 continue;
269 case BPF_LDX|BPF_MEM: 269 case BPF_S_LDX_MEM:
270 X = mem[fentry->k]; 270 X = mem[fentry->k];
271 continue; 271 continue;
272 case BPF_MISC|BPF_TAX: 272 case BPF_S_MISC_TAX:
273 X = A; 273 X = A;
274 continue; 274 continue;
275 case BPF_MISC|BPF_TXA: 275 case BPF_S_MISC_TXA:
276 A = X; 276 A = X;
277 continue; 277 continue;
278 case BPF_RET|BPF_K: 278 case BPF_S_RET_K:
279 return fentry->k; 279 return fentry->k;
280 case BPF_RET|BPF_A: 280 case BPF_S_RET_A:
281 return A; 281 return A;
282 case BPF_ST: 282 case BPF_S_ST:
283 mem[fentry->k] = A; 283 mem[fentry->k] = A;
284 continue; 284 continue;
285 case BPF_STX: 285 case BPF_S_STX:
286 mem[fentry->k] = X; 286 mem[fentry->k] = X;
287 continue; 287 continue;
288 default: 288 default:
@@ -302,6 +302,8 @@ load_b:
302 A = skb->pkt_type; 302 A = skb->pkt_type;
303 continue; 303 continue;
304 case SKF_AD_IFINDEX: 304 case SKF_AD_IFINDEX:
305 if (!skb->dev)
306 return 0;
305 A = skb->dev->ifindex; 307 A = skb->dev->ifindex;
306 continue; 308 continue;
307 case SKF_AD_MARK: 309 case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
310 case SKF_AD_QUEUE: 312 case SKF_AD_QUEUE:
311 A = skb->queue_mapping; 313 A = skb->queue_mapping;
312 continue; 314 continue;
315 case SKF_AD_HATYPE:
316 if (!skb->dev)
317 return 0;
318 A = skb->dev->type;
319 continue;
313 case SKF_AD_NLATTR: { 320 case SKF_AD_NLATTR: {
314 struct nlattr *nla; 321 struct nlattr *nla;
315 322
@@ -383,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
383 /* Only allow valid instructions */ 390 /* Only allow valid instructions */
384 switch (ftest->code) { 391 switch (ftest->code) {
385 case BPF_ALU|BPF_ADD|BPF_K: 392 case BPF_ALU|BPF_ADD|BPF_K:
393 ftest->code = BPF_S_ALU_ADD_K;
394 break;
386 case BPF_ALU|BPF_ADD|BPF_X: 395 case BPF_ALU|BPF_ADD|BPF_X:
396 ftest->code = BPF_S_ALU_ADD_X;
397 break;
387 case BPF_ALU|BPF_SUB|BPF_K: 398 case BPF_ALU|BPF_SUB|BPF_K:
399 ftest->code = BPF_S_ALU_SUB_K;
400 break;
388 case BPF_ALU|BPF_SUB|BPF_X: 401 case BPF_ALU|BPF_SUB|BPF_X:
402 ftest->code = BPF_S_ALU_SUB_X;
403 break;
389 case BPF_ALU|BPF_MUL|BPF_K: 404 case BPF_ALU|BPF_MUL|BPF_K:
405 ftest->code = BPF_S_ALU_MUL_K;
406 break;
390 case BPF_ALU|BPF_MUL|BPF_X: 407 case BPF_ALU|BPF_MUL|BPF_X:
408 ftest->code = BPF_S_ALU_MUL_X;
409 break;
391 case BPF_ALU|BPF_DIV|BPF_X: 410 case BPF_ALU|BPF_DIV|BPF_X:
411 ftest->code = BPF_S_ALU_DIV_X;
412 break;
392 case BPF_ALU|BPF_AND|BPF_K: 413 case BPF_ALU|BPF_AND|BPF_K:
414 ftest->code = BPF_S_ALU_AND_K;
415 break;
393 case BPF_ALU|BPF_AND|BPF_X: 416 case BPF_ALU|BPF_AND|BPF_X:
417 ftest->code = BPF_S_ALU_AND_X;
418 break;
394 case BPF_ALU|BPF_OR|BPF_K: 419 case BPF_ALU|BPF_OR|BPF_K:
420 ftest->code = BPF_S_ALU_OR_K;
421 break;
395 case BPF_ALU|BPF_OR|BPF_X: 422 case BPF_ALU|BPF_OR|BPF_X:
423 ftest->code = BPF_S_ALU_OR_X;
424 break;
396 case BPF_ALU|BPF_LSH|BPF_K: 425 case BPF_ALU|BPF_LSH|BPF_K:
426 ftest->code = BPF_S_ALU_LSH_K;
427 break;
397 case BPF_ALU|BPF_LSH|BPF_X: 428 case BPF_ALU|BPF_LSH|BPF_X:
429 ftest->code = BPF_S_ALU_LSH_X;
430 break;
398 case BPF_ALU|BPF_RSH|BPF_K: 431 case BPF_ALU|BPF_RSH|BPF_K:
432 ftest->code = BPF_S_ALU_RSH_K;
433 break;
399 case BPF_ALU|BPF_RSH|BPF_X: 434 case BPF_ALU|BPF_RSH|BPF_X:
435 ftest->code = BPF_S_ALU_RSH_X;
436 break;
400 case BPF_ALU|BPF_NEG: 437 case BPF_ALU|BPF_NEG:
438 ftest->code = BPF_S_ALU_NEG;
439 break;
401 case BPF_LD|BPF_W|BPF_ABS: 440 case BPF_LD|BPF_W|BPF_ABS:
441 ftest->code = BPF_S_LD_W_ABS;
442 break;
402 case BPF_LD|BPF_H|BPF_ABS: 443 case BPF_LD|BPF_H|BPF_ABS:
444 ftest->code = BPF_S_LD_H_ABS;
445 break;
403 case BPF_LD|BPF_B|BPF_ABS: 446 case BPF_LD|BPF_B|BPF_ABS:
447 ftest->code = BPF_S_LD_B_ABS;
448 break;
404 case BPF_LD|BPF_W|BPF_LEN: 449 case BPF_LD|BPF_W|BPF_LEN:
450 ftest->code = BPF_S_LD_W_LEN;
451 break;
405 case BPF_LD|BPF_W|BPF_IND: 452 case BPF_LD|BPF_W|BPF_IND:
453 ftest->code = BPF_S_LD_W_IND;
454 break;
406 case BPF_LD|BPF_H|BPF_IND: 455 case BPF_LD|BPF_H|BPF_IND:
456 ftest->code = BPF_S_LD_H_IND;
457 break;
407 case BPF_LD|BPF_B|BPF_IND: 458 case BPF_LD|BPF_B|BPF_IND:
459 ftest->code = BPF_S_LD_B_IND;
460 break;
408 case BPF_LD|BPF_IMM: 461 case BPF_LD|BPF_IMM:
462 ftest->code = BPF_S_LD_IMM;
463 break;
409 case BPF_LDX|BPF_W|BPF_LEN: 464 case BPF_LDX|BPF_W|BPF_LEN:
465 ftest->code = BPF_S_LDX_W_LEN;
466 break;
410 case BPF_LDX|BPF_B|BPF_MSH: 467 case BPF_LDX|BPF_B|BPF_MSH:
468 ftest->code = BPF_S_LDX_B_MSH;
469 break;
411 case BPF_LDX|BPF_IMM: 470 case BPF_LDX|BPF_IMM:
471 ftest->code = BPF_S_LDX_IMM;
472 break;
412 case BPF_MISC|BPF_TAX: 473 case BPF_MISC|BPF_TAX:
474 ftest->code = BPF_S_MISC_TAX;
475 break;
413 case BPF_MISC|BPF_TXA: 476 case BPF_MISC|BPF_TXA:
477 ftest->code = BPF_S_MISC_TXA;
478 break;
414 case BPF_RET|BPF_K: 479 case BPF_RET|BPF_K:
480 ftest->code = BPF_S_RET_K;
481 break;
415 case BPF_RET|BPF_A: 482 case BPF_RET|BPF_A:
483 ftest->code = BPF_S_RET_A;
416 break; 484 break;
417 485
418 /* Some instructions need special checks */ 486 /* Some instructions need special checks */
419 487
420 case BPF_ALU|BPF_DIV|BPF_K:
421 /* check for division by zero */ 488 /* check for division by zero */
489 case BPF_ALU|BPF_DIV|BPF_K:
422 if (ftest->k == 0) 490 if (ftest->k == 0)
423 return -EINVAL; 491 return -EINVAL;
492 ftest->code = BPF_S_ALU_DIV_K;
424 break; 493 break;
425 494
495 /* check for invalid memory addresses */
426 case BPF_LD|BPF_MEM: 496 case BPF_LD|BPF_MEM:
497 if (ftest->k >= BPF_MEMWORDS)
498 return -EINVAL;
499 ftest->code = BPF_S_LD_MEM;
500 break;
427 case BPF_LDX|BPF_MEM: 501 case BPF_LDX|BPF_MEM:
502 if (ftest->k >= BPF_MEMWORDS)
503 return -EINVAL;
504 ftest->code = BPF_S_LDX_MEM;
505 break;
428 case BPF_ST: 506 case BPF_ST:
507 if (ftest->k >= BPF_MEMWORDS)
508 return -EINVAL;
509 ftest->code = BPF_S_ST;
510 break;
429 case BPF_STX: 511 case BPF_STX:
430 /* check for invalid memory addresses */
431 if (ftest->k >= BPF_MEMWORDS) 512 if (ftest->k >= BPF_MEMWORDS)
432 return -EINVAL; 513 return -EINVAL;
514 ftest->code = BPF_S_STX;
433 break; 515 break;
434 516
435 case BPF_JMP|BPF_JA: 517 case BPF_JMP|BPF_JA:
@@ -440,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
440 */ 522 */
441 if (ftest->k >= (unsigned)(flen-pc-1)) 523 if (ftest->k >= (unsigned)(flen-pc-1))
442 return -EINVAL; 524 return -EINVAL;
525 ftest->code = BPF_S_JMP_JA;
443 break; 526 break;
444 527
445 case BPF_JMP|BPF_JEQ|BPF_K: 528 case BPF_JMP|BPF_JEQ|BPF_K:
529 ftest->code = BPF_S_JMP_JEQ_K;
530 break;
446 case BPF_JMP|BPF_JEQ|BPF_X: 531 case BPF_JMP|BPF_JEQ|BPF_X:
532 ftest->code = BPF_S_JMP_JEQ_X;
533 break;
447 case BPF_JMP|BPF_JGE|BPF_K: 534 case BPF_JMP|BPF_JGE|BPF_K:
535 ftest->code = BPF_S_JMP_JGE_K;
536 break;
448 case BPF_JMP|BPF_JGE|BPF_X: 537 case BPF_JMP|BPF_JGE|BPF_X:
538 ftest->code = BPF_S_JMP_JGE_X;
539 break;
449 case BPF_JMP|BPF_JGT|BPF_K: 540 case BPF_JMP|BPF_JGT|BPF_K:
541 ftest->code = BPF_S_JMP_JGT_K;
542 break;
450 case BPF_JMP|BPF_JGT|BPF_X: 543 case BPF_JMP|BPF_JGT|BPF_X:
544 ftest->code = BPF_S_JMP_JGT_X;
545 break;
451 case BPF_JMP|BPF_JSET|BPF_K: 546 case BPF_JMP|BPF_JSET|BPF_K:
547 ftest->code = BPF_S_JMP_JSET_K;
548 break;
452 case BPF_JMP|BPF_JSET|BPF_X: 549 case BPF_JMP|BPF_JSET|BPF_X:
550 ftest->code = BPF_S_JMP_JSET_X;
551 break;
552
553 default:
554 return -EINVAL;
555 }
556
453 /* for conditionals both must be safe */ 557 /* for conditionals both must be safe */
558 switch (ftest->code) {
559 case BPF_S_JMP_JEQ_K:
560 case BPF_S_JMP_JEQ_X:
561 case BPF_S_JMP_JGE_K:
562 case BPF_S_JMP_JGE_X:
563 case BPF_S_JMP_JGT_K:
564 case BPF_S_JMP_JGT_X:
565 case BPF_S_JMP_JSET_X:
566 case BPF_S_JMP_JSET_K:
454 if (pc + ftest->jt + 1 >= flen || 567 if (pc + ftest->jt + 1 >= flen ||
455 pc + ftest->jf + 1 >= flen) 568 pc + ftest->jf + 1 >= flen)
456 return -EINVAL; 569 return -EINVAL;
457 break; 570 }
571 }
458 572
573 /* last instruction must be a RET code */
574 switch (filter[flen - 1].code) {
575 case BPF_S_RET_K:
576 case BPF_S_RET_A:
577 return 0;
578 break;
459 default: 579 default:
460 return -EINVAL; 580 return -EINVAL;
461 } 581 }
462 }
463
464 return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
465} 582}
466EXPORT_SYMBOL(sk_chk_filter); 583EXPORT_SYMBOL(sk_chk_filter);
467 584
diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..f67dcbfe54ef 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,113 +26,159 @@
26#include <linux/security.h> 26#include <linux/security.h>
27 27
28struct flow_cache_entry { 28struct flow_cache_entry {
29 struct flow_cache_entry *next; 29 union {
30 u16 family; 30 struct hlist_node hlist;
31 u8 dir; 31 struct list_head gc_list;
32 u32 genid; 32 } u;
33 struct flowi key; 33 u16 family;
34 void *object; 34 u8 dir;
35 atomic_t *object_ref; 35 u32 genid;
36 struct flowi key;
37 struct flow_cache_object *object;
36}; 38};
37 39
38atomic_t flow_cache_genid = ATOMIC_INIT(0); 40struct flow_cache_percpu {
39 41 struct hlist_head *hash_table;
40static u32 flow_hash_shift; 42 int hash_count;
41#define flow_hash_size (1 << flow_hash_shift) 43 u32 hash_rnd;
42static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL }; 44 int hash_rnd_recalc;
43 45 struct tasklet_struct flush_tasklet;
44#define flow_table(cpu) (per_cpu(flow_tables, cpu))
45
46static struct kmem_cache *flow_cachep __read_mostly;
47
48static int flow_lwm, flow_hwm;
49
50struct flow_percpu_info {
51 int hash_rnd_recalc;
52 u32 hash_rnd;
53 int count;
54}; 46};
55static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
56 47
57#define flow_hash_rnd_recalc(cpu) \ 48struct flow_flush_info {
58 (per_cpu(flow_hash_info, cpu).hash_rnd_recalc) 49 struct flow_cache *cache;
59#define flow_hash_rnd(cpu) \ 50 atomic_t cpuleft;
60 (per_cpu(flow_hash_info, cpu).hash_rnd) 51 struct completion completion;
61#define flow_count(cpu) \ 52};
62 (per_cpu(flow_hash_info, cpu).count)
63 53
64static struct timer_list flow_hash_rnd_timer; 54struct flow_cache {
55 u32 hash_shift;
56 unsigned long order;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier;
59 int low_watermark;
60 int high_watermark;
61 struct timer_list rnd_timer;
62};
65 63
66#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) 64atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep;
67 68
68struct flow_flush_info { 69static DEFINE_SPINLOCK(flow_cache_gc_lock);
69 atomic_t cpuleft; 70static LIST_HEAD(flow_cache_gc_list);
70 struct completion completion;
71};
72static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
73 71
74#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu)) 72#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
73#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
75 74
76static void flow_cache_new_hashrnd(unsigned long arg) 75static void flow_cache_new_hashrnd(unsigned long arg)
77{ 76{
77 struct flow_cache *fc = (void *) arg;
78 int i; 78 int i;
79 79
80 for_each_possible_cpu(i) 80 for_each_possible_cpu(i)
81 flow_hash_rnd_recalc(i) = 1; 81 per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
82
83 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
84 add_timer(&fc->rnd_timer);
85}
82 86
83 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 87static int flow_entry_valid(struct flow_cache_entry *fle)
84 add_timer(&flow_hash_rnd_timer); 88{
89 if (atomic_read(&flow_cache_genid) != fle->genid)
90 return 0;
91 if (fle->object && !fle->object->ops->check(fle->object))
92 return 0;
93 return 1;
85} 94}
86 95
87static void flow_entry_kill(int cpu, struct flow_cache_entry *fle) 96static void flow_entry_kill(struct flow_cache_entry *fle)
88{ 97{
89 if (fle->object) 98 if (fle->object)
90 atomic_dec(fle->object_ref); 99 fle->object->ops->delete(fle->object);
91 kmem_cache_free(flow_cachep, fle); 100 kmem_cache_free(flow_cachep, fle);
92 flow_count(cpu)--;
93} 101}
94 102
95static void __flow_cache_shrink(int cpu, int shrink_to) 103static void flow_cache_gc_task(struct work_struct *work)
96{ 104{
97 struct flow_cache_entry *fle, **flp; 105 struct list_head gc_list;
98 int i; 106 struct flow_cache_entry *fce, *n;
99 107
100 for (i = 0; i < flow_hash_size; i++) { 108 INIT_LIST_HEAD(&gc_list);
101 int k = 0; 109 spin_lock_bh(&flow_cache_gc_lock);
110 list_splice_tail_init(&flow_cache_gc_list, &gc_list);
111 spin_unlock_bh(&flow_cache_gc_lock);
102 112
103 flp = &flow_table(cpu)[i]; 113 list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
104 while ((fle = *flp) != NULL && k < shrink_to) { 114 flow_entry_kill(fce);
105 k++; 115}
106 flp = &fle->next; 116static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
107 } 117
108 while ((fle = *flp) != NULL) { 118static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
109 *flp = fle->next; 119 int deleted, struct list_head *gc_list)
110 flow_entry_kill(cpu, fle); 120{
111 } 121 if (deleted) {
122 fcp->hash_count -= deleted;
123 spin_lock_bh(&flow_cache_gc_lock);
124 list_splice_tail(gc_list, &flow_cache_gc_list);
125 spin_unlock_bh(&flow_cache_gc_lock);
126 schedule_work(&flow_cache_gc_work);
112 } 127 }
113} 128}
114 129
115static void flow_cache_shrink(int cpu) 130static void __flow_cache_shrink(struct flow_cache *fc,
131 struct flow_cache_percpu *fcp,
132 int shrink_to)
116{ 133{
117 int shrink_to = flow_lwm / flow_hash_size; 134 struct flow_cache_entry *fle;
135 struct hlist_node *entry, *tmp;
136 LIST_HEAD(gc_list);
137 int i, deleted = 0;
138
139 for (i = 0; i < flow_cache_hash_size(fc); i++) {
140 int saved = 0;
141
142 hlist_for_each_entry_safe(fle, entry, tmp,
143 &fcp->hash_table[i], u.hlist) {
144 if (saved < shrink_to &&
145 flow_entry_valid(fle)) {
146 saved++;
147 } else {
148 deleted++;
149 hlist_del(&fle->u.hlist);
150 list_add_tail(&fle->u.gc_list, &gc_list);
151 }
152 }
153 }
118 154
119 __flow_cache_shrink(cpu, shrink_to); 155 flow_cache_queue_garbage(fcp, deleted, &gc_list);
120} 156}
121 157
122static void flow_new_hash_rnd(int cpu) 158static void flow_cache_shrink(struct flow_cache *fc,
159 struct flow_cache_percpu *fcp)
123{ 160{
124 get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32)); 161 int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
125 flow_hash_rnd_recalc(cpu) = 0;
126 162
127 __flow_cache_shrink(cpu, 0); 163 __flow_cache_shrink(fc, fcp, shrink_to);
128} 164}
129 165
130static u32 flow_hash_code(struct flowi *key, int cpu) 166static void flow_new_hash_rnd(struct flow_cache *fc,
167 struct flow_cache_percpu *fcp)
168{
169 get_random_bytes(&fcp->hash_rnd, sizeof(u32));
170 fcp->hash_rnd_recalc = 0;
171 __flow_cache_shrink(fc, fcp, 0);
172}
173
174static u32 flow_hash_code(struct flow_cache *fc,
175 struct flow_cache_percpu *fcp,
176 struct flowi *key)
131{ 177{
132 u32 *k = (u32 *) key; 178 u32 *k = (u32 *) key;
133 179
134 return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) & 180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
135 (flow_hash_size - 1)); 181 & (flow_cache_hash_size(fc) - 1));
136} 182}
137 183
138#if (BITS_PER_LONG == 64) 184#if (BITS_PER_LONG == 64)
@@ -165,114 +211,118 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
165 return 0; 211 return 0;
166} 212}
167 213
168void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, 214struct flow_cache_object *
169 flow_resolve_t resolver) 215flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
216 flow_resolve_t resolver, void *ctx)
170{ 217{
171 struct flow_cache_entry *fle, **head; 218 struct flow_cache *fc = &flow_cache_global;
219 struct flow_cache_percpu *fcp;
220 struct flow_cache_entry *fle, *tfle;
221 struct hlist_node *entry;
222 struct flow_cache_object *flo;
172 unsigned int hash; 223 unsigned int hash;
173 int cpu;
174 224
175 local_bh_disable(); 225 local_bh_disable();
176 cpu = smp_processor_id(); 226 fcp = this_cpu_ptr(fc->percpu);
177 227
178 fle = NULL; 228 fle = NULL;
229 flo = NULL;
179 /* Packet really early in init? Making flow_cache_init a 230 /* Packet really early in init? Making flow_cache_init a
180 * pre-smp initcall would solve this. --RR */ 231 * pre-smp initcall would solve this. --RR */
181 if (!flow_table(cpu)) 232 if (!fcp->hash_table)
182 goto nocache; 233 goto nocache;
183 234
184 if (flow_hash_rnd_recalc(cpu)) 235 if (fcp->hash_rnd_recalc)
185 flow_new_hash_rnd(cpu); 236 flow_new_hash_rnd(fc, fcp);
186 hash = flow_hash_code(key, cpu);
187 237
188 head = &flow_table(cpu)[hash]; 238 hash = flow_hash_code(fc, fcp, key);
189 for (fle = *head; fle; fle = fle->next) { 239 hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
190 if (fle->family == family && 240 if (tfle->family == family &&
191 fle->dir == dir && 241 tfle->dir == dir &&
192 flow_key_compare(key, &fle->key) == 0) { 242 flow_key_compare(key, &tfle->key) == 0) {
193 if (fle->genid == atomic_read(&flow_cache_genid)) { 243 fle = tfle;
194 void *ret = fle->object;
195
196 if (ret)
197 atomic_inc(fle->object_ref);
198 local_bh_enable();
199
200 return ret;
201 }
202 break; 244 break;
203 } 245 }
204 } 246 }
205 247
206 if (!fle) { 248 if (unlikely(!fle)) {
207 if (flow_count(cpu) > flow_hwm) 249 if (fcp->hash_count > fc->high_watermark)
208 flow_cache_shrink(cpu); 250 flow_cache_shrink(fc, fcp);
209 251
210 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); 252 fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
211 if (fle) { 253 if (fle) {
212 fle->next = *head;
213 *head = fle;
214 fle->family = family; 254 fle->family = family;
215 fle->dir = dir; 255 fle->dir = dir;
216 memcpy(&fle->key, key, sizeof(*key)); 256 memcpy(&fle->key, key, sizeof(*key));
217 fle->object = NULL; 257 fle->object = NULL;
218 flow_count(cpu)++; 258 hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
259 fcp->hash_count++;
219 } 260 }
261 } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
262 flo = fle->object;
263 if (!flo)
264 goto ret_object;
265 flo = flo->ops->get(flo);
266 if (flo)
267 goto ret_object;
268 } else if (fle->object) {
269 flo = fle->object;
270 flo->ops->delete(flo);
271 fle->object = NULL;
220 } 272 }
221 273
222nocache: 274nocache:
223 { 275 flo = NULL;
224 int err; 276 if (fle) {
225 void *obj; 277 flo = fle->object;
226 atomic_t *obj_ref; 278 fle->object = NULL;
227
228 err = resolver(net, key, family, dir, &obj, &obj_ref);
229
230 if (fle && !err) {
231 fle->genid = atomic_read(&flow_cache_genid);
232
233 if (fle->object)
234 atomic_dec(fle->object_ref);
235
236 fle->object = obj;
237 fle->object_ref = obj_ref;
238 if (obj)
239 atomic_inc(fle->object_ref);
240 }
241 local_bh_enable();
242
243 if (err)
244 obj = ERR_PTR(err);
245 return obj;
246 } 279 }
280 flo = resolver(net, key, family, dir, flo, ctx);
281 if (fle) {
282 fle->genid = atomic_read(&flow_cache_genid);
283 if (!IS_ERR(flo))
284 fle->object = flo;
285 else
286 fle->genid--;
287 } else {
288 if (flo && !IS_ERR(flo))
289 flo->ops->delete(flo);
290 }
291ret_object:
292 local_bh_enable();
293 return flo;
247} 294}
295EXPORT_SYMBOL(flow_cache_lookup);
248 296
249static void flow_cache_flush_tasklet(unsigned long data) 297static void flow_cache_flush_tasklet(unsigned long data)
250{ 298{
251 struct flow_flush_info *info = (void *)data; 299 struct flow_flush_info *info = (void *)data;
252 int i; 300 struct flow_cache *fc = info->cache;
253 int cpu; 301 struct flow_cache_percpu *fcp;
254 302 struct flow_cache_entry *fle;
255 cpu = smp_processor_id(); 303 struct hlist_node *entry, *tmp;
256 for (i = 0; i < flow_hash_size; i++) { 304 LIST_HEAD(gc_list);
257 struct flow_cache_entry *fle; 305 int i, deleted = 0;
258 306
259 fle = flow_table(cpu)[i]; 307 fcp = this_cpu_ptr(fc->percpu);
260 for (; fle; fle = fle->next) { 308 for (i = 0; i < flow_cache_hash_size(fc); i++) {
261 unsigned genid = atomic_read(&flow_cache_genid); 309 hlist_for_each_entry_safe(fle, entry, tmp,
262 310 &fcp->hash_table[i], u.hlist) {
263 if (!fle->object || fle->genid == genid) 311 if (flow_entry_valid(fle))
264 continue; 312 continue;
265 313
266 fle->object = NULL; 314 deleted++;
267 atomic_dec(fle->object_ref); 315 hlist_del(&fle->u.hlist);
316 list_add_tail(&fle->u.gc_list, &gc_list);
268 } 317 }
269 } 318 }
270 319
320 flow_cache_queue_garbage(fcp, deleted, &gc_list);
321
271 if (atomic_dec_and_test(&info->cpuleft)) 322 if (atomic_dec_and_test(&info->cpuleft))
272 complete(&info->completion); 323 complete(&info->completion);
273} 324}
274 325
275static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
276static void flow_cache_flush_per_cpu(void *data) 326static void flow_cache_flush_per_cpu(void *data)
277{ 327{
278 struct flow_flush_info *info = data; 328 struct flow_flush_info *info = data;
@@ -280,8 +330,7 @@ static void flow_cache_flush_per_cpu(void *data)
280 struct tasklet_struct *tasklet; 330 struct tasklet_struct *tasklet;
281 331
282 cpu = smp_processor_id(); 332 cpu = smp_processor_id();
283 333 tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
284 tasklet = flow_flush_tasklet(cpu);
285 tasklet->data = (unsigned long)info; 334 tasklet->data = (unsigned long)info;
286 tasklet_schedule(tasklet); 335 tasklet_schedule(tasklet);
287} 336}
@@ -294,6 +343,7 @@ void flow_cache_flush(void)
294 /* Don't want cpus going down or up during this. */ 343 /* Don't want cpus going down or up during this. */
295 get_online_cpus(); 344 get_online_cpus();
296 mutex_lock(&flow_flush_sem); 345 mutex_lock(&flow_flush_sem);
346 info.cache = &flow_cache_global;
297 atomic_set(&info.cpuleft, num_online_cpus()); 347 atomic_set(&info.cpuleft, num_online_cpus());
298 init_completion(&info.completion); 348 init_completion(&info.completion);
299 349
@@ -307,62 +357,72 @@ void flow_cache_flush(void)
307 put_online_cpus(); 357 put_online_cpus();
308} 358}
309 359
310static void __init flow_cache_cpu_prepare(int cpu) 360static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
361 struct flow_cache_percpu *fcp)
311{ 362{
312 struct tasklet_struct *tasklet; 363 fcp->hash_table = (struct hlist_head *)
313 unsigned long order; 364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
314 365 if (!fcp->hash_table)
315 for (order = 0; 366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
316 (PAGE_SIZE << order) < 367
317 (sizeof(struct flow_cache_entry *)*flow_hash_size); 368 fcp->hash_rnd_recalc = 1;
318 order++) 369 fcp->hash_count = 0;
319 /* NOTHING */; 370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
320
321 flow_table(cpu) = (struct flow_cache_entry **)
322 __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
323 if (!flow_table(cpu))
324 panic("NET: failed to allocate flow cache order %lu\n", order);
325
326 flow_hash_rnd_recalc(cpu) = 1;
327 flow_count(cpu) = 0;
328
329 tasklet = flow_flush_tasklet(cpu);
330 tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
331} 371}
332 372
333static int flow_cache_cpu(struct notifier_block *nfb, 373static int flow_cache_cpu(struct notifier_block *nfb,
334 unsigned long action, 374 unsigned long action,
335 void *hcpu) 375 void *hcpu)
336{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380
337 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
338 __flow_cache_shrink((unsigned long)hcpu, 0); 382 __flow_cache_shrink(fc, fcp, 0);
339 return NOTIFY_OK; 383 return NOTIFY_OK;
340} 384}
341 385
342static int __init flow_cache_init(void) 386static int flow_cache_init(struct flow_cache *fc)
343{ 387{
388 unsigned long order;
344 int i; 389 int i;
345 390
346 flow_cachep = kmem_cache_create("flow_cache", 391 fc->hash_shift = 10;
347 sizeof(struct flow_cache_entry), 392 fc->low_watermark = 2 * flow_cache_hash_size(fc);
348 0, SLAB_PANIC, 393 fc->high_watermark = 4 * flow_cache_hash_size(fc);
349 NULL);
350 flow_hash_shift = 10;
351 flow_lwm = 2 * flow_hash_size;
352 flow_hwm = 4 * flow_hash_size;
353 394
354 setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0); 395 for (order = 0;
355 flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 396 (PAGE_SIZE << order) <
356 add_timer(&flow_hash_rnd_timer); 397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu);
402
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
404 (unsigned long) fc);
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
406 add_timer(&fc->rnd_timer);
357 407
358 for_each_possible_cpu(i) 408 for_each_possible_cpu(i)
359 flow_cache_cpu_prepare(i); 409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu,
413 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier);
360 415
361 hotcpu_notifier(flow_cache_cpu, 0);
362 return 0; 416 return 0;
363} 417}
364 418
365module_init(flow_cache_init); 419static int __init flow_cache_init_global(void)
420{
421 flow_cachep = kmem_cache_create("flow_cache",
422 sizeof(struct flow_cache_entry),
423 0, SLAB_PANIC, NULL);
366 424
367EXPORT_SYMBOL(flow_cache_genid); 425 return flow_cache_init(&flow_cache_global);
368EXPORT_SYMBOL(flow_cache_lookup); 426}
427
428module_init(flow_cache_init_global);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e70392fe0..9fbe7f7429b0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
107 107
108/* Protects against soft lockup during large deletion */ 108/* Protects against soft lockup during large deletion */
109static struct rb_root est_root = RB_ROOT; 109static struct rb_root est_root = RB_ROOT;
110static DEFINE_SPINLOCK(est_tree_lock);
110 111
111static void est_timer(unsigned long arg) 112static void est_timer(unsigned long arg)
112{ 113{
@@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
201 * 202 *
202 * Returns 0 on success or a negative error code. 203 * Returns 0 on success or a negative error code.
203 * 204 *
204 * NOTE: Called under rtnl_mutex
205 */ 205 */
206int gen_new_estimator(struct gnet_stats_basic_packed *bstats, 206int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
207 struct gnet_stats_rate_est *rate_est, 207 struct gnet_stats_rate_est *rate_est,
@@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
232 est->last_packets = bstats->packets; 232 est->last_packets = bstats->packets;
233 est->avpps = rate_est->pps<<10; 233 est->avpps = rate_est->pps<<10;
234 234
235 spin_lock(&est_tree_lock);
235 if (!elist[idx].timer.function) { 236 if (!elist[idx].timer.function) {
236 INIT_LIST_HEAD(&elist[idx].list); 237 INIT_LIST_HEAD(&elist[idx].list);
237 setup_timer(&elist[idx].timer, est_timer, idx); 238 setup_timer(&elist[idx].timer, est_timer, idx);
@@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
242 243
243 list_add_rcu(&est->list, &elist[idx].list); 244 list_add_rcu(&est->list, &elist[idx].list);
244 gen_add_node(est); 245 gen_add_node(est);
246 spin_unlock(&est_tree_lock);
245 247
246 return 0; 248 return 0;
247} 249}
@@ -261,13 +263,14 @@ static void __gen_kill_estimator(struct rcu_head *head)
261 * 263 *
262 * Removes the rate estimator specified by &bstats and &rate_est. 264 * Removes the rate estimator specified by &bstats and &rate_est.
263 * 265 *
264 * NOTE: Called under rtnl_mutex 266 * Note : Caller should respect an RCU grace period before freeing stats_lock
265 */ 267 */
266void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, 268void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
267 struct gnet_stats_rate_est *rate_est) 269 struct gnet_stats_rate_est *rate_est)
268{ 270{
269 struct gen_estimator *e; 271 struct gen_estimator *e;
270 272
273 spin_lock(&est_tree_lock);
271 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
272 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
273 276
@@ -278,6 +281,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
278 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
279 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
280 } 283 }
284 spin_unlock(&est_tree_lock);
281} 285}
282EXPORT_SYMBOL(gen_kill_estimator); 286EXPORT_SYMBOL(gen_kill_estimator);
283 287
@@ -312,8 +316,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
312bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, 316bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
313 const struct gnet_stats_rate_est *rate_est) 317 const struct gnet_stats_rate_est *rate_est)
314{ 318{
319 bool res;
320
315 ASSERT_RTNL(); 321 ASSERT_RTNL();
316 322
317 return gen_find_node(bstats, rate_est) != NULL; 323 spin_lock(&est_tree_lock);
324 res = gen_find_node(bstats, rate_est) != NULL;
325 spin_unlock(&est_tree_lock);
326
327 return res;
318} 328}
319EXPORT_SYMBOL(gen_estimator_active); 329EXPORT_SYMBOL(gen_estimator_active);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
73 73
74 return 0; 74 return 0;
75} 75}
76EXPORT_SYMBOL(gnet_stats_start_copy_compat);
76 77
77/** 78/**
78 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode 79 * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
93{ 94{
94 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d); 95 return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
95} 96}
97EXPORT_SYMBOL(gnet_stats_start_copy);
96 98
97/** 99/**
98 * gnet_stats_copy_basic - copy basic statistics into statistic TLV 100 * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
123 } 125 }
124 return 0; 126 return 0;
125} 127}
128EXPORT_SYMBOL(gnet_stats_copy_basic);
126 129
127/** 130/**
128 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV 131 * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
154 157
155 return 0; 158 return 0;
156} 159}
160EXPORT_SYMBOL(gnet_stats_copy_rate_est);
157 161
158/** 162/**
159 * gnet_stats_copy_queue - copy queue statistics into statistics TLV 163 * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
181 185
182 return 0; 186 return 0;
183} 187}
188EXPORT_SYMBOL(gnet_stats_copy_queue);
184 189
185/** 190/**
186 * gnet_stats_copy_app - copy application specific statistics into statistics TLV 191 * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
208 213
209 return 0; 214 return 0;
210} 215}
216EXPORT_SYMBOL(gnet_stats_copy_app);
211 217
212/** 218/**
213 * gnet_stats_finish_copy - finish dumping procedure 219 * gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
241 spin_unlock_bh(d->lock); 247 spin_unlock_bh(d->lock);
242 return 0; 248 return 0;
243} 249}
244
245
246EXPORT_SYMBOL(gnet_stats_start_copy);
247EXPORT_SYMBOL(gnet_stats_start_copy_compat);
248EXPORT_SYMBOL(gnet_stats_copy_basic);
249EXPORT_SYMBOL(gnet_stats_copy_rate_est);
250EXPORT_SYMBOL(gnet_stats_copy_queue);
251EXPORT_SYMBOL(gnet_stats_copy_app);
252EXPORT_SYMBOL(gnet_stats_finish_copy); 250EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1e7f4e91a935..1cd98df412df 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
95 95
96 return 0; 96 return 0;
97} 97}
98EXPORT_SYMBOL(memcpy_toiovec);
98 99
99/* 100/*
100 * Copy kernel to iovec. Returns -EFAULT on error. 101 * Copy kernel to iovec. Returns -EFAULT on error.
@@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
120 121
121 return 0; 122 return 0;
122} 123}
124EXPORT_SYMBOL(memcpy_toiovecend);
123 125
124/* 126/*
125 * Copy iovec to kernel. Returns -EFAULT on error. 127 * Copy iovec to kernel. Returns -EFAULT on error.
@@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
144 146
145 return 0; 147 return 0;
146} 148}
149EXPORT_SYMBOL(memcpy_fromiovec);
147 150
148/* 151/*
149 * Copy iovec from kernel. Returns -EFAULT on error. 152 * Copy iovec from kernel. Returns -EFAULT on error.
@@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
172 175
173 return 0; 176 return 0;
174} 177}
178EXPORT_SYMBOL(memcpy_fromiovecend);
175 179
176/* 180/*
177 * And now for the all-in-one: copy and checksum from a user iovec 181 * And now for the all-in-one: copy and checksum from a user iovec
@@ -256,9 +260,4 @@ out_fault:
256 err = -EFAULT; 260 err = -EFAULT;
257 goto out; 261 goto out;
258} 262}
259
260EXPORT_SYMBOL(csum_partial_copy_fromiovecend); 263EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
261EXPORT_SYMBOL(memcpy_fromiovec);
262EXPORT_SYMBOL(memcpy_fromiovecend);
263EXPORT_SYMBOL(memcpy_toiovec);
264EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bdbce2f5875b..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
243 243
244 linkwatch_schedule_work(urgent); 244 linkwatch_schedule_work(urgent);
245} 245}
246
247EXPORT_SYMBOL(linkwatch_fire_event); 246EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bff37908bd55..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
934 kfree_skb(buff); 934 kfree_skb(buff);
935 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 935 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
936 } 936 }
937 skb_dst_force(skb);
937 __skb_queue_tail(&neigh->arp_queue, skb); 938 __skb_queue_tail(&neigh->arp_queue, skb);
938 } 939 }
939 rc = 1; 940 rc = 1;
@@ -948,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
948{ 949{
949 struct hh_cache *hh; 950 struct hh_cache *hh;
950 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 951 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
951 = neigh->dev->header_ops->cache_update; 952 = NULL;
953
954 if (neigh->dev->header_ops)
955 update = neigh->dev->header_ops->cache_update;
952 956
953 if (update) { 957 if (update) {
954 for (hh = neigh->hh; hh; hh = hh->hh_next) { 958 for (hh = neigh->hh; hh; hh = hh->hh_next) {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 59cfc7d8fc45..af4dfbadf2a0 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,9 +14,12 @@
14#include <linux/netdevice.h> 14#include <linux/netdevice.h>
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/nsproxy.h>
17#include <net/sock.h> 18#include <net/sock.h>
19#include <net/net_namespace.h>
18#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
19#include <linux/wireless.h> 21#include <linux/wireless.h>
22#include <linux/vmalloc.h>
20#include <net/wext.h> 23#include <net/wext.h>
21 24
22#include "net-sysfs.h" 25#include "net-sysfs.h"
@@ -26,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
26static const char fmt_long_hex[] = "%#lx\n"; 29static const char fmt_long_hex[] = "%#lx\n";
27static const char fmt_dec[] = "%d\n"; 30static const char fmt_dec[] = "%d\n";
28static const char fmt_ulong[] = "%lu\n"; 31static const char fmt_ulong[] = "%lu\n";
32static const char fmt_u64[] = "%llu\n";
29 33
30static inline int dev_isalive(const struct net_device *dev) 34static inline int dev_isalive(const struct net_device *dev)
31{ 35{
@@ -91,6 +95,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
91} 95}
92 96
93NETDEVICE_SHOW(dev_id, fmt_hex); 97NETDEVICE_SHOW(dev_id, fmt_hex);
98NETDEVICE_SHOW(addr_assign_type, fmt_dec);
94NETDEVICE_SHOW(addr_len, fmt_dec); 99NETDEVICE_SHOW(addr_len, fmt_dec);
95NETDEVICE_SHOW(iflink, fmt_dec); 100NETDEVICE_SHOW(iflink, fmt_dec);
96NETDEVICE_SHOW(ifindex, fmt_dec); 101NETDEVICE_SHOW(ifindex, fmt_dec);
@@ -291,6 +296,7 @@ static ssize_t show_ifalias(struct device *dev,
291} 296}
292 297
293static struct device_attribute net_class_attributes[] = { 298static struct device_attribute net_class_attributes[] = {
299 __ATTR(addr_assign_type, S_IRUGO, show_addr_assign_type, NULL),
294 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), 300 __ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
295 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), 301 __ATTR(dev_id, S_IRUGO, show_dev_id, NULL),
296 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), 302 __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias),
@@ -321,14 +327,15 @@ static ssize_t netstat_show(const struct device *d,
321 struct net_device *dev = to_net_dev(d); 327 struct net_device *dev = to_net_dev(d);
322 ssize_t ret = -EINVAL; 328 ssize_t ret = -EINVAL;
323 329
324 WARN_ON(offset > sizeof(struct net_device_stats) || 330 WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
325 offset % sizeof(unsigned long) != 0); 331 offset % sizeof(u64) != 0);
326 332
327 read_lock(&dev_base_lock); 333 read_lock(&dev_base_lock);
328 if (dev_isalive(dev)) { 334 if (dev_isalive(dev)) {
329 const struct net_device_stats *stats = dev_get_stats(dev); 335 struct rtnl_link_stats64 temp;
330 ret = sprintf(buf, fmt_ulong, 336 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
331 *(unsigned long *)(((u8 *) stats) + offset)); 337
338 ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
332 } 339 }
333 read_unlock(&dev_base_lock); 340 read_unlock(&dev_base_lock);
334 return ret; 341 return ret;
@@ -340,7 +347,7 @@ static ssize_t show_##name(struct device *d, \
340 struct device_attribute *attr, char *buf) \ 347 struct device_attribute *attr, char *buf) \
341{ \ 348{ \
342 return netstat_show(d, attr, buf, \ 349 return netstat_show(d, attr, buf, \
343 offsetof(struct net_device_stats, name)); \ 350 offsetof(struct rtnl_link_stats64, name)); \
344} \ 351} \
345static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) 352static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
346 353
@@ -466,18 +473,345 @@ static struct attribute_group wireless_group = {
466 .attrs = wireless_attrs, 473 .attrs = wireless_attrs,
467}; 474};
468#endif 475#endif
469
470#endif /* CONFIG_SYSFS */ 476#endif /* CONFIG_SYSFS */
471 477
478#ifdef CONFIG_RPS
479/*
480 * RX queue sysfs structures and functions.
481 */
482struct rx_queue_attribute {
483 struct attribute attr;
484 ssize_t (*show)(struct netdev_rx_queue *queue,
485 struct rx_queue_attribute *attr, char *buf);
486 ssize_t (*store)(struct netdev_rx_queue *queue,
487 struct rx_queue_attribute *attr, const char *buf, size_t len);
488};
489#define to_rx_queue_attr(_attr) container_of(_attr, \
490 struct rx_queue_attribute, attr)
491
492#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
493
494static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
495 char *buf)
496{
497 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
498 struct netdev_rx_queue *queue = to_rx_queue(kobj);
499
500 if (!attribute->show)
501 return -EIO;
502
503 return attribute->show(queue, attribute, buf);
504}
505
506static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
507 const char *buf, size_t count)
508{
509 struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
510 struct netdev_rx_queue *queue = to_rx_queue(kobj);
511
512 if (!attribute->store)
513 return -EIO;
514
515 return attribute->store(queue, attribute, buf, count);
516}
517
518static struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store,
521};
522
523static ssize_t show_rps_map(struct netdev_rx_queue *queue,
524 struct rx_queue_attribute *attribute, char *buf)
525{
526 struct rps_map *map;
527 cpumask_var_t mask;
528 size_t len = 0;
529 int i;
530
531 if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
532 return -ENOMEM;
533
534 rcu_read_lock();
535 map = rcu_dereference(queue->rps_map);
536 if (map)
537 for (i = 0; i < map->len; i++)
538 cpumask_set_cpu(map->cpus[i], mask);
539
540 len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
541 if (PAGE_SIZE - len < 3) {
542 rcu_read_unlock();
543 free_cpumask_var(mask);
544 return -EINVAL;
545 }
546 rcu_read_unlock();
547
548 free_cpumask_var(mask);
549 len += sprintf(buf + len, "\n");
550 return len;
551}
552
553static void rps_map_release(struct rcu_head *rcu)
554{
555 struct rps_map *map = container_of(rcu, struct rps_map, rcu);
556
557 kfree(map);
558}
559
560static ssize_t store_rps_map(struct netdev_rx_queue *queue,
561 struct rx_queue_attribute *attribute,
562 const char *buf, size_t len)
563{
564 struct rps_map *old_map, *map;
565 cpumask_var_t mask;
566 int err, cpu, i;
567 static DEFINE_SPINLOCK(rps_map_lock);
568
569 if (!capable(CAP_NET_ADMIN))
570 return -EPERM;
571
572 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
573 return -ENOMEM;
574
575 err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
576 if (err) {
577 free_cpumask_var(mask);
578 return err;
579 }
580
581 map = kzalloc(max_t(unsigned,
582 RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
583 GFP_KERNEL);
584 if (!map) {
585 free_cpumask_var(mask);
586 return -ENOMEM;
587 }
588
589 i = 0;
590 for_each_cpu_and(cpu, mask, cpu_online_mask)
591 map->cpus[i++] = cpu;
592
593 if (i)
594 map->len = i;
595 else {
596 kfree(map);
597 map = NULL;
598 }
599
600 spin_lock(&rps_map_lock);
601 old_map = queue->rps_map;
602 rcu_assign_pointer(queue->rps_map, map);
603 spin_unlock(&rps_map_lock);
604
605 if (old_map)
606 call_rcu(&old_map->rcu, rps_map_release);
607
608 free_cpumask_var(mask);
609 return len;
610}
611
612static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
613 struct rx_queue_attribute *attr,
614 char *buf)
615{
616 struct rps_dev_flow_table *flow_table;
617 unsigned int val = 0;
618
619 rcu_read_lock();
620 flow_table = rcu_dereference(queue->rps_flow_table);
621 if (flow_table)
622 val = flow_table->mask + 1;
623 rcu_read_unlock();
624
625 return sprintf(buf, "%u\n", val);
626}
627
628static void rps_dev_flow_table_release_work(struct work_struct *work)
629{
630 struct rps_dev_flow_table *table = container_of(work,
631 struct rps_dev_flow_table, free_work);
632
633 vfree(table);
634}
635
636static void rps_dev_flow_table_release(struct rcu_head *rcu)
637{
638 struct rps_dev_flow_table *table = container_of(rcu,
639 struct rps_dev_flow_table, rcu);
640
641 INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
642 schedule_work(&table->free_work);
643}
644
645static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
646 struct rx_queue_attribute *attr,
647 const char *buf, size_t len)
648{
649 unsigned int count;
650 char *endp;
651 struct rps_dev_flow_table *table, *old_table;
652 static DEFINE_SPINLOCK(rps_dev_flow_lock);
653
654 if (!capable(CAP_NET_ADMIN))
655 return -EPERM;
656
657 count = simple_strtoul(buf, &endp, 0);
658 if (endp == buf)
659 return -EINVAL;
660
661 if (count) {
662 int i;
663
664 if (count > 1<<30) {
665 /* Enforce a limit to prevent overflow */
666 return -EINVAL;
667 }
668 count = roundup_pow_of_two(count);
669 table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
670 if (!table)
671 return -ENOMEM;
672
673 table->mask = count - 1;
674 for (i = 0; i < count; i++)
675 table->flows[i].cpu = RPS_NO_CPU;
676 } else
677 table = NULL;
678
679 spin_lock(&rps_dev_flow_lock);
680 old_table = queue->rps_flow_table;
681 rcu_assign_pointer(queue->rps_flow_table, table);
682 spin_unlock(&rps_dev_flow_lock);
683
684 if (old_table)
685 call_rcu(&old_table->rcu, rps_dev_flow_table_release);
686
687 return len;
688}
689
690static struct rx_queue_attribute rps_cpus_attribute =
691 __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
692
693
694static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
695 __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
696 show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
697
698static struct attribute *rx_queue_default_attrs[] = {
699 &rps_cpus_attribute.attr,
700 &rps_dev_flow_table_cnt_attribute.attr,
701 NULL
702};
703
704static void rx_queue_release(struct kobject *kobj)
705{
706 struct netdev_rx_queue *queue = to_rx_queue(kobj);
707 struct netdev_rx_queue *first = queue->first;
708
709 if (queue->rps_map)
710 call_rcu(&queue->rps_map->rcu, rps_map_release);
711
712 if (queue->rps_flow_table)
713 call_rcu(&queue->rps_flow_table->rcu,
714 rps_dev_flow_table_release);
715
716 if (atomic_dec_and_test(&first->count))
717 kfree(first);
718}
719
720static struct kobj_type rx_queue_ktype = {
721 .sysfs_ops = &rx_queue_sysfs_ops,
722 .release = rx_queue_release,
723 .default_attrs = rx_queue_default_attrs,
724};
725
726static int rx_queue_add_kobject(struct net_device *net, int index)
727{
728 struct netdev_rx_queue *queue = net->_rx + index;
729 struct kobject *kobj = &queue->kobj;
730 int error = 0;
731
732 kobj->kset = net->queues_kset;
733 error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
734 "rx-%u", index);
735 if (error) {
736 kobject_put(kobj);
737 return error;
738 }
739
740 kobject_uevent(kobj, KOBJ_ADD);
741
742 return error;
743}
744
745static int rx_queue_register_kobjects(struct net_device *net)
746{
747 int i;
748 int error = 0;
749
750 net->queues_kset = kset_create_and_add("queues",
751 NULL, &net->dev.kobj);
752 if (!net->queues_kset)
753 return -ENOMEM;
754 for (i = 0; i < net->num_rx_queues; i++) {
755 error = rx_queue_add_kobject(net, i);
756 if (error)
757 break;
758 }
759
760 if (error)
761 while (--i >= 0)
762 kobject_put(&net->_rx[i].kobj);
763
764 return error;
765}
766
767static void rx_queue_remove_kobjects(struct net_device *net)
768{
769 int i;
770
771 for (i = 0; i < net->num_rx_queues; i++)
772 kobject_put(&net->_rx[i].kobj);
773 kset_unregister(net->queues_kset);
774}
775#endif /* CONFIG_RPS */
776
777static const void *net_current_ns(void)
778{
779 return current->nsproxy->net_ns;
780}
781
782static const void *net_initial_ns(void)
783{
784 return &init_net;
785}
786
787static const void *net_netlink_ns(struct sock *sk)
788{
789 return sock_net(sk);
790}
791
792static struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns,
797};
798
799static void net_kobj_ns_exit(struct net *net)
800{
801 kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
802}
803
804static struct pernet_operations kobj_net_ops = {
805 .exit = net_kobj_ns_exit,
806};
807
808
472#ifdef CONFIG_HOTPLUG 809#ifdef CONFIG_HOTPLUG
473static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) 810static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
474{ 811{
475 struct net_device *dev = to_net_dev(d); 812 struct net_device *dev = to_net_dev(d);
476 int retval; 813 int retval;
477 814
478 if (!net_eq(dev_net(dev), &init_net))
479 return 0;
480
481 /* pass interface to uevent. */ 815 /* pass interface to uevent. */
482 retval = add_uevent_var(env, "INTERFACE=%s", dev->name); 816 retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
483 if (retval) 817 if (retval)
@@ -507,6 +841,13 @@ static void netdev_release(struct device *d)
507 kfree((char *)dev - dev->padded); 841 kfree((char *)dev - dev->padded);
508} 842}
509 843
844static const void *net_namespace(struct device *d)
845{
846 struct net_device *dev;
847 dev = container_of(d, struct net_device, dev);
848 return dev_net(dev);
849}
850
510static struct class net_class = { 851static struct class net_class = {
511 .name = "net", 852 .name = "net",
512 .dev_release = netdev_release, 853 .dev_release = netdev_release,
@@ -516,6 +857,8 @@ static struct class net_class = {
516#ifdef CONFIG_HOTPLUG 857#ifdef CONFIG_HOTPLUG
517 .dev_uevent = netdev_uevent, 858 .dev_uevent = netdev_uevent,
518#endif 859#endif
860 .ns_type = &net_ns_type_operations,
861 .namespace = net_namespace,
519}; 862};
520 863
521/* Delete sysfs entries but hold kobject reference until after all 864/* Delete sysfs entries but hold kobject reference until after all
@@ -527,8 +870,9 @@ void netdev_unregister_kobject(struct net_device * net)
527 870
528 kobject_get(&dev->kobj); 871 kobject_get(&dev->kobj);
529 872
530 if (!net_eq(dev_net(net), &init_net)) 873#ifdef CONFIG_RPS
531 return; 874 rx_queue_remove_kobjects(net);
875#endif
532 876
533 device_del(dev); 877 device_del(dev);
534} 878}
@@ -538,7 +882,9 @@ int netdev_register_kobject(struct net_device *net)
538{ 882{
539 struct device *dev = &(net->dev); 883 struct device *dev = &(net->dev);
540 const struct attribute_group **groups = net->sysfs_groups; 884 const struct attribute_group **groups = net->sysfs_groups;
885 int error = 0;
541 886
887 device_initialize(dev);
542 dev->class = &net_class; 888 dev->class = &net_class;
543 dev->platform_data = net; 889 dev->platform_data = net;
544 dev->groups = groups; 890 dev->groups = groups;
@@ -561,32 +907,36 @@ int netdev_register_kobject(struct net_device *net)
561#endif 907#endif
562#endif /* CONFIG_SYSFS */ 908#endif /* CONFIG_SYSFS */
563 909
564 if (!net_eq(dev_net(net), &init_net)) 910 error = device_add(dev);
565 return 0; 911 if (error)
912 return error;
913
914#ifdef CONFIG_RPS
915 error = rx_queue_register_kobjects(net);
916 if (error) {
917 device_del(dev);
918 return error;
919 }
920#endif
566 921
567 return device_add(dev); 922 return error;
568} 923}
569 924
570int netdev_class_create_file(struct class_attribute *class_attr) 925int netdev_class_create_file(struct class_attribute *class_attr)
571{ 926{
572 return class_create_file(&net_class, class_attr); 927 return class_create_file(&net_class, class_attr);
573} 928}
929EXPORT_SYMBOL(netdev_class_create_file);
574 930
575void netdev_class_remove_file(struct class_attribute *class_attr) 931void netdev_class_remove_file(struct class_attribute *class_attr)
576{ 932{
577 class_remove_file(&net_class, class_attr); 933 class_remove_file(&net_class, class_attr);
578} 934}
579
580EXPORT_SYMBOL(netdev_class_create_file);
581EXPORT_SYMBOL(netdev_class_remove_file); 935EXPORT_SYMBOL(netdev_class_remove_file);
582 936
583void netdev_initialize_kobject(struct net_device *net)
584{
585 struct device *device = &(net->dev);
586 device_initialize(device);
587}
588
589int netdev_kobject_init(void) 937int netdev_kobject_init(void)
590{ 938{
939 kobj_ns_type_register(&net_ns_type_operations);
940 register_pernet_subsys(&kobj_net_ops);
591 return class_register(&net_class); 941 return class_register(&net_class);
592} 942}
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7void netdev_initialize_kobject(struct net_device *);
8#endif 7#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
27 27
28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 28#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
29 29
30static void net_generic_release(struct rcu_head *rcu)
31{
32 struct net_generic *ng;
33
34 ng = container_of(rcu, struct net_generic, rcu);
35 kfree(ng);
36}
37
38static int net_assign_generic(struct net *net, int id, void *data)
39{
40 struct net_generic *ng, *old_ng;
41
42 BUG_ON(!mutex_is_locked(&net_mutex));
43 BUG_ON(id == 0);
44
45 ng = old_ng = net->gen;
46 if (old_ng->len >= id)
47 goto assign;
48
49 ng = kzalloc(sizeof(struct net_generic) +
50 id * sizeof(void *), GFP_KERNEL);
51 if (ng == NULL)
52 return -ENOMEM;
53
54 /*
55 * Some synchronisation notes:
56 *
57 * The net_generic explores the net->gen array inside rcu
58 * read section. Besides once set the net->gen->ptr[x]
59 * pointer never changes (see rules in netns/generic.h).
60 *
61 * That said, we simply duplicate this array and schedule
62 * the old copy for kfree after a grace period.
63 */
64
65 ng->len = id;
66 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
67
68 rcu_assign_pointer(net->gen, ng);
69 call_rcu(&old_ng->rcu, net_generic_release);
70assign:
71 ng->ptr[id - 1] = data;
72 return 0;
73}
74
30static int ops_init(const struct pernet_operations *ops, struct net *net) 75static int ops_init(const struct pernet_operations *ops, struct net *net)
31{ 76{
32 int err; 77 int err;
@@ -469,10 +514,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
469 * addition run the exit method for all existing network 514 * addition run the exit method for all existing network
470 * namespaces. 515 * namespaces.
471 */ 516 */
472void unregister_pernet_subsys(struct pernet_operations *module) 517void unregister_pernet_subsys(struct pernet_operations *ops)
473{ 518{
474 mutex_lock(&net_mutex); 519 mutex_lock(&net_mutex);
475 unregister_pernet_operations(module); 520 unregister_pernet_operations(ops);
476 mutex_unlock(&net_mutex); 521 mutex_unlock(&net_mutex);
477} 522}
478EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 523EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
526 mutex_unlock(&net_mutex); 571 mutex_unlock(&net_mutex);
527} 572}
528EXPORT_SYMBOL_GPL(unregister_pernet_device); 573EXPORT_SYMBOL_GPL(unregister_pernet_device);
529
530static void net_generic_release(struct rcu_head *rcu)
531{
532 struct net_generic *ng;
533
534 ng = container_of(rcu, struct net_generic, rcu);
535 kfree(ng);
536}
537
538int net_assign_generic(struct net *net, int id, void *data)
539{
540 struct net_generic *ng, *old_ng;
541
542 BUG_ON(!mutex_is_locked(&net_mutex));
543 BUG_ON(id == 0);
544
545 ng = old_ng = net->gen;
546 if (old_ng->len >= id)
547 goto assign;
548
549 ng = kzalloc(sizeof(struct net_generic) +
550 id * sizeof(void *), GFP_KERNEL);
551 if (ng == NULL)
552 return -ENOMEM;
553
554 /*
555 * Some synchronisation notes:
556 *
557 * The net_generic explores the net->gen array inside rcu
558 * read section. Besides once set the net->gen->ptr[x]
559 * pointer never changes (see rules in netns/generic.h).
560 *
561 * That said, we simply duplicate this array and schedule
562 * the old copy for kfree after a grace period.
563 */
564
565 ng->len = id;
566 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
567
568 rcu_assign_pointer(net->gen, ng);
569 call_rcu(&old_ng->rcu, net_generic_release);
570assign:
571 ng->ptr[id - 1] = data;
572 return 0;
573}
574EXPORT_SYMBOL_GPL(net_assign_generic);
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb); 35 err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
36 return err; 36 return err;
37} 37}
38EXPORT_SYMBOL_GPL(register_netevent_notifier);
38 39
39/** 40/**
40 * netevent_unregister_notifier - unregister a netevent notifier block 41 * netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
50{ 51{
51 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb); 52 return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
52} 53}
54EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
53 55
54/** 56/**
55 * call_netevent_notifiers - call all netevent notifier blocks 57 * call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
64{ 66{
65 return atomic_notifier_call_chain(&netevent_notif_chain, val, v); 67 return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
66} 68}
67
68EXPORT_SYMBOL_GPL(register_netevent_notifier);
69EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
70EXPORT_SYMBOL_GPL(call_netevent_notifiers); 69EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..537e01afd81b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
179 } 179 }
180} 180}
181 181
182void netpoll_poll(struct netpoll *np) 182void netpoll_poll_dev(struct net_device *dev)
183{ 183{
184 struct net_device *dev = np->dev;
185 const struct net_device_ops *ops; 184 const struct net_device_ops *ops;
186 185
187 if (!dev || !netif_running(dev)) 186 if (!dev || !netif_running(dev))
@@ -200,6 +199,13 @@ void netpoll_poll(struct netpoll *np)
200 199
201 zap_completion_queue(); 200 zap_completion_queue();
202} 201}
202EXPORT_SYMBOL(netpoll_poll_dev);
203
204void netpoll_poll(struct netpoll *np)
205{
206 netpoll_poll_dev(np->dev);
207}
208EXPORT_SYMBOL(netpoll_poll);
203 209
204static void refill_skbs(void) 210static void refill_skbs(void)
205{ 211{
@@ -282,12 +288,13 @@ static int netpoll_owner_active(struct net_device *dev)
282 return 0; 288 return 0;
283} 289}
284 290
285static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 291void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
286{ 292{
287 int status = NETDEV_TX_BUSY; 293 int status = NETDEV_TX_BUSY;
288 unsigned long tries; 294 unsigned long tries;
289 struct net_device *dev = np->dev; 295 struct net_device *dev = np->dev;
290 const struct net_device_ops *ops = dev->netdev_ops; 296 const struct net_device_ops *ops = dev->netdev_ops;
297 /* It is up to the caller to keep npinfo alive. */
291 struct netpoll_info *npinfo = np->dev->npinfo; 298 struct netpoll_info *npinfo = np->dev->npinfo;
292 299
293 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { 300 if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -308,7 +315,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
308 tries > 0; --tries) { 315 tries > 0; --tries) {
309 if (__netif_tx_trylock(txq)) { 316 if (__netif_tx_trylock(txq)) {
310 if (!netif_tx_queue_stopped(txq)) { 317 if (!netif_tx_queue_stopped(txq)) {
318 dev->priv_flags |= IFF_IN_NETPOLL;
311 status = ops->ndo_start_xmit(skb, dev); 319 status = ops->ndo_start_xmit(skb, dev);
320 dev->priv_flags &= ~IFF_IN_NETPOLL;
312 if (status == NETDEV_TX_OK) 321 if (status == NETDEV_TX_OK)
313 txq_trans_update(txq); 322 txq_trans_update(txq);
314 } 323 }
@@ -337,6 +346,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
337 schedule_delayed_work(&npinfo->tx_work,0); 346 schedule_delayed_work(&npinfo->tx_work,0);
338 } 347 }
339} 348}
349EXPORT_SYMBOL(netpoll_send_skb);
340 350
341void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 351void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
342{ 352{
@@ -398,6 +408,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
398 408
399 netpoll_send_skb(np, skb); 409 netpoll_send_skb(np, skb);
400} 410}
411EXPORT_SYMBOL(netpoll_send_udp);
401 412
402static void arp_reply(struct sk_buff *skb) 413static void arp_reply(struct sk_buff *skb)
403{ 414{
@@ -624,6 +635,7 @@ void netpoll_print_options(struct netpoll *np)
624 printk(KERN_INFO "%s: remote ethernet address %pM\n", 635 printk(KERN_INFO "%s: remote ethernet address %pM\n",
625 np->name, np->remote_mac); 636 np->name, np->remote_mac);
626} 637}
638EXPORT_SYMBOL(netpoll_print_options);
627 639
628int netpoll_parse_options(struct netpoll *np, char *opt) 640int netpoll_parse_options(struct netpoll *np, char *opt)
629{ 641{
@@ -716,30 +728,29 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
716 np->name, cur); 728 np->name, cur);
717 return -1; 729 return -1;
718} 730}
731EXPORT_SYMBOL(netpoll_parse_options);
719 732
720int netpoll_setup(struct netpoll *np) 733int __netpoll_setup(struct netpoll *np)
721{ 734{
722 struct net_device *ndev = NULL; 735 struct net_device *ndev = np->dev;
723 struct in_device *in_dev;
724 struct netpoll_info *npinfo; 736 struct netpoll_info *npinfo;
725 struct netpoll *npe, *tmp; 737 const struct net_device_ops *ops;
726 unsigned long flags; 738 unsigned long flags;
727 int err; 739 int err;
728 740
729 if (np->dev_name) 741 if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
730 ndev = dev_get_by_name(&init_net, np->dev_name); 742 !ndev->netdev_ops->ndo_poll_controller) {
731 if (!ndev) { 743 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
732 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
733 np->name, np->dev_name); 744 np->name, np->dev_name);
734 return -ENODEV; 745 err = -ENOTSUPP;
746 goto out;
735 } 747 }
736 748
737 np->dev = ndev;
738 if (!ndev->npinfo) { 749 if (!ndev->npinfo) {
739 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); 750 npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
740 if (!npinfo) { 751 if (!npinfo) {
741 err = -ENOMEM; 752 err = -ENOMEM;
742 goto put; 753 goto out;
743 } 754 }
744 755
745 npinfo->rx_flags = 0; 756 npinfo->rx_flags = 0;
@@ -751,16 +762,51 @@ int netpoll_setup(struct netpoll *np)
751 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); 762 INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
752 763
753 atomic_set(&npinfo->refcnt, 1); 764 atomic_set(&npinfo->refcnt, 1);
765
766 ops = np->dev->netdev_ops;
767 if (ops->ndo_netpoll_setup) {
768 err = ops->ndo_netpoll_setup(ndev, npinfo);
769 if (err)
770 goto free_npinfo;
771 }
754 } else { 772 } else {
755 npinfo = ndev->npinfo; 773 npinfo = ndev->npinfo;
756 atomic_inc(&npinfo->refcnt); 774 atomic_inc(&npinfo->refcnt);
757 } 775 }
758 776
759 if (!ndev->netdev_ops->ndo_poll_controller) { 777 npinfo->netpoll = np;
760 printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", 778
779 if (np->rx_hook) {
780 spin_lock_irqsave(&npinfo->rx_lock, flags);
781 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
782 list_add_tail(&np->rx, &npinfo->rx_np);
783 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
784 }
785
786 /* last thing to do is link it to the net device structure */
787 rcu_assign_pointer(ndev->npinfo, npinfo);
788
789 return 0;
790
791free_npinfo:
792 kfree(npinfo);
793out:
794 return err;
795}
796EXPORT_SYMBOL_GPL(__netpoll_setup);
797
798int netpoll_setup(struct netpoll *np)
799{
800 struct net_device *ndev = NULL;
801 struct in_device *in_dev;
802 int err;
803
804 if (np->dev_name)
805 ndev = dev_get_by_name(&init_net, np->dev_name);
806 if (!ndev) {
807 printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
761 np->name, np->dev_name); 808 np->name, np->dev_name);
762 err = -ENOTSUPP; 809 return -ENODEV;
763 goto release;
764 } 810 }
765 811
766 if (!netif_running(ndev)) { 812 if (!netif_running(ndev)) {
@@ -776,7 +822,7 @@ int netpoll_setup(struct netpoll *np)
776 if (err) { 822 if (err) {
777 printk(KERN_ERR "%s: failed to open %s\n", 823 printk(KERN_ERR "%s: failed to open %s\n",
778 np->name, ndev->name); 824 np->name, ndev->name);
779 goto release; 825 goto put;
780 } 826 }
781 827
782 atleast = jiffies + HZ/10; 828 atleast = jiffies + HZ/10;
@@ -813,7 +859,7 @@ int netpoll_setup(struct netpoll *np)
813 printk(KERN_ERR "%s: no IP address for %s, aborting\n", 859 printk(KERN_ERR "%s: no IP address for %s, aborting\n",
814 np->name, np->dev_name); 860 np->name, np->dev_name);
815 err = -EDESTADDRREQ; 861 err = -EDESTADDRREQ;
816 goto release; 862 goto put;
817 } 863 }
818 864
819 np->local_ip = in_dev->ifa_list->ifa_local; 865 np->local_ip = in_dev->ifa_list->ifa_local;
@@ -821,38 +867,25 @@ int netpoll_setup(struct netpoll *np)
821 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); 867 printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
822 } 868 }
823 869
824 if (np->rx_hook) { 870 np->dev = ndev;
825 spin_lock_irqsave(&npinfo->rx_lock, flags);
826 npinfo->rx_flags |= NETPOLL_RX_ENABLED;
827 list_add_tail(&np->rx, &npinfo->rx_np);
828 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
829 }
830 871
831 /* fill up the skb queue */ 872 /* fill up the skb queue */
832 refill_skbs(); 873 refill_skbs();
833 874
834 /* last thing to do is link it to the net device structure */ 875 rtnl_lock();
835 ndev->npinfo = npinfo; 876 err = __netpoll_setup(np);
877 rtnl_unlock();
836 878
837 /* avoid racing with NAPI reading npinfo */ 879 if (err)
838 synchronize_rcu(); 880 goto put;
839 881
840 return 0; 882 return 0;
841 883
842 release:
843 if (!ndev->npinfo) {
844 spin_lock_irqsave(&npinfo->rx_lock, flags);
845 list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
846 npe->dev = NULL;
847 }
848 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
849
850 kfree(npinfo);
851 }
852put: 884put:
853 dev_put(ndev); 885 dev_put(ndev);
854 return err; 886 return err;
855} 887}
888EXPORT_SYMBOL(netpoll_setup);
856 889
857static int __init netpoll_init(void) 890static int __init netpoll_init(void)
858{ 891{
@@ -861,44 +894,65 @@ static int __init netpoll_init(void)
861} 894}
862core_initcall(netpoll_init); 895core_initcall(netpoll_init);
863 896
864void netpoll_cleanup(struct netpoll *np) 897void __netpoll_cleanup(struct netpoll *np)
865{ 898{
866 struct netpoll_info *npinfo; 899 struct netpoll_info *npinfo;
867 unsigned long flags; 900 unsigned long flags;
868 901
869 if (np->dev) { 902 npinfo = np->dev->npinfo;
870 npinfo = np->dev->npinfo; 903 if (!npinfo)
871 if (npinfo) { 904 return;
872 if (!list_empty(&npinfo->rx_np)) {
873 spin_lock_irqsave(&npinfo->rx_lock, flags);
874 list_del(&np->rx);
875 if (list_empty(&npinfo->rx_np))
876 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
877 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
878 }
879 905
880 if (atomic_dec_and_test(&npinfo->refcnt)) { 906 if (!list_empty(&npinfo->rx_np)) {
881 skb_queue_purge(&npinfo->arp_tx); 907 spin_lock_irqsave(&npinfo->rx_lock, flags);
882 skb_queue_purge(&npinfo->txq); 908 list_del(&np->rx);
883 cancel_rearming_delayed_work(&npinfo->tx_work); 909 if (list_empty(&npinfo->rx_np))
910 npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
911 spin_unlock_irqrestore(&npinfo->rx_lock, flags);
912 }
884 913
885 /* clean after last, unfinished work */ 914 if (atomic_dec_and_test(&npinfo->refcnt)) {
886 __skb_queue_purge(&npinfo->txq); 915 const struct net_device_ops *ops;
887 kfree(npinfo); 916
888 np->dev->npinfo = NULL; 917 ops = np->dev->netdev_ops;
889 } 918 if (ops->ndo_netpoll_cleanup)
890 } 919 ops->ndo_netpoll_cleanup(np->dev);
920
921 rcu_assign_pointer(np->dev->npinfo, NULL);
922
923 /* avoid racing with NAPI reading npinfo */
924 synchronize_rcu_bh();
891 925
892 dev_put(np->dev); 926 skb_queue_purge(&npinfo->arp_tx);
927 skb_queue_purge(&npinfo->txq);
928 cancel_rearming_delayed_work(&npinfo->tx_work);
929
930 /* clean after last, unfinished work */
931 __skb_queue_purge(&npinfo->txq);
932 kfree(npinfo);
893 } 933 }
934}
935EXPORT_SYMBOL_GPL(__netpoll_cleanup);
894 936
937void netpoll_cleanup(struct netpoll *np)
938{
939 if (!np->dev)
940 return;
941
942 rtnl_lock();
943 __netpoll_cleanup(np);
944 rtnl_unlock();
945
946 dev_put(np->dev);
895 np->dev = NULL; 947 np->dev = NULL;
896} 948}
949EXPORT_SYMBOL(netpoll_cleanup);
897 950
898int netpoll_trap(void) 951int netpoll_trap(void)
899{ 952{
900 return atomic_read(&trapped); 953 return atomic_read(&trapped);
901} 954}
955EXPORT_SYMBOL(netpoll_trap);
902 956
903void netpoll_set_trap(int trap) 957void netpoll_set_trap(int trap)
904{ 958{
@@ -907,12 +961,4 @@ void netpoll_set_trap(int trap)
907 else 961 else
908 atomic_dec(&trapped); 962 atomic_dec(&trapped);
909} 963}
910
911EXPORT_SYMBOL(netpoll_set_trap); 964EXPORT_SYMBOL(netpoll_set_trap);
912EXPORT_SYMBOL(netpoll_trap);
913EXPORT_SYMBOL(netpoll_print_options);
914EXPORT_SYMBOL(netpoll_parse_options);
915EXPORT_SYMBOL(netpoll_setup);
916EXPORT_SYMBOL(netpoll_cleanup);
917EXPORT_SYMBOL(netpoll_send_udp);
918EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..10a1ea72010d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
115 * command by Adit Ranadive <adit.262@gmail.com> 115 * command by Adit Ranadive <adit.262@gmail.com>
116 * 116 *
117 */ 117 */
118
119#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
120
118#include <linux/sys.h> 121#include <linux/sys.h>
119#include <linux/types.h> 122#include <linux/types.h>
120#include <linux/module.h> 123#include <linux/module.h>
@@ -169,11 +172,13 @@
169#include <asm/dma.h> 172#include <asm/dma.h>
170#include <asm/div64.h> /* do_div */ 173#include <asm/div64.h> /* do_div */
171 174
172#define VERSION "2.72" 175#define VERSION "2.74"
173#define IP_NAME_SZ 32 176#define IP_NAME_SZ 32
174#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ 177#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
175#define MPLS_STACK_BOTTOM htonl(0x00000100) 178#define MPLS_STACK_BOTTOM htonl(0x00000100)
176 179
180#define func_enter() pr_debug("entering %s\n", __func__);
181
177/* Device flag bits */ 182/* Device flag bits */
178#define F_IPSRC_RND (1<<0) /* IP-Src Random */ 183#define F_IPSRC_RND (1<<0) /* IP-Src Random */
179#define F_IPDST_RND (1<<1) /* IP-Dst Random */ 184#define F_IPDST_RND (1<<1) /* IP-Dst Random */
@@ -190,6 +195,7 @@
190#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ 195#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
191#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ 196#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
192#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */ 197#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
198#define F_NODE (1<<15) /* Node memory alloc*/
193 199
194/* Thread control flag bits */ 200/* Thread control flag bits */
195#define T_STOP (1<<0) /* Stop run */ 201#define T_STOP (1<<0) /* Stop run */
@@ -372,6 +378,7 @@ struct pktgen_dev {
372 378
373 u16 queue_map_min; 379 u16 queue_map_min;
374 u16 queue_map_max; 380 u16 queue_map_max;
381 int node; /* Memory node */
375 382
376#ifdef CONFIG_XFRM 383#ifdef CONFIG_XFRM
377 __u8 ipsmode; /* IPSEC mode (config) */ 384 __u8 ipsmode; /* IPSEC mode (config) */
@@ -422,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
422} 429}
423 430
424static const char version[] = 431static const char version[] =
425 "pktgen " VERSION ": Packet Generator for packet performance testing.\n"; 432 "Packet Generator for packet performance testing. "
433 "Version: " VERSION "\n";
426 434
427static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); 435static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
428static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); 436static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -493,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
493 pktgen_reset_all_threads(); 501 pktgen_reset_all_threads();
494 502
495 else 503 else
496 printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); 504 pr_warning("Unknown command: %s\n", data);
497 505
498 err = count; 506 err = count;
499 507
@@ -607,6 +615,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
607 if (pkt_dev->traffic_class) 615 if (pkt_dev->traffic_class)
608 seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class); 616 seq_printf(seq, " traffic_class: 0x%02x\n", pkt_dev->traffic_class);
609 617
618 if (pkt_dev->node >= 0)
619 seq_printf(seq, " node: %d\n", pkt_dev->node);
620
610 seq_printf(seq, " Flags: "); 621 seq_printf(seq, " Flags: ");
611 622
612 if (pkt_dev->flags & F_IPV6) 623 if (pkt_dev->flags & F_IPV6)
@@ -660,6 +671,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
660 if (pkt_dev->flags & F_SVID_RND) 671 if (pkt_dev->flags & F_SVID_RND)
661 seq_printf(seq, "SVID_RND "); 672 seq_printf(seq, "SVID_RND ");
662 673
674 if (pkt_dev->flags & F_NODE)
675 seq_printf(seq, "NODE_ALLOC ");
676
663 seq_puts(seq, "\n"); 677 seq_puts(seq, "\n");
664 678
665 /* not really stopped, more like last-running-at */ 679 /* not really stopped, more like last-running-at */
@@ -832,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
832 const char __user * user_buffer, size_t count, 846 const char __user * user_buffer, size_t count,
833 loff_t * offset) 847 loff_t * offset)
834{ 848{
835 struct seq_file *seq = (struct seq_file *)file->private_data; 849 struct seq_file *seq = file->private_data;
836 struct pktgen_dev *pkt_dev = seq->private; 850 struct pktgen_dev *pkt_dev = seq->private;
837 int i = 0, max, len; 851 int i = 0, max, len;
838 char name[16], valstr[32]; 852 char name[16], valstr[32];
@@ -844,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file,
844 pg_result = &(pkt_dev->result[0]); 858 pg_result = &(pkt_dev->result[0]);
845 859
846 if (count < 1) { 860 if (count < 1) {
847 printk(KERN_WARNING "pktgen: wrong command format\n"); 861 pr_warning("wrong command format\n");
848 return -EINVAL; 862 return -EINVAL;
849 } 863 }
850 864
851 max = count - i; 865 max = count - i;
852 tmp = count_trail_chars(&user_buffer[i], max); 866 tmp = count_trail_chars(&user_buffer[i], max);
853 if (tmp < 0) { 867 if (tmp < 0) {
854 printk(KERN_WARNING "pktgen: illegal format\n"); 868 pr_warning("illegal format\n");
855 return tmp; 869 return tmp;
856 } 870 }
857 i += tmp; 871 i += tmp;
@@ -972,6 +986,36 @@ static ssize_t pktgen_if_write(struct file *file,
972 (unsigned long long) pkt_dev->delay); 986 (unsigned long long) pkt_dev->delay);
973 return count; 987 return count;
974 } 988 }
989 if (!strcmp(name, "rate")) {
990 len = num_arg(&user_buffer[i], 10, &value);
991 if (len < 0)
992 return len;
993
994 i += len;
995 if (!value)
996 return len;
997 pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
998 if (debug)
999 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1000
1001 sprintf(pg_result, "OK: rate=%lu", value);
1002 return count;
1003 }
1004 if (!strcmp(name, "ratep")) {
1005 len = num_arg(&user_buffer[i], 10, &value);
1006 if (len < 0)
1007 return len;
1008
1009 i += len;
1010 if (!value)
1011 return len;
1012 pkt_dev->delay = NSEC_PER_SEC/value;
1013 if (debug)
1014 pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
1015
1016 sprintf(pg_result, "OK: rate=%lu", value);
1017 return count;
1018 }
975 if (!strcmp(name, "udp_src_min")) { 1019 if (!strcmp(name, "udp_src_min")) {
976 len = num_arg(&user_buffer[i], 10, &value); 1020 len = num_arg(&user_buffer[i], 10, &value);
977 if (len < 0) 1021 if (len < 0)
@@ -1074,6 +1118,21 @@ static ssize_t pktgen_if_write(struct file *file,
1074 pkt_dev->dst_mac_count); 1118 pkt_dev->dst_mac_count);
1075 return count; 1119 return count;
1076 } 1120 }
1121 if (!strcmp(name, "node")) {
1122 len = num_arg(&user_buffer[i], 10, &value);
1123 if (len < 0)
1124 return len;
1125
1126 i += len;
1127
1128 if (node_possible(value)) {
1129 pkt_dev->node = value;
1130 sprintf(pg_result, "OK: node=%d", pkt_dev->node);
1131 }
1132 else
1133 sprintf(pg_result, "ERROR: node not possible");
1134 return count;
1135 }
1077 if (!strcmp(name, "flag")) { 1136 if (!strcmp(name, "flag")) {
1078 char f[32]; 1137 char f[32];
1079 memset(f, 0, 32); 1138 memset(f, 0, 32);
@@ -1166,12 +1225,18 @@ static ssize_t pktgen_if_write(struct file *file,
1166 else if (strcmp(f, "!IPV6") == 0) 1225 else if (strcmp(f, "!IPV6") == 0)
1167 pkt_dev->flags &= ~F_IPV6; 1226 pkt_dev->flags &= ~F_IPV6;
1168 1227
1228 else if (strcmp(f, "NODE_ALLOC") == 0)
1229 pkt_dev->flags |= F_NODE;
1230
1231 else if (strcmp(f, "!NODE_ALLOC") == 0)
1232 pkt_dev->flags &= ~F_NODE;
1233
1169 else { 1234 else {
1170 sprintf(pg_result, 1235 sprintf(pg_result,
1171 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", 1236 "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
1172 f, 1237 f,
1173 "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " 1238 "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
1174 "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n"); 1239 "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
1175 return count; 1240 return count;
1176 } 1241 }
1177 sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); 1242 sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -1369,18 +1434,12 @@ static ssize_t pktgen_if_write(struct file *file,
1369 i += len; 1434 i += len;
1370 1435
1371 for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { 1436 for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) {
1372 if (*v >= '0' && *v <= '9') { 1437 int value;
1373 *m *= 16; 1438
1374 *m += *v - '0'; 1439 value = hex_to_bin(*v);
1375 } 1440 if (value >= 0)
1376 if (*v >= 'A' && *v <= 'F') { 1441 *m = *m * 16 + value;
1377 *m *= 16; 1442
1378 *m += *v - 'A' + 10;
1379 }
1380 if (*v >= 'a' && *v <= 'f') {
1381 *m *= 16;
1382 *m += *v - 'a' + 10;
1383 }
1384 if (*v == ':') { 1443 if (*v == ':') {
1385 m++; 1444 m++;
1386 *m = 0; 1445 *m = 0;
@@ -1411,18 +1470,12 @@ static ssize_t pktgen_if_write(struct file *file,
1411 i += len; 1470 i += len;
1412 1471
1413 for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { 1472 for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) {
1414 if (*v >= '0' && *v <= '9') { 1473 int value;
1415 *m *= 16; 1474
1416 *m += *v - '0'; 1475 value = hex_to_bin(*v);
1417 } 1476 if (value >= 0)
1418 if (*v >= 'A' && *v <= 'F') { 1477 *m = *m * 16 + value;
1419 *m *= 16; 1478
1420 *m += *v - 'A' + 10;
1421 }
1422 if (*v >= 'a' && *v <= 'f') {
1423 *m *= 16;
1424 *m += *v - 'a' + 10;
1425 }
1426 if (*v == ':') { 1479 if (*v == ':') {
1427 m++; 1480 m++;
1428 *m = 0; 1481 *m = 0;
@@ -1711,7 +1764,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1711 const char __user * user_buffer, 1764 const char __user * user_buffer,
1712 size_t count, loff_t * offset) 1765 size_t count, loff_t * offset)
1713{ 1766{
1714 struct seq_file *seq = (struct seq_file *)file->private_data; 1767 struct seq_file *seq = file->private_data;
1715 struct pktgen_thread *t = seq->private; 1768 struct pktgen_thread *t = seq->private;
1716 int i = 0, max, len, ret; 1769 int i = 0, max, len, ret;
1717 char name[40]; 1770 char name[40];
@@ -1752,7 +1805,7 @@ static ssize_t pktgen_thread_write(struct file *file,
1752 name, (unsigned long)count); 1805 name, (unsigned long)count);
1753 1806
1754 if (!t) { 1807 if (!t) {
1755 printk(KERN_ERR "pktgen: ERROR: No thread\n"); 1808 pr_err("ERROR: No thread\n");
1756 ret = -EINVAL; 1809 ret = -EINVAL;
1757 goto out; 1810 goto out;
1758 } 1811 }
@@ -1845,7 +1898,7 @@ static void pktgen_mark_device(const char *ifname)
1845 int i = 0; 1898 int i = 0;
1846 1899
1847 mutex_lock(&pktgen_thread_lock); 1900 mutex_lock(&pktgen_thread_lock);
1848 pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname); 1901 pr_debug("%s: marking %s for removal\n", __func__, ifname);
1849 1902
1850 while (1) { 1903 while (1) {
1851 1904
@@ -1854,15 +1907,14 @@ static void pktgen_mark_device(const char *ifname)
1854 break; /* success */ 1907 break; /* success */
1855 1908
1856 mutex_unlock(&pktgen_thread_lock); 1909 mutex_unlock(&pktgen_thread_lock);
1857 pr_debug("pktgen: pktgen_mark_device waiting for %s " 1910 pr_debug("%s: waiting for %s to disappear....\n",
1858 "to disappear....\n", ifname); 1911 __func__, ifname);
1859 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); 1912 schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
1860 mutex_lock(&pktgen_thread_lock); 1913 mutex_lock(&pktgen_thread_lock);
1861 1914
1862 if (++i >= max_tries) { 1915 if (++i >= max_tries) {
1863 printk(KERN_ERR "pktgen_mark_device: timed out after " 1916 pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
1864 "waiting %d msec for device %s to be removed\n", 1917 __func__, msec_per_try * i, ifname);
1865 msec_per_try * i, ifname);
1866 break; 1918 break;
1867 } 1919 }
1868 1920
@@ -1889,8 +1941,8 @@ static void pktgen_change_name(struct net_device *dev)
1889 &pktgen_if_fops, 1941 &pktgen_if_fops,
1890 pkt_dev); 1942 pkt_dev);
1891 if (!pkt_dev->entry) 1943 if (!pkt_dev->entry)
1892 printk(KERN_ERR "pktgen: can't move proc " 1944 pr_err("can't move proc entry for '%s'\n",
1893 " entry for '%s'\n", dev->name); 1945 dev->name);
1894 break; 1946 break;
1895 } 1947 }
1896 } 1948 }
@@ -1954,15 +2006,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
1954 2006
1955 odev = pktgen_dev_get_by_name(pkt_dev, ifname); 2007 odev = pktgen_dev_get_by_name(pkt_dev, ifname);
1956 if (!odev) { 2008 if (!odev) {
1957 printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); 2009 pr_err("no such netdevice: \"%s\"\n", ifname);
1958 return -ENODEV; 2010 return -ENODEV;
1959 } 2011 }
1960 2012
1961 if (odev->type != ARPHRD_ETHER) { 2013 if (odev->type != ARPHRD_ETHER) {
1962 printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); 2014 pr_err("not an ethernet device: \"%s\"\n", ifname);
1963 err = -EINVAL; 2015 err = -EINVAL;
1964 } else if (!netif_running(odev)) { 2016 } else if (!netif_running(odev)) {
1965 printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); 2017 pr_err("device is down: \"%s\"\n", ifname);
1966 err = -ENETDOWN; 2018 err = -ENETDOWN;
1967 } else { 2019 } else {
1968 pkt_dev->odev = odev; 2020 pkt_dev->odev = odev;
@@ -1981,8 +2033,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1981 int ntxq; 2033 int ntxq;
1982 2034
1983 if (!pkt_dev->odev) { 2035 if (!pkt_dev->odev) {
1984 printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " 2036 pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
1985 "setup_inject.\n");
1986 sprintf(pkt_dev->result, 2037 sprintf(pkt_dev->result,
1987 "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); 2038 "ERROR: pkt_dev->odev == NULL in setup_inject.\n");
1988 return; 2039 return;
@@ -1992,19 +2043,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
1992 ntxq = pkt_dev->odev->real_num_tx_queues; 2043 ntxq = pkt_dev->odev->real_num_tx_queues;
1993 2044
1994 if (ntxq <= pkt_dev->queue_map_min) { 2045 if (ntxq <= pkt_dev->queue_map_min) {
1995 printk(KERN_WARNING "pktgen: WARNING: Requested " 2046 pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
1996 "queue_map_min (zero-based) (%d) exceeds valid range " 2047 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
1997 "[0 - %d] for (%d) queues on %s, resetting\n", 2048 pkt_dev->odevname);
1998 pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
1999 pkt_dev->odevname);
2000 pkt_dev->queue_map_min = ntxq - 1; 2049 pkt_dev->queue_map_min = ntxq - 1;
2001 } 2050 }
2002 if (pkt_dev->queue_map_max >= ntxq) { 2051 if (pkt_dev->queue_map_max >= ntxq) {
2003 printk(KERN_WARNING "pktgen: WARNING: Requested " 2052 pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
2004 "queue_map_max (zero-based) (%d) exceeds valid range " 2053 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2005 "[0 - %d] for (%d) queues on %s, resetting\n", 2054 pkt_dev->odevname);
2006 pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
2007 pkt_dev->odevname);
2008 pkt_dev->queue_map_max = ntxq - 1; 2055 pkt_dev->queue_map_max = ntxq - 1;
2009 } 2056 }
2010 2057
@@ -2064,8 +2111,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
2064 } 2111 }
2065 rcu_read_unlock(); 2112 rcu_read_unlock();
2066 if (err) 2113 if (err)
2067 printk(KERN_ERR "pktgen: ERROR: IPv6 link " 2114 pr_err("ERROR: IPv6 link address not available\n");
2068 "address not availble.\n");
2069 } 2115 }
2070#endif 2116#endif
2071 } else { 2117 } else {
@@ -2113,15 +2159,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2113 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); 2159 hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
2114 hrtimer_set_expires(&t.timer, spin_until); 2160 hrtimer_set_expires(&t.timer, spin_until);
2115 2161
2116 remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer)); 2162 remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
2117 if (remaining <= 0) { 2163 if (remaining <= 0) {
2118 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); 2164 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2119 return; 2165 return;
2120 } 2166 }
2121 2167
2122 start_time = ktime_now(); 2168 start_time = ktime_now();
2123 if (remaining < 100) 2169 if (remaining < 100000)
2124 udelay(remaining); /* really small just spin */ 2170 ndelay(remaining); /* really small just spin */
2125 else { 2171 else {
2126 /* see do_nanosleep */ 2172 /* see do_nanosleep */
2127 hrtimer_init_sleeper(&t, current); 2173 hrtimer_init_sleeper(&t, current);
@@ -2141,7 +2187,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
2141 end_time = ktime_now(); 2187 end_time = ktime_now();
2142 2188
2143 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); 2189 pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
2144 pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay); 2190 pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
2145} 2191}
2146 2192
2147static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) 2193static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
@@ -2499,8 +2545,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2499 if (nhead > 0) { 2545 if (nhead > 0) {
2500 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); 2546 ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
2501 if (ret < 0) { 2547 if (ret < 0) {
2502 printk(KERN_ERR "Error expanding " 2548 pr_err("Error expanding ipsec packet %d\n",
2503 "ipsec packet %d\n", ret); 2549 ret);
2504 goto err; 2550 goto err;
2505 } 2551 }
2506 } 2552 }
@@ -2509,8 +2555,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
2509 skb_pull(skb, ETH_HLEN); 2555 skb_pull(skb, ETH_HLEN);
2510 ret = pktgen_output_ipsec(skb, pkt_dev); 2556 ret = pktgen_output_ipsec(skb, pkt_dev);
2511 if (ret) { 2557 if (ret) {
2512 printk(KERN_ERR "Error creating ipsec " 2558 pr_err("Error creating ipsec packet %d\n", ret);
2513 "packet %d\n", ret);
2514 goto err; 2559 goto err;
2515 } 2560 }
2516 /* restore ll */ 2561 /* restore ll */
@@ -2572,9 +2617,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
2572 mod_cur_headers(pkt_dev); 2617 mod_cur_headers(pkt_dev);
2573 2618
2574 datalen = (odev->hard_header_len + 16) & ~0xf; 2619 datalen = (odev->hard_header_len + 16) & ~0xf;
2575 skb = __netdev_alloc_skb(odev, 2620
2576 pkt_dev->cur_pkt_size + 64 2621 if (pkt_dev->flags & F_NODE) {
2577 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); 2622 int node;
2623
2624 if (pkt_dev->node >= 0)
2625 node = pkt_dev->node;
2626 else
2627 node = numa_node_id();
2628
2629 skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
2630 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
2631 if (likely(skb)) {
2632 skb_reserve(skb, NET_SKB_PAD);
2633 skb->dev = odev;
2634 }
2635 }
2636 else
2637 skb = __netdev_alloc_skb(odev,
2638 pkt_dev->cur_pkt_size + 64
2639 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
2640
2578 if (!skb) { 2641 if (!skb) {
2579 sprintf(pkt_dev->result, "No memory"); 2642 sprintf(pkt_dev->result, "No memory");
2580 return NULL; 2643 return NULL;
@@ -2968,8 +3031,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
2968 if (datalen < sizeof(struct pktgen_hdr)) { 3031 if (datalen < sizeof(struct pktgen_hdr)) {
2969 datalen = sizeof(struct pktgen_hdr); 3032 datalen = sizeof(struct pktgen_hdr);
2970 if (net_ratelimit()) 3033 if (net_ratelimit())
2971 printk(KERN_INFO "pktgen: increased datalen to %d\n", 3034 pr_info("increased datalen to %d\n", datalen);
2972 datalen);
2973 } 3035 }
2974 3036
2975 udph->source = htons(pkt_dev->cur_udp_src); 3037 udph->source = htons(pkt_dev->cur_udp_src);
@@ -3096,7 +3158,7 @@ static void pktgen_run(struct pktgen_thread *t)
3096 struct pktgen_dev *pkt_dev; 3158 struct pktgen_dev *pkt_dev;
3097 int started = 0; 3159 int started = 0;
3098 3160
3099 pr_debug("pktgen: entering pktgen_run. %p\n", t); 3161 func_enter();
3100 3162
3101 if_lock(t); 3163 if_lock(t);
3102 list_for_each_entry(pkt_dev, &t->if_list, list) { 3164 list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3129,7 +3191,7 @@ static void pktgen_stop_all_threads_ifs(void)
3129{ 3191{
3130 struct pktgen_thread *t; 3192 struct pktgen_thread *t;
3131 3193
3132 pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n"); 3194 func_enter();
3133 3195
3134 mutex_lock(&pktgen_thread_lock); 3196 mutex_lock(&pktgen_thread_lock);
3135 3197
@@ -3194,7 +3256,7 @@ static void pktgen_run_all_threads(void)
3194{ 3256{
3195 struct pktgen_thread *t; 3257 struct pktgen_thread *t;
3196 3258
3197 pr_debug("pktgen: entering pktgen_run_all_threads.\n"); 3259 func_enter();
3198 3260
3199 mutex_lock(&pktgen_thread_lock); 3261 mutex_lock(&pktgen_thread_lock);
3200 3262
@@ -3213,7 +3275,7 @@ static void pktgen_reset_all_threads(void)
3213{ 3275{
3214 struct pktgen_thread *t; 3276 struct pktgen_thread *t;
3215 3277
3216 pr_debug("pktgen: entering pktgen_reset_all_threads.\n"); 3278 func_enter();
3217 3279
3218 mutex_lock(&pktgen_thread_lock); 3280 mutex_lock(&pktgen_thread_lock);
3219 3281
@@ -3263,8 +3325,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
3263 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1; 3325 int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
3264 3326
3265 if (!pkt_dev->running) { 3327 if (!pkt_dev->running) {
3266 printk(KERN_WARNING "pktgen: interface: %s is already " 3328 pr_warning("interface: %s is already stopped\n",
3267 "stopped\n", pkt_dev->odevname); 3329 pkt_dev->odevname);
3268 return -EINVAL; 3330 return -EINVAL;
3269 } 3331 }
3270 3332
@@ -3300,7 +3362,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3300{ 3362{
3301 struct pktgen_dev *pkt_dev; 3363 struct pktgen_dev *pkt_dev;
3302 3364
3303 pr_debug("pktgen: entering pktgen_stop\n"); 3365 func_enter();
3304 3366
3305 if_lock(t); 3367 if_lock(t);
3306 3368
@@ -3320,7 +3382,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3320 struct list_head *q, *n; 3382 struct list_head *q, *n;
3321 struct pktgen_dev *cur; 3383 struct pktgen_dev *cur;
3322 3384
3323 pr_debug("pktgen: entering pktgen_rem_one_if\n"); 3385 func_enter();
3324 3386
3325 if_lock(t); 3387 if_lock(t);
3326 3388
@@ -3346,9 +3408,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3346 struct list_head *q, *n; 3408 struct list_head *q, *n;
3347 struct pktgen_dev *cur; 3409 struct pktgen_dev *cur;
3348 3410
3411 func_enter();
3412
3349 /* Remove all devices, free mem */ 3413 /* Remove all devices, free mem */
3350 3414
3351 pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
3352 if_lock(t); 3415 if_lock(t);
3353 3416
3354 list_for_each_safe(q, n, &t->if_list) { 3417 list_for_each_safe(q, n, &t->if_list) {
@@ -3430,8 +3493,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
3430 3493
3431 pkt_dev->skb = fill_packet(odev, pkt_dev); 3494 pkt_dev->skb = fill_packet(odev, pkt_dev);
3432 if (pkt_dev->skb == NULL) { 3495 if (pkt_dev->skb == NULL) {
3433 printk(KERN_ERR "pktgen: ERROR: couldn't " 3496 pr_err("ERROR: couldn't allocate skb in fill_packet\n");
3434 "allocate skb in fill_packet.\n");
3435 schedule(); 3497 schedule();
3436 pkt_dev->clone_count--; /* back out increment, OOM */ 3498 pkt_dev->clone_count--; /* back out increment, OOM */
3437 return; 3499 return;
@@ -3511,8 +3573,7 @@ static int pktgen_thread_worker(void *arg)
3511 init_waitqueue_head(&t->queue); 3573 init_waitqueue_head(&t->queue);
3512 complete(&t->start_done); 3574 complete(&t->start_done);
3513 3575
3514 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", 3576 pr_debug("starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3515 cpu, task_pid_nr(current));
3516 3577
3517 set_current_state(TASK_INTERRUPTIBLE); 3578 set_current_state(TASK_INTERRUPTIBLE);
3518 3579
@@ -3565,13 +3626,13 @@ static int pktgen_thread_worker(void *arg)
3565 set_current_state(TASK_INTERRUPTIBLE); 3626 set_current_state(TASK_INTERRUPTIBLE);
3566 } 3627 }
3567 3628
3568 pr_debug("pktgen: %s stopping all device\n", t->tsk->comm); 3629 pr_debug("%s stopping all device\n", t->tsk->comm);
3569 pktgen_stop(t); 3630 pktgen_stop(t);
3570 3631
3571 pr_debug("pktgen: %s removing all device\n", t->tsk->comm); 3632 pr_debug("%s removing all device\n", t->tsk->comm);
3572 pktgen_rem_all_ifs(t); 3633 pktgen_rem_all_ifs(t);
3573 3634
3574 pr_debug("pktgen: %s removing thread.\n", t->tsk->comm); 3635 pr_debug("%s removing thread\n", t->tsk->comm);
3575 pktgen_rem_thread(t); 3636 pktgen_rem_thread(t);
3576 3637
3577 return 0; 3638 return 0;
@@ -3595,7 +3656,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
3595 } 3656 }
3596 3657
3597 if_unlock(t); 3658 if_unlock(t);
3598 pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev); 3659 pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
3599 return pkt_dev; 3660 return pkt_dev;
3600} 3661}
3601 3662
@@ -3611,8 +3672,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
3611 if_lock(t); 3672 if_lock(t);
3612 3673
3613 if (pkt_dev->pg_thread) { 3674 if (pkt_dev->pg_thread) {
3614 printk(KERN_ERR "pktgen: ERROR: already assigned " 3675 pr_err("ERROR: already assigned to a thread\n");
3615 "to a thread.\n");
3616 rv = -EBUSY; 3676 rv = -EBUSY;
3617 goto out; 3677 goto out;
3618 } 3678 }
@@ -3638,7 +3698,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3638 3698
3639 pkt_dev = __pktgen_NN_threads(ifname, FIND); 3699 pkt_dev = __pktgen_NN_threads(ifname, FIND);
3640 if (pkt_dev) { 3700 if (pkt_dev) {
3641 printk(KERN_ERR "pktgen: ERROR: interface already used.\n"); 3701 pr_err("ERROR: interface already used\n");
3642 return -EBUSY; 3702 return -EBUSY;
3643 } 3703 }
3644 3704
@@ -3674,6 +3734,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3674 pkt_dev->svlan_p = 0; 3734 pkt_dev->svlan_p = 0;
3675 pkt_dev->svlan_cfi = 0; 3735 pkt_dev->svlan_cfi = 0;
3676 pkt_dev->svlan_id = 0xffff; 3736 pkt_dev->svlan_id = 0xffff;
3737 pkt_dev->node = -1;
3677 3738
3678 err = pktgen_setup_dev(pkt_dev, ifname); 3739 err = pktgen_setup_dev(pkt_dev, ifname);
3679 if (err) 3740 if (err)
@@ -3682,7 +3743,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
3682 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, 3743 pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
3683 &pktgen_if_fops, pkt_dev); 3744 &pktgen_if_fops, pkt_dev);
3684 if (!pkt_dev->entry) { 3745 if (!pkt_dev->entry) {
3685 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3746 pr_err("cannot create %s/%s procfs entry\n",
3686 PG_PROC_DIR, ifname); 3747 PG_PROC_DIR, ifname);
3687 err = -EINVAL; 3748 err = -EINVAL;
3688 goto out2; 3749 goto out2;
@@ -3713,8 +3774,7 @@ static int __init pktgen_create_thread(int cpu)
3713 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL, 3774 t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
3714 cpu_to_node(cpu)); 3775 cpu_to_node(cpu));
3715 if (!t) { 3776 if (!t) {
3716 printk(KERN_ERR "pktgen: ERROR: out of memory, can't " 3777 pr_err("ERROR: out of memory, can't create new thread\n");
3717 "create new thread.\n");
3718 return -ENOMEM; 3778 return -ENOMEM;
3719 } 3779 }
3720 3780
@@ -3728,8 +3788,7 @@ static int __init pktgen_create_thread(int cpu)
3728 3788
3729 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3789 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
3730 if (IS_ERR(p)) { 3790 if (IS_ERR(p)) {
3731 printk(KERN_ERR "pktgen: kernel_thread() failed " 3791 pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
3732 "for cpu %d\n", t->cpu);
3733 list_del(&t->th_list); 3792 list_del(&t->th_list);
3734 kfree(t); 3793 kfree(t);
3735 return PTR_ERR(p); 3794 return PTR_ERR(p);
@@ -3740,7 +3799,7 @@ static int __init pktgen_create_thread(int cpu)
3740 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir, 3799 pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
3741 &pktgen_thread_fops, t); 3800 &pktgen_thread_fops, t);
3742 if (!pe) { 3801 if (!pe) {
3743 printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n", 3802 pr_err("cannot create %s/%s procfs entry\n",
3744 PG_PROC_DIR, t->tsk->comm); 3803 PG_PROC_DIR, t->tsk->comm);
3745 kthread_stop(p); 3804 kthread_stop(p);
3746 list_del(&t->th_list); 3805 list_del(&t->th_list);
@@ -3774,11 +3833,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
3774 struct pktgen_dev *pkt_dev) 3833 struct pktgen_dev *pkt_dev)
3775{ 3834{
3776 3835
3777 pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev); 3836 pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
3778 3837
3779 if (pkt_dev->running) { 3838 if (pkt_dev->running) {
3780 printk(KERN_WARNING "pktgen: WARNING: trying to remove a " 3839 pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
3781 "running interface, stopping it now.\n");
3782 pktgen_stop_device(pkt_dev); 3840 pktgen_stop_device(pkt_dev);
3783 } 3841 }
3784 3842
@@ -3809,7 +3867,7 @@ static int __init pg_init(void)
3809 int cpu; 3867 int cpu;
3810 struct proc_dir_entry *pe; 3868 struct proc_dir_entry *pe;
3811 3869
3812 printk(KERN_INFO "%s", version); 3870 pr_info("%s", version);
3813 3871
3814 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); 3872 pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
3815 if (!pg_proc_dir) 3873 if (!pg_proc_dir)
@@ -3817,8 +3875,7 @@ static int __init pg_init(void)
3817 3875
3818 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); 3876 pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
3819 if (pe == NULL) { 3877 if (pe == NULL) {
3820 printk(KERN_ERR "pktgen: ERROR: cannot create %s " 3878 pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
3821 "procfs entry.\n", PGCTRL);
3822 proc_net_remove(&init_net, PG_PROC_DIR); 3879 proc_net_remove(&init_net, PG_PROC_DIR);
3823 return -EINVAL; 3880 return -EINVAL;
3824 } 3881 }
@@ -3831,13 +3888,12 @@ static int __init pg_init(void)
3831 3888
3832 err = pktgen_create_thread(cpu); 3889 err = pktgen_create_thread(cpu);
3833 if (err) 3890 if (err)
3834 printk(KERN_WARNING "pktgen: WARNING: Cannot create " 3891 pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
3835 "thread for cpu %d (%d)\n", cpu, err); 3892 cpu, err);
3836 } 3893 }
3837 3894
3838 if (list_empty(&pktgen_threads)) { 3895 if (list_empty(&pktgen_threads)) {
3839 printk(KERN_ERR "pktgen: ERROR: Initialization failed for " 3896 pr_err("ERROR: Initialization failed for all threads\n");
3840 "all threads\n");
3841 unregister_netdevice_notifier(&pktgen_notifier_block); 3897 unregister_netdevice_notifier(&pktgen_notifier_block);
3842 remove_proc_entry(PGCTRL, pg_proc_dir); 3898 remove_proc_entry(PGCTRL, pg_proc_dir);
3843 proc_net_remove(&init_net, PG_PROC_DIR); 3899 proc_net_remove(&init_net, PG_PROC_DIR);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..f78d821bd935 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
98EXPORT_SYMBOL(lockdep_rtnl_is_held); 98EXPORT_SYMBOL(lockdep_rtnl_is_held);
99#endif /* #ifdef CONFIG_PROVE_LOCKING */ 99#endif /* #ifdef CONFIG_PROVE_LOCKING */
100 100
101static struct rtnl_link *rtnl_msg_handlers[NPROTO]; 101static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
102 102
103static inline int rtm_msgindex(int msgtype) 103static inline int rtm_msgindex(int msgtype)
104{ 104{
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
118{ 118{
119 struct rtnl_link *tab; 119 struct rtnl_link *tab;
120 120
121 tab = rtnl_msg_handlers[protocol]; 121 if (protocol <= RTNL_FAMILY_MAX)
122 tab = rtnl_msg_handlers[protocol];
123 else
124 tab = NULL;
125
122 if (tab == NULL || tab[msgindex].doit == NULL) 126 if (tab == NULL || tab[msgindex].doit == NULL)
123 tab = rtnl_msg_handlers[PF_UNSPEC]; 127 tab = rtnl_msg_handlers[PF_UNSPEC];
124 128
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
129{ 133{
130 struct rtnl_link *tab; 134 struct rtnl_link *tab;
131 135
132 tab = rtnl_msg_handlers[protocol]; 136 if (protocol <= RTNL_FAMILY_MAX)
137 tab = rtnl_msg_handlers[protocol];
138 else
139 tab = NULL;
140
133 if (tab == NULL || tab[msgindex].dumpit == NULL) 141 if (tab == NULL || tab[msgindex].dumpit == NULL)
134 tab = rtnl_msg_handlers[PF_UNSPEC]; 142 tab = rtnl_msg_handlers[PF_UNSPEC];
135 143
@@ -159,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
159 struct rtnl_link *tab; 167 struct rtnl_link *tab;
160 int msgindex; 168 int msgindex;
161 169
162 BUG_ON(protocol < 0 || protocol >= NPROTO); 170 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
163 msgindex = rtm_msgindex(msgtype); 171 msgindex = rtm_msgindex(msgtype);
164 172
165 tab = rtnl_msg_handlers[protocol]; 173 tab = rtnl_msg_handlers[protocol];
@@ -211,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
211{ 219{
212 int msgindex; 220 int msgindex;
213 221
214 BUG_ON(protocol < 0 || protocol >= NPROTO); 222 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
215 msgindex = rtm_msgindex(msgtype); 223 msgindex = rtm_msgindex(msgtype);
216 224
217 if (rtnl_msg_handlers[protocol] == NULL) 225 if (rtnl_msg_handlers[protocol] == NULL)
@@ -233,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
233 */ 241 */
234void rtnl_unregister_all(int protocol) 242void rtnl_unregister_all(int protocol)
235{ 243{
236 BUG_ON(protocol < 0 || protocol >= NPROTO); 244 BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
237 245
238 kfree(rtnl_msg_handlers[protocol]); 246 kfree(rtnl_msg_handlers[protocol]);
239 rtnl_msg_handlers[protocol] = NULL; 247 rtnl_msg_handlers[protocol] = NULL;
@@ -571,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
571} 579}
572 580
573static void copy_rtnl_link_stats(struct rtnl_link_stats *a, 581static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
574 const struct net_device_stats *b) 582 const struct rtnl_link_stats64 *b)
575{ 583{
576 a->rx_packets = b->rx_packets; 584 a->rx_packets = b->rx_packets;
577 a->tx_packets = b->tx_packets; 585 a->tx_packets = b->tx_packets;
@@ -600,18 +608,85 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
600 608
601 a->rx_compressed = b->rx_compressed; 609 a->rx_compressed = b->rx_compressed;
602 a->tx_compressed = b->tx_compressed; 610 a->tx_compressed = b->tx_compressed;
603}; 611}
604 612
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{
615 struct rtnl_link_stats64 a;
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645}
646
647/* All VF info */
605static inline int rtnl_vfinfo_size(const struct net_device *dev) 648static inline int rtnl_vfinfo_size(const struct net_device *dev)
606{ 649{
607 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) 650 if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
608 return dev_num_vf(dev->dev.parent) * 651
609 sizeof(struct ifla_vf_info); 652 int num_vfs = dev_num_vf(dev->dev.parent);
610 else 653 size_t size = nla_total_size(sizeof(struct nlattr));
654 size += nla_total_size(num_vfs * sizeof(struct nlattr));
655 size += num_vfs *
656 (nla_total_size(sizeof(struct ifla_vf_mac)) +
657 nla_total_size(sizeof(struct ifla_vf_vlan)) +
658 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
659 return size;
660 } else
661 return 0;
662}
663
664static size_t rtnl_port_size(const struct net_device *dev)
665{
666 size_t port_size = nla_total_size(4) /* PORT_VF */
667 + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */
668 + nla_total_size(sizeof(struct ifla_port_vsi))
669 /* PORT_VSI_TYPE */
670 + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */
671 + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */
672 + nla_total_size(1) /* PROT_VDP_REQUEST */
673 + nla_total_size(2); /* PORT_VDP_RESPONSE */
674 size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
675 size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
676 + port_size;
677 size_t port_self_size = nla_total_size(sizeof(struct nlattr))
678 + port_size;
679
680 if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
611 return 0; 681 return 0;
682 if (dev_num_vf(dev->dev.parent))
683 return port_self_size + vf_ports_size +
684 vf_port_size * dev_num_vf(dev->dev.parent);
685 else
686 return port_self_size;
612} 687}
613 688
614static inline size_t if_nlmsg_size(const struct net_device *dev) 689static noinline size_t if_nlmsg_size(const struct net_device *dev)
615{ 690{
616 return NLMSG_ALIGN(sizeof(struct ifinfomsg)) 691 return NLMSG_ALIGN(sizeof(struct ifinfomsg))
617 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ 692 + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
@@ -619,6 +694,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
619 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ 694 + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
620 + nla_total_size(sizeof(struct rtnl_link_ifmap)) 695 + nla_total_size(sizeof(struct rtnl_link_ifmap))
621 + nla_total_size(sizeof(struct rtnl_link_stats)) 696 + nla_total_size(sizeof(struct rtnl_link_stats))
697 + nla_total_size(sizeof(struct rtnl_link_stats64))
622 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ 698 + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
623 + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ 699 + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
624 + nla_total_size(4) /* IFLA_TXQLEN */ 700 + nla_total_size(4) /* IFLA_TXQLEN */
@@ -629,17 +705,94 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
629 + nla_total_size(1) /* IFLA_OPERSTATE */ 705 + nla_total_size(1) /* IFLA_OPERSTATE */
630 + nla_total_size(1) /* IFLA_LINKMODE */ 706 + nla_total_size(1) /* IFLA_LINKMODE */
631 + nla_total_size(4) /* IFLA_NUM_VF */ 707 + nla_total_size(4) /* IFLA_NUM_VF */
632 + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */ 708 + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
709 + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
633 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ 710 + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
634} 711}
635 712
713static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
714{
715 struct nlattr *vf_ports;
716 struct nlattr *vf_port;
717 int vf;
718 int err;
719
720 vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
721 if (!vf_ports)
722 return -EMSGSIZE;
723
724 for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
725 vf_port = nla_nest_start(skb, IFLA_VF_PORT);
726 if (!vf_port)
727 goto nla_put_failure;
728 NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
729 err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
730 if (err == -EMSGSIZE)
731 goto nla_put_failure;
732 if (err) {
733 nla_nest_cancel(skb, vf_port);
734 continue;
735 }
736 nla_nest_end(skb, vf_port);
737 }
738
739 nla_nest_end(skb, vf_ports);
740
741 return 0;
742
743nla_put_failure:
744 nla_nest_cancel(skb, vf_ports);
745 return -EMSGSIZE;
746}
747
748static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
749{
750 struct nlattr *port_self;
751 int err;
752
753 port_self = nla_nest_start(skb, IFLA_PORT_SELF);
754 if (!port_self)
755 return -EMSGSIZE;
756
757 err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
758 if (err) {
759 nla_nest_cancel(skb, port_self);
760 return (err == -EMSGSIZE) ? err : 0;
761 }
762
763 nla_nest_end(skb, port_self);
764
765 return 0;
766}
767
768static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
769{
770 int err;
771
772 if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
773 return 0;
774
775 err = rtnl_port_self_fill(skb, dev);
776 if (err)
777 return err;
778
779 if (dev_num_vf(dev->dev.parent)) {
780 err = rtnl_vf_ports_fill(skb, dev);
781 if (err)
782 return err;
783 }
784
785 return 0;
786}
787
636static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 788static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
637 int type, u32 pid, u32 seq, u32 change, 789 int type, u32 pid, u32 seq, u32 change,
638 unsigned int flags) 790 unsigned int flags)
639{ 791{
640 struct ifinfomsg *ifm; 792 struct ifinfomsg *ifm;
641 struct nlmsghdr *nlh; 793 struct nlmsghdr *nlh;
642 const struct net_device_stats *stats; 794 struct rtnl_link_stats64 temp;
795 const struct rtnl_link_stats64 *stats;
643 struct nlattr *attr; 796 struct nlattr *attr;
644 797
645 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); 798 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -695,20 +848,55 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
695 if (attr == NULL) 848 if (attr == NULL)
696 goto nla_put_failure; 849 goto nla_put_failure;
697 850
698 stats = dev_get_stats(dev); 851 stats = dev_get_stats(dev, &temp);
699 copy_rtnl_link_stats(nla_data(attr), stats); 852 copy_rtnl_link_stats(nla_data(attr), stats);
700 853
854 attr = nla_reserve(skb, IFLA_STATS64,
855 sizeof(struct rtnl_link_stats64));
856 if (attr == NULL)
857 goto nla_put_failure;
858 copy_rtnl_link_stats64(nla_data(attr), stats);
859
860 if (dev->dev.parent)
861 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
862
701 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) { 863 if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
702 int i; 864 int i;
703 struct ifla_vf_info ivi;
704 865
705 NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)); 866 struct nlattr *vfinfo, *vf;
706 for (i = 0; i < dev_num_vf(dev->dev.parent); i++) { 867 int num_vfs = dev_num_vf(dev->dev.parent);
868
869 vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
870 if (!vfinfo)
871 goto nla_put_failure;
872 for (i = 0; i < num_vfs; i++) {
873 struct ifla_vf_info ivi;
874 struct ifla_vf_mac vf_mac;
875 struct ifla_vf_vlan vf_vlan;
876 struct ifla_vf_tx_rate vf_tx_rate;
707 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) 877 if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
708 break; 878 break;
709 NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi); 879 vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
880 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
881 vf_vlan.vlan = ivi.vlan;
882 vf_vlan.qos = ivi.qos;
883 vf_tx_rate.rate = ivi.tx_rate;
884 vf = nla_nest_start(skb, IFLA_VF_INFO);
885 if (!vf) {
886 nla_nest_cancel(skb, vfinfo);
887 goto nla_put_failure;
888 }
889 NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
890 NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
891 NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
892 nla_nest_end(skb, vf);
710 } 893 }
894 nla_nest_end(skb, vfinfo);
711 } 895 }
896
897 if (rtnl_port_fill(skb, dev))
898 goto nla_put_failure;
899
712 if (dev->rtnl_link_ops) { 900 if (dev->rtnl_link_ops) {
713 if (rtnl_link_fill(skb, dev) < 0) 901 if (rtnl_link_fill(skb, dev) < 0)
714 goto nla_put_failure; 902 goto nla_put_failure;
@@ -769,6 +957,22 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
769 [IFLA_LINKINFO] = { .type = NLA_NESTED }, 957 [IFLA_LINKINFO] = { .type = NLA_NESTED },
770 [IFLA_NET_NS_PID] = { .type = NLA_U32 }, 958 [IFLA_NET_NS_PID] = { .type = NLA_U32 },
771 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, 959 [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
960 [IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
961 [IFLA_VF_PORTS] = { .type = NLA_NESTED },
962 [IFLA_PORT_SELF] = { .type = NLA_NESTED },
963};
964EXPORT_SYMBOL(ifla_policy);
965
966static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
967 [IFLA_INFO_KIND] = { .type = NLA_STRING },
968 [IFLA_INFO_DATA] = { .type = NLA_NESTED },
969};
970
971static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
972 [IFLA_VF_INFO] = { .type = NLA_NESTED },
973};
974
975static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
772 [IFLA_VF_MAC] = { .type = NLA_BINARY, 976 [IFLA_VF_MAC] = { .type = NLA_BINARY,
773 .len = sizeof(struct ifla_vf_mac) }, 977 .len = sizeof(struct ifla_vf_mac) },
774 [IFLA_VF_VLAN] = { .type = NLA_BINARY, 978 [IFLA_VF_VLAN] = { .type = NLA_BINARY,
@@ -776,11 +980,19 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
776 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY, 980 [IFLA_VF_TX_RATE] = { .type = NLA_BINARY,
777 .len = sizeof(struct ifla_vf_tx_rate) }, 981 .len = sizeof(struct ifla_vf_tx_rate) },
778}; 982};
779EXPORT_SYMBOL(ifla_policy);
780 983
781static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { 984static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
782 [IFLA_INFO_KIND] = { .type = NLA_STRING }, 985 [IFLA_PORT_VF] = { .type = NLA_U32 },
783 [IFLA_INFO_DATA] = { .type = NLA_NESTED }, 986 [IFLA_PORT_PROFILE] = { .type = NLA_STRING,
987 .len = PORT_PROFILE_MAX },
988 [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY,
989 .len = sizeof(struct ifla_port_vsi)},
990 [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
991 .len = PORT_UUID_MAX },
992 [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING,
993 .len = PORT_UUID_MAX },
994 [IFLA_PORT_REQUEST] = { .type = NLA_U8, },
995 [IFLA_PORT_RESPONSE] = { .type = NLA_U16, },
784}; 996};
785 997
786struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) 998struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
@@ -812,6 +1024,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
812 return 0; 1024 return 0;
813} 1025}
814 1026
1027static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1028{
1029 int rem, err = -EINVAL;
1030 struct nlattr *vf;
1031 const struct net_device_ops *ops = dev->netdev_ops;
1032
1033 nla_for_each_nested(vf, attr, rem) {
1034 switch (nla_type(vf)) {
1035 case IFLA_VF_MAC: {
1036 struct ifla_vf_mac *ivm;
1037 ivm = nla_data(vf);
1038 err = -EOPNOTSUPP;
1039 if (ops->ndo_set_vf_mac)
1040 err = ops->ndo_set_vf_mac(dev, ivm->vf,
1041 ivm->mac);
1042 break;
1043 }
1044 case IFLA_VF_VLAN: {
1045 struct ifla_vf_vlan *ivv;
1046 ivv = nla_data(vf);
1047 err = -EOPNOTSUPP;
1048 if (ops->ndo_set_vf_vlan)
1049 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
1050 ivv->vlan,
1051 ivv->qos);
1052 break;
1053 }
1054 case IFLA_VF_TX_RATE: {
1055 struct ifla_vf_tx_rate *ivt;
1056 ivt = nla_data(vf);
1057 err = -EOPNOTSUPP;
1058 if (ops->ndo_set_vf_tx_rate)
1059 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
1060 ivt->rate);
1061 break;
1062 }
1063 default:
1064 err = -EINVAL;
1065 break;
1066 }
1067 if (err)
1068 break;
1069 }
1070 return err;
1071}
1072
815static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, 1073static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
816 struct nlattr **tb, char *ifname, int modified) 1074 struct nlattr **tb, char *ifname, int modified)
817{ 1075{
@@ -942,37 +1200,63 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
942 write_unlock_bh(&dev_base_lock); 1200 write_unlock_bh(&dev_base_lock);
943 } 1201 }
944 1202
945 if (tb[IFLA_VF_MAC]) { 1203 if (tb[IFLA_VFINFO_LIST]) {
946 struct ifla_vf_mac *ivm; 1204 struct nlattr *attr;
947 ivm = nla_data(tb[IFLA_VF_MAC]); 1205 int rem;
948 err = -EOPNOTSUPP; 1206 nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
949 if (ops->ndo_set_vf_mac) 1207 if (nla_type(attr) != IFLA_VF_INFO) {
950 err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); 1208 err = -EINVAL;
951 if (err < 0) 1209 goto errout;
952 goto errout; 1210 }
953 modified = 1; 1211 err = do_setvfinfo(dev, attr);
1212 if (err < 0)
1213 goto errout;
1214 modified = 1;
1215 }
954 } 1216 }
1217 err = 0;
1218
1219 if (tb[IFLA_VF_PORTS]) {
1220 struct nlattr *port[IFLA_PORT_MAX+1];
1221 struct nlattr *attr;
1222 int vf;
1223 int rem;
955 1224
956 if (tb[IFLA_VF_VLAN]) {
957 struct ifla_vf_vlan *ivv;
958 ivv = nla_data(tb[IFLA_VF_VLAN]);
959 err = -EOPNOTSUPP; 1225 err = -EOPNOTSUPP;
960 if (ops->ndo_set_vf_vlan) 1226 if (!ops->ndo_set_vf_port)
961 err = ops->ndo_set_vf_vlan(dev, ivv->vf,
962 ivv->vlan,
963 ivv->qos);
964 if (err < 0)
965 goto errout; 1227 goto errout;
966 modified = 1; 1228
1229 nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
1230 if (nla_type(attr) != IFLA_VF_PORT)
1231 continue;
1232 err = nla_parse_nested(port, IFLA_PORT_MAX,
1233 attr, ifla_port_policy);
1234 if (err < 0)
1235 goto errout;
1236 if (!port[IFLA_PORT_VF]) {
1237 err = -EOPNOTSUPP;
1238 goto errout;
1239 }
1240 vf = nla_get_u32(port[IFLA_PORT_VF]);
1241 err = ops->ndo_set_vf_port(dev, vf, port);
1242 if (err < 0)
1243 goto errout;
1244 modified = 1;
1245 }
967 } 1246 }
968 err = 0; 1247 err = 0;
969 1248
970 if (tb[IFLA_VF_TX_RATE]) { 1249 if (tb[IFLA_PORT_SELF]) {
971 struct ifla_vf_tx_rate *ivt; 1250 struct nlattr *port[IFLA_PORT_MAX+1];
972 ivt = nla_data(tb[IFLA_VF_TX_RATE]); 1251
1252 err = nla_parse_nested(port, IFLA_PORT_MAX,
1253 tb[IFLA_PORT_SELF], ifla_port_policy);
1254 if (err < 0)
1255 goto errout;
1256
973 err = -EOPNOTSUPP; 1257 err = -EOPNOTSUPP;
974 if (ops->ndo_set_vf_tx_rate) 1258 if (ops->ndo_set_vf_port)
975 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate); 1259 err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
976 if (err < 0) 1260 if (err < 0)
977 goto errout; 1261 goto errout;
978 modified = 1; 1262 modified = 1;
@@ -1336,7 +1620,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
1336 1620
1337 if (s_idx == 0) 1621 if (s_idx == 0)
1338 s_idx = 1; 1622 s_idx = 1;
1339 for (idx = 1; idx < NPROTO; idx++) { 1623 for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
1340 int type = cb->nlh->nlmsg_type-RTM_BASE; 1624 int type = cb->nlh->nlmsg_type-RTM_BASE;
1341 if (idx < s_idx || idx == PF_PACKET) 1625 if (idx < s_idx || idx == PF_PACKET)
1342 continue; 1626 continue;
@@ -1404,9 +1688,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
1404 return 0; 1688 return 0;
1405 1689
1406 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; 1690 family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
1407 if (family >= NPROTO)
1408 return -EAFNOSUPPORT;
1409
1410 sz_idx = type>>2; 1691 sz_idx = type>>2;
1411 kind = type&3; 1692 kind = type&3;
1412 1693
@@ -1474,6 +1755,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
1474 case NETDEV_POST_INIT: 1755 case NETDEV_POST_INIT:
1475 case NETDEV_REGISTER: 1756 case NETDEV_REGISTER:
1476 case NETDEV_CHANGE: 1757 case NETDEV_CHANGE:
1758 case NETDEV_PRE_TYPE_CHANGE:
1477 case NETDEV_GOING_DOWN: 1759 case NETDEV_GOING_DOWN:
1478 case NETDEV_UNREGISTER: 1760 case NETDEV_UNREGISTER:
1479 case NETDEV_UNREGISTER_BATCH: 1761 case NETDEV_UNREGISTER_BATCH:
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
130 } 130 }
131 } 131 }
132} 132}
133EXPORT_SYMBOL(__scm_destroy);
133 134
134int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) 135int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
135{ 136{
@@ -170,6 +171,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
170 err = scm_check_creds(&p->creds); 171 err = scm_check_creds(&p->creds);
171 if (err) 172 if (err)
172 goto error; 173 goto error;
174
175 if (pid_vnr(p->pid) != p->creds.pid) {
176 struct pid *pid;
177 err = -ESRCH;
178 pid = find_get_pid(p->creds.pid);
179 if (!pid)
180 goto error;
181 put_pid(p->pid);
182 p->pid = pid;
183 }
184
185 if ((p->cred->euid != p->creds.uid) ||
186 (p->cred->egid != p->creds.gid)) {
187 struct cred *cred;
188 err = -ENOMEM;
189 cred = prepare_creds();
190 if (!cred)
191 goto error;
192
193 cred->uid = cred->euid = p->creds.uid;
194 cred->gid = cred->egid = p->creds.uid;
195 put_cred(p->cred);
196 p->cred = cred;
197 }
173 break; 198 break;
174 default: 199 default:
175 goto error; 200 goto error;
@@ -187,6 +212,7 @@ error:
187 scm_destroy(p); 212 scm_destroy(p);
188 return err; 213 return err;
189} 214}
215EXPORT_SYMBOL(__scm_send);
190 216
191int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data) 217int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
192{ 218{
@@ -225,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
225out: 251out:
226 return err; 252 return err;
227} 253}
254EXPORT_SYMBOL(put_cmsg);
228 255
229void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) 256void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
230{ 257{
@@ -294,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
294 */ 321 */
295 __scm_destroy(scm); 322 __scm_destroy(scm);
296} 323}
324EXPORT_SYMBOL(scm_detach_fds);
297 325
298struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) 326struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
299{ 327{
@@ -311,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
311 } 339 }
312 return new_fpl; 340 return new_fpl;
313} 341}
314
315EXPORT_SYMBOL(__scm_destroy);
316EXPORT_SYMBOL(__scm_send);
317EXPORT_SYMBOL(put_cmsg);
318EXPORT_SYMBOL(scm_detach_fds);
319EXPORT_SYMBOL(scm_fp_dup); 342EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..3a2513f0d0c3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
117 * 117 *
118 * Out of line support code for skb_put(). Not user callable. 118 * Out of line support code for skb_put(). Not user callable.
119 */ 119 */
120void skb_over_panic(struct sk_buff *skb, int sz, void *here) 120static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
121{ 121{
122 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p " 122 printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
123 "data:%p tail:%#lx end:%#lx dev:%s\n", 123 "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
126 skb->dev ? skb->dev->name : "<NULL>"); 126 skb->dev ? skb->dev->name : "<NULL>");
127 BUG(); 127 BUG();
128} 128}
129EXPORT_SYMBOL(skb_over_panic);
130 129
131/** 130/**
132 * skb_under_panic - private function 131 * skb_under_panic - private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
137 * Out of line support code for skb_push(). Not user callable. 136 * Out of line support code for skb_push(). Not user callable.
138 */ 137 */
139 138
140void skb_under_panic(struct sk_buff *skb, int sz, void *here) 139static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
141{ 140{
142 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p " 141 printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
143 "data:%p tail:%#lx end:%#lx dev:%s\n", 142 "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
146 skb->dev ? skb->dev->name : "<NULL>"); 145 skb->dev ? skb->dev->name : "<NULL>");
147 BUG(); 146 BUG();
148} 147}
149EXPORT_SYMBOL(skb_under_panic);
150 148
151/* Allocate a new skbuff. We do this ourselves so we can fill in a few 149/* Allocate a new skbuff. We do this ourselves so we can fill in a few
152 * 'private' fields and also do memory statistics to find all the 150 * 'private' fields and also do memory statistics to find all the
@@ -183,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
183 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node); 181 skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
184 if (!skb) 182 if (!skb)
185 goto out; 183 goto out;
184 prefetchw(skb);
186 185
187 size = SKB_DATA_ALIGN(size); 186 size = SKB_DATA_ALIGN(size);
188 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), 187 data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
189 gfp_mask, node); 188 gfp_mask, node);
190 if (!data) 189 if (!data)
191 goto nodata; 190 goto nodata;
191 prefetchw(data + size);
192 192
193 /* 193 /*
194 * Only clear those fields we need to clear, not those that we will 194 * Only clear those fields we need to clear, not those that we will
@@ -210,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
210 210
211 /* make sure we initialize shinfo sequentially */ 211 /* make sure we initialize shinfo sequentially */
212 shinfo = skb_shinfo(skb); 212 shinfo = skb_shinfo(skb);
213 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
213 atomic_set(&shinfo->dataref, 1); 214 atomic_set(&shinfo->dataref, 1);
214 shinfo->nr_frags = 0;
215 shinfo->gso_size = 0;
216 shinfo->gso_segs = 0;
217 shinfo->gso_type = 0;
218 shinfo->ip6_frag_id = 0;
219 shinfo->tx_flags.flags = 0;
220 skb_frag_list_init(skb);
221 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
222 215
223 if (fclone) { 216 if (fclone) {
224 struct sk_buff *child = skb + 1; 217 struct sk_buff *child = skb + 1;
@@ -489,40 +482,34 @@ EXPORT_SYMBOL(consume_skb);
489 * reference count dropping and cleans up the skbuff as if it 482 * reference count dropping and cleans up the skbuff as if it
490 * just came from __alloc_skb(). 483 * just came from __alloc_skb().
491 */ 484 */
492int skb_recycle_check(struct sk_buff *skb, int skb_size) 485bool skb_recycle_check(struct sk_buff *skb, int skb_size)
493{ 486{
494 struct skb_shared_info *shinfo; 487 struct skb_shared_info *shinfo;
495 488
496 if (irqs_disabled()) 489 if (irqs_disabled())
497 return 0; 490 return false;
498 491
499 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) 492 if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
500 return 0; 493 return false;
501 494
502 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); 495 skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
503 if (skb_end_pointer(skb) - skb->head < skb_size) 496 if (skb_end_pointer(skb) - skb->head < skb_size)
504 return 0; 497 return false;
505 498
506 if (skb_shared(skb) || skb_cloned(skb)) 499 if (skb_shared(skb) || skb_cloned(skb))
507 return 0; 500 return false;
508 501
509 skb_release_head_state(skb); 502 skb_release_head_state(skb);
503
510 shinfo = skb_shinfo(skb); 504 shinfo = skb_shinfo(skb);
505 memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
511 atomic_set(&shinfo->dataref, 1); 506 atomic_set(&shinfo->dataref, 1);
512 shinfo->nr_frags = 0;
513 shinfo->gso_size = 0;
514 shinfo->gso_segs = 0;
515 shinfo->gso_type = 0;
516 shinfo->ip6_frag_id = 0;
517 shinfo->tx_flags.flags = 0;
518 skb_frag_list_init(skb);
519 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
520 507
521 memset(skb, 0, offsetof(struct sk_buff, tail)); 508 memset(skb, 0, offsetof(struct sk_buff, tail));
522 skb->data = skb->head + NET_SKB_PAD; 509 skb->data = skb->head + NET_SKB_PAD;
523 skb_reset_tail_pointer(skb); 510 skb_reset_tail_pointer(skb);
524 511
525 return 1; 512 return true;
526} 513}
527EXPORT_SYMBOL(skb_recycle_check); 514EXPORT_SYMBOL(skb_recycle_check);
528 515
@@ -533,7 +520,8 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
533 new->transport_header = old->transport_header; 520 new->transport_header = old->transport_header;
534 new->network_header = old->network_header; 521 new->network_header = old->network_header;
535 new->mac_header = old->mac_header; 522 new->mac_header = old->mac_header;
536 skb_dst_set(new, dst_clone(skb_dst(old))); 523 skb_dst_copy(new, old);
524 new->rxhash = old->rxhash;
537#ifdef CONFIG_XFRM 525#ifdef CONFIG_XFRM
538 new->sp = secpath_get(old->sp); 526 new->sp = secpath_get(old->sp);
539#endif 527#endif
@@ -544,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
544 new->ip_summed = old->ip_summed; 532 new->ip_summed = old->ip_summed;
545 skb_copy_queue_mapping(new, old); 533 skb_copy_queue_mapping(new, old);
546 new->priority = old->priority; 534 new->priority = old->priority;
535 new->deliver_no_wcard = old->deliver_no_wcard;
547#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) 536#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
548 new->ipvs_property = old->ipvs_property; 537 new->ipvs_property = old->ipvs_property;
549#endif 538#endif
@@ -828,7 +817,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
828 memcpy(data + nhead, skb->head, skb->tail - skb->head); 817 memcpy(data + nhead, skb->head, skb->tail - skb->head);
829#endif 818#endif
830 memcpy(data + size, skb_end_pointer(skb), 819 memcpy(data + size, skb_end_pointer(skb),
831 sizeof(struct skb_shared_info)); 820 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
832 821
833 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 822 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
834 get_page(skb_shinfo(skb)->frags[i].page); 823 get_page(skb_shinfo(skb)->frags[i].page);
@@ -854,7 +843,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
854 skb->network_header += off; 843 skb->network_header += off;
855 if (skb_mac_header_was_set(skb)) 844 if (skb_mac_header_was_set(skb))
856 skb->mac_header += off; 845 skb->mac_header += off;
857 skb->csum_start += nhead; 846 /* Only adjust this if it actually is csum_start rather than csum */
847 if (skb->ip_summed == CHECKSUM_PARTIAL)
848 skb->csum_start += nhead;
858 skb->cloned = 0; 849 skb->cloned = 0;
859 skb->hdr_len = 0; 850 skb->hdr_len = 0;
860 skb->nohdr = 0; 851 skb->nohdr = 0;
@@ -941,7 +932,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
941 copy_skb_header(n, skb); 932 copy_skb_header(n, skb);
942 933
943 off = newheadroom - oldheadroom; 934 off = newheadroom - oldheadroom;
944 n->csum_start += off; 935 if (n->ip_summed == CHECKSUM_PARTIAL)
936 n->csum_start += off;
945#ifdef NET_SKBUFF_DATA_USES_OFFSET 937#ifdef NET_SKBUFF_DATA_USES_OFFSET
946 n->transport_header += off; 938 n->transport_header += off;
947 n->network_header += off; 939 n->network_header += off;
@@ -1051,7 +1043,7 @@ EXPORT_SYMBOL(skb_push);
1051 */ 1043 */
1052unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 1044unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
1053{ 1045{
1054 return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); 1046 return skb_pull_inline(skb, len);
1055} 1047}
1056EXPORT_SYMBOL(skb_pull); 1048EXPORT_SYMBOL(skb_pull);
1057 1049
@@ -1417,12 +1409,13 @@ new_page:
1417/* 1409/*
1418 * Fill page/offset/length into spd, if it can hold more pages. 1410 * Fill page/offset/length into spd, if it can hold more pages.
1419 */ 1411 */
1420static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1412static inline int spd_fill_page(struct splice_pipe_desc *spd,
1413 struct pipe_inode_info *pipe, struct page *page,
1421 unsigned int *len, unsigned int offset, 1414 unsigned int *len, unsigned int offset,
1422 struct sk_buff *skb, int linear, 1415 struct sk_buff *skb, int linear,
1423 struct sock *sk) 1416 struct sock *sk)
1424{ 1417{
1425 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1418 if (unlikely(spd->nr_pages == pipe->buffers))
1426 return 1; 1419 return 1;
1427 1420
1428 if (linear) { 1421 if (linear) {
@@ -1458,7 +1451,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1458 unsigned int plen, unsigned int *off, 1451 unsigned int plen, unsigned int *off,
1459 unsigned int *len, struct sk_buff *skb, 1452 unsigned int *len, struct sk_buff *skb,
1460 struct splice_pipe_desc *spd, int linear, 1453 struct splice_pipe_desc *spd, int linear,
1461 struct sock *sk) 1454 struct sock *sk,
1455 struct pipe_inode_info *pipe)
1462{ 1456{
1463 if (!*len) 1457 if (!*len)
1464 return 1; 1458 return 1;
@@ -1481,7 +1475,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1481 /* the linear region may spread across several pages */ 1475 /* the linear region may spread across several pages */
1482 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1476 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1483 1477
1484 if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) 1478 if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
1485 return 1; 1479 return 1;
1486 1480
1487 __segment_seek(&page, &poff, &plen, flen); 1481 __segment_seek(&page, &poff, &plen, flen);
@@ -1496,9 +1490,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1496 * Map linear and fragment data from the skb to spd. It reports failure if the 1490 * Map linear and fragment data from the skb to spd. It reports failure if the
1497 * pipe is full or if we already spliced the requested length. 1491 * pipe is full or if we already spliced the requested length.
1498 */ 1492 */
1499static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, 1493static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
1500 unsigned int *len, struct splice_pipe_desc *spd, 1494 unsigned int *offset, unsigned int *len,
1501 struct sock *sk) 1495 struct splice_pipe_desc *spd, struct sock *sk)
1502{ 1496{
1503 int seg; 1497 int seg;
1504 1498
@@ -1508,7 +1502,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1508 if (__splice_segment(virt_to_page(skb->data), 1502 if (__splice_segment(virt_to_page(skb->data),
1509 (unsigned long) skb->data & (PAGE_SIZE - 1), 1503 (unsigned long) skb->data & (PAGE_SIZE - 1),
1510 skb_headlen(skb), 1504 skb_headlen(skb),
1511 offset, len, skb, spd, 1, sk)) 1505 offset, len, skb, spd, 1, sk, pipe))
1512 return 1; 1506 return 1;
1513 1507
1514 /* 1508 /*
@@ -1518,7 +1512,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
1518 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; 1512 const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
1519 1513
1520 if (__splice_segment(f->page, f->page_offset, f->size, 1514 if (__splice_segment(f->page, f->page_offset, f->size,
1521 offset, len, skb, spd, 0, sk)) 1515 offset, len, skb, spd, 0, sk, pipe))
1522 return 1; 1516 return 1;
1523 } 1517 }
1524 1518
@@ -1535,8 +1529,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1535 struct pipe_inode_info *pipe, unsigned int tlen, 1529 struct pipe_inode_info *pipe, unsigned int tlen,
1536 unsigned int flags) 1530 unsigned int flags)
1537{ 1531{
1538 struct partial_page partial[PIPE_BUFFERS]; 1532 struct partial_page partial[PIPE_DEF_BUFFERS];
1539 struct page *pages[PIPE_BUFFERS]; 1533 struct page *pages[PIPE_DEF_BUFFERS];
1540 struct splice_pipe_desc spd = { 1534 struct splice_pipe_desc spd = {
1541 .pages = pages, 1535 .pages = pages,
1542 .partial = partial, 1536 .partial = partial,
@@ -1546,12 +1540,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1546 }; 1540 };
1547 struct sk_buff *frag_iter; 1541 struct sk_buff *frag_iter;
1548 struct sock *sk = skb->sk; 1542 struct sock *sk = skb->sk;
1543 int ret = 0;
1544
1545 if (splice_grow_spd(pipe, &spd))
1546 return -ENOMEM;
1549 1547
1550 /* 1548 /*
1551 * __skb_splice_bits() only fails if the output has no room left, 1549 * __skb_splice_bits() only fails if the output has no room left,
1552 * so no point in going over the frag_list for the error case. 1550 * so no point in going over the frag_list for the error case.
1553 */ 1551 */
1554 if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) 1552 if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
1555 goto done; 1553 goto done;
1556 else if (!tlen) 1554 else if (!tlen)
1557 goto done; 1555 goto done;
@@ -1562,14 +1560,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
1562 skb_walk_frags(skb, frag_iter) { 1560 skb_walk_frags(skb, frag_iter) {
1563 if (!tlen) 1561 if (!tlen)
1564 break; 1562 break;
1565 if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk)) 1563 if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
1566 break; 1564 break;
1567 } 1565 }
1568 1566
1569done: 1567done:
1570 if (spd.nr_pages) { 1568 if (spd.nr_pages) {
1571 int ret;
1572
1573 /* 1569 /*
1574 * Drop the socket lock, otherwise we have reverse 1570 * Drop the socket lock, otherwise we have reverse
1575 * locking dependencies between sk_lock and i_mutex 1571 * locking dependencies between sk_lock and i_mutex
@@ -1582,10 +1578,10 @@ done:
1582 release_sock(sk); 1578 release_sock(sk);
1583 ret = splice_to_pipe(pipe, &spd); 1579 ret = splice_to_pipe(pipe, &spd);
1584 lock_sock(sk); 1580 lock_sock(sk);
1585 return ret;
1586 } 1581 }
1587 1582
1588 return 0; 1583 splice_shrink_spd(pipe, &spd);
1584 return ret;
1589} 1585}
1590 1586
1591/** 1587/**
@@ -2490,7 +2486,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
2490 skb_postpull_rcsum(skb, skb->data, len); 2486 skb_postpull_rcsum(skb, skb->data, len);
2491 return skb->data += len; 2487 return skb->data += len;
2492} 2488}
2493
2494EXPORT_SYMBOL_GPL(skb_pull_rcsum); 2489EXPORT_SYMBOL_GPL(skb_pull_rcsum);
2495 2490
2496/** 2491/**
@@ -2729,6 +2724,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2729 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2724 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2730 skb_shinfo(nskb)->frag_list = p; 2725 skb_shinfo(nskb)->frag_list = p;
2731 skb_shinfo(nskb)->gso_size = pinfo->gso_size; 2726 skb_shinfo(nskb)->gso_size = pinfo->gso_size;
2727 pinfo->gso_size = 0;
2732 skb_header_release(p); 2728 skb_header_release(p);
2733 nskb->prev = p; 2729 nskb->prev = p;
2734 2730
@@ -2971,6 +2967,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2971} 2967}
2972EXPORT_SYMBOL_GPL(skb_cow_data); 2968EXPORT_SYMBOL_GPL(skb_cow_data);
2973 2969
2970static void sock_rmem_free(struct sk_buff *skb)
2971{
2972 struct sock *sk = skb->sk;
2973
2974 atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
2975}
2976
2977/*
2978 * Note: We dont mem charge error packets (no sk_forward_alloc changes)
2979 */
2980int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
2981{
2982 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
2983 (unsigned)sk->sk_rcvbuf)
2984 return -ENOMEM;
2985
2986 skb_orphan(skb);
2987 skb->sk = sk;
2988 skb->destructor = sock_rmem_free;
2989 atomic_add(skb->truesize, &sk->sk_rmem_alloc);
2990
2991 skb_queue_tail(&sk->sk_error_queue, skb);
2992 if (!sock_flag(sk, SOCK_DEAD))
2993 sk->sk_data_ready(sk, skb->len);
2994 return 0;
2995}
2996EXPORT_SYMBOL(sock_queue_err_skb);
2997
2974void skb_tstamp_tx(struct sk_buff *orig_skb, 2998void skb_tstamp_tx(struct sk_buff *orig_skb,
2975 struct skb_shared_hwtstamps *hwtstamps) 2999 struct skb_shared_hwtstamps *hwtstamps)
2976{ 3000{
@@ -3002,7 +3026,9 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3002 memset(serr, 0, sizeof(*serr)); 3026 memset(serr, 0, sizeof(*serr));
3003 serr->ee.ee_errno = ENOMSG; 3027 serr->ee.ee_errno = ENOMSG;
3004 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; 3028 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
3029
3005 err = sock_queue_err_skb(sk, skb); 3030 err = sock_queue_err_skb(sk, skb);
3031
3006 if (err) 3032 if (err)
3007 kfree_skb(skb); 3033 kfree_skb(skb);
3008} 3034}
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..b05b9b6ddb87 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
110#include <linux/tcp.h> 110#include <linux/tcp.h>
111#include <linux/init.h> 111#include <linux/init.h>
112#include <linux/highmem.h> 112#include <linux/highmem.h>
113#include <linux/user_namespace.h>
113 114
114#include <asm/uaccess.h> 115#include <asm/uaccess.h>
115#include <asm/system.h> 116#include <asm/system.h>
@@ -123,6 +124,7 @@
123#include <linux/net_tstamp.h> 124#include <linux/net_tstamp.h>
124#include <net/xfrm.h> 125#include <net/xfrm.h>
125#include <linux/ipsec.h> 126#include <linux/ipsec.h>
127#include <net/cls_cgroup.h>
126 128
127#include <linux/filter.h> 129#include <linux/filter.h>
128 130
@@ -155,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 157 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 158 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
157 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , 159 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
158 "sk_lock-AF_IEEE802154", 160 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
159 "sk_lock-AF_MAX" 161 "sk_lock-AF_MAX"
160}; 162};
161static const char *const af_family_slock_key_strings[AF_MAX+1] = { 163static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -171,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
171 "slock-27" , "slock-28" , "slock-AF_CAN" , 173 "slock-27" , "slock-28" , "slock-AF_CAN" ,
172 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 174 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
173 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , 175 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
174 "slock-AF_IEEE802154", 176 "slock-AF_IEEE802154", "slock-AF_CAIF" ,
175 "slock-AF_MAX" 177 "slock-AF_MAX"
176}; 178};
177static const char *const af_family_clock_key_strings[AF_MAX+1] = { 179static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -187,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
187 "clock-27" , "clock-28" , "clock-AF_CAN" , 189 "clock-27" , "clock-28" , "clock-AF_CAN" ,
188 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 190 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
189 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , 191 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
190 "clock-AF_IEEE802154", 192 "clock-AF_IEEE802154", "clock-AF_CAIF" ,
191 "clock-AF_MAX" 193 "clock-AF_MAX"
192}; 194};
193 195
@@ -217,6 +219,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
217int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); 219int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
218EXPORT_SYMBOL(sysctl_optmem_max); 220EXPORT_SYMBOL(sysctl_optmem_max);
219 221
222#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
223int net_cls_subsys_id = -1;
224EXPORT_SYMBOL_GPL(net_cls_subsys_id);
225#endif
226
220static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) 227static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
221{ 228{
222 struct timeval tv; 229 struct timeval tv;
@@ -307,6 +314,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
307 */ 314 */
308 skb_len = skb->len; 315 skb_len = skb->len;
309 316
317 /* we escape from rcu protected region, make sure we dont leak
318 * a norefcounted dst
319 */
320 skb_dst_force(skb);
321
310 spin_lock_irqsave(&list->lock, flags); 322 spin_lock_irqsave(&list->lock, flags);
311 skb->dropcount = atomic_read(&sk->sk_drops); 323 skb->dropcount = atomic_read(&sk->sk_drops);
312 __skb_queue_tail(list, skb); 324 __skb_queue_tail(list, skb);
@@ -327,6 +339,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
327 339
328 skb->dev = NULL; 340 skb->dev = NULL;
329 341
342 if (sk_rcvqueues_full(sk, skb)) {
343 atomic_inc(&sk->sk_drops);
344 goto discard_and_relse;
345 }
330 if (nested) 346 if (nested)
331 bh_lock_sock_nested(sk); 347 bh_lock_sock_nested(sk);
332 else 348 else
@@ -364,11 +380,11 @@ EXPORT_SYMBOL(sk_reset_txq);
364 380
365struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) 381struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
366{ 382{
367 struct dst_entry *dst = sk->sk_dst_cache; 383 struct dst_entry *dst = __sk_dst_get(sk);
368 384
369 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { 385 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
370 sk_tx_queue_clear(sk); 386 sk_tx_queue_clear(sk);
371 sk->sk_dst_cache = NULL; 387 rcu_assign_pointer(sk->sk_dst_cache, NULL);
372 dst_release(dst); 388 dst_release(dst);
373 return NULL; 389 return NULL;
374 } 390 }
@@ -734,6 +750,20 @@ set_rcvbuf:
734EXPORT_SYMBOL(sock_setsockopt); 750EXPORT_SYMBOL(sock_setsockopt);
735 751
736 752
753void cred_to_ucred(struct pid *pid, const struct cred *cred,
754 struct ucred *ucred)
755{
756 ucred->pid = pid_vnr(pid);
757 ucred->uid = ucred->gid = -1;
758 if (cred) {
759 struct user_namespace *current_ns = current_user_ns();
760
761 ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
762 ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
763 }
764}
765EXPORT_SYMBOL_GPL(cred_to_ucred);
766
737int sock_getsockopt(struct socket *sock, int level, int optname, 767int sock_getsockopt(struct socket *sock, int level, int optname,
738 char __user *optval, int __user *optlen) 768 char __user *optval, int __user *optlen)
739{ 769{
@@ -886,11 +916,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
886 break; 916 break;
887 917
888 case SO_PEERCRED: 918 case SO_PEERCRED:
889 if (len > sizeof(sk->sk_peercred)) 919 {
890 len = sizeof(sk->sk_peercred); 920 struct ucred peercred;
891 if (copy_to_user(optval, &sk->sk_peercred, len)) 921 if (len > sizeof(peercred))
922 len = sizeof(peercred);
923 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
924 if (copy_to_user(optval, &peercred, len))
892 return -EFAULT; 925 return -EFAULT;
893 goto lenout; 926 goto lenout;
927 }
894 928
895 case SO_PEERNAME: 929 case SO_PEERNAME:
896 { 930 {
@@ -1041,6 +1075,17 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
1041 module_put(owner); 1075 module_put(owner);
1042} 1076}
1043 1077
1078#ifdef CONFIG_CGROUPS
1079void sock_update_classid(struct sock *sk)
1080{
1081 u32 classid = task_cls_classid(current);
1082
1083 if (classid && classid != sk->sk_classid)
1084 sk->sk_classid = classid;
1085}
1086EXPORT_SYMBOL(sock_update_classid);
1087#endif
1088
1044/** 1089/**
1045 * sk_alloc - All socket objects are allocated here 1090 * sk_alloc - All socket objects are allocated here
1046 * @net: the applicable net namespace 1091 * @net: the applicable net namespace
@@ -1064,6 +1109,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1064 sock_lock_init(sk); 1109 sock_lock_init(sk);
1065 sock_net_set(sk, get_net(net)); 1110 sock_net_set(sk, get_net(net));
1066 atomic_set(&sk->sk_wmem_alloc, 1); 1111 atomic_set(&sk->sk_wmem_alloc, 1);
1112
1113 sock_update_classid(sk);
1067 } 1114 }
1068 1115
1069 return sk; 1116 return sk;
@@ -1091,6 +1138,9 @@ static void __sk_free(struct sock *sk)
1091 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1138 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
1092 __func__, atomic_read(&sk->sk_omem_alloc)); 1139 __func__, atomic_read(&sk->sk_omem_alloc));
1093 1140
1141 if (sk->sk_peer_cred)
1142 put_cred(sk->sk_peer_cred);
1143 put_pid(sk->sk_peer_pid);
1094 put_net(sock_net(sk)); 1144 put_net(sock_net(sk));
1095 sk_prot_free(sk->sk_prot_creator, sk); 1145 sk_prot_free(sk->sk_prot_creator, sk);
1096} 1146}
@@ -1157,7 +1207,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1157 skb_queue_head_init(&newsk->sk_async_wait_queue); 1207 skb_queue_head_init(&newsk->sk_async_wait_queue);
1158#endif 1208#endif
1159 1209
1160 rwlock_init(&newsk->sk_dst_lock); 1210 spin_lock_init(&newsk->sk_dst_lock);
1161 rwlock_init(&newsk->sk_callback_lock); 1211 rwlock_init(&newsk->sk_callback_lock);
1162 lockdep_set_class_and_name(&newsk->sk_callback_lock, 1212 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1163 af_callback_keys + newsk->sk_family, 1213 af_callback_keys + newsk->sk_family,
@@ -1207,7 +1257,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
1207 */ 1257 */
1208 sk_refcnt_debug_inc(newsk); 1258 sk_refcnt_debug_inc(newsk);
1209 sk_set_socket(newsk, NULL); 1259 sk_set_socket(newsk, NULL);
1210 newsk->sk_sleep = NULL; 1260 newsk->sk_wq = NULL;
1211 1261
1212 if (newsk->sk_prot->sockets_allocated) 1262 if (newsk->sk_prot->sockets_allocated)
1213 percpu_counter_inc(newsk->sk_prot->sockets_allocated); 1263 percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1227,6 +1277,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1227 sk->sk_route_caps = dst->dev->features; 1277 sk->sk_route_caps = dst->dev->features;
1228 if (sk->sk_route_caps & NETIF_F_GSO) 1278 if (sk->sk_route_caps & NETIF_F_GSO)
1229 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; 1279 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1280 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1230 if (sk_can_gso(sk)) { 1281 if (sk_can_gso(sk)) {
1231 if (dst->header_len) { 1282 if (dst->header_len) {
1232 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1283 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
@@ -1288,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree);
1288void sock_rfree(struct sk_buff *skb) 1339void sock_rfree(struct sk_buff *skb)
1289{ 1340{
1290 struct sock *sk = skb->sk; 1341 struct sock *sk = skb->sk;
1342 unsigned int len = skb->truesize;
1291 1343
1292 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); 1344 atomic_sub(len, &sk->sk_rmem_alloc);
1293 sk_mem_uncharge(skb->sk, skb->truesize); 1345 sk_mem_uncharge(sk, len);
1294} 1346}
1295EXPORT_SYMBOL(sock_rfree); 1347EXPORT_SYMBOL(sock_rfree);
1296 1348
@@ -1395,7 +1447,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1395 if (signal_pending(current)) 1447 if (signal_pending(current))
1396 break; 1448 break;
1397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1449 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1398 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1450 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1399 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) 1451 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1400 break; 1452 break;
1401 if (sk->sk_shutdown & SEND_SHUTDOWN) 1453 if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1456,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
1404 break; 1456 break;
1405 timeo = schedule_timeout(timeo); 1457 timeo = schedule_timeout(timeo);
1406 } 1458 }
1407 finish_wait(sk->sk_sleep, &wait); 1459 finish_wait(sk_sleep(sk), &wait);
1408 return timeo; 1460 return timeo;
1409} 1461}
1410 1462
@@ -1531,6 +1583,7 @@ static void __release_sock(struct sock *sk)
1531 do { 1583 do {
1532 struct sk_buff *next = skb->next; 1584 struct sk_buff *next = skb->next;
1533 1585
1586 WARN_ON_ONCE(skb_dst_is_noref(skb));
1534 skb->next = NULL; 1587 skb->next = NULL;
1535 sk_backlog_rcv(sk, skb); 1588 sk_backlog_rcv(sk, skb);
1536 1589
@@ -1570,11 +1623,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
1570 int rc; 1623 int rc;
1571 DEFINE_WAIT(wait); 1624 DEFINE_WAIT(wait);
1572 1625
1573 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1626 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1574 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1627 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1575 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); 1628 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1576 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1629 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1577 finish_wait(sk->sk_sleep, &wait); 1630 finish_wait(sk_sleep(sk), &wait);
1578 return rc; 1631 return rc;
1579} 1632}
1580EXPORT_SYMBOL(sk_wait_data); 1633EXPORT_SYMBOL(sk_wait_data);
@@ -1796,41 +1849,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
1796 1849
1797static void sock_def_wakeup(struct sock *sk) 1850static void sock_def_wakeup(struct sock *sk)
1798{ 1851{
1799 read_lock(&sk->sk_callback_lock); 1852 struct socket_wq *wq;
1800 if (sk_has_sleeper(sk)) 1853
1801 wake_up_interruptible_all(sk->sk_sleep); 1854 rcu_read_lock();
1802 read_unlock(&sk->sk_callback_lock); 1855 wq = rcu_dereference(sk->sk_wq);
1856 if (wq_has_sleeper(wq))
1857 wake_up_interruptible_all(&wq->wait);
1858 rcu_read_unlock();
1803} 1859}
1804 1860
1805static void sock_def_error_report(struct sock *sk) 1861static void sock_def_error_report(struct sock *sk)
1806{ 1862{
1807 read_lock(&sk->sk_callback_lock); 1863 struct socket_wq *wq;
1808 if (sk_has_sleeper(sk)) 1864
1809 wake_up_interruptible_poll(sk->sk_sleep, POLLERR); 1865 rcu_read_lock();
1866 wq = rcu_dereference(sk->sk_wq);
1867 if (wq_has_sleeper(wq))
1868 wake_up_interruptible_poll(&wq->wait, POLLERR);
1810 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1869 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1811 read_unlock(&sk->sk_callback_lock); 1870 rcu_read_unlock();
1812} 1871}
1813 1872
1814static void sock_def_readable(struct sock *sk, int len) 1873static void sock_def_readable(struct sock *sk, int len)
1815{ 1874{
1816 read_lock(&sk->sk_callback_lock); 1875 struct socket_wq *wq;
1817 if (sk_has_sleeper(sk)) 1876
1818 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | 1877 rcu_read_lock();
1878 wq = rcu_dereference(sk->sk_wq);
1879 if (wq_has_sleeper(wq))
1880 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
1819 POLLRDNORM | POLLRDBAND); 1881 POLLRDNORM | POLLRDBAND);
1820 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1882 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1821 read_unlock(&sk->sk_callback_lock); 1883 rcu_read_unlock();
1822} 1884}
1823 1885
1824static void sock_def_write_space(struct sock *sk) 1886static void sock_def_write_space(struct sock *sk)
1825{ 1887{
1826 read_lock(&sk->sk_callback_lock); 1888 struct socket_wq *wq;
1889
1890 rcu_read_lock();
1827 1891
1828 /* Do not wake up a writer until he can make "significant" 1892 /* Do not wake up a writer until he can make "significant"
1829 * progress. --DaveM 1893 * progress. --DaveM
1830 */ 1894 */
1831 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1895 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1832 if (sk_has_sleeper(sk)) 1896 wq = rcu_dereference(sk->sk_wq);
1833 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | 1897 if (wq_has_sleeper(wq))
1898 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
1834 POLLWRNORM | POLLWRBAND); 1899 POLLWRNORM | POLLWRBAND);
1835 1900
1836 /* Should agree with poll, otherwise some programs break */ 1901 /* Should agree with poll, otherwise some programs break */
@@ -1838,7 +1903,7 @@ static void sock_def_write_space(struct sock *sk)
1838 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 1903 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
1839 } 1904 }
1840 1905
1841 read_unlock(&sk->sk_callback_lock); 1906 rcu_read_unlock();
1842} 1907}
1843 1908
1844static void sock_def_destruct(struct sock *sk) 1909static void sock_def_destruct(struct sock *sk)
@@ -1885,7 +1950,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1885 sk->sk_allocation = GFP_KERNEL; 1950 sk->sk_allocation = GFP_KERNEL;
1886 sk->sk_rcvbuf = sysctl_rmem_default; 1951 sk->sk_rcvbuf = sysctl_rmem_default;
1887 sk->sk_sndbuf = sysctl_wmem_default; 1952 sk->sk_sndbuf = sysctl_wmem_default;
1888 sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
1889 sk->sk_state = TCP_CLOSE; 1953 sk->sk_state = TCP_CLOSE;
1890 sk_set_socket(sk, sock); 1954 sk_set_socket(sk, sock);
1891 1955
@@ -1893,12 +1957,12 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1893 1957
1894 if (sock) { 1958 if (sock) {
1895 sk->sk_type = sock->type; 1959 sk->sk_type = sock->type;
1896 sk->sk_sleep = &sock->wait; 1960 sk->sk_wq = sock->wq;
1897 sock->sk = sk; 1961 sock->sk = sk;
1898 } else 1962 } else
1899 sk->sk_sleep = NULL; 1963 sk->sk_wq = NULL;
1900 1964
1901 rwlock_init(&sk->sk_dst_lock); 1965 spin_lock_init(&sk->sk_dst_lock);
1902 rwlock_init(&sk->sk_callback_lock); 1966 rwlock_init(&sk->sk_callback_lock);
1903 lockdep_set_class_and_name(&sk->sk_callback_lock, 1967 lockdep_set_class_and_name(&sk->sk_callback_lock,
1904 af_callback_keys + sk->sk_family, 1968 af_callback_keys + sk->sk_family,
@@ -1913,9 +1977,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
1913 sk->sk_sndmsg_page = NULL; 1977 sk->sk_sndmsg_page = NULL;
1914 sk->sk_sndmsg_off = 0; 1978 sk->sk_sndmsg_off = 0;
1915 1979
1916 sk->sk_peercred.pid = 0; 1980 sk->sk_peer_pid = NULL;
1917 sk->sk_peercred.uid = -1; 1981 sk->sk_peer_cred = NULL;
1918 sk->sk_peercred.gid = -1;
1919 sk->sk_write_pending = 0; 1982 sk->sk_write_pending = 0;
1920 sk->sk_rcvlowat = 1; 1983 sk->sk_rcvlowat = 1;
1921 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; 1984 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -1966,6 +2029,39 @@ void release_sock(struct sock *sk)
1966} 2029}
1967EXPORT_SYMBOL(release_sock); 2030EXPORT_SYMBOL(release_sock);
1968 2031
2032/**
2033 * lock_sock_fast - fast version of lock_sock
2034 * @sk: socket
2035 *
2036 * This version should be used for very small section, where process wont block
2037 * return false if fast path is taken
2038 * sk_lock.slock locked, owned = 0, BH disabled
2039 * return true if slow path is taken
2040 * sk_lock.slock unlocked, owned = 1, BH enabled
2041 */
2042bool lock_sock_fast(struct sock *sk)
2043{
2044 might_sleep();
2045 spin_lock_bh(&sk->sk_lock.slock);
2046
2047 if (!sk->sk_lock.owned)
2048 /*
2049 * Note : We must disable BH
2050 */
2051 return false;
2052
2053 __lock_sock(sk);
2054 sk->sk_lock.owned = 1;
2055 spin_unlock(&sk->sk_lock.slock);
2056 /*
2057 * The sk_lock has mutex_lock() semantics here:
2058 */
2059 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2060 local_bh_enable();
2061 return true;
2062}
2063EXPORT_SYMBOL(lock_sock_fast);
2064
1969int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) 2065int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1970{ 2066{
1971 struct timeval tv; 2067 struct timeval tv;
@@ -2136,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2136#ifdef CONFIG_NET_NS 2232#ifdef CONFIG_NET_NS
2137void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2233void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2138{ 2234{
2139 int cpu = smp_processor_id(); 2235 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2140 per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
2141} 2236}
2142EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2237EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2143 2238
@@ -2183,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2183 2278
2184void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) 2279void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2185{ 2280{
2186 __get_cpu_var(prot_inuse).val[prot->inuse_idx] += val; 2281 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2187} 2282}
2188EXPORT_SYMBOL_GPL(sock_prot_inuse_add); 2283EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2189 2284
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..d959e0f41528 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,18 +28,21 @@
28void sk_stream_write_space(struct sock *sk) 28void sk_stream_write_space(struct sock *sk)
29{ 29{
30 struct socket *sock = sk->sk_socket; 30 struct socket *sock = sk->sk_socket;
31 struct socket_wq *wq;
31 32
32 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) { 33 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
33 clear_bit(SOCK_NOSPACE, &sock->flags); 34 clear_bit(SOCK_NOSPACE, &sock->flags);
34 35
35 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 36 rcu_read_lock();
36 wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | 37 wq = rcu_dereference(sk->sk_wq);
38 if (wq_has_sleeper(wq))
39 wake_up_interruptible_poll(&wq->wait, POLLOUT |
37 POLLWRNORM | POLLWRBAND); 40 POLLWRNORM | POLLWRBAND);
38 if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) 41 if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
39 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); 42 sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
43 rcu_read_unlock();
40 } 44 }
41} 45}
42
43EXPORT_SYMBOL(sk_stream_write_space); 46EXPORT_SYMBOL(sk_stream_write_space);
44 47
45/** 48/**
@@ -66,18 +69,17 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
66 if (signal_pending(tsk)) 69 if (signal_pending(tsk))
67 return sock_intr_errno(*timeo_p); 70 return sock_intr_errno(*timeo_p);
68 71
69 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 72 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
70 sk->sk_write_pending++; 73 sk->sk_write_pending++;
71 done = sk_wait_event(sk, timeo_p, 74 done = sk_wait_event(sk, timeo_p,
72 !sk->sk_err && 75 !sk->sk_err &&
73 !((1 << sk->sk_state) & 76 !((1 << sk->sk_state) &
74 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))); 77 ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
75 finish_wait(sk->sk_sleep, &wait); 78 finish_wait(sk_sleep(sk), &wait);
76 sk->sk_write_pending--; 79 sk->sk_write_pending--;
77 } while (!done); 80 } while (!done);
78 return 0; 81 return 0;
79} 82}
80
81EXPORT_SYMBOL(sk_stream_wait_connect); 83EXPORT_SYMBOL(sk_stream_wait_connect);
82 84
83/** 85/**
@@ -96,16 +98,15 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
96 DEFINE_WAIT(wait); 98 DEFINE_WAIT(wait);
97 99
98 do { 100 do {
99 prepare_to_wait(sk->sk_sleep, &wait, 101 prepare_to_wait(sk_sleep(sk), &wait,
100 TASK_INTERRUPTIBLE); 102 TASK_INTERRUPTIBLE);
101 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk))) 103 if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
102 break; 104 break;
103 } while (!signal_pending(current) && timeout); 105 } while (!signal_pending(current) && timeout);
104 106
105 finish_wait(sk->sk_sleep, &wait); 107 finish_wait(sk_sleep(sk), &wait);
106 } 108 }
107} 109}
108
109EXPORT_SYMBOL(sk_stream_wait_close); 110EXPORT_SYMBOL(sk_stream_wait_close);
110 111
111/** 112/**
@@ -126,7 +127,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
126 while (1) { 127 while (1) {
127 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 128 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
128 129
129 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 130 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
130 131
131 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 132 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
132 goto do_error; 133 goto do_error;
@@ -157,7 +158,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
157 *timeo_p = current_timeo; 158 *timeo_p = current_timeo;
158 } 159 }
159out: 160out:
160 finish_wait(sk->sk_sleep, &wait); 161 finish_wait(sk_sleep(sk), &wait);
161 return err; 162 return err;
162 163
163do_error: 164do_error:
@@ -170,7 +171,6 @@ do_interrupted:
170 err = sock_intr_errno(*timeo_p); 171 err = sock_intr_errno(*timeo_p);
171 goto out; 172 goto out;
172} 173}
173
174EXPORT_SYMBOL(sk_stream_wait_memory); 174EXPORT_SYMBOL(sk_stream_wait_memory);
175 175
176int sk_stream_error(struct sock *sk, int flags, int err) 176int sk_stream_error(struct sock *sk, int flags, int err)
@@ -181,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
181 send_sig(SIGPIPE, current, 0); 181 send_sig(SIGPIPE, current, 0);
182 return err; 182 return err;
183} 183}
184
185EXPORT_SYMBOL(sk_stream_error); 184EXPORT_SYMBOL(sk_stream_error);
186 185
187void sk_stream_kill_queues(struct sock *sk) 186void sk_stream_kill_queues(struct sock *sk)
@@ -206,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
206 * have gone away, only the net layer knows can touch it. 205 * have gone away, only the net layer knows can touch it.
207 */ 206 */
208} 207}
209
210EXPORT_SYMBOL(sk_stream_kill_queues); 208EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7b6b8208f75..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,12 +11,72 @@
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/ratelimit.h> 13#include <linux/ratelimit.h>
14#include <linux/vmalloc.h>
14#include <linux/init.h> 15#include <linux/init.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
16 17
17#include <net/ip.h> 18#include <net/ip.h>
18#include <net/sock.h> 19#include <net/sock.h>
19 20
21#ifdef CONFIG_RPS
22static int rps_sock_flow_sysctl(ctl_table *table, int write,
23 void __user *buffer, size_t *lenp, loff_t *ppos)
24{
25 unsigned int orig_size, size;
26 int ret, i;
27 ctl_table tmp = {
28 .data = &size,
29 .maxlen = sizeof(size),
30 .mode = table->mode
31 };
32 struct rps_sock_flow_table *orig_sock_table, *sock_table;
33 static DEFINE_MUTEX(sock_flow_mutex);
34
35 mutex_lock(&sock_flow_mutex);
36
37 orig_sock_table = rps_sock_flow_table;
38 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
39
40 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
41
42 if (write) {
43 if (size) {
44 if (size > 1<<30) {
45 /* Enforce limit to prevent overflow */
46 mutex_unlock(&sock_flow_mutex);
47 return -EINVAL;
48 }
49 size = roundup_pow_of_two(size);
50 if (size != orig_size) {
51 sock_table =
52 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
53 if (!sock_table) {
54 mutex_unlock(&sock_flow_mutex);
55 return -ENOMEM;
56 }
57
58 sock_table->mask = size - 1;
59 } else
60 sock_table = orig_sock_table;
61
62 for (i = 0; i < size; i++)
63 sock_table->ents[i] = RPS_NO_CPU;
64 } else
65 sock_table = NULL;
66
67 if (sock_table != orig_sock_table) {
68 rcu_assign_pointer(rps_sock_flow_table, sock_table);
69 synchronize_rcu();
70 vfree(orig_sock_table);
71 }
72 }
73
74 mutex_unlock(&sock_flow_mutex);
75
76 return ret;
77}
78#endif /* CONFIG_RPS */
79
20static struct ctl_table net_core_table[] = { 80static struct ctl_table net_core_table[] = {
21#ifdef CONFIG_NET 81#ifdef CONFIG_NET
22 { 82 {
@@ -62,6 +122,13 @@ static struct ctl_table net_core_table[] = {
62 .proc_handler = proc_dointvec 122 .proc_handler = proc_dointvec
63 }, 123 },
64 { 124 {
125 .procname = "netdev_tstamp_prequeue",
126 .data = &netdev_tstamp_prequeue,
127 .maxlen = sizeof(int),
128 .mode = 0644,
129 .proc_handler = proc_dointvec
130 },
131 {
65 .procname = "message_cost", 132 .procname = "message_cost",
66 .data = &net_ratelimit_state.interval, 133 .data = &net_ratelimit_state.interval,
67 .maxlen = sizeof(int), 134 .maxlen = sizeof(int),
@@ -82,6 +149,14 @@ static struct ctl_table net_core_table[] = {
82 .mode = 0644, 149 .mode = 0644,
83 .proc_handler = proc_dointvec 150 .proc_handler = proc_dointvec
84 }, 151 },
152#ifdef CONFIG_RPS
153 {
154 .procname = "rps_sock_flow_entries",
155 .maxlen = sizeof(int),
156 .mode = 0644,
157 .proc_handler = rps_sock_flow_sysctl
158 },
159#endif
85#endif /* CONFIG_NET */ 160#endif /* CONFIG_NET */
86 { 161 {
87 .procname = "netdev_budget", 162 .procname = "netdev_budget",
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
1/*
2 * PTP 1588 clock support - support for timestamping in PHY devices
3 *
4 * Copyright (C) 2010 OMICRON electronics GmbH
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */
20#include <linux/errqueue.h>
21#include <linux/phy.h>
22#include <linux/ptp_classify.h>
23#include <linux/skbuff.h>
24
25static struct sock_filter ptp_filter[] = {
26 PTP_FILTER
27};
28
29static unsigned int classify(struct sk_buff *skb)
30{
31 if (likely(skb->dev &&
32 skb->dev->phydev &&
33 skb->dev->phydev->drv))
34 return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
35 else
36 return PTP_CLASS_NONE;
37}
38
39void skb_clone_tx_timestamp(struct sk_buff *skb)
40{
41 struct phy_device *phydev;
42 struct sk_buff *clone;
43 struct sock *sk = skb->sk;
44 unsigned int type;
45
46 if (!sk)
47 return;
48
49 type = classify(skb);
50
51 switch (type) {
52 case PTP_CLASS_V1_IPV4:
53 case PTP_CLASS_V1_IPV6:
54 case PTP_CLASS_V2_IPV4:
55 case PTP_CLASS_V2_IPV6:
56 case PTP_CLASS_V2_L2:
57 case PTP_CLASS_V2_VLAN:
58 phydev = skb->dev->phydev;
59 if (likely(phydev->drv->txtstamp)) {
60 clone = skb_clone(skb, GFP_ATOMIC);
61 if (!clone)
62 return;
63 clone->sk = sk;
64 phydev->drv->txtstamp(phydev, clone, type);
65 }
66 break;
67 default:
68 break;
69 }
70}
71
72void skb_complete_tx_timestamp(struct sk_buff *skb,
73 struct skb_shared_hwtstamps *hwtstamps)
74{
75 struct sock *sk = skb->sk;
76 struct sock_exterr_skb *serr;
77 int err;
78
79 if (!hwtstamps)
80 return;
81
82 *skb_hwtstamps(skb) = *hwtstamps;
83 serr = SKB_EXT_ERR(skb);
84 memset(serr, 0, sizeof(*serr));
85 serr->ee.ee_errno = ENOMSG;
86 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
87 skb->sk = NULL;
88 err = sock_queue_err_skb(sk, skb);
89 if (err)
90 kfree_skb(skb);
91}
92EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
93
94bool skb_defer_rx_timestamp(struct sk_buff *skb)
95{
96 struct phy_device *phydev;
97 unsigned int type;
98
99 skb_push(skb, ETH_HLEN);
100
101 type = classify(skb);
102
103 skb_pull(skb, ETH_HLEN);
104
105 switch (type) {
106 case PTP_CLASS_V1_IPV4:
107 case PTP_CLASS_V1_IPV6:
108 case PTP_CLASS_V2_IPV4:
109 case PTP_CLASS_V2_IPV6:
110 case PTP_CLASS_V2_L2:
111 case PTP_CLASS_V2_VLAN:
112 phydev = skb->dev->phydev;
113 if (likely(phydev->drv->rxtstamp))
114 return phydev->drv->rxtstamp(phydev, skb, type);
115 break;
116 default:
117 break;
118 }
119
120 return false;
121}
122
123void __init skb_timestamping_init(void)
124{
125 BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
126}
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..f41854470539 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -77,7 +77,6 @@ __be32 in_aton(const char *str)
77 } 77 }
78 return(htonl(l)); 78 return(htonl(l));
79} 79}
80
81EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
82 81
83#define IN6PTON_XDIGIT 0x00010000 82#define IN6PTON_XDIGIT 0x00010000
@@ -162,7 +161,6 @@ out:
162 *end = s; 161 *end = s;
163 return ret; 162 return ret;
164} 163}
165
166EXPORT_SYMBOL(in4_pton); 164EXPORT_SYMBOL(in4_pton);
167 165
168int in6_pton(const char *src, int srclen, 166int in6_pton(const char *src, int srclen,
@@ -280,7 +278,6 @@ out:
280 *end = s; 278 *end = s;
281 return ret; 279 return ret;
282} 280}
283
284EXPORT_SYMBOL(in6_pton); 281EXPORT_SYMBOL(in6_pton);
285 282
286void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, 283void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01e4d39fa232..92a6fcb40d7d 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
82 elapsed_time = delta / 10; 82 elapsed_time = delta / 10;
83 83
84 if (elapsed_time != 0 && 84 if (elapsed_time != 0 &&
85 dccp_insert_option_elapsed_time(sk, skb, elapsed_time)) 85 dccp_insert_option_elapsed_time(skb, elapsed_time))
86 return -1; 86 return -1;
87 87
88 avr = dccp_ackvec_record_new(); 88 avr = dccp_ackvec_record_new();
@@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
201 const unsigned int packets, 201 const unsigned int packets,
202 const unsigned char state) 202 const unsigned char state)
203{ 203{
204 unsigned int gap; 204 long gap;
205 long new_head; 205 long new_head;
206 206
207 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN) 207 if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index bcd7632299f5..95f752986497 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -208,7 +208,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
208 goto restart_timer; 208 goto restart_timer;
209 } 209 }
210 210
211 ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk, 211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 212 ccid3_tx_state_name(hc->tx_state));
213 213
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 214 if (hc->tx_state == TFRC_SSTATE_FBACK)
@@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
715 x_recv = htonl(hc->rx_x_recv); 715 x_recv = htonl(hc->rx_x_recv);
716 pinv = htonl(hc->rx_pinv); 716 pinv = htonl(hc->rx_pinv);
717 717
718 if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, 718 if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE,
719 &pinv, sizeof(pinv)) || 719 &pinv, sizeof(pinv)) ||
720 dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, 720 dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE,
721 &x_recv, sizeof(x_recv))) 721 &x_recv, sizeof(x_recv)))
722 return -1; 722 return -1;
723 723
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5ef32c2f0d6a..3ccef1b70fee 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -189,7 +189,7 @@ enum {
189#define DCCP_MIB_MAX __DCCP_MIB_MAX 189#define DCCP_MIB_MAX __DCCP_MIB_MAX
190struct dccp_mib { 190struct dccp_mib {
191 unsigned long mibs[DCCP_MIB_MAX]; 191 unsigned long mibs[DCCP_MIB_MAX];
192} __SNMP_MIB_ALIGN__; 192};
193 193
194DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); 194DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
195#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) 195#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
@@ -223,7 +223,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
223 skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0); 223 skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
224} 224}
225 225
226extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); 226extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
227 227
228extern int dccp_retransmit_skb(struct sock *sk); 228extern int dccp_retransmit_skb(struct sock *sk);
229 229
@@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list);
446 446
447extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb); 447extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
448extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*); 448extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
449extern int dccp_insert_option_elapsed_time(struct sock *sk, 449extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
450 struct sk_buff *skb,
451 u32 elapsed_time);
452extern u32 dccp_timestamp(void); 450extern u32 dccp_timestamp(void);
453extern void dccp_timestamping_init(void); 451extern void dccp_timestamping_init(void);
454extern int dccp_insert_option_timestamp(struct sock *sk, 452extern int dccp_insert_option_timestamp(struct sk_buff *skb);
455 struct sk_buff *skb); 453extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
456extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb, 454 const void *value, unsigned char len);
457 unsigned char option,
458 const void *value, unsigned char len);
459 455
460#ifdef CONFIG_SYSCTL 456#ifdef CONFIG_SYSCTL
461extern int dccp_sysctl_init(void); 457extern int dccp_sysctl_init(void);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 9ec717426024..10c957a88f4f 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -124,9 +124,9 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
124 return queued; 124 return queued;
125} 125}
126 126
127static u8 dccp_reset_code_convert(const u8 code) 127static u16 dccp_reset_code_convert(const u8 code)
128{ 128{
129 const u8 error_code[] = { 129 const u16 error_code[] = {
130 [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ 130 [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */
131 [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ 131 [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */
132 [DCCP_RESET_CODE_ABORTED] = ECONNRESET, 132 [DCCP_RESET_CODE_ABORTED] = ECONNRESET,
@@ -148,7 +148,7 @@ static u8 dccp_reset_code_convert(const u8 code)
148 148
149static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) 149static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
150{ 150{
151 u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); 151 u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
152 152
153 sk->sk_err = err; 153 sk->sk_err = err;
154 154
@@ -415,7 +415,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
415 if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, 415 if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
416 dp->dccps_awl, dp->dccps_awh)) { 416 dp->dccps_awl, dp->dccps_awh)) {
417 dccp_pr_debug("invalid ackno: S.AWL=%llu, " 417 dccp_pr_debug("invalid ackno: S.AWL=%llu, "
418 "P.ackno=%llu, S.AWH=%llu \n", 418 "P.ackno=%llu, S.AWH=%llu\n",
419 (unsigned long long)dp->dccps_awl, 419 (unsigned long long)dp->dccps_awl,
420 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq, 420 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
421 (unsigned long long)dp->dccps_awh); 421 (unsigned long long)dp->dccps_awh);
@@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
430 if (dccp_parse_options(sk, NULL, skb)) 430 if (dccp_parse_options(sk, NULL, skb))
431 return 1; 431 return 1;
432 432
433 /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ 433 /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
434 if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) 434 if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
435 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - 435 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
436 dp->dccps_options_received.dccpor_timestamp_echo)); 436 dp->dccps_options_received.dccpor_timestamp_echo));
@@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
535 const struct dccp_hdr *dh, 535 const struct dccp_hdr *dh,
536 const unsigned len) 536 const unsigned len)
537{ 537{
538 struct dccp_sock *dp = dccp_sk(sk);
539 u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
538 int queued = 0; 540 int queued = 0;
539 541
540 switch (dh->dccph_type) { 542 switch (dh->dccph_type) {
@@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
559 if (sk->sk_state == DCCP_PARTOPEN) 561 if (sk->sk_state == DCCP_PARTOPEN)
560 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 562 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
561 563
562 dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; 564 /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
565 if (likely(sample)) {
566 long delta = dccp_timestamp() - sample;
567
568 dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta);
569 }
570
571 dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
563 dccp_set_state(sk, DCCP_OPEN); 572 dccp_set_state(sk, DCCP_OPEN);
564 573
565 if (dh->dccph_type == DCCP_PKT_DATAACK || 574 if (dh->dccph_type == DCCP_PKT_DATAACK ||
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 52ffa1cde15a..d4a166f0f391 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
105 goto failure; 105 goto failure;
106 106
107 /* OK, now commit destination to socket. */ 107 /* OK, now commit destination to socket. */
108 sk_setup_caps(sk, &rt->u.dst); 108 sk_setup_caps(sk, &rt->dst);
109 109
110 dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, 110 dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
111 inet->inet_daddr, 111 inet->inet_daddr,
@@ -349,7 +349,7 @@ static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
349 return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); 349 return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
350} 350}
351 351
352void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb) 352void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb)
353{ 353{
354 const struct inet_sock *inet = inet_sk(sk); 354 const struct inet_sock *inet = inet_sk(sk);
355 struct dccp_hdr *dh = dccp_hdr(skb); 355 struct dccp_hdr *dh = dccp_hdr(skb);
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
475 return NULL; 475 return NULL;
476 } 476 }
477 477
478 return &rt->u.dst; 478 return &rt->dst;
479} 479}
480 480
481static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 481static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3b11e41a2929..6e3f32575df7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -60,8 +60,7 @@ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
60 return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); 60 return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
61} 61}
62 62
63static inline void dccp_v6_send_check(struct sock *sk, int unused_value, 63static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
64 struct sk_buff *skb)
65{ 64{
66 struct ipv6_pinfo *np = inet6_sk(sk); 65 struct ipv6_pinfo *np = inet6_sk(sk);
67 struct dccp_hdr *dh = dccp_hdr(skb); 66 struct dccp_hdr *dh = dccp_hdr(skb);
@@ -249,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
249 struct ipv6_pinfo *np = inet6_sk(sk); 248 struct ipv6_pinfo *np = inet6_sk(sk);
250 struct sk_buff *skb; 249 struct sk_buff *skb;
251 struct ipv6_txoptions *opt = NULL; 250 struct ipv6_txoptions *opt = NULL;
252 struct in6_addr *final_p = NULL, final; 251 struct in6_addr *final_p, final;
253 struct flowi fl; 252 struct flowi fl;
254 int err = -1; 253 int err = -1;
255 struct dst_entry *dst; 254 struct dst_entry *dst;
@@ -266,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
266 265
267 opt = np->opt; 266 opt = np->opt;
268 267
269 if (opt != NULL && opt->srcrt != NULL) { 268 final_p = fl6_update_dst(&fl, opt, &final);
270 const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
271
272 ipv6_addr_copy(&final, &fl.fl6_dst);
273 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
274 final_p = &final;
275 }
276 269
277 err = ip6_dst_lookup(sk, &dst, &fl); 270 err = ip6_dst_lookup(sk, &dst, &fl);
278 if (err) 271 if (err)
@@ -293,7 +286,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
293 &ireq6->loc_addr, 286 &ireq6->loc_addr,
294 &ireq6->rmt_addr); 287 &ireq6->rmt_addr);
295 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 288 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
296 err = ip6_xmit(sk, skb, &fl, opt, 0); 289 err = ip6_xmit(sk, skb, &fl, opt);
297 err = net_xmit_eval(err); 290 err = net_xmit_eval(err);
298 } 291 }
299 292
@@ -348,7 +341,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
348 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 341 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
349 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 342 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
350 skb_dst_set(skb, dst); 343 skb_dst_set(skb, dst);
351 ip6_xmit(ctl_sk, skb, &fl, NULL, 0); 344 ip6_xmit(ctl_sk, skb, &fl, NULL);
352 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 345 DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
353 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 346 DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
354 return; 347 return;
@@ -546,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
546 goto out_overflow; 539 goto out_overflow;
547 540
548 if (dst == NULL) { 541 if (dst == NULL) {
549 struct in6_addr *final_p = NULL, final; 542 struct in6_addr *final_p, final;
550 struct flowi fl; 543 struct flowi fl;
551 544
552 memset(&fl, 0, sizeof(fl)); 545 memset(&fl, 0, sizeof(fl));
553 fl.proto = IPPROTO_DCCP; 546 fl.proto = IPPROTO_DCCP;
554 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 547 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
555 if (opt != NULL && opt->srcrt != NULL) { 548 final_p = fl6_update_dst(&fl, opt, &final);
556 const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
557
558 ipv6_addr_copy(&final, &fl.fl6_dst);
559 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
560 final_p = &final;
561 }
562 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 549 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
563 fl.oif = sk->sk_bound_dev_if; 550 fl.oif = sk->sk_bound_dev_if;
564 fl.fl_ip_dport = inet_rsk(req)->rmt_port; 551 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -886,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
886 struct inet_sock *inet = inet_sk(sk); 873 struct inet_sock *inet = inet_sk(sk);
887 struct ipv6_pinfo *np = inet6_sk(sk); 874 struct ipv6_pinfo *np = inet6_sk(sk);
888 struct dccp_sock *dp = dccp_sk(sk); 875 struct dccp_sock *dp = dccp_sk(sk);
889 struct in6_addr *saddr = NULL, *final_p = NULL, final; 876 struct in6_addr *saddr = NULL, *final_p, final;
890 struct flowi fl; 877 struct flowi fl;
891 struct dst_entry *dst; 878 struct dst_entry *dst;
892 int addr_type; 879 int addr_type;
@@ -989,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
989 fl.fl_ip_sport = inet->inet_sport; 976 fl.fl_ip_sport = inet->inet_sport;
990 security_sk_classify_flow(sk, &fl); 977 security_sk_classify_flow(sk, &fl);
991 978
992 if (np->opt != NULL && np->opt->srcrt != NULL) { 979 final_p = fl6_update_dst(&fl, np->opt, &final);
993 const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
994
995 ipv6_addr_copy(&final, &fl.fl6_dst);
996 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
997 final_p = &final;
998 }
999 980
1000 err = ip6_dst_lookup(sk, &dst, &fl); 981 err = ip6_dst_lookup(sk, &dst, &fl);
1001 if (err) 982 if (err)
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 1b08cae9c65b..bfda087bd90d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -296,12 +296,11 @@ static inline u8 dccp_ndp_len(const u64 ndp)
296{ 296{
297 if (likely(ndp <= 0xFF)) 297 if (likely(ndp <= 0xFF))
298 return 1; 298 return 1;
299 return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6); 299 return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
300} 300}
301 301
302int dccp_insert_option(struct sock *sk, struct sk_buff *skb, 302int dccp_insert_option(struct sk_buff *skb, const unsigned char option,
303 const unsigned char option, 303 const void *value, const unsigned char len)
304 const void *value, const unsigned char len)
305{ 304{
306 unsigned char *to; 305 unsigned char *to;
307 306
@@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
354 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4; 353 return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
355} 354}
356 355
357int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb, 356int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
358 u32 elapsed_time)
359{ 357{
360 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 358 const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
361 const int len = 2 + elapsed_time_len; 359 const int len = 2 + elapsed_time_len;
@@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
386 384
387EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 385EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
388 386
389int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) 387int dccp_insert_option_timestamp(struct sk_buff *skb)
390{ 388{
391 __be32 now = htonl(dccp_timestamp()); 389 __be32 now = htonl(dccp_timestamp());
392 /* yes this will overflow but that is the point as we want a 390 /* yes this will overflow but that is the point as we want a
393 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ 391 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
394 392
395 return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); 393 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
396} 394}
397 395
398EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp); 396EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
@@ -531,9 +529,9 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
531 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { 529 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
532 /* 530 /*
533 * Obtain RTT sample from Request/Response exchange. 531 * Obtain RTT sample from Request/Response exchange.
534 * This is currently used in CCID 3 initialisation. 532 * This is currently used for TFRC initialisation.
535 */ 533 */
536 if (dccp_insert_option_timestamp(sk, skb)) 534 if (dccp_insert_option_timestamp(skb))
537 return -1; 535 return -1;
538 536
539 } else if (dp->dccps_hc_rx_ackvec != NULL && 537 } else if (dp->dccps_hc_rx_ackvec != NULL &&
@@ -564,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
564 if (dccp_feat_insert_opts(NULL, dreq, skb)) 562 if (dccp_feat_insert_opts(NULL, dreq, skb))
565 return -1; 563 return -1;
566 564
565 /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */
566 if (dccp_insert_option_timestamp(skb))
567 return -1;
568
567 if (dreq->dreq_timestamp_echo != 0 && 569 if (dreq->dreq_timestamp_echo != 0 &&
568 dccp_insert_option_timestamp_echo(NULL, dreq, skb)) 570 dccp_insert_option_timestamp_echo(NULL, dreq, skb))
569 return -1; 571 return -1;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fc3f436440b4..aadbdb58758b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -129,14 +129,14 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
129 break; 129 break;
130 } 130 }
131 131
132 icsk->icsk_af_ops->send_check(sk, 0, skb); 132 icsk->icsk_af_ops->send_check(sk, skb);
133 133
134 if (set_ack) 134 if (set_ack)
135 dccp_event_ack_sent(sk); 135 dccp_event_ack_sent(sk);
136 136
137 DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 137 DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
138 138
139 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 139 err = icsk->icsk_af_ops->queue_xmit(skb);
140 return net_xmit_eval(err); 140 return net_xmit_eval(err);
141 } 141 }
142 return -ENOBUFS; 142 return -ENOBUFS;
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss);
195 195
196void dccp_write_space(struct sock *sk) 196void dccp_write_space(struct sock *sk)
197{ 197{
198 read_lock(&sk->sk_callback_lock); 198 struct socket_wq *wq;
199 199
200 if (sk_has_sleeper(sk)) 200 rcu_read_lock();
201 wake_up_interruptible(sk->sk_sleep); 201 wq = rcu_dereference(sk->sk_wq);
202 if (wq_has_sleeper(wq))
203 wake_up_interruptible(&wq->wait);
202 /* Should agree with poll, otherwise some programs break */ 204 /* Should agree with poll, otherwise some programs break */
203 if (sock_writeable(sk)) 205 if (sock_writeable(sk))
204 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 206 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
205 207
206 read_unlock(&sk->sk_callback_lock); 208 rcu_read_unlock();
207} 209}
208 210
209/** 211/**
@@ -225,7 +227,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
225 dccp_pr_debug("delayed send by %d msec\n", delay); 227 dccp_pr_debug("delayed send by %d msec\n", delay);
226 jiffdelay = msecs_to_jiffies(delay); 228 jiffdelay = msecs_to_jiffies(delay);
227 229
228 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 230 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
229 231
230 sk->sk_write_pending++; 232 sk->sk_write_pending++;
231 release_sock(sk); 233 release_sock(sk);
@@ -241,7 +243,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
241 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb); 243 rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
242 } while ((delay = rc) > 0); 244 } while ((delay = rc) > 0);
243out: 245out:
244 finish_wait(sk->sk_sleep, &wait); 246 finish_wait(sk_sleep(sk), &wait);
245 return rc; 247 return rc;
246 248
247do_error: 249do_error:
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a0e38d8018f5..096250d1323b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
312 unsigned int mask; 312 unsigned int mask;
313 struct sock *sk = sock->sk; 313 struct sock *sk = sock->sk;
314 314
315 sock_poll_wait(file, sk->sk_sleep, wait); 315 sock_poll_wait(file, sk_sleep(sk), wait);
316 if (sk->sk_state == DCCP_LISTEN) 316 if (sk->sk_state == DCCP_LISTEN)
317 return inet_csk_listen_poll(sk); 317 return inet_csk_listen_poll(sk);
318 318
@@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
473 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) 473 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
474 return -EINVAL; 474 return -EINVAL;
475 475
476 val = kmalloc(optlen, GFP_KERNEL); 476 val = memdup_user(optval, optlen);
477 if (val == NULL) 477 if (IS_ERR(val))
478 return -ENOMEM; 478 return PTR_ERR(val);
479
480 if (copy_from_user(val, optval, optlen)) {
481 kfree(val);
482 return -EFAULT;
483 }
484 479
485 lock_sock(sk); 480 lock_sock(sk);
486 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) 481 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
@@ -1007,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
1007static inline int dccp_mib_init(void) 1002static inline int dccp_mib_init(void)
1008{ 1003{
1009 return snmp_mib_init((void __percpu **)dccp_statistics, 1004 return snmp_mib_init((void __percpu **)dccp_statistics,
1010 sizeof(struct dccp_mib)); 1005 sizeof(struct dccp_mib),
1006 __alignof__(struct dccp_mib));
1011} 1007}
1012 1008
1013static inline void dccp_mib_exit(void) 1009static inline void dccp_mib_exit(void)
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index bbfeb5eae46a..1a9aa05d4dc4 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -38,7 +38,7 @@ static int dccp_write_timeout(struct sock *sk)
38 38
39 if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { 39 if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
40 if (icsk->icsk_retransmits != 0) 40 if (icsk->icsk_retransmits != 0)
41 dst_negative_advice(&sk->sk_dst_cache, sk); 41 dst_negative_advice(sk);
42 retry_until = icsk->icsk_syn_retries ? 42 retry_until = icsk->icsk_syn_retries ?
43 : sysctl_dccp_request_retries; 43 : sysctl_dccp_request_retries;
44 } else { 44 } else {
@@ -63,7 +63,7 @@ static int dccp_write_timeout(struct sock *sk)
63 Golden words :-). 63 Golden words :-).
64 */ 64 */
65 65
66 dst_negative_advice(&sk->sk_dst_cache, sk); 66 dst_negative_advice(sk);
67 } 67 }
68 68
69 retry_until = sysctl_dccp_retries2; 69 retry_until = sysctl_dccp_retries2;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2b494fac9468..d6b93d19790f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -446,7 +446,7 @@ static void dn_destruct(struct sock *sk)
446 skb_queue_purge(&scp->other_xmit_queue); 446 skb_queue_purge(&scp->other_xmit_queue);
447 skb_queue_purge(&scp->other_receive_queue); 447 skb_queue_purge(&scp->other_receive_queue);
448 448
449 dst_release(xchg(&sk->sk_dst_cache, NULL)); 449 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
450} 450}
451 451
452static int dn_memory_pressure; 452static int dn_memory_pressure;
@@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
832 scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); 832 scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
833 dn_send_conn_conf(sk, allocation); 833 dn_send_conn_conf(sk, allocation);
834 834
835 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 835 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
836 for(;;) { 836 for(;;) {
837 release_sock(sk); 837 release_sock(sk);
838 if (scp->state == DN_CC) 838 if (scp->state == DN_CC)
@@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
850 err = -EAGAIN; 850 err = -EAGAIN;
851 if (!*timeo) 851 if (!*timeo)
852 break; 852 break;
853 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 853 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
854 } 854 }
855 finish_wait(sk->sk_sleep, &wait); 855 finish_wait(sk_sleep(sk), &wait);
856 if (err == 0) { 856 if (err == 0) {
857 sk->sk_socket->state = SS_CONNECTED; 857 sk->sk_socket->state = SS_CONNECTED;
858 } else if (scp->state != DN_CC) { 858 } else if (scp->state != DN_CC) {
@@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo)
873 if (!*timeo) 873 if (!*timeo)
874 return -EALREADY; 874 return -EALREADY;
875 875
876 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 876 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
877 for(;;) { 877 for(;;) {
878 release_sock(sk); 878 release_sock(sk);
879 if (scp->state == DN_CI || scp->state == DN_CC) 879 if (scp->state == DN_CI || scp->state == DN_CC)
@@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo)
891 err = -ETIMEDOUT; 891 err = -ETIMEDOUT;
892 if (!*timeo) 892 if (!*timeo)
893 break; 893 break;
894 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 894 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
895 } 895 }
896 finish_wait(sk->sk_sleep, &wait); 896 finish_wait(sk_sleep(sk), &wait);
897out: 897out:
898 if (err == 0) { 898 if (err == 0) {
899 sk->sk_socket->state = SS_CONNECTED; 899 sk->sk_socket->state = SS_CONNECTED;
@@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
1040 struct sk_buff *skb = NULL; 1040 struct sk_buff *skb = NULL;
1041 int err = 0; 1041 int err = 0;
1042 1042
1043 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1043 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1044 for(;;) { 1044 for(;;) {
1045 release_sock(sk); 1045 release_sock(sk);
1046 skb = skb_dequeue(&sk->sk_receive_queue); 1046 skb = skb_dequeue(&sk->sk_receive_queue);
@@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
1060 err = -EAGAIN; 1060 err = -EAGAIN;
1061 if (!*timeo) 1061 if (!*timeo)
1062 break; 1062 break;
1063 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1063 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1064 } 1064 }
1065 finish_wait(sk->sk_sleep, &wait); 1065 finish_wait(sk_sleep(sk), &wait);
1066 1066
1067 return skb == NULL ? ERR_PTR(err) : skb; 1067 return skb == NULL ? ERR_PTR(err) : skb;
1068} 1068}
@@ -1105,7 +1105,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
1105 release_sock(sk); 1105 release_sock(sk);
1106 1106
1107 dst = skb_dst(skb); 1107 dst = skb_dst(skb);
1108 dst_release(xchg(&newsk->sk_dst_cache, dst)); 1108 sk_dst_set(newsk, dst);
1109 skb_dst_set(skb, NULL); 1109 skb_dst_set(skb, NULL);
1110 1110
1111 DN_SK(newsk)->state = DN_CR; 1111 DN_SK(newsk)->state = DN_CR;
@@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
1746 goto out; 1746 goto out;
1747 } 1747 }
1748 1748
1749 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1749 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1750 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1750 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1751 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target)); 1751 sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
1752 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1752 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1753 finish_wait(sk->sk_sleep, &wait); 1753 finish_wait(sk_sleep(sk), &wait);
1754 } 1754 }
1755 1755
1756 skb_queue_walk_safe(queue, skb, n) { 1756 skb_queue_walk_safe(queue, skb, n) {
@@ -1956,7 +1956,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
1956 } 1956 }
1957 1957
1958 if ((flags & MSG_TRYHARD) && sk->sk_dst_cache) 1958 if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
1959 dst_negative_advice(&sk->sk_dst_cache, sk); 1959 dst_negative_advice(sk);
1960 1960
1961 mss = scp->segsize_rem; 1961 mss = scp->segsize_rem;
1962 fctype = scp->services_rem & NSP_FC_MASK; 1962 fctype = scp->services_rem & NSP_FC_MASK;
@@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
2003 goto out; 2003 goto out;
2004 } 2004 }
2005 2005
2006 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 2006 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2007 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2007 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2008 sk_wait_event(sk, &timeo, 2008 sk_wait_event(sk, &timeo,
2009 !dn_queue_too_long(scp, queue, flags)); 2009 !dn_queue_too_long(scp, queue, flags));
2010 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 2010 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2011 finish_wait(sk->sk_sleep, &wait); 2011 finish_wait(sk_sleep(sk), &wait);
2012 continue; 2012 continue;
2013 } 2013 }
2014 2014
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index cead68eb254c..4c409b46aa35 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -350,7 +350,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
350 if (dn_db->dev->type == ARPHRD_ETHER) { 350 if (dn_db->dev->type == ARPHRD_ETHER) {
351 if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) { 351 if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
352 dn_dn2eth(mac_addr, ifa1->ifa_local); 352 dn_dn2eth(mac_addr, ifa1->ifa_local);
353 dev_mc_delete(dev, mac_addr, ETH_ALEN, 0); 353 dev_mc_del(dev, mac_addr);
354 } 354 }
355 } 355 }
356 356
@@ -381,7 +381,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
381 if (dev->type == ARPHRD_ETHER) { 381 if (dev->type == ARPHRD_ETHER) {
382 if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) { 382 if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
383 dn_dn2eth(mac_addr, ifa->ifa_local); 383 dn_dn2eth(mac_addr, ifa->ifa_local);
384 dev_mc_add(dev, mac_addr, ETH_ALEN, 0); 384 dev_mc_add(dev, mac_addr);
385 } 385 }
386 } 386 }
387 387
@@ -1001,9 +1001,9 @@ static int dn_eth_up(struct net_device *dev)
1001 struct dn_dev *dn_db = dev->dn_ptr; 1001 struct dn_dev *dn_db = dev->dn_ptr;
1002 1002
1003 if (dn_db->parms.forwarding == 0) 1003 if (dn_db->parms.forwarding == 0)
1004 dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0); 1004 dev_mc_add(dev, dn_rt_all_end_mcast);
1005 else 1005 else
1006 dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0); 1006 dev_mc_add(dev, dn_rt_all_rt_mcast);
1007 1007
1008 dn_db->use_long = 1; 1008 dn_db->use_long = 1;
1009 1009
@@ -1015,9 +1015,9 @@ static void dn_eth_down(struct net_device *dev)
1015 struct dn_dev *dn_db = dev->dn_ptr; 1015 struct dn_dev *dn_db = dev->dn_ptr;
1016 1016
1017 if (dn_db->parms.forwarding == 0) 1017 if (dn_db->parms.forwarding == 0)
1018 dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0); 1018 dev_mc_del(dev, dn_rt_all_end_mcast);
1019 else 1019 else
1020 dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0); 1020 dev_mc_del(dev, dn_rt_all_rt_mcast);
1021} 1021}
1022 1022
1023static void dn_dev_set_timer(struct net_device *dev); 1023static void dn_dev_set_timer(struct net_device *dev);
@@ -1220,17 +1220,14 @@ void dn_dev_down(struct net_device *dev)
1220 1220
1221void dn_dev_init_pkt(struct sk_buff *skb) 1221void dn_dev_init_pkt(struct sk_buff *skb)
1222{ 1222{
1223 return;
1224} 1223}
1225 1224
1226void dn_dev_veri_pkt(struct sk_buff *skb) 1225void dn_dev_veri_pkt(struct sk_buff *skb)
1227{ 1226{
1228 return;
1229} 1227}
1230 1228
1231void dn_dev_hello(struct sk_buff *skb) 1229void dn_dev_hello(struct sk_buff *skb)
1232{ 1230{
1233 return;
1234} 1231}
1235 1232
1236void dn_dev_devices_off(void) 1233void dn_dev_devices_off(void)
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index deb723dba44b..0363bb95cc7d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -266,7 +266,8 @@ static int dn_long_output(struct sk_buff *skb)
266 266
267 skb_reset_network_header(skb); 267 skb_reset_network_header(skb);
268 268
269 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 269 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
270 neigh->dev, dn_neigh_output_packet);
270} 271}
271 272
272static int dn_short_output(struct sk_buff *skb) 273static int dn_short_output(struct sk_buff *skb)
@@ -305,7 +306,8 @@ static int dn_short_output(struct sk_buff *skb)
305 306
306 skb_reset_network_header(skb); 307 skb_reset_network_header(skb);
307 308
308 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 309 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
310 neigh->dev, dn_neigh_output_packet);
309} 311}
310 312
311/* 313/*
@@ -347,7 +349,8 @@ static int dn_phase3_output(struct sk_buff *skb)
347 349
348 skb_reset_network_header(skb); 350 skb_reset_network_header(skb);
349 351
350 return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet); 352 return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
353 neigh->dev, dn_neigh_output_packet);
351} 354}
352 355
353/* 356/*
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 25a37299bc65..b430549e2b91 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -810,7 +810,8 @@ free_out:
810 810
811int dn_nsp_rx(struct sk_buff *skb) 811int dn_nsp_rx(struct sk_buff *skb)
812{ 812{
813 return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->dev, NULL, dn_nsp_rx_packet); 813 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL,
814 dn_nsp_rx_packet);
814} 815}
815 816
816/* 817/*
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 70ebe74027d5..6585ea6d1182 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
146 146
147static inline void dnrt_free(struct dn_route *rt) 147static inline void dnrt_free(struct dn_route *rt)
148{ 148{
149 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 149 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
150} 150}
151 151
152static inline void dnrt_drop(struct dn_route *rt) 152static inline void dnrt_drop(struct dn_route *rt)
153{ 153{
154 dst_release(&rt->u.dst); 154 dst_release(&rt->dst);
155 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 155 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
156} 156}
157 157
158static void dn_dst_check_expire(unsigned long dummy) 158static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
167 167
168 spin_lock(&dn_rt_hash_table[i].lock); 168 spin_lock(&dn_rt_hash_table[i].lock);
169 while((rt=*rtp) != NULL) { 169 while((rt=*rtp) != NULL) {
170 if (atomic_read(&rt->u.dst.__refcnt) || 170 if (atomic_read(&rt->dst.__refcnt) ||
171 (now - rt->u.dst.lastuse) < expire) { 171 (now - rt->dst.lastuse) < expire) {
172 rtp = &rt->u.dst.dn_next; 172 rtp = &rt->dst.dn_next;
173 continue; 173 continue;
174 } 174 }
175 *rtp = rt->u.dst.dn_next; 175 *rtp = rt->dst.dn_next;
176 rt->u.dst.dn_next = NULL; 176 rt->dst.dn_next = NULL;
177 dnrt_free(rt); 177 dnrt_free(rt);
178 } 178 }
179 spin_unlock(&dn_rt_hash_table[i].lock); 179 spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
198 rtp = &dn_rt_hash_table[i].chain; 198 rtp = &dn_rt_hash_table[i].chain;
199 199
200 while((rt=*rtp) != NULL) { 200 while((rt=*rtp) != NULL) {
201 if (atomic_read(&rt->u.dst.__refcnt) || 201 if (atomic_read(&rt->dst.__refcnt) ||
202 (now - rt->u.dst.lastuse) < expire) { 202 (now - rt->dst.lastuse) < expire) {
203 rtp = &rt->u.dst.dn_next; 203 rtp = &rt->dst.dn_next;
204 continue; 204 continue;
205 } 205 }
206 *rtp = rt->u.dst.dn_next; 206 *rtp = rt->dst.dn_next;
207 rt->u.dst.dn_next = NULL; 207 rt->dst.dn_next = NULL;
208 dnrt_drop(rt); 208 dnrt_drop(rt);
209 break; 209 break;
210 } 210 }
@@ -264,7 +264,6 @@ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
264 264
265static void dn_dst_link_failure(struct sk_buff *skb) 265static void dn_dst_link_failure(struct sk_buff *skb)
266{ 266{
267 return;
268} 267}
269 268
270static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 269static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
@@ -288,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
288 while((rth = *rthp) != NULL) { 287 while((rth = *rthp) != NULL) {
289 if (compare_keys(&rth->fl, &rt->fl)) { 288 if (compare_keys(&rth->fl, &rt->fl)) {
290 /* Put it first */ 289 /* Put it first */
291 *rthp = rth->u.dst.dn_next; 290 *rthp = rth->dst.dn_next;
292 rcu_assign_pointer(rth->u.dst.dn_next, 291 rcu_assign_pointer(rth->dst.dn_next,
293 dn_rt_hash_table[hash].chain); 292 dn_rt_hash_table[hash].chain);
294 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 293 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
295 294
296 dst_use(&rth->u.dst, now); 295 dst_use(&rth->dst, now);
297 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 296 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
298 297
299 dnrt_drop(rt); 298 dnrt_drop(rt);
300 *rp = rth; 299 *rp = rth;
301 return 0; 300 return 0;
302 } 301 }
303 rthp = &rth->u.dst.dn_next; 302 rthp = &rth->dst.dn_next;
304 } 303 }
305 304
306 rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain); 305 rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
307 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 306 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
308 307
309 dst_use(&rt->u.dst, now); 308 dst_use(&rt->dst, now);
310 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 309 spin_unlock_bh(&dn_rt_hash_table[hash].lock);
311 *rp = rt; 310 *rp = rt;
312 return 0; 311 return 0;
@@ -324,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
324 goto nothing_to_declare; 323 goto nothing_to_declare;
325 324
326 for(; rt; rt=next) { 325 for(; rt; rt=next) {
327 next = rt->u.dst.dn_next; 326 next = rt->dst.dn_next;
328 rt->u.dst.dn_next = NULL; 327 rt->dst.dn_next = NULL;
329 dst_free((struct dst_entry *)rt); 328 dst_free((struct dst_entry *)rt);
330 } 329 }
331 330
@@ -518,7 +517,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
518 ptr++; 517 ptr++;
519 cb->hops = *ptr++; /* Visit Count */ 518 cb->hops = *ptr++; /* Visit Count */
520 519
521 return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet); 520 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
521 dn_route_rx_packet);
522 522
523drop_it: 523drop_it:
524 kfree_skb(skb); 524 kfree_skb(skb);
@@ -544,7 +544,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
544 ptr += 2; 544 ptr += 2;
545 cb->hops = *ptr & 0x3f; 545 cb->hops = *ptr & 0x3f;
546 546
547 return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet); 547 return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
548 dn_route_rx_packet);
548 549
549drop_it: 550drop_it:
550 kfree_skb(skb); 551 kfree_skb(skb);
@@ -646,16 +647,24 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
646 647
647 switch(flags & DN_RT_CNTL_MSK) { 648 switch(flags & DN_RT_CNTL_MSK) {
648 case DN_RT_PKT_HELO: 649 case DN_RT_PKT_HELO:
649 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_route_ptp_hello); 650 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
651 skb, skb->dev, NULL,
652 dn_route_ptp_hello);
650 653
651 case DN_RT_PKT_L1RT: 654 case DN_RT_PKT_L1RT:
652 case DN_RT_PKT_L2RT: 655 case DN_RT_PKT_L2RT:
653 return NF_HOOK(PF_DECnet, NF_DN_ROUTE, skb, skb->dev, NULL, dn_route_discard); 656 return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
657 skb, skb->dev, NULL,
658 dn_route_discard);
654 case DN_RT_PKT_ERTH: 659 case DN_RT_PKT_ERTH:
655 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_router_hello); 660 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
661 skb, skb->dev, NULL,
662 dn_neigh_router_hello);
656 663
657 case DN_RT_PKT_EEDH: 664 case DN_RT_PKT_EEDH:
658 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_endnode_hello); 665 return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
666 skb, skb->dev, NULL,
667 dn_neigh_endnode_hello);
659 } 668 }
660 } else { 669 } else {
661 if (dn->parms.state != DN_DEV_S_RU) 670 if (dn->parms.state != DN_DEV_S_RU)
@@ -704,7 +713,8 @@ static int dn_output(struct sk_buff *skb)
704 cb->rt_flags |= DN_RT_F_IE; 713 cb->rt_flags |= DN_RT_F_IE;
705 cb->hops = 0; 714 cb->hops = 0;
706 715
707 return NF_HOOK(PF_DECnet, NF_DN_LOCAL_OUT, skb, NULL, dev, neigh->output); 716 return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev,
717 neigh->output);
708 718
709error: 719error:
710 if (net_ratelimit()) 720 if (net_ratelimit())
@@ -733,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
733 /* Ensure that we have enough space for headers */ 743 /* Ensure that we have enough space for headers */
734 rt = (struct dn_route *)skb_dst(skb); 744 rt = (struct dn_route *)skb_dst(skb);
735 header_len = dn_db->use_long ? 21 : 6; 745 header_len = dn_db->use_long ? 21 : 6;
736 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) 746 if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
737 goto drop; 747 goto drop;
738 748
739 /* 749 /*
@@ -742,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
742 if (++cb->hops > 30) 752 if (++cb->hops > 30)
743 goto drop; 753 goto drop;
744 754
745 skb->dev = rt->u.dst.dev; 755 skb->dev = rt->dst.dev;
746 756
747 /* 757 /*
748 * If packet goes out same interface it came in on, then set 758 * If packet goes out same interface it came in on, then set
@@ -753,7 +763,8 @@ static int dn_forward(struct sk_buff *skb)
753 if (rt->rt_flags & RTCF_DOREDIRECT) 763 if (rt->rt_flags & RTCF_DOREDIRECT)
754 cb->rt_flags |= DN_RT_F_IE; 764 cb->rt_flags |= DN_RT_F_IE;
755 765
756 return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output); 766 return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev,
767 neigh->output);
757 768
758drop: 769drop:
759 kfree_skb(skb); 770 kfree_skb(skb);
@@ -781,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
781static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) 792static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
782{ 793{
783 struct dn_fib_info *fi = res->fi; 794 struct dn_fib_info *fi = res->fi;
784 struct net_device *dev = rt->u.dst.dev; 795 struct net_device *dev = rt->dst.dev;
785 struct neighbour *n; 796 struct neighbour *n;
786 unsigned mss; 797 unsigned mss;
787 798
@@ -789,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
789 if (DN_FIB_RES_GW(*res) && 800 if (DN_FIB_RES_GW(*res) &&
790 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 801 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
791 rt->rt_gateway = DN_FIB_RES_GW(*res); 802 rt->rt_gateway = DN_FIB_RES_GW(*res);
792 memcpy(rt->u.dst.metrics, fi->fib_metrics, 803 memcpy(rt->dst.metrics, fi->fib_metrics,
793 sizeof(rt->u.dst.metrics)); 804 sizeof(rt->dst.metrics));
794 } 805 }
795 rt->rt_type = res->type; 806 rt->rt_type = res->type;
796 807
797 if (dev != NULL && rt->u.dst.neighbour == NULL) { 808 if (dev != NULL && rt->dst.neighbour == NULL) {
798 n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); 809 n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
799 if (IS_ERR(n)) 810 if (IS_ERR(n))
800 return PTR_ERR(n); 811 return PTR_ERR(n);
801 rt->u.dst.neighbour = n; 812 rt->dst.neighbour = n;
802 } 813 }
803 814
804 if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 || 815 if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
805 dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu) 816 dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
806 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 817 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
807 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); 818 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
808 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 || 819 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
809 dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss) 820 dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
810 rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; 821 rt->dst.metrics[RTAX_ADVMSS-1] = mss;
811 return 0; 822 return 0;
812} 823}
813 824
@@ -1085,8 +1096,8 @@ make_route:
1085 if (rt == NULL) 1096 if (rt == NULL)
1086 goto e_nobufs; 1097 goto e_nobufs;
1087 1098
1088 atomic_set(&rt->u.dst.__refcnt, 1); 1099 atomic_set(&rt->dst.__refcnt, 1);
1089 rt->u.dst.flags = DST_HOST; 1100 rt->dst.flags = DST_HOST;
1090 1101
1091 rt->fl.fld_src = oldflp->fld_src; 1102 rt->fl.fld_src = oldflp->fld_src;
1092 rt->fl.fld_dst = oldflp->fld_dst; 1103 rt->fl.fld_dst = oldflp->fld_dst;
@@ -1102,17 +1113,17 @@ make_route:
1102 rt->rt_dst_map = fl.fld_dst; 1113 rt->rt_dst_map = fl.fld_dst;
1103 rt->rt_src_map = fl.fld_src; 1114 rt->rt_src_map = fl.fld_src;
1104 1115
1105 rt->u.dst.dev = dev_out; 1116 rt->dst.dev = dev_out;
1106 dev_hold(dev_out); 1117 dev_hold(dev_out);
1107 rt->u.dst.neighbour = neigh; 1118 rt->dst.neighbour = neigh;
1108 neigh = NULL; 1119 neigh = NULL;
1109 1120
1110 rt->u.dst.lastuse = jiffies; 1121 rt->dst.lastuse = jiffies;
1111 rt->u.dst.output = dn_output; 1122 rt->dst.output = dn_output;
1112 rt->u.dst.input = dn_rt_bug; 1123 rt->dst.input = dn_rt_bug;
1113 rt->rt_flags = flags; 1124 rt->rt_flags = flags;
1114 if (flags & RTCF_LOCAL) 1125 if (flags & RTCF_LOCAL)
1115 rt->u.dst.input = dn_nsp_rx; 1126 rt->dst.input = dn_nsp_rx;
1116 1127
1117 err = dn_rt_set_next_hop(rt, &res); 1128 err = dn_rt_set_next_hop(rt, &res);
1118 if (err) 1129 if (err)
@@ -1141,7 +1152,7 @@ e_nobufs:
1141 err = -ENOBUFS; 1152 err = -ENOBUFS;
1142 goto done; 1153 goto done;
1143e_neighbour: 1154e_neighbour:
1144 dst_free(&rt->u.dst); 1155 dst_free(&rt->dst);
1145 goto e_nobufs; 1156 goto e_nobufs;
1146} 1157}
1147 1158
@@ -1157,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
1157 if (!(flags & MSG_TRYHARD)) { 1168 if (!(flags & MSG_TRYHARD)) {
1158 rcu_read_lock_bh(); 1169 rcu_read_lock_bh();
1159 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt; 1170 for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
1160 rt = rcu_dereference_bh(rt->u.dst.dn_next)) { 1171 rt = rcu_dereference_bh(rt->dst.dn_next)) {
1161 if ((flp->fld_dst == rt->fl.fld_dst) && 1172 if ((flp->fld_dst == rt->fl.fld_dst) &&
1162 (flp->fld_src == rt->fl.fld_src) && 1173 (flp->fld_src == rt->fl.fld_src) &&
1163 (flp->mark == rt->fl.mark) && 1174 (flp->mark == rt->fl.mark) &&
1164 (rt->fl.iif == 0) && 1175 (rt->fl.iif == 0) &&
1165 (rt->fl.oif == flp->oif)) { 1176 (rt->fl.oif == flp->oif)) {
1166 dst_use(&rt->u.dst, jiffies); 1177 dst_use(&rt->dst, jiffies);
1167 rcu_read_unlock_bh(); 1178 rcu_read_unlock_bh();
1168 *pprt = &rt->u.dst; 1179 *pprt = &rt->dst;
1169 return 0; 1180 return 0;
1170 } 1181 }
1171 } 1182 }
@@ -1364,29 +1375,29 @@ make_route:
1364 rt->fl.iif = in_dev->ifindex; 1375 rt->fl.iif = in_dev->ifindex;
1365 rt->fl.mark = fl.mark; 1376 rt->fl.mark = fl.mark;
1366 1377
1367 rt->u.dst.flags = DST_HOST; 1378 rt->dst.flags = DST_HOST;
1368 rt->u.dst.neighbour = neigh; 1379 rt->dst.neighbour = neigh;
1369 rt->u.dst.dev = out_dev; 1380 rt->dst.dev = out_dev;
1370 rt->u.dst.lastuse = jiffies; 1381 rt->dst.lastuse = jiffies;
1371 rt->u.dst.output = dn_rt_bug; 1382 rt->dst.output = dn_rt_bug;
1372 switch(res.type) { 1383 switch(res.type) {
1373 case RTN_UNICAST: 1384 case RTN_UNICAST:
1374 rt->u.dst.input = dn_forward; 1385 rt->dst.input = dn_forward;
1375 break; 1386 break;
1376 case RTN_LOCAL: 1387 case RTN_LOCAL:
1377 rt->u.dst.output = dn_output; 1388 rt->dst.output = dn_output;
1378 rt->u.dst.input = dn_nsp_rx; 1389 rt->dst.input = dn_nsp_rx;
1379 rt->u.dst.dev = in_dev; 1390 rt->dst.dev = in_dev;
1380 flags |= RTCF_LOCAL; 1391 flags |= RTCF_LOCAL;
1381 break; 1392 break;
1382 default: 1393 default:
1383 case RTN_UNREACHABLE: 1394 case RTN_UNREACHABLE:
1384 case RTN_BLACKHOLE: 1395 case RTN_BLACKHOLE:
1385 rt->u.dst.input = dst_discard; 1396 rt->dst.input = dst_discard;
1386 } 1397 }
1387 rt->rt_flags = flags; 1398 rt->rt_flags = flags;
1388 if (rt->u.dst.dev) 1399 if (rt->dst.dev)
1389 dev_hold(rt->u.dst.dev); 1400 dev_hold(rt->dst.dev);
1390 1401
1391 err = dn_rt_set_next_hop(rt, &res); 1402 err = dn_rt_set_next_hop(rt, &res);
1392 if (err) 1403 if (err)
@@ -1394,7 +1405,7 @@ make_route:
1394 1405
1395 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1406 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
1396 dn_insert_route(rt, hash, &rt); 1407 dn_insert_route(rt, hash, &rt);
1397 skb_dst_set(skb, &rt->u.dst); 1408 skb_dst_set(skb, &rt->dst);
1398 1409
1399done: 1410done:
1400 if (neigh) 1411 if (neigh)
@@ -1416,7 +1427,7 @@ e_nobufs:
1416 goto done; 1427 goto done;
1417 1428
1418e_neighbour: 1429e_neighbour:
1419 dst_free(&rt->u.dst); 1430 dst_free(&rt->dst);
1420 goto done; 1431 goto done;
1421} 1432}
1422 1433
@@ -1431,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
1431 1442
1432 rcu_read_lock(); 1443 rcu_read_lock();
1433 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1444 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
1434 rt = rcu_dereference(rt->u.dst.dn_next)) { 1445 rt = rcu_dereference(rt->dst.dn_next)) {
1435 if ((rt->fl.fld_src == cb->src) && 1446 if ((rt->fl.fld_src == cb->src) &&
1436 (rt->fl.fld_dst == cb->dst) && 1447 (rt->fl.fld_dst == cb->dst) &&
1437 (rt->fl.oif == 0) && 1448 (rt->fl.oif == 0) &&
1438 (rt->fl.mark == skb->mark) && 1449 (rt->fl.mark == skb->mark) &&
1439 (rt->fl.iif == cb->iif)) { 1450 (rt->fl.iif == cb->iif)) {
1440 dst_use(&rt->u.dst, jiffies); 1451 dst_use(&rt->dst, jiffies);
1441 rcu_read_unlock(); 1452 rcu_read_unlock();
1442 skb_dst_set(skb, (struct dst_entry *)rt); 1453 skb_dst_set(skb, (struct dst_entry *)rt);
1443 return 0; 1454 return 0;
@@ -1476,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1476 r->rtm_src_len = 16; 1487 r->rtm_src_len = 16;
1477 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); 1488 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
1478 } 1489 }
1479 if (rt->u.dst.dev) 1490 if (rt->dst.dev)
1480 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 1491 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
1481 /* 1492 /*
1482 * Note to self - change this if input routes reverse direction when 1493 * Note to self - change this if input routes reverse direction when
1483 * they deal only with inputs and not with replies like they do 1494 * they deal only with inputs and not with replies like they do
@@ -1486,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
1486 RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); 1497 RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
1487 if (rt->rt_daddr != rt->rt_gateway) 1498 if (rt->rt_daddr != rt->rt_gateway)
1488 RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); 1499 RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
1489 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1500 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
1490 goto rtattr_failure; 1501 goto rtattr_failure;
1491 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 1502 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
1492 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, 1503 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
1493 rt->u.dst.error) < 0) 1504 rt->dst.error) < 0)
1494 goto rtattr_failure; 1505 goto rtattr_failure;
1495 if (rt->fl.iif) 1506 if (rt->fl.iif)
1496 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1507 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1557,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1557 local_bh_enable(); 1568 local_bh_enable();
1558 memset(cb, 0, sizeof(struct dn_skb_cb)); 1569 memset(cb, 0, sizeof(struct dn_skb_cb));
1559 rt = (struct dn_route *)skb_dst(skb); 1570 rt = (struct dn_route *)skb_dst(skb);
1560 if (!err && -rt->u.dst.error) 1571 if (!err && -rt->dst.error)
1561 err = rt->u.dst.error; 1572 err = rt->dst.error;
1562 } else { 1573 } else {
1563 int oif = 0; 1574 int oif = 0;
1564 if (rta[RTA_OIF - 1]) 1575 if (rta[RTA_OIF - 1])
@@ -1572,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
1572 skb->dev = NULL; 1583 skb->dev = NULL;
1573 if (err) 1584 if (err)
1574 goto out_free; 1585 goto out_free;
1575 skb_dst_set(skb, &rt->u.dst); 1586 skb_dst_set(skb, &rt->dst);
1576 if (rtm->rtm_flags & RTM_F_NOTIFY) 1587 if (rtm->rtm_flags & RTM_F_NOTIFY)
1577 rt->rt_flags |= RTCF_NOTIFY; 1588 rt->rt_flags |= RTCF_NOTIFY;
1578 1589
@@ -1621,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
1621 rcu_read_lock_bh(); 1632 rcu_read_lock_bh();
1622 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0; 1633 for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
1623 rt; 1634 rt;
1624 rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) { 1635 rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
1625 if (idx < s_idx) 1636 if (idx < s_idx)
1626 continue; 1637 continue;
1627 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1638 skb_dst_set(skb, dst_clone(&rt->dst));
1628 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1639 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
1629 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1640 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1630 1, NLM_F_MULTI) <= 0) { 1641 1, NLM_F_MULTI) <= 0) {
@@ -1667,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
1667{ 1678{
1668 struct dn_rt_cache_iter_state *s = seq->private; 1679 struct dn_rt_cache_iter_state *s = seq->private;
1669 1680
1670 rt = rt->u.dst.dn_next; 1681 rt = rt->dst.dn_next;
1671 while(!rt) { 1682 while(!rt) {
1672 rcu_read_unlock_bh(); 1683 rcu_read_unlock_bh();
1673 if (--s->bucket < 0) 1684 if (--s->bucket < 0)
@@ -1708,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
1708 char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; 1719 char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
1709 1720
1710 seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", 1721 seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
1711 rt->u.dst.dev ? rt->u.dst.dev->name : "*", 1722 rt->dst.dev ? rt->dst.dev->name : "*",
1712 dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), 1723 dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
1713 dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), 1724 dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
1714 atomic_read(&rt->u.dst.__refcnt), 1725 atomic_read(&rt->dst.__refcnt),
1715 rt->u.dst.__use, 1726 rt->dst.__use,
1716 (int) dst_metric(&rt->u.dst, RTAX_RTT)); 1727 (int) dst_metric(&rt->dst, RTAX_RTT));
1717 return 0; 1728 return 0;
1718} 1729}
1719 1730
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 7466c546f286..48fdf10be7a1 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -196,7 +196,6 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
196{ 196{
197 struct dn_fib_rule *r = (struct dn_fib_rule *)rule; 197 struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
198 198
199 frh->family = AF_DECnet;
200 frh->dst_len = r->dst_len; 199 frh->dst_len = r->dst_len;
201 frh->src_len = r->src_len; 200 frh->src_len = r->src_len;
202 frh->tos = 0; 201 frh->tos = 0;
@@ -212,29 +211,12 @@ nla_put_failure:
212 return -ENOBUFS; 211 return -ENOBUFS;
213} 212}
214 213
215static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
216{
217 struct list_head *pos;
218 struct fib_rule *rule;
219
220 if (!list_empty(&dn_fib_rules_ops->rules_list)) {
221 pos = dn_fib_rules_ops->rules_list.next;
222 if (pos->next != &dn_fib_rules_ops->rules_list) {
223 rule = list_entry(pos->next, struct fib_rule, list);
224 if (rule->pref)
225 return rule->pref - 1;
226 }
227 }
228
229 return 0;
230}
231
232static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) 214static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
233{ 215{
234 dn_rt_cache_flush(-1); 216 dn_rt_cache_flush(-1);
235} 217}
236 218
237static struct fib_rules_ops dn_fib_rules_ops_template = { 219static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
238 .family = AF_DECnet, 220 .family = AF_DECnet,
239 .rule_size = sizeof(struct dn_fib_rule), 221 .rule_size = sizeof(struct dn_fib_rule),
240 .addr_size = sizeof(u16), 222 .addr_size = sizeof(u16),
@@ -243,7 +225,7 @@ static struct fib_rules_ops dn_fib_rules_ops_template = {
243 .configure = dn_fib_rule_configure, 225 .configure = dn_fib_rule_configure,
244 .compare = dn_fib_rule_compare, 226 .compare = dn_fib_rule_compare,
245 .fill = dn_fib_rule_fill, 227 .fill = dn_fib_rule_fill,
246 .default_pref = dn_fib_rule_default_pref, 228 .default_pref = fib_default_rule_pref,
247 .flush_cache = dn_fib_rule_flush_cache, 229 .flush_cache = dn_fib_rule_flush_cache,
248 .nlgroup = RTNLGRP_DECnet_RULE, 230 .nlgroup = RTNLGRP_DECnet_RULE,
249 .policy = dn_fib_rule_policy, 231 .policy = dn_fib_rule_policy,
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
new file mode 100644
index 000000000000..50d49f7e0472
--- /dev/null
+++ b/net/dns_resolver/Kconfig
@@ -0,0 +1,27 @@
1#
2# Configuration for DNS Resolver
3#
4config DNS_RESOLVER
5 tristate "DNS Resolver support"
6 depends on NET && KEYS
7 help
8 Saying Y here will include support for the DNS Resolver key type
9 which can be used to make upcalls to perform DNS lookups in
10 userspace.
11
12 DNS Resolver is used to query DNS server for information. Examples
13 being resolving a UNC hostname element to an IP address for CIFS or
14 performing a DNS query for AFSDB records so that AFS can locate a
15 cell's volume location database servers.
16
17 DNS Resolver is used by the CIFS and AFS modules, and would support
18 SMB2 later. DNS Resolver is supported by the userspace upcall
19 helper "/sbin/dns.resolver" via /etc/request-key.conf.
20
21 See <file:Documentation/networking/dns_resolver.txt> for further
22 information.
23
24 To compile this as a module, choose M here: the module will be called
25 dnsresolver.
26
27 If unsure, say N.
diff --git a/net/dns_resolver/Makefile b/net/dns_resolver/Makefile
new file mode 100644
index 000000000000..c0ef4e71dc49
--- /dev/null
+++ b/net/dns_resolver/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the Linux DNS Resolver.
3#
4
5obj-$(CONFIG_DNS_RESOLVER) += dns_resolver.o
6
7dns_resolver-objs := dns_key.o dns_query.o
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
new file mode 100644
index 000000000000..400a04d5c9a1
--- /dev/null
+++ b/net/dns_resolver/dns_key.c
@@ -0,0 +1,211 @@
1/* Key type used to cache DNS lookups made by the kernel
2 *
3 * See Documentation/networking/dns_resolver.txt
4 *
5 * Copyright (c) 2007 Igor Mammedov
6 * Author(s): Igor Mammedov (niallain@gmail.com)
7 * Steve French (sfrench@us.ibm.com)
8 * Wang Lei (wang840925@gmail.com)
9 * David Howells (dhowells@redhat.com)
10 *
11 * This library is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License as published
13 * by the Free Software Foundation; either version 2.1 of the License, or
14 * (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
19 * the GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this library; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/slab.h>
28#include <linux/string.h>
29#include <linux/kernel.h>
30#include <linux/keyctl.h>
31#include <linux/err.h>
32#include <keys/dns_resolver-type.h>
33#include <keys/user-type.h>
34#include "internal.h"
35
36MODULE_DESCRIPTION("DNS Resolver");
37MODULE_AUTHOR("Wang Lei");
38MODULE_LICENSE("GPL");
39
40unsigned dns_resolver_debug;
41module_param_named(debug, dns_resolver_debug, uint, S_IWUSR | S_IRUGO);
42MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
43
44const struct cred *dns_resolver_cache;
45
46/*
47 * Instantiate a user defined key for dns_resolver.
48 *
49 * The data must be a NUL-terminated string, with the NUL char accounted in
50 * datalen.
51 *
52 * If the data contains a '#' characters, then we take the clause after each
53 * one to be an option of the form 'key=value'. The actual data of interest is
54 * the string leading up to the first '#'. For instance:
55 *
56 * "ip1,ip2,...#foo=bar"
57 */
58static int
59dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
60{
61 struct user_key_payload *upayload;
62 int ret;
63 size_t result_len = 0;
64 const char *data = _data, *opt;
65
66 kenter("%%%d,%s,'%s',%zu",
67 key->serial, key->description, data, datalen);
68
69 if (datalen <= 1 || !data || data[datalen - 1] != '\0')
70 return -EINVAL;
71 datalen--;
72
73 /* deal with any options embedded in the data */
74 opt = memchr(data, '#', datalen);
75 if (!opt) {
76 kdebug("no options currently supported");
77 return -EINVAL;
78 }
79
80 result_len = datalen;
81 ret = key_payload_reserve(key, result_len);
82 if (ret < 0)
83 return -EINVAL;
84
85 upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL);
86 if (!upayload) {
87 kleave(" = -ENOMEM");
88 return -ENOMEM;
89 }
90
91 upayload->datalen = result_len;
92 memcpy(upayload->data, data, result_len);
93 upayload->data[result_len] = '\0';
94 rcu_assign_pointer(key->payload.data, upayload);
95
96 kleave(" = 0");
97 return 0;
98}
99
100/*
101 * The description is of the form "[<type>:]<domain_name>"
102 *
103 * The domain name may be a simple name or an absolute domain name (which
104 * should end with a period). The domain name is case-independent.
105 */
106static int
107dns_resolver_match(const struct key *key, const void *description)
108{
109 int slen, dlen, ret = 0;
110 const char *src = key->description, *dsp = description;
111
112 kenter("%s,%s", src, dsp);
113
114 if (!src || !dsp)
115 goto no_match;
116
117 if (strcasecmp(src, dsp) == 0)
118 goto matched;
119
120 slen = strlen(src);
121 dlen = strlen(dsp);
122 if (slen <= 0 || dlen <= 0)
123 goto no_match;
124 if (src[slen - 1] == '.')
125 slen--;
126 if (dsp[dlen - 1] == '.')
127 dlen--;
128 if (slen != dlen || strncasecmp(src, dsp, slen) != 0)
129 goto no_match;
130
131matched:
132 ret = 1;
133no_match:
134 kleave(" = %d", ret);
135 return ret;
136}
137
138struct key_type key_type_dns_resolver = {
139 .name = "dns_resolver",
140 .instantiate = dns_resolver_instantiate,
141 .match = dns_resolver_match,
142 .revoke = user_revoke,
143 .destroy = user_destroy,
144 .describe = user_describe,
145 .read = user_read,
146};
147
148static int __init init_dns_resolver(void)
149{
150 struct cred *cred;
151 struct key *keyring;
152 int ret;
153
154 printk(KERN_NOTICE "Registering the %s key type\n",
155 key_type_dns_resolver.name);
156
157 /* create an override credential set with a special thread keyring in
158 * which DNS requests are cached
159 *
160 * this is used to prevent malicious redirections from being installed
161 * with add_key().
162 */
163 cred = prepare_kernel_cred(NULL);
164 if (!cred)
165 return -ENOMEM;
166
167 keyring = key_alloc(&key_type_keyring, ".dns_resolver", 0, 0, cred,
168 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
169 KEY_USR_VIEW | KEY_USR_READ,
170 KEY_ALLOC_NOT_IN_QUOTA);
171 if (IS_ERR(keyring)) {
172 ret = PTR_ERR(keyring);
173 goto failed_put_cred;
174 }
175
176 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
177 if (ret < 0)
178 goto failed_put_key;
179
180 ret = register_key_type(&key_type_dns_resolver);
181 if (ret < 0)
182 goto failed_put_key;
183
184 /* instruct request_key() to use this special keyring as a cache for
185 * the results it looks up */
186 cred->thread_keyring = keyring;
187 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
188 dns_resolver_cache = cred;
189
190 kdebug("DNS resolver keyring: %d\n", key_serial(keyring));
191 return 0;
192
193failed_put_key:
194 key_put(keyring);
195failed_put_cred:
196 put_cred(cred);
197 return ret;
198}
199
200static void __exit exit_dns_resolver(void)
201{
202 key_revoke(dns_resolver_cache->thread_keyring);
203 unregister_key_type(&key_type_dns_resolver);
204 put_cred(dns_resolver_cache);
205 printk(KERN_NOTICE "Unregistered %s key type\n",
206 key_type_dns_resolver.name);
207}
208
209module_init(init_dns_resolver)
210module_exit(exit_dns_resolver)
211MODULE_LICENSE("GPL");
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
new file mode 100644
index 000000000000..03d5255f5cf2
--- /dev/null
+++ b/net/dns_resolver/dns_query.c
@@ -0,0 +1,160 @@
1/* Upcall routine, designed to work as a key type and working through
2 * /sbin/request-key to contact userspace when handling DNS queries.
3 *
4 * See Documentation/networking/dns_resolver.txt
5 *
6 * Copyright (c) 2007 Igor Mammedov
7 * Author(s): Igor Mammedov (niallain@gmail.com)
8 * Steve French (sfrench@us.ibm.com)
9 * Wang Lei (wang840925@gmail.com)
10 * David Howells (dhowells@redhat.com)
11 *
12 * The upcall wrapper used to make an arbitrary DNS query.
13 *
14 * This function requires the appropriate userspace tool dns.upcall to be
15 * installed and something like the following lines should be added to the
16 * /etc/request-key.conf file:
17 *
18 * create dns_resolver * * /sbin/dns.upcall %k
19 *
20 * For example to use this module to query AFSDB RR:
21 *
22 * create dns_resolver afsdb:* * /sbin/dns.afsdb %k
23 *
24 * This library is free software; you can redistribute it and/or modify
25 * it under the terms of the GNU Lesser General Public License as published
26 * by the Free Software Foundation; either version 2.1 of the License, or
27 * (at your option) any later version.
28 *
29 * This library is distributed in the hope that it will be useful,
30 * but WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
32 * the GNU Lesser General Public License for more details.
33 *
34 * You should have received a copy of the GNU Lesser General Public License
35 * along with this library; if not, write to the Free Software
36 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
37 */
38
39#include <linux/module.h>
40#include <linux/slab.h>
41#include <linux/dns_resolver.h>
42#include <linux/err.h>
43#include <keys/dns_resolver-type.h>
44#include <keys/user-type.h>
45
46#include "internal.h"
47
48/**
49 * dns_query - Query the DNS
50 * @type: Query type (or NULL for straight host->IP lookup)
51 * @name: Name to look up
52 * @namelen: Length of name
53 * @options: Request options (or NULL if no options)
54 * @_result: Where to place the returned data.
55 * @_expiry: Where to store the result expiry time (or NULL)
56 *
57 * The data will be returned in the pointer at *result, and the caller is
58 * responsible for freeing it.
59 *
60 * The description should be of the form "[<query_type>:]<domain_name>", and
61 * the options need to be appropriate for the query type requested. If no
62 * query_type is given, then the query is a straight hostname to IP address
63 * lookup.
64 *
65 * The DNS resolution lookup is performed by upcalling to userspace by way of
66 * requesting a key of type dns_resolver.
67 *
68 * Returns the size of the result on success, -ve error code otherwise.
69 */
70int dns_query(const char *type, const char *name, size_t namelen,
71 const char *options, char **_result, time_t *_expiry)
72{
73 struct key *rkey;
74 struct user_key_payload *upayload;
75 const struct cred *saved_cred;
76 size_t typelen, desclen;
77 char *desc, *cp;
78 int ret, len;
79
80 kenter("%s,%*.*s,%zu,%s",
81 type, (int)namelen, (int)namelen, name, namelen, options);
82
83 if (!name || namelen == 0 || !_result)
84 return -EINVAL;
85
86 /* construct the query key description as "[<type>:]<name>" */
87 typelen = 0;
88 desclen = 0;
89 if (type) {
90 typelen = strlen(type);
91 if (typelen < 1)
92 return -EINVAL;
93 desclen += typelen + 1;
94 }
95
96 if (!namelen)
97 namelen = strlen(name);
98 if (namelen < 3)
99 return -EINVAL;
100 desclen += namelen + 1;
101
102 desc = kmalloc(desclen, GFP_KERNEL);
103 if (!desc)
104 return -ENOMEM;
105
106 cp = desc;
107 if (type) {
108 memcpy(cp, type, typelen);
109 cp += typelen;
110 *cp++ = ':';
111 }
112 memcpy(cp, name, namelen);
113 cp += namelen;
114 *cp = '\0';
115
116 if (!options)
117 options = "";
118 kdebug("call request_key(,%s,%s)", desc, options);
119
120 /* make the upcall, using special credentials to prevent the use of
121 * add_key() to preinstall malicious redirections
122 */
123 saved_cred = override_creds(dns_resolver_cache);
124 rkey = request_key(&key_type_dns_resolver, desc, options);
125 revert_creds(saved_cred);
126 kfree(desc);
127 if (IS_ERR(rkey)) {
128 ret = PTR_ERR(rkey);
129 goto out;
130 }
131
132 down_read(&rkey->sem);
133 rkey->perm |= KEY_USR_VIEW;
134
135 ret = key_validate(rkey);
136 if (ret < 0)
137 goto put;
138
139 upayload = rcu_dereference_protected(rkey->payload.data,
140 lockdep_is_held(&rkey->sem));
141 len = upayload->datalen;
142
143 ret = -ENOMEM;
144 *_result = kmalloc(len + 1, GFP_KERNEL);
145 if (!*_result)
146 goto put;
147
148 memcpy(*_result, upayload->data, len + 1);
149 if (_expiry)
150 *_expiry = rkey->expiry;
151
152 ret = len;
153put:
154 up_read(&rkey->sem);
155 key_put(rkey);
156out:
157 kleave(" = %d", ret);
158 return ret;
159}
160EXPORT_SYMBOL(dns_query);
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
new file mode 100644
index 000000000000..189ca9e9b785
--- /dev/null
+++ b/net/dns_resolver/internal.h
@@ -0,0 +1,44 @@
1/*
2 * Copyright (c) 2010 Wang Lei
3 * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved.
4 *
5 * Internal DNS Rsolver stuff
6 *
7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published
9 * by the Free Software Foundation; either version 2.1 of the License, or
10 * (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
15 * the GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21
22#include <linux/compiler.h>
23#include <linux/kernel.h>
24#include <linux/sched.h>
25
26/*
27 * dns_key.c
28 */
29extern const struct cred *dns_resolver_cache;
30
31/*
32 * debug tracing
33 */
34extern unsigned dns_resolver_debug;
35
36#define kdebug(FMT, ...) \
37do { \
38 if (unlikely(dns_resolver_debug)) \
39 printk(KERN_DEBUG "[%-6.6s] "FMT"\n", \
40 current->comm, ##__VA_ARGS__); \
41} while (0)
42
43#define kenter(FMT, ...) kdebug("==> %s("FMT")", __func__, ##__VA_ARGS__)
44#define kleave(FMT, ...) kdebug("<== %s()"FMT"", __func__, ##__VA_ARGS__)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c51b55400dc5..11201784d29a 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
1menuconfig NET_DSA 1menuconfig NET_DSA
2 bool "Distributed Switch Architecture support" 2 bool "Distributed Switch Architecture support"
3 default n 3 default n
4 depends on EXPERIMENTAL && !S390 4 depends on EXPERIMENTAL && NET_ETHERNET && !S390
5 select PHYLIB 5 select PHYLIB
6 ---help--- 6 ---help---
7 This allows you to use hardware switch chips that use 7 This allows you to use hardware switch chips that use
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2175e6d5cc8d..64ca2a6fa0d4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
67 return -ENETDOWN; 67 return -ENETDOWN;
68 68
69 if (compare_ether_addr(dev->dev_addr, master->dev_addr)) { 69 if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
70 err = dev_unicast_add(master, dev->dev_addr); 70 err = dev_uc_add(master, dev->dev_addr);
71 if (err < 0) 71 if (err < 0)
72 goto out; 72 goto out;
73 } 73 }
@@ -90,7 +90,7 @@ clear_allmulti:
90 dev_set_allmulti(master, -1); 90 dev_set_allmulti(master, -1);
91del_unicast: 91del_unicast:
92 if (compare_ether_addr(dev->dev_addr, master->dev_addr)) 92 if (compare_ether_addr(dev->dev_addr, master->dev_addr))
93 dev_unicast_delete(master, dev->dev_addr); 93 dev_uc_del(master, dev->dev_addr);
94out: 94out:
95 return err; 95 return err;
96} 96}
@@ -101,14 +101,14 @@ static int dsa_slave_close(struct net_device *dev)
101 struct net_device *master = p->parent->dst->master_netdev; 101 struct net_device *master = p->parent->dst->master_netdev;
102 102
103 dev_mc_unsync(master, dev); 103 dev_mc_unsync(master, dev);
104 dev_unicast_unsync(master, dev); 104 dev_uc_unsync(master, dev);
105 if (dev->flags & IFF_ALLMULTI) 105 if (dev->flags & IFF_ALLMULTI)
106 dev_set_allmulti(master, -1); 106 dev_set_allmulti(master, -1);
107 if (dev->flags & IFF_PROMISC) 107 if (dev->flags & IFF_PROMISC)
108 dev_set_promiscuity(master, -1); 108 dev_set_promiscuity(master, -1);
109 109
110 if (compare_ether_addr(dev->dev_addr, master->dev_addr)) 110 if (compare_ether_addr(dev->dev_addr, master->dev_addr))
111 dev_unicast_delete(master, dev->dev_addr); 111 dev_uc_del(master, dev->dev_addr);
112 112
113 return 0; 113 return 0;
114} 114}
@@ -130,7 +130,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
130 struct net_device *master = p->parent->dst->master_netdev; 130 struct net_device *master = p->parent->dst->master_netdev;
131 131
132 dev_mc_sync(master, dev); 132 dev_mc_sync(master, dev);
133 dev_unicast_sync(master, dev); 133 dev_uc_sync(master, dev);
134} 134}
135 135
136static int dsa_slave_set_mac_address(struct net_device *dev, void *a) 136static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
147 goto out; 147 goto out;
148 148
149 if (compare_ether_addr(addr->sa_data, master->dev_addr)) { 149 if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
150 err = dev_unicast_add(master, addr->sa_data); 150 err = dev_uc_add(master, addr->sa_data);
151 if (err < 0) 151 if (err < 0)
152 return err; 152 return err;
153 } 153 }
154 154
155 if (compare_ether_addr(dev->dev_addr, master->dev_addr)) 155 if (compare_ether_addr(dev->dev_addr, master->dev_addr))
156 dev_unicast_delete(master, dev->dev_addr); 156 dev_uc_del(master, dev->dev_addr);
157 157
158out: 158out:
159 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); 159 memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -164,10 +164,9 @@ out:
164static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 164static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
165{ 165{
166 struct dsa_slave_priv *p = netdev_priv(dev); 166 struct dsa_slave_priv *p = netdev_priv(dev);
167 struct mii_ioctl_data *mii_data = if_mii(ifr);
168 167
169 if (p->phy != NULL) 168 if (p->phy != NULL)
170 return phy_mii_ioctl(p->phy, mii_data, cmd); 169 return phy_mii_ioctl(p->phy, ifr, cmd);
171 170
172 return -EOPNOTSUPP; 171 return -EOPNOTSUPP;
173} 172}
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 2a5a8053e000..dc54bd0d083b 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -48,7 +48,7 @@
48 48
49static const struct proto_ops econet_ops; 49static const struct proto_ops econet_ops;
50static struct hlist_head econet_sklist; 50static struct hlist_head econet_sklist;
51static DEFINE_RWLOCK(econet_lock); 51static DEFINE_SPINLOCK(econet_lock);
52static DEFINE_MUTEX(econet_mutex); 52static DEFINE_MUTEX(econet_mutex);
53 53
54/* Since there are only 256 possible network numbers (or fewer, depends 54/* Since there are only 256 possible network numbers (or fewer, depends
@@ -98,16 +98,16 @@ struct ec_cb
98 98
99static void econet_remove_socket(struct hlist_head *list, struct sock *sk) 99static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
100{ 100{
101 write_lock_bh(&econet_lock); 101 spin_lock_bh(&econet_lock);
102 sk_del_node_init(sk); 102 sk_del_node_init(sk);
103 write_unlock_bh(&econet_lock); 103 spin_unlock_bh(&econet_lock);
104} 104}
105 105
106static void econet_insert_socket(struct hlist_head *list, struct sock *sk) 106static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
107{ 107{
108 write_lock_bh(&econet_lock); 108 spin_lock_bh(&econet_lock);
109 sk_add_node(sk, list); 109 sk_add_node(sk, list);
110 write_unlock_bh(&econet_lock); 110 spin_unlock_bh(&econet_lock);
111} 111}
112 112
113/* 113/*
@@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
782 struct sock *sk; 782 struct sock *sk;
783 struct hlist_node *node; 783 struct hlist_node *node;
784 784
785 spin_lock(&econet_lock);
785 sk_for_each(sk, node, &econet_sklist) { 786 sk_for_each(sk, node, &econet_sklist) {
786 struct econet_sock *opt = ec_sk(sk); 787 struct econet_sock *opt = ec_sk(sk);
787 if ((opt->port == port || opt->port == 0) && 788 if ((opt->port == port || opt->port == 0) &&
788 (opt->station == station || opt->station == 0) && 789 (opt->station == station || opt->station == 0) &&
789 (opt->net == net || opt->net == 0)) 790 (opt->net == net || opt->net == 0)) {
791 sock_hold(sk);
790 goto found; 792 goto found;
793 }
791 } 794 }
792 sk = NULL; 795 sk = NULL;
793found: 796found:
797 spin_unlock(&econet_lock);
794 return sk; 798 return sk;
795} 799}
796 800
@@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
852{ 856{
853 struct iphdr *ip = ip_hdr(skb); 857 struct iphdr *ip = ip_hdr(skb);
854 unsigned char stn = ntohl(ip->saddr) & 0xff; 858 unsigned char stn = ntohl(ip->saddr) & 0xff;
855 struct sock *sk; 859 struct sock *sk = NULL;
856 struct sk_buff *newskb; 860 struct sk_buff *newskb;
857 struct ec_device *edev = skb->dev->ec_ptr; 861 struct ec_device *edev = skb->dev->ec_ptr;
858 862
@@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
882 } 886 }
883 887
884 aun_send_response(ip->saddr, ah->handle, 3, 0); 888 aun_send_response(ip->saddr, ah->handle, 3, 0);
889 sock_put(sk);
885 return; 890 return;
886 891
887bad: 892bad:
888 aun_send_response(ip->saddr, ah->handle, 4, 0); 893 aun_send_response(ip->saddr, ah->handle, 4, 0);
894 if (sk)
895 sock_put(sk);
889} 896}
890 897
891/* 898/*
@@ -1050,7 +1057,7 @@ release:
1050static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 1057static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1051{ 1058{
1052 struct ec_framehdr *hdr; 1059 struct ec_framehdr *hdr;
1053 struct sock *sk; 1060 struct sock *sk = NULL;
1054 struct ec_device *edev = dev->ec_ptr; 1061 struct ec_device *edev = dev->ec_ptr;
1055 1062
1056 if (!net_eq(dev_net(dev), &init_net)) 1063 if (!net_eq(dev_net(dev), &init_net))
@@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
1085 if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb, 1092 if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
1086 hdr->port)) 1093 hdr->port))
1087 goto drop; 1094 goto drop;
1088 1095 sock_put(sk);
1089 return NET_RX_SUCCESS; 1096 return NET_RX_SUCCESS;
1090 1097
1091drop: 1098drop:
1099 if (sk)
1100 sock_put(sk);
1092 kfree_skb(skb); 1101 kfree_skb(skb);
1093 return NET_RX_DROP; 1102 return NET_RX_DROP;
1094} 1103}
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 205a1c12f3c0..215c83986a9d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb)
136 default: 136 default:
137 printk(KERN_DEBUG 137 printk(KERN_DEBUG
138 "%s: unable to resolve type %X addresses.\n", 138 "%s: unable to resolve type %X addresses.\n",
139 dev->name, (int)eth->h_proto); 139 dev->name, ntohs(eth->h_proto));
140 140
141 memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); 141 memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
142 break; 142 break;
@@ -158,11 +158,10 @@ EXPORT_SYMBOL(eth_rebuild_header);
158__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) 158__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
159{ 159{
160 struct ethhdr *eth; 160 struct ethhdr *eth;
161 unsigned char *rawp;
162 161
163 skb->dev = dev; 162 skb->dev = dev;
164 skb_reset_mac_header(skb); 163 skb_reset_mac_header(skb);
165 skb_pull(skb, ETH_HLEN); 164 skb_pull_inline(skb, ETH_HLEN);
166 eth = eth_hdr(skb); 165 eth = eth_hdr(skb);
167 166
168 if (unlikely(is_multicast_ether_addr(eth->h_dest))) { 167 if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
199 if (ntohs(eth->h_proto) >= 1536) 198 if (ntohs(eth->h_proto) >= 1536)
200 return eth->h_proto; 199 return eth->h_proto;
201 200
202 rawp = skb->data;
203
204 /* 201 /*
205 * This is a magic hack to spot IPX packets. Older Novell breaks 202 * This is a magic hack to spot IPX packets. Older Novell breaks
206 * the protocol design and runs IPX over 802.3 without an 802.2 LLC 203 * the protocol design and runs IPX over 802.3 without an 802.2 LLC
207 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 204 * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
208 * won't work for fault tolerant netware but does for the rest. 205 * won't work for fault tolerant netware but does for the rest.
209 */ 206 */
210 if (*(unsigned short *)rawp == 0xFFFF) 207 if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
211 return htons(ETH_P_802_3); 208 return htons(ETH_P_802_3);
212 209
213 /* 210 /*
diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
index eb00796758c3..85d574addbc1 100644
--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void)
28 28
29 return proto; 29 return proto;
30} 30}
31EXPORT_SYMBOL(make_EII_client);
31 32
32void destroy_EII_client(struct datalink_proto *dl) 33void destroy_EII_client(struct datalink_proto *dl)
33{ 34{
34 kfree(dl); 35 kfree(dl);
35} 36}
36
37EXPORT_SYMBOL(destroy_EII_client); 37EXPORT_SYMBOL(destroy_EII_client);
38EXPORT_SYMBOL(make_EII_client);
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 3d803a1b9fb6..1627ef2e8522 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -147,13 +147,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
147 struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size, 147 struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size,
148 GFP_KERNEL); 148 GFP_KERNEL);
149 149
150 if (!phy)
151 goto out;
150 mutex_lock(&wpan_phy_mutex); 152 mutex_lock(&wpan_phy_mutex);
151 phy->idx = wpan_phy_idx++; 153 phy->idx = wpan_phy_idx++;
152 if (unlikely(!wpan_phy_idx_valid(phy->idx))) { 154 if (unlikely(!wpan_phy_idx_valid(phy->idx))) {
153 wpan_phy_idx--; 155 wpan_phy_idx--;
154 mutex_unlock(&wpan_phy_mutex); 156 mutex_unlock(&wpan_phy_mutex);
155 kfree(phy); 157 kfree(phy);
156 return NULL; 158 goto out;
157 } 159 }
158 mutex_unlock(&wpan_phy_mutex); 160 mutex_unlock(&wpan_phy_mutex);
159 161
@@ -168,6 +170,9 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
168 phy->current_page = 0; /* for compatibility */ 170 phy->current_page = 0; /* for compatibility */
169 171
170 return phy; 172 return phy;
173
174out:
175 return NULL;
171} 176}
172EXPORT_SYMBOL(wpan_phy_alloc); 177EXPORT_SYMBOL(wpan_phy_alloc);
173 178
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1ac2946..7c3a7d191249 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
250 <file:Documentation/networking/multicast.txt>. If you haven't heard 250 <file:Documentation/networking/multicast.txt>. If you haven't heard
251 about it, you don't need it. 251 about it, you don't need it.
252 252
253config IP_MROUTE_MULTIPLE_TABLES
254 bool "IP: multicast policy routing"
255 depends on IP_MROUTE && IP_ADVANCED_ROUTER
256 select FIB_RULES
257 help
258 Normally, a multicast router runs a userspace daemon and decides
259 what to do with a multicast packet based on the source and
260 destination addresses. If you say Y here, the multicast router
261 will also be able to take interfaces and packet marks into
262 account and run multiple instances of userspace daemons
263 simultaneously, each one handling a single table.
264
265 If unsure, say N.
266
253config IP_PIMSM_V1 267config IP_PIMSM_V1
254 bool "IP: PIM-SM version 1 support" 268 bool "IP: PIM-SM version 1 support"
255 depends on IP_MROUTE 269 depends on IP_MROUTE
@@ -289,7 +303,7 @@ config ARPD
289 If unsure, say N. 303 If unsure, say N.
290 304
291config SYN_COOKIES 305config SYN_COOKIES
292 bool "IP: TCP syncookie support (disabled per default)" 306 bool "IP: TCP syncookie support"
293 ---help--- 307 ---help---
294 Normal TCP/IP networking is open to an attack known as "SYN 308 Normal TCP/IP networking is open to an attack known as "SYN
295 flooding". This denial-of-service attack prevents legitimate remote 309 flooding". This denial-of-service attack prevents legitimate remote
@@ -314,13 +328,13 @@ config SYN_COOKIES
314 server is really overloaded. If this happens frequently better turn 328 server is really overloaded. If this happens frequently better turn
315 them off. 329 them off.
316 330
317 If you say Y here, note that SYN cookies aren't enabled by default; 331 If you say Y here, you can disable SYN cookies at run time by
318 you can enable them by saying Y to "/proc file system support" and 332 saying Y to "/proc file system support" and
319 "Sysctl support" below and executing the command 333 "Sysctl support" below and executing the command
320 334
321 echo 1 >/proc/sys/net/ipv4/tcp_syncookies 335 echo 0 > /proc/sys/net/ipv4/tcp_syncookies
322 336
323 at boot time after the /proc file system has been mounted. 337 after the /proc file system has been mounted.
324 338
325 If unsure, say N. 339 If unsure, say N.
326 340
@@ -587,9 +601,15 @@ choice
587 config DEFAULT_HTCP 601 config DEFAULT_HTCP
588 bool "Htcp" if TCP_CONG_HTCP=y 602 bool "Htcp" if TCP_CONG_HTCP=y
589 603
604 config DEFAULT_HYBLA
605 bool "Hybla" if TCP_CONG_HYBLA=y
606
590 config DEFAULT_VEGAS 607 config DEFAULT_VEGAS
591 bool "Vegas" if TCP_CONG_VEGAS=y 608 bool "Vegas" if TCP_CONG_VEGAS=y
592 609
610 config DEFAULT_VENO
611 bool "Veno" if TCP_CONG_VENO=y
612
593 config DEFAULT_WESTWOOD 613 config DEFAULT_WESTWOOD
594 bool "Westwood" if TCP_CONG_WESTWOOD=y 614 bool "Westwood" if TCP_CONG_WESTWOOD=y
595 615
@@ -610,8 +630,10 @@ config DEFAULT_TCP_CONG
610 default "bic" if DEFAULT_BIC 630 default "bic" if DEFAULT_BIC
611 default "cubic" if DEFAULT_CUBIC 631 default "cubic" if DEFAULT_CUBIC
612 default "htcp" if DEFAULT_HTCP 632 default "htcp" if DEFAULT_HTCP
633 default "hybla" if DEFAULT_HYBLA
613 default "vegas" if DEFAULT_VEGAS 634 default "vegas" if DEFAULT_VEGAS
614 default "westwood" if DEFAULT_WESTWOOD 635 default "westwood" if DEFAULT_WESTWOOD
636 default "veno" if DEFAULT_VENO
615 default "reno" if DEFAULT_RENO 637 default "reno" if DEFAULT_RENO
616 default "cubic" 638 default "cubic"
617 639
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f71357422380..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
154 WARN_ON(sk->sk_forward_alloc); 154 WARN_ON(sk->sk_forward_alloc);
155 155
156 kfree(inet->opt); 156 kfree(inet->opt);
157 dst_release(sk->sk_dst_cache); 157 dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
158 sk_refcnt_debug_dec(sk); 158 sk_refcnt_debug_dec(sk);
159} 159}
160EXPORT_SYMBOL(inet_sock_destruct); 160EXPORT_SYMBOL(inet_sock_destruct);
@@ -355,6 +355,8 @@ lookup_protocol:
355 inet = inet_sk(sk); 355 inet = inet_sk(sk);
356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 356 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
357 357
358 inet->nodefrag = 0;
359
358 if (SOCK_RAW == sock->type) { 360 if (SOCK_RAW == sock->type) {
359 inet->inet_num = protocol; 361 inet->inet_num = protocol;
360 if (IPPROTO_RAW == protocol) 362 if (IPPROTO_RAW == protocol)
@@ -419,6 +421,8 @@ int inet_release(struct socket *sock)
419 if (sk) { 421 if (sk) {
420 long timeout; 422 long timeout;
421 423
424 sock_rps_reset_flow(sk);
425
422 /* Applications forget to leave groups before exiting */ 426 /* Applications forget to leave groups before exiting */
423 ip_mc_drop_socket(sk); 427 ip_mc_drop_socket(sk);
424 428
@@ -546,7 +550,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
546{ 550{
547 DEFINE_WAIT(wait); 551 DEFINE_WAIT(wait);
548 552
549 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 553 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
550 554
551 /* Basic assumption: if someone sets sk->sk_err, he _must_ 555 /* Basic assumption: if someone sets sk->sk_err, he _must_
552 * change state of the socket from TCP_SYN_*. 556 * change state of the socket from TCP_SYN_*.
@@ -559,9 +563,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
559 lock_sock(sk); 563 lock_sock(sk);
560 if (signal_pending(current) || !timeo) 564 if (signal_pending(current) || !timeo)
561 break; 565 break;
562 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 566 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
563 } 567 }
564 finish_wait(sk->sk_sleep, &wait); 568 finish_wait(sk_sleep(sk), &wait);
565 return timeo; 569 return timeo;
566} 570}
567 571
@@ -720,29 +724,51 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
720{ 724{
721 struct sock *sk = sock->sk; 725 struct sock *sk = sock->sk;
722 726
727 sock_rps_record_flow(sk);
728
723 /* We may need to bind the socket. */ 729 /* We may need to bind the socket. */
724 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 730 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
731 inet_autobind(sk))
725 return -EAGAIN; 732 return -EAGAIN;
726 733
727 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 734 return sk->sk_prot->sendmsg(iocb, sk, msg, size);
728} 735}
729EXPORT_SYMBOL(inet_sendmsg); 736EXPORT_SYMBOL(inet_sendmsg);
730 737
731 738ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
732static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, 739 size_t size, int flags)
733 size_t size, int flags)
734{ 740{
735 struct sock *sk = sock->sk; 741 struct sock *sk = sock->sk;
736 742
743 sock_rps_record_flow(sk);
744
737 /* We may need to bind the socket. */ 745 /* We may need to bind the socket. */
738 if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 746 if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
747 inet_autobind(sk))
739 return -EAGAIN; 748 return -EAGAIN;
740 749
741 if (sk->sk_prot->sendpage) 750 if (sk->sk_prot->sendpage)
742 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 751 return sk->sk_prot->sendpage(sk, page, offset, size, flags);
743 return sock_no_sendpage(sock, page, offset, size, flags); 752 return sock_no_sendpage(sock, page, offset, size, flags);
744} 753}
754EXPORT_SYMBOL(inet_sendpage);
745 755
756int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
757 size_t size, int flags)
758{
759 struct sock *sk = sock->sk;
760 int addr_len = 0;
761 int err;
762
763 sock_rps_record_flow(sk);
764
765 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
766 flags & ~MSG_DONTWAIT, &addr_len);
767 if (err >= 0)
768 msg->msg_namelen = addr_len;
769 return err;
770}
771EXPORT_SYMBOL(inet_recvmsg);
746 772
747int inet_shutdown(struct socket *sock, int how) 773int inet_shutdown(struct socket *sock, int how)
748{ 774{
@@ -871,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
871 .shutdown = inet_shutdown, 897 .shutdown = inet_shutdown,
872 .setsockopt = sock_common_setsockopt, 898 .setsockopt = sock_common_setsockopt,
873 .getsockopt = sock_common_getsockopt, 899 .getsockopt = sock_common_getsockopt,
874 .sendmsg = tcp_sendmsg, 900 .sendmsg = inet_sendmsg,
875 .recvmsg = sock_common_recvmsg, 901 .recvmsg = inet_recvmsg,
876 .mmap = sock_no_mmap, 902 .mmap = sock_no_mmap,
877 .sendpage = tcp_sendpage, 903 .sendpage = inet_sendpage,
878 .splice_read = tcp_splice_read, 904 .splice_read = tcp_splice_read,
879#ifdef CONFIG_COMPAT 905#ifdef CONFIG_COMPAT
880 .compat_setsockopt = compat_sock_common_setsockopt, 906 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -899,7 +925,7 @@ const struct proto_ops inet_dgram_ops = {
899 .setsockopt = sock_common_setsockopt, 925 .setsockopt = sock_common_setsockopt,
900 .getsockopt = sock_common_getsockopt, 926 .getsockopt = sock_common_getsockopt,
901 .sendmsg = inet_sendmsg, 927 .sendmsg = inet_sendmsg,
902 .recvmsg = sock_common_recvmsg, 928 .recvmsg = inet_recvmsg,
903 .mmap = sock_no_mmap, 929 .mmap = sock_no_mmap,
904 .sendpage = inet_sendpage, 930 .sendpage = inet_sendpage,
905#ifdef CONFIG_COMPAT 931#ifdef CONFIG_COMPAT
@@ -929,7 +955,7 @@ static const struct proto_ops inet_sockraw_ops = {
929 .setsockopt = sock_common_setsockopt, 955 .setsockopt = sock_common_setsockopt,
930 .getsockopt = sock_common_getsockopt, 956 .getsockopt = sock_common_getsockopt,
931 .sendmsg = inet_sendmsg, 957 .sendmsg = inet_sendmsg,
932 .recvmsg = sock_common_recvmsg, 958 .recvmsg = inet_recvmsg,
933 .mmap = sock_no_mmap, 959 .mmap = sock_no_mmap,
934 .sendpage = inet_sendpage, 960 .sendpage = inet_sendpage,
935#ifdef CONFIG_COMPAT 961#ifdef CONFIG_COMPAT
@@ -1079,7 +1105,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
1079 if (err) 1105 if (err)
1080 return err; 1106 return err;
1081 1107
1082 sk_setup_caps(sk, &rt->u.dst); 1108 sk_setup_caps(sk, &rt->dst);
1083 1109
1084 new_saddr = rt->rt_src; 1110 new_saddr = rt->rt_src;
1085 1111
@@ -1145,7 +1171,7 @@ int inet_sk_rebuild_header(struct sock *sk)
1145 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); 1171 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
1146} 1172}
1147 if (!err) 1173 if (!err)
1148 sk_setup_caps(sk, &rt->u.dst); 1174 sk_setup_caps(sk, &rt->dst);
1149 else { 1175 else {
1150 /* Routing failed... */ 1176 /* Routing failed... */
1151 sk->sk_route_caps = 0; 1177 sk->sk_route_caps = 0;
@@ -1302,8 +1328,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1302 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1328 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1303 goto out_unlock; 1329 goto out_unlock;
1304 1330
1305 id = ntohl(*(u32 *)&iph->id); 1331 id = ntohl(*(__be32 *)&iph->id);
1306 flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); 1332 flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
1307 id >>= 16; 1333 id >>= 16;
1308 1334
1309 for (p = *head; p; p = p->next) { 1335 for (p = *head; p; p = p->next) {
@@ -1316,8 +1342,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1316 1342
1317 if ((iph->protocol ^ iph2->protocol) | 1343 if ((iph->protocol ^ iph2->protocol) |
1318 (iph->tos ^ iph2->tos) | 1344 (iph->tos ^ iph2->tos) |
1319 (iph->saddr ^ iph2->saddr) | 1345 ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
1320 (iph->daddr ^ iph2->daddr)) { 1346 ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
1321 NAPI_GRO_CB(p)->same_flow = 0; 1347 NAPI_GRO_CB(p)->same_flow = 0;
1322 continue; 1348 continue;
1323 } 1349 }
@@ -1404,13 +1430,49 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
1404} 1430}
1405EXPORT_SYMBOL_GPL(snmp_fold_field); 1431EXPORT_SYMBOL_GPL(snmp_fold_field);
1406 1432
1407int snmp_mib_init(void __percpu *ptr[2], size_t mibsize) 1433#if BITS_PER_LONG==32
1434
1435u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
1436{
1437 u64 res = 0;
1438 int cpu;
1439
1440 for_each_possible_cpu(cpu) {
1441 void *bhptr, *userptr;
1442 struct u64_stats_sync *syncp;
1443 u64 v_bh, v_user;
1444 unsigned int start;
1445
1446 /* first mib used by softirq context, we must use _bh() accessors */
1447 bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
1448 syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
1449 do {
1450 start = u64_stats_fetch_begin_bh(syncp);
1451 v_bh = *(((u64 *) bhptr) + offt);
1452 } while (u64_stats_fetch_retry_bh(syncp, start));
1453
1454 /* second mib used in USER context */
1455 userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
1456 syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
1457 do {
1458 start = u64_stats_fetch_begin(syncp);
1459 v_user = *(((u64 *) userptr) + offt);
1460 } while (u64_stats_fetch_retry(syncp, start));
1461
1462 res += v_bh + v_user;
1463 }
1464 return res;
1465}
1466EXPORT_SYMBOL_GPL(snmp_fold_field64);
1467#endif
1468
1469int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
1408{ 1470{
1409 BUG_ON(ptr == NULL); 1471 BUG_ON(ptr == NULL);
1410 ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1472 ptr[0] = __alloc_percpu(mibsize, align);
1411 if (!ptr[0]) 1473 if (!ptr[0])
1412 goto err0; 1474 goto err0;
1413 ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); 1475 ptr[1] = __alloc_percpu(mibsize, align);
1414 if (!ptr[1]) 1476 if (!ptr[1])
1415 goto err1; 1477 goto err1;
1416 return 0; 1478 return 0;
@@ -1467,25 +1529,32 @@ static const struct net_protocol icmp_protocol = {
1467static __net_init int ipv4_mib_init_net(struct net *net) 1529static __net_init int ipv4_mib_init_net(struct net *net)
1468{ 1530{
1469 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics, 1531 if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
1470 sizeof(struct tcp_mib)) < 0) 1532 sizeof(struct tcp_mib),
1533 __alignof__(struct tcp_mib)) < 0)
1471 goto err_tcp_mib; 1534 goto err_tcp_mib;
1472 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics, 1535 if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
1473 sizeof(struct ipstats_mib)) < 0) 1536 sizeof(struct ipstats_mib),
1537 __alignof__(struct ipstats_mib)) < 0)
1474 goto err_ip_mib; 1538 goto err_ip_mib;
1475 if (snmp_mib_init((void __percpu **)net->mib.net_statistics, 1539 if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
1476 sizeof(struct linux_mib)) < 0) 1540 sizeof(struct linux_mib),
1541 __alignof__(struct linux_mib)) < 0)
1477 goto err_net_mib; 1542 goto err_net_mib;
1478 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics, 1543 if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
1479 sizeof(struct udp_mib)) < 0) 1544 sizeof(struct udp_mib),
1545 __alignof__(struct udp_mib)) < 0)
1480 goto err_udp_mib; 1546 goto err_udp_mib;
1481 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics, 1547 if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
1482 sizeof(struct udp_mib)) < 0) 1548 sizeof(struct udp_mib),
1549 __alignof__(struct udp_mib)) < 0)
1483 goto err_udplite_mib; 1550 goto err_udplite_mib;
1484 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics, 1551 if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
1485 sizeof(struct icmp_mib)) < 0) 1552 sizeof(struct icmp_mib),
1553 __alignof__(struct icmp_mib)) < 0)
1486 goto err_icmp_mib; 1554 goto err_icmp_mib;
1487 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, 1555 if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
1488 sizeof(struct icmpmsg_mib)) < 0) 1556 sizeof(struct icmpmsg_mib),
1557 __alignof__(struct icmpmsg_mib)) < 0)
1489 goto err_icmpmsg_mib; 1558 goto err_icmpmsg_mib;
1490 1559
1491 tcp_mib_init(net); 1560 tcp_mib_init(net);
@@ -1552,9 +1621,13 @@ static int __init inet_init(void)
1552 1621
1553 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); 1622 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
1554 1623
1624 sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
1625 if (!sysctl_local_reserved_ports)
1626 goto out;
1627
1555 rc = proto_register(&tcp_prot, 1); 1628 rc = proto_register(&tcp_prot, 1);
1556 if (rc) 1629 if (rc)
1557 goto out; 1630 goto out_free_reserved_ports;
1558 1631
1559 rc = proto_register(&udp_prot, 1); 1632 rc = proto_register(&udp_prot, 1);
1560 if (rc) 1633 if (rc)
@@ -1653,6 +1726,8 @@ out_unregister_udp_proto:
1653 proto_unregister(&udp_prot); 1726 proto_unregister(&udp_prot);
1654out_unregister_tcp_proto: 1727out_unregister_tcp_proto:
1655 proto_unregister(&tcp_prot); 1728 proto_unregister(&tcp_prot);
1729out_free_reserved_ports:
1730 kfree(sysctl_local_reserved_ports);
1656 goto out; 1731 goto out;
1657} 1732}
1658 1733
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6e747065c202..96c1955b3e2f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -116,6 +116,7 @@
116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 116#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
117#include <net/atmclip.h> 117#include <net/atmclip.h>
118struct neigh_table *clip_tbl_hook; 118struct neigh_table *clip_tbl_hook;
119EXPORT_SYMBOL(clip_tbl_hook);
119#endif 120#endif
120 121
121#include <asm/system.h> 122#include <asm/system.h>
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = {
169 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
170 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
171}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
172 174
173struct neigh_table arp_tbl = { 175struct neigh_table arp_tbl = {
174 .family = AF_INET, 176 .family = AF_INET,
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = {
198 .gc_thresh2 = 512, 200 .gc_thresh2 = 512,
199 .gc_thresh3 = 1024, 201 .gc_thresh3 = 1024,
200}; 202};
203EXPORT_SYMBOL(arp_tbl);
201 204
202int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) 205int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
203{ 206{
@@ -333,11 +336,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
333 struct net_device *dev = neigh->dev; 336 struct net_device *dev = neigh->dev;
334 __be32 target = *(__be32*)neigh->primary_key; 337 __be32 target = *(__be32*)neigh->primary_key;
335 int probes = atomic_read(&neigh->probes); 338 int probes = atomic_read(&neigh->probes);
336 struct in_device *in_dev = in_dev_get(dev); 339 struct in_device *in_dev;
337 340
338 if (!in_dev) 341 rcu_read_lock();
342 in_dev = __in_dev_get_rcu(dev);
343 if (!in_dev) {
344 rcu_read_unlock();
339 return; 345 return;
340 346 }
341 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
342 default: 348 default:
343 case 0: /* By default announce any local IP */ 349 case 0: /* By default announce any local IP */
@@ -358,9 +364,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
358 case 2: /* Avoid secondary IPs, get a primary/preferred one */ 364 case 2: /* Avoid secondary IPs, get a primary/preferred one */
359 break; 365 break;
360 } 366 }
367 rcu_read_unlock();
361 368
362 if (in_dev)
363 in_dev_put(in_dev);
364 if (!saddr) 369 if (!saddr)
365 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
366 371
@@ -427,7 +432,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
427 432
428 if (ip_route_output_key(net, &rt, &fl) < 0) 433 if (ip_route_output_key(net, &rt, &fl) < 0)
429 return 1; 434 return 1;
430 if (rt->u.dst.dev != dev) { 435 if (rt->dst.dev != dev) {
431 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER); 436 NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
432 flag = 1; 437 flag = 1;
433 } 438 }
@@ -497,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
497 kfree_skb(skb); 502 kfree_skb(skb);
498 return 1; 503 return 1;
499} 504}
505EXPORT_SYMBOL(arp_find);
500 506
501/* END OF OBSOLETE FUNCTIONS */ 507/* END OF OBSOLETE FUNCTIONS */
502 508
@@ -532,7 +538,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
532 struct in_device *out_dev; 538 struct in_device *out_dev;
533 int imi, omi = -1; 539 int imi, omi = -1;
534 540
535 if (rt->u.dst.dev == dev) 541 if (rt->dst.dev == dev)
536 return 0; 542 return 0;
537 543
538 if (!IN_DEV_PROXY_ARP(in_dev)) 544 if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,10 +551,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
545 551
546 /* place to check for proxy_arp for routes */ 552 /* place to check for proxy_arp for routes */
547 553
548 if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) { 554 out_dev = __in_dev_get_rcu(rt->dst.dev);
555 if (out_dev)
549 omi = IN_DEV_MEDIUM_ID(out_dev); 556 omi = IN_DEV_MEDIUM_ID(out_dev);
550 in_dev_put(out_dev); 557
551 }
552 return (omi != imi && omi != -1); 558 return (omi != imi && omi != -1);
553} 559}
554 560
@@ -576,7 +582,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
576 __be32 sip, __be32 tip) 582 __be32 sip, __be32 tip)
577{ 583{
578 /* Private VLAN is only concerned about the same ethernet segment */ 584 /* Private VLAN is only concerned about the same ethernet segment */
579 if (rt->u.dst.dev != dev) 585 if (rt->dst.dev != dev)
580 return 0; 586 return 0;
581 587
582 /* Don't reply on self probes (often done by windowz boxes)*/ 588 /* Don't reply on self probes (often done by windowz boxes)*/
@@ -661,13 +667,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
661#endif 667#endif
662#endif 668#endif
663 669
664#ifdef CONFIG_FDDI 670#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
665 case ARPHRD_FDDI: 671 case ARPHRD_FDDI:
666 arp->ar_hrd = htons(ARPHRD_ETHER); 672 arp->ar_hrd = htons(ARPHRD_ETHER);
667 arp->ar_pro = htons(ETH_P_IP); 673 arp->ar_pro = htons(ETH_P_IP);
668 break; 674 break;
669#endif 675#endif
670#ifdef CONFIG_TR 676#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
671 case ARPHRD_IEEE802_TR: 677 case ARPHRD_IEEE802_TR:
672 arp->ar_hrd = htons(ARPHRD_IEEE802); 678 arp->ar_hrd = htons(ARPHRD_IEEE802);
673 arp->ar_pro = htons(ETH_P_IP); 679 arp->ar_pro = htons(ETH_P_IP);
@@ -698,6 +704,7 @@ out:
698 kfree_skb(skb); 704 kfree_skb(skb);
699 return NULL; 705 return NULL;
700} 706}
707EXPORT_SYMBOL(arp_create);
701 708
702/* 709/*
703 * Send an arp packet. 710 * Send an arp packet.
@@ -707,6 +714,7 @@ void arp_xmit(struct sk_buff *skb)
707 /* Send it off, maybe filter it using firewalling first. */ 714 /* Send it off, maybe filter it using firewalling first. */
708 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); 715 NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
709} 716}
717EXPORT_SYMBOL(arp_xmit);
710 718
711/* 719/*
712 * Create and send an arp packet. 720 * Create and send an arp packet.
@@ -733,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
733 741
734 arp_xmit(skb); 742 arp_xmit(skb);
735} 743}
744EXPORT_SYMBOL(arp_send);
736 745
737/* 746/*
738 * Process an arp request. 747 * Process an arp request.
@@ -741,7 +750,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
741static int arp_process(struct sk_buff *skb) 750static int arp_process(struct sk_buff *skb)
742{ 751{
743 struct net_device *dev = skb->dev; 752 struct net_device *dev = skb->dev;
744 struct in_device *in_dev = in_dev_get(dev); 753 struct in_device *in_dev = __in_dev_get_rcu(dev);
745 struct arphdr *arp; 754 struct arphdr *arp;
746 unsigned char *arp_ptr; 755 unsigned char *arp_ptr;
747 struct rtable *rt; 756 struct rtable *rt;
@@ -854,7 +863,7 @@ static int arp_process(struct sk_buff *skb)
854 } 863 }
855 864
856 if (arp->ar_op == htons(ARPOP_REQUEST) && 865 if (arp->ar_op == htons(ARPOP_REQUEST) &&
857 ip_route_input(skb, tip, sip, 0, dev) == 0) { 866 ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
858 867
859 rt = skb_rtable(skb); 868 rt = skb_rtable(skb);
860 addr_type = rt->rt_type; 869 addr_type = rt->rt_type;
@@ -890,7 +899,6 @@ static int arp_process(struct sk_buff *skb)
890 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
891 } else { 900 } else {
892 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
893 in_dev_put(in_dev);
894 return 0; 902 return 0;
895 } 903 }
896 goto out; 904 goto out;
@@ -936,8 +944,6 @@ static int arp_process(struct sk_buff *skb)
936 } 944 }
937 945
938out: 946out:
939 if (in_dev)
940 in_dev_put(in_dev);
941 consume_skb(skb); 947 consume_skb(skb);
942 return 0; 948 return 0;
943} 949}
@@ -1045,13 +1051,13 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1045 struct rtable * rt; 1051 struct rtable * rt;
1046 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1047 return err; 1053 return err;
1048 dev = rt->u.dst.dev; 1054 dev = rt->dst.dev;
1049 ip_rt_put(rt); 1055 ip_rt_put(rt);
1050 if (!dev) 1056 if (!dev)
1051 return -EINVAL; 1057 return -EINVAL;
1052 } 1058 }
1053 switch (dev->type) { 1059 switch (dev->type) {
1054#ifdef CONFIG_FDDI 1060#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
1055 case ARPHRD_FDDI: 1061 case ARPHRD_FDDI:
1056 /* 1062 /*
1057 * According to RFC 1390, FDDI devices should accept ARP 1063 * According to RFC 1390, FDDI devices should accept ARP
@@ -1152,7 +1158,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1152 struct rtable * rt; 1158 struct rtable * rt;
1153 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
1154 return err; 1160 return err;
1155 dev = rt->u.dst.dev; 1161 dev = rt->dst.dev;
1156 ip_rt_put(rt); 1162 ip_rt_put(rt);
1157 if (!dev) 1163 if (!dev)
1158 return -EINVAL; 1164 return -EINVAL;
@@ -1453,14 +1459,3 @@ static int __init arp_proc_init(void)
1453} 1459}
1454 1460
1455#endif /* CONFIG_PROC_FS */ 1461#endif /* CONFIG_PROC_FS */
1456
1457EXPORT_SYMBOL(arp_broken_ops);
1458EXPORT_SYMBOL(arp_find);
1459EXPORT_SYMBOL(arp_create);
1460EXPORT_SYMBOL(arp_xmit);
1461EXPORT_SYMBOL(arp_send);
1462EXPORT_SYMBOL(arp_tbl);
1463
1464#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
1465EXPORT_SYMBOL(clip_tbl_hook);
1466#endif
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c97cd9ff697e..3a92a76ae41d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -290,8 +290,6 @@ void cipso_v4_cache_invalidate(void)
290 cipso_v4_cache[iter].size = 0; 290 cipso_v4_cache[iter].size = 0;
291 spin_unlock_bh(&cipso_v4_cache[iter].lock); 291 spin_unlock_bh(&cipso_v4_cache[iter].lock);
292 } 292 }
293
294 return;
295} 293}
296 294
297/** 295/**
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..f0550941df7b 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,9 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
69 sk->sk_state = TCP_ESTABLISHED; 69 sk->sk_state = TCP_ESTABLISHED;
70 inet->inet_id = jiffies; 70 inet->inet_id = jiffies;
71 71
72 sk_dst_set(sk, &rt->u.dst); 72 sk_dst_set(sk, &rt->dst);
73 return(0); 73 return(0);
74} 74}
75
76EXPORT_SYMBOL(ip4_datagram_connect); 75EXPORT_SYMBOL(ip4_datagram_connect);
77
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 90e3d6379a42..da14c49284f4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1081 } 1081 }
1082 ip_mc_up(in_dev); 1082 ip_mc_up(in_dev);
1083 /* fall through */ 1083 /* fall through */
1084 case NETDEV_NOTIFY_PEERS:
1084 case NETDEV_CHANGEADDR: 1085 case NETDEV_CHANGEADDR:
1085 /* Send gratuitous ARP to notify of link change */ 1086 /* Send gratuitous ARP to notify of link change */
1086 if (IN_DEV_ARP_NOTIFY(in_dev)) { 1087 if (IN_DEV_ARP_NOTIFY(in_dev)) {
@@ -1096,10 +1097,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1096 case NETDEV_DOWN: 1097 case NETDEV_DOWN:
1097 ip_mc_down(in_dev); 1098 ip_mc_down(in_dev);
1098 break; 1099 break;
1099 case NETDEV_BONDING_OLDTYPE: 1100 case NETDEV_PRE_TYPE_CHANGE:
1100 ip_mc_unmap(in_dev); 1101 ip_mc_unmap(in_dev);
1101 break; 1102 break;
1102 case NETDEV_BONDING_NEWTYPE: 1103 case NETDEV_POST_TYPE_CHANGE:
1103 ip_mc_remap(in_dev); 1104 ip_mc_remap(in_dev);
1104 break; 1105 break;
1105 case NETDEV_CHANGEMTU: 1106 case NETDEV_CHANGEMTU:
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c883..a43968918350 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -175,6 +175,7 @@ out:
175 fib_res_put(&res); 175 fib_res_put(&res);
176 return dev; 176 return dev;
177} 177}
178EXPORT_SYMBOL(ip_dev_find);
178 179
179/* 180/*
180 * Find address type as if only "dev" was present in the system. If 181 * Find address type as if only "dev" was present in the system. If
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr)
214{ 215{
215 return __inet_dev_addr_type(net, NULL, addr); 216 return __inet_dev_addr_type(net, NULL, addr);
216} 217}
218EXPORT_SYMBOL(inet_addr_type);
217 219
218unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
219 __be32 addr) 221 __be32 addr)
220{ 222{
221 return __inet_dev_addr_type(net, dev, addr); 223 return __inet_dev_addr_type(net, dev, addr);
222} 224}
225EXPORT_SYMBOL(inet_dev_addr_type);
223 226
224/* Given (packet source, input interface) and optional (dst, oif, tos): 227/* Given (packet source, input interface) and optional (dst, oif, tos):
225 - (main) check, that source is valid i.e. not broadcast or our local 228 - (main) check, that source is valid i.e. not broadcast or our local
@@ -284,7 +287,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
284 if (no_addr) 287 if (no_addr)
285 goto last_resort; 288 goto last_resort;
286 if (rpf == 1) 289 if (rpf == 1)
287 goto e_inval; 290 goto e_rpf;
288 fl.oif = dev->ifindex; 291 fl.oif = dev->ifindex;
289 292
290 ret = 0; 293 ret = 0;
@@ -299,7 +302,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
299 302
300last_resort: 303last_resort:
301 if (rpf) 304 if (rpf)
302 goto e_inval; 305 goto e_rpf;
303 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 306 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
304 *itag = 0; 307 *itag = 0;
305 return 0; 308 return 0;
@@ -308,6 +311,8 @@ e_inval_res:
308 fib_res_put(&res); 311 fib_res_put(&res);
309e_inval: 312e_inval:
310 return -EINVAL; 313 return -EINVAL;
314e_rpf:
315 return -EXDEV;
311} 316}
312 317
313static inline __be32 sk_extract_addr(struct sockaddr *addr) 318static inline __be32 sk_extract_addr(struct sockaddr *addr)
@@ -1075,7 +1080,3 @@ void __init ip_fib_init(void)
1075 1080
1076 fib_hash_init(); 1081 fib_hash_init();
1077} 1082}
1078
1079EXPORT_SYMBOL(inet_addr_type);
1080EXPORT_SYMBOL(inet_dev_addr_type);
1081EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ca2d07b1c706..76daeb5ff564 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
213{ 213{
214 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 214 struct fib4_rule *rule4 = (struct fib4_rule *) rule;
215 215
216 frh->family = AF_INET;
217 frh->dst_len = rule4->dst_len; 216 frh->dst_len = rule4->dst_len;
218 frh->src_len = rule4->src_len; 217 frh->src_len = rule4->src_len;
219 frh->tos = rule4->tos; 218 frh->tos = rule4->tos;
@@ -234,23 +233,6 @@ nla_put_failure:
234 return -ENOBUFS; 233 return -ENOBUFS;
235} 234}
236 235
237static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
238{
239 struct list_head *pos;
240 struct fib_rule *rule;
241
242 if (!list_empty(&ops->rules_list)) {
243 pos = ops->rules_list.next;
244 if (pos->next != &ops->rules_list) {
245 rule = list_entry(pos->next, struct fib_rule, list);
246 if (rule->pref)
247 return rule->pref - 1;
248 }
249 }
250
251 return 0;
252}
253
254static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule) 236static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
255{ 237{
256 return nla_total_size(4) /* dst */ 238 return nla_total_size(4) /* dst */
@@ -263,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
263 rt_cache_flush(ops->fro_net, -1); 245 rt_cache_flush(ops->fro_net, -1);
264} 246}
265 247
266static struct fib_rules_ops fib4_rules_ops_template = { 248static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
267 .family = AF_INET, 249 .family = AF_INET,
268 .rule_size = sizeof(struct fib4_rule), 250 .rule_size = sizeof(struct fib4_rule),
269 .addr_size = sizeof(u32), 251 .addr_size = sizeof(u32),
@@ -272,7 +254,7 @@ static struct fib_rules_ops fib4_rules_ops_template = {
272 .configure = fib4_rule_configure, 254 .configure = fib4_rule_configure,
273 .compare = fib4_rule_compare, 255 .compare = fib4_rule_compare,
274 .fill = fib4_rule_fill, 256 .fill = fib4_rule_fill,
275 .default_pref = fib4_rule_default_pref, 257 .default_pref = fib_default_rule_pref,
276 .nlmsg_payload = fib4_rule_nlmsg_payload, 258 .nlmsg_payload = fib4_rule_nlmsg_payload,
277 .flush_cache = fib4_rule_flush_cache, 259 .flush_cache = fib4_rule_flush_cache,
278 .nlgroup = RTNLGRP_IPV4_RULE, 260 .nlgroup = RTNLGRP_IPV4_RULE,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c98f115fb0fd..79d057a939ba 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1022,8 +1022,6 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
1022 1022
1023 rcu_assign_pointer(t->trie, (struct node *)tn); 1023 rcu_assign_pointer(t->trie, (struct node *)tn);
1024 tnode_free_flush(); 1024 tnode_free_flush();
1025
1026 return;
1027} 1025}
1028 1026
1029/* only used from updater-side */ 1027/* only used from updater-side */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ac4dec132735..a0d847c7cba5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = {
181 .fatal = 1, 181 .fatal = 1,
182 }, 182 },
183}; 183};
184EXPORT_SYMBOL(icmp_err_convert);
184 185
185/* 186/*
186 * ICMP control array. This specifies what to do with each ICMP. 187 * ICMP control array. This specifies what to do with each ICMP.
@@ -267,11 +268,12 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
267 dst->rate_tokens = token; 268 dst->rate_tokens = token;
268 return rc; 269 return rc;
269} 270}
271EXPORT_SYMBOL(xrlim_allow);
270 272
271static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 273static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
272 int type, int code) 274 int type, int code)
273{ 275{
274 struct dst_entry *dst = &rt->u.dst; 276 struct dst_entry *dst = &rt->dst;
275 int rc = 1; 277 int rc = 1;
276 278
277 if (type > NR_ICMP_TYPES) 279 if (type > NR_ICMP_TYPES)
@@ -327,13 +329,14 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
327 struct sock *sk; 329 struct sock *sk;
328 struct sk_buff *skb; 330 struct sk_buff *skb;
329 331
330 sk = icmp_sk(dev_net((*rt)->u.dst.dev)); 332 sk = icmp_sk(dev_net((*rt)->dst.dev));
331 if (ip_append_data(sk, icmp_glue_bits, icmp_param, 333 if (ip_append_data(sk, icmp_glue_bits, icmp_param,
332 icmp_param->data_len+icmp_param->head_len, 334 icmp_param->data_len+icmp_param->head_len,
333 icmp_param->head_len, 335 icmp_param->head_len,
334 ipc, rt, MSG_DONTWAIT) < 0) 336 ipc, rt, MSG_DONTWAIT) < 0) {
337 ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
335 ip_flush_pending_frames(sk); 338 ip_flush_pending_frames(sk);
336 else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { 339 } else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
337 struct icmphdr *icmph = icmp_hdr(skb); 340 struct icmphdr *icmph = icmp_hdr(skb);
338 __wsum csum = 0; 341 __wsum csum = 0;
339 struct sk_buff *skb1; 342 struct sk_buff *skb1;
@@ -358,7 +361,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
358{ 361{
359 struct ipcm_cookie ipc; 362 struct ipcm_cookie ipc;
360 struct rtable *rt = skb_rtable(skb); 363 struct rtable *rt = skb_rtable(skb);
361 struct net *net = dev_net(rt->u.dst.dev); 364 struct net *net = dev_net(rt->dst.dev);
362 struct sock *sk; 365 struct sock *sk;
363 struct inet_sock *inet; 366 struct inet_sock *inet;
364 __be32 daddr; 367 __be32 daddr;
@@ -426,7 +429,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
426 429
427 if (!rt) 430 if (!rt)
428 goto out; 431 goto out;
429 net = dev_net(rt->u.dst.dev); 432 net = dev_net(rt->dst.dev);
430 433
431 /* 434 /*
432 * Find the original header. It is expected to be valid, of course. 435 * Find the original header. It is expected to be valid, of course.
@@ -586,20 +589,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
586 err = __ip_route_output_key(net, &rt2, &fl); 589 err = __ip_route_output_key(net, &rt2, &fl);
587 else { 590 else {
588 struct flowi fl2 = {}; 591 struct flowi fl2 = {};
589 struct dst_entry *odst; 592 unsigned long orefdst;
590 593
591 fl2.fl4_dst = fl.fl4_src; 594 fl2.fl4_dst = fl.fl4_src;
592 if (ip_route_output_key(net, &rt2, &fl2)) 595 if (ip_route_output_key(net, &rt2, &fl2))
593 goto relookup_failed; 596 goto relookup_failed;
594 597
595 /* Ugh! */ 598 /* Ugh! */
596 odst = skb_dst(skb_in); 599 orefdst = skb_in->_skb_refdst; /* save old refdst */
597 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src, 600 err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
598 RT_TOS(tos), rt2->u.dst.dev); 601 RT_TOS(tos), rt2->dst.dev);
599 602
600 dst_release(&rt2->u.dst); 603 dst_release(&rt2->dst);
601 rt2 = skb_rtable(skb_in); 604 rt2 = skb_rtable(skb_in);
602 skb_dst_set(skb_in, odst); 605 skb_in->_skb_refdst = orefdst; /* restore old refdst */
603 } 606 }
604 607
605 if (err) 608 if (err)
@@ -609,7 +612,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
609 XFRM_LOOKUP_ICMP); 612 XFRM_LOOKUP_ICMP);
610 switch (err) { 613 switch (err) {
611 case 0: 614 case 0:
612 dst_release(&rt->u.dst); 615 dst_release(&rt->dst);
613 rt = rt2; 616 rt = rt2;
614 break; 617 break;
615 case -EPERM: 618 case -EPERM:
@@ -628,7 +631,7 @@ route_done:
628 631
629 /* RFC says return as much as we can without exceeding 576 bytes. */ 632 /* RFC says return as much as we can without exceeding 576 bytes. */
630 633
631 room = dst_mtu(&rt->u.dst); 634 room = dst_mtu(&rt->dst);
632 if (room > 576) 635 if (room > 576)
633 room = 576; 636 room = 576;
634 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 637 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -646,6 +649,7 @@ out_unlock:
646 icmp_xmit_unlock(sk); 649 icmp_xmit_unlock(sk);
647out:; 650out:;
648} 651}
652EXPORT_SYMBOL(icmp_send);
649 653
650 654
651/* 655/*
@@ -924,6 +928,7 @@ static void icmp_address(struct sk_buff *skb)
924/* 928/*
925 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain 929 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain
926 * loudly if an inconsistency is found. 930 * loudly if an inconsistency is found.
931 * called with rcu_read_lock()
927 */ 932 */
928 933
929static void icmp_address_reply(struct sk_buff *skb) 934static void icmp_address_reply(struct sk_buff *skb)
@@ -934,12 +939,12 @@ static void icmp_address_reply(struct sk_buff *skb)
934 struct in_ifaddr *ifa; 939 struct in_ifaddr *ifa;
935 940
936 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC)) 941 if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
937 goto out; 942 return;
938 943
939 in_dev = in_dev_get(dev); 944 in_dev = __in_dev_get_rcu(dev);
940 if (!in_dev) 945 if (!in_dev)
941 goto out; 946 return;
942 rcu_read_lock(); 947
943 if (in_dev->ifa_list && 948 if (in_dev->ifa_list &&
944 IN_DEV_LOG_MARTIANS(in_dev) && 949 IN_DEV_LOG_MARTIANS(in_dev) &&
945 IN_DEV_FORWARD(in_dev)) { 950 IN_DEV_FORWARD(in_dev)) {
@@ -957,9 +962,6 @@ static void icmp_address_reply(struct sk_buff *skb)
957 mp, dev->name, &rt->rt_src); 962 mp, dev->name, &rt->rt_src);
958 } 963 }
959 } 964 }
960 rcu_read_unlock();
961 in_dev_put(in_dev);
962out:;
963} 965}
964 966
965static void icmp_discard(struct sk_buff *skb) 967static void icmp_discard(struct sk_buff *skb)
@@ -973,7 +975,7 @@ int icmp_rcv(struct sk_buff *skb)
973{ 975{
974 struct icmphdr *icmph; 976 struct icmphdr *icmph;
975 struct rtable *rt = skb_rtable(skb); 977 struct rtable *rt = skb_rtable(skb);
976 struct net *net = dev_net(rt->u.dst.dev); 978 struct net *net = dev_net(rt->dst.dev);
977 979
978 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 980 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
979 struct sec_path *sp = skb_sec_path(skb); 981 struct sec_path *sp = skb_sec_path(skb);
@@ -1215,7 +1217,3 @@ int __init icmp_init(void)
1215{ 1217{
1216 return register_pernet_subsys(&icmp_sk_ops); 1218 return register_pernet_subsys(&icmp_sk_ops);
1217} 1219}
1218
1219EXPORT_SYMBOL(icmp_err_convert);
1220EXPORT_SYMBOL(icmp_send);
1221EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 15d3eeda92f5..a1ad0e7180d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
312 return NULL; 312 return NULL;
313 } 313 }
314 314
315 skb_dst_set(skb, &rt->u.dst); 315 skb_dst_set(skb, &rt->dst);
316 skb->dev = dev; 316 skb->dev = dev;
317 317
318 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 318 skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
330 pip->saddr = rt->rt_src; 330 pip->saddr = rt->rt_src;
331 pip->protocol = IPPROTO_IGMP; 331 pip->protocol = IPPROTO_IGMP;
332 pip->tot_len = 0; /* filled in later */ 332 pip->tot_len = 0; /* filled in later */
333 ip_select_ident(pip, &rt->u.dst, NULL); 333 ip_select_ident(pip, &rt->dst, NULL);
334 ((u8*)&pip[1])[0] = IPOPT_RA; 334 ((u8*)&pip[1])[0] = IPOPT_RA;
335 ((u8*)&pip[1])[1] = 4; 335 ((u8*)&pip[1])[1] = 4;
336 ((u8*)&pip[1])[2] = 0; 336 ((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
660 return -1; 660 return -1;
661 } 661 }
662 662
663 skb_dst_set(skb, &rt->u.dst); 663 skb_dst_set(skb, &rt->dst);
664 664
665 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 665 skb_reserve(skb, LL_RESERVED_SPACE(dev));
666 666
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
676 iph->daddr = dst; 676 iph->daddr = dst;
677 iph->saddr = rt->rt_src; 677 iph->saddr = rt->rt_src;
678 iph->protocol = IPPROTO_IGMP; 678 iph->protocol = IPPROTO_IGMP;
679 ip_select_ident(iph, &rt->u.dst, NULL); 679 ip_select_ident(iph, &rt->dst, NULL);
680 ((u8*)&iph[1])[0] = IPOPT_RA; 680 ((u8*)&iph[1])[0] = IPOPT_RA;
681 ((u8*)&iph[1])[1] = 4; 681 ((u8*)&iph[1])[1] = 4;
682 ((u8*)&iph[1])[2] = 0; 682 ((u8*)&iph[1])[2] = 0;
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
916 read_unlock(&in_dev->mc_list_lock); 916 read_unlock(&in_dev->mc_list_lock);
917} 917}
918 918
919/* called in rcu_read_lock() section */
919int igmp_rcv(struct sk_buff *skb) 920int igmp_rcv(struct sk_buff *skb)
920{ 921{
921 /* This basically follows the spec line by line -- see RFC1112 */ 922 /* This basically follows the spec line by line -- see RFC1112 */
922 struct igmphdr *ih; 923 struct igmphdr *ih;
923 struct in_device *in_dev = in_dev_get(skb->dev); 924 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
924 int len = skb->len; 925 int len = skb->len;
925 926
926 if (in_dev == NULL) 927 if (in_dev == NULL)
927 goto drop; 928 goto drop;
928 929
929 if (!pskb_may_pull(skb, sizeof(struct igmphdr))) 930 if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
930 goto drop_ref; 931 goto drop;
931 932
932 switch (skb->ip_summed) { 933 switch (skb->ip_summed) {
933 case CHECKSUM_COMPLETE: 934 case CHECKSUM_COMPLETE:
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb)
937 case CHECKSUM_NONE: 938 case CHECKSUM_NONE:
938 skb->csum = 0; 939 skb->csum = 0;
939 if (__skb_checksum_complete(skb)) 940 if (__skb_checksum_complete(skb))
940 goto drop_ref; 941 goto drop;
941 } 942 }
942 943
943 ih = igmp_hdr(skb); 944 ih = igmp_hdr(skb);
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb)
957 break; 958 break;
958 case IGMP_PIM: 959 case IGMP_PIM:
959#ifdef CONFIG_IP_PIMSM_V1 960#ifdef CONFIG_IP_PIMSM_V1
960 in_dev_put(in_dev);
961 return pim_rcv_v1(skb); 961 return pim_rcv_v1(skb);
962#endif 962#endif
963 case IGMPV3_HOST_MEMBERSHIP_REPORT: 963 case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb)
971 break; 971 break;
972 } 972 }
973 973
974drop_ref:
975 in_dev_put(in_dev);
976drop: 974drop:
977 kfree_skb(skb); 975 kfree_skb(skb);
978 return 0; 976 return 0;
@@ -998,7 +996,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
998 --ANK 996 --ANK
999 */ 997 */
1000 if (arp_mc_map(addr, buf, dev, 0) == 0) 998 if (arp_mc_map(addr, buf, dev, 0) == 0)
1001 dev_mc_add(dev, buf, dev->addr_len, 0); 999 dev_mc_add(dev, buf);
1002} 1000}
1003 1001
1004/* 1002/*
@@ -1011,7 +1009,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
1011 struct net_device *dev = in_dev->dev; 1009 struct net_device *dev = in_dev->dev;
1012 1010
1013 if (arp_mc_map(addr, buf, dev, 0) == 0) 1011 if (arp_mc_map(addr, buf, dev, 0) == 0)
1014 dev_mc_delete(dev, buf, dev->addr_len, 0); 1012 dev_mc_del(dev, buf);
1015} 1013}
1016 1014
1017#ifdef CONFIG_IP_MULTICAST 1015#ifdef CONFIG_IP_MULTICAST
@@ -1246,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
1246out: 1244out:
1247 return; 1245 return;
1248} 1246}
1247EXPORT_SYMBOL(ip_mc_inc_group);
1249 1248
1250/* 1249/*
1251 * Resend IGMP JOIN report; used for bonding. 1250 * Resend IGMP JOIN report; used for bonding.
@@ -1268,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
1268 igmp_ifc_event(in_dev); 1267 igmp_ifc_event(in_dev);
1269#endif 1268#endif
1270} 1269}
1270EXPORT_SYMBOL(ip_mc_rejoin_group);
1271 1271
1272/* 1272/*
1273 * A socket has left a multicast group on device dev 1273 * A socket has left a multicast group on device dev
@@ -1298,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
1298 } 1298 }
1299 } 1299 }
1300} 1300}
1301EXPORT_SYMBOL(ip_mc_dec_group);
1301 1302
1302/* Device changing type */ 1303/* Device changing type */
1303 1304
@@ -1427,7 +1428,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1427 } 1428 }
1428 1429
1429 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1430 if (!dev && !ip_route_output_key(net, &rt, &fl)) {
1430 dev = rt->u.dst.dev; 1431 dev = rt->dst.dev;
1431 ip_rt_put(rt); 1432 ip_rt_put(rt);
1432 } 1433 }
1433 if (dev) { 1434 if (dev) {
@@ -1646,8 +1647,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
1646 if (dpsf->sf_inaddr == psf->sf_inaddr) 1647 if (dpsf->sf_inaddr == psf->sf_inaddr)
1647 break; 1648 break;
1648 if (!dpsf) { 1649 if (!dpsf) {
1649 dpsf = (struct ip_sf_list *) 1650 dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1650 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
1651 if (!dpsf) 1651 if (!dpsf)
1652 continue; 1652 continue;
1653 *dpsf = *psf; 1653 *dpsf = *psf;
@@ -1807,6 +1807,7 @@ done:
1807 rtnl_unlock(); 1807 rtnl_unlock();
1808 return err; 1808 return err;
1809} 1809}
1810EXPORT_SYMBOL(ip_mc_join_group);
1810 1811
1811static void ip_sf_socklist_reclaim(struct rcu_head *rp) 1812static void ip_sf_socklist_reclaim(struct rcu_head *rp)
1812{ 1813{
@@ -2679,8 +2680,3 @@ int __init igmp_mc_proc_init(void)
2679 return register_pernet_subsys(&igmp_net_ops); 2680 return register_pernet_subsys(&igmp_net_ops);
2680} 2681}
2681#endif 2682#endif
2682
2683EXPORT_SYMBOL(ip_mc_dec_group);
2684EXPORT_SYMBOL(ip_mc_inc_group);
2685EXPORT_SYMBOL(ip_mc_join_group);
2686EXPORT_SYMBOL(ip_mc_rejoin_group);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429269dd..7174370b1195 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
37 .range = { 32768, 61000 }, 37 .range = { 32768, 61000 },
38}; 38};
39 39
40unsigned long *sysctl_local_reserved_ports;
41EXPORT_SYMBOL(sysctl_local_reserved_ports);
42
40void inet_get_local_port_range(int *low, int *high) 43void inet_get_local_port_range(int *low, int *high)
41{ 44{
42 unsigned seq; 45 unsigned seq;
@@ -81,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
81 } 84 }
82 return node != NULL; 85 return node != NULL;
83} 86}
84
85EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); 87EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
86 88
87/* Obtain a reference to a local port for the given sock, 89/* Obtain a reference to a local port for the given sock,
@@ -108,6 +110,8 @@ again:
108 110
109 smallest_size = -1; 111 smallest_size = -1;
110 do { 112 do {
113 if (inet_is_reserved_local_port(rover))
114 goto next_nolock;
111 head = &hashinfo->bhash[inet_bhashfn(net, rover, 115 head = &hashinfo->bhash[inet_bhashfn(net, rover,
112 hashinfo->bhash_size)]; 116 hashinfo->bhash_size)];
113 spin_lock(&head->lock); 117 spin_lock(&head->lock);
@@ -130,6 +134,7 @@ again:
130 break; 134 break;
131 next: 135 next:
132 spin_unlock(&head->lock); 136 spin_unlock(&head->lock);
137 next_nolock:
133 if (++rover > high) 138 if (++rover > high)
134 rover = low; 139 rover = low;
135 } while (--remaining > 0); 140 } while (--remaining > 0);
@@ -206,7 +211,6 @@ fail:
206 local_bh_enable(); 211 local_bh_enable();
207 return ret; 212 return ret;
208} 213}
209
210EXPORT_SYMBOL_GPL(inet_csk_get_port); 214EXPORT_SYMBOL_GPL(inet_csk_get_port);
211 215
212/* 216/*
@@ -234,7 +238,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
234 * having to remove and re-insert us on the wait queue. 238 * having to remove and re-insert us on the wait queue.
235 */ 239 */
236 for (;;) { 240 for (;;) {
237 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 241 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
238 TASK_INTERRUPTIBLE); 242 TASK_INTERRUPTIBLE);
239 release_sock(sk); 243 release_sock(sk);
240 if (reqsk_queue_empty(&icsk->icsk_accept_queue)) 244 if (reqsk_queue_empty(&icsk->icsk_accept_queue))
@@ -253,7 +257,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
253 if (!timeo) 257 if (!timeo)
254 break; 258 break;
255 } 259 }
256 finish_wait(sk->sk_sleep, &wait); 260 finish_wait(sk_sleep(sk), &wait);
257 return err; 261 return err;
258} 262}
259 263
@@ -299,7 +303,6 @@ out_err:
299 *err = error; 303 *err = error;
300 goto out; 304 goto out;
301} 305}
302
303EXPORT_SYMBOL(inet_csk_accept); 306EXPORT_SYMBOL(inet_csk_accept);
304 307
305/* 308/*
@@ -321,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
321 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); 324 setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
322 icsk->icsk_pending = icsk->icsk_ack.pending = 0; 325 icsk->icsk_pending = icsk->icsk_ack.pending = 0;
323} 326}
324
325EXPORT_SYMBOL(inet_csk_init_xmit_timers); 327EXPORT_SYMBOL(inet_csk_init_xmit_timers);
326 328
327void inet_csk_clear_xmit_timers(struct sock *sk) 329void inet_csk_clear_xmit_timers(struct sock *sk)
@@ -334,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
334 sk_stop_timer(sk, &icsk->icsk_delack_timer); 336 sk_stop_timer(sk, &icsk->icsk_delack_timer);
335 sk_stop_timer(sk, &sk->sk_timer); 337 sk_stop_timer(sk, &sk->sk_timer);
336} 338}
337
338EXPORT_SYMBOL(inet_csk_clear_xmit_timers); 339EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
339 340
340void inet_csk_delete_keepalive_timer(struct sock *sk) 341void inet_csk_delete_keepalive_timer(struct sock *sk)
341{ 342{
342 sk_stop_timer(sk, &sk->sk_timer); 343 sk_stop_timer(sk, &sk->sk_timer);
343} 344}
344
345EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); 345EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
346 346
347void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) 347void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
348{ 348{
349 sk_reset_timer(sk, &sk->sk_timer, jiffies + len); 349 sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
350} 350}
351
352EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); 351EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
353 352
354struct dst_entry *inet_csk_route_req(struct sock *sk, 353struct dst_entry *inet_csk_route_req(struct sock *sk,
@@ -377,7 +376,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
377 goto no_route; 376 goto no_route;
378 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 377 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
379 goto route_err; 378 goto route_err;
380 return &rt->u.dst; 379 return &rt->dst;
381 380
382route_err: 381route_err:
383 ip_rt_put(rt); 382 ip_rt_put(rt);
@@ -385,7 +384,6 @@ no_route:
385 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); 384 IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
386 return NULL; 385 return NULL;
387} 386}
388
389EXPORT_SYMBOL_GPL(inet_csk_route_req); 387EXPORT_SYMBOL_GPL(inet_csk_route_req);
390 388
391static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 389static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
@@ -427,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
427 425
428 return req; 426 return req;
429} 427}
430
431EXPORT_SYMBOL_GPL(inet_csk_search_req); 428EXPORT_SYMBOL_GPL(inet_csk_search_req);
432 429
433void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 430void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
@@ -441,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
441 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 438 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
442 inet_csk_reqsk_queue_added(sk, timeout); 439 inet_csk_reqsk_queue_added(sk, timeout);
443} 440}
441EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
444 442
445/* Only thing we need from tcp.h */ 443/* Only thing we need from tcp.h */
446extern int sysctl_tcp_synack_retries; 444extern int sysctl_tcp_synack_retries;
447 445
448EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
449 446
450/* Decide when to expire the request and when to resend SYN-ACK */ 447/* Decide when to expire the request and when to resend SYN-ACK */
451static inline void syn_ack_recalc(struct request_sock *req, const int thresh, 448static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
@@ -563,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
563 if (lopt->qlen) 560 if (lopt->qlen)
564 inet_csk_reset_keepalive_timer(parent, interval); 561 inet_csk_reset_keepalive_timer(parent, interval);
565} 562}
566
567EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 563EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
568 564
569struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 565struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
@@ -593,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
593 } 589 }
594 return newsk; 590 return newsk;
595} 591}
596
597EXPORT_SYMBOL_GPL(inet_csk_clone); 592EXPORT_SYMBOL_GPL(inet_csk_clone);
598 593
599/* 594/*
@@ -624,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk)
624 percpu_counter_dec(sk->sk_prot->orphan_count); 619 percpu_counter_dec(sk->sk_prot->orphan_count);
625 sock_put(sk); 620 sock_put(sk);
626} 621}
627
628EXPORT_SYMBOL(inet_csk_destroy_sock); 622EXPORT_SYMBOL(inet_csk_destroy_sock);
629 623
630int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 624int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
@@ -659,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
659 __reqsk_queue_destroy(&icsk->icsk_accept_queue); 653 __reqsk_queue_destroy(&icsk->icsk_accept_queue);
660 return -EADDRINUSE; 654 return -EADDRINUSE;
661} 655}
662
663EXPORT_SYMBOL_GPL(inet_csk_listen_start); 656EXPORT_SYMBOL_GPL(inet_csk_listen_start);
664 657
665/* 658/*
@@ -714,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk)
714 } 707 }
715 WARN_ON(sk->sk_ack_backlog); 708 WARN_ON(sk->sk_ack_backlog);
716} 709}
717
718EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 710EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
719 711
720void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 712void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
@@ -726,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
726 sin->sin_addr.s_addr = inet->inet_daddr; 718 sin->sin_addr.s_addr = inet->inet_daddr;
727 sin->sin_port = inet->inet_dport; 719 sin->sin_port = inet->inet_dport;
728} 720}
729
730EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); 721EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
731 722
732#ifdef CONFIG_COMPAT 723#ifdef CONFIG_COMPAT
@@ -741,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
741 return icsk->icsk_af_ops->getsockopt(sk, level, optname, 732 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
742 optval, optlen); 733 optval, optlen);
743} 734}
744
745EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); 735EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
746 736
747int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, 737int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
@@ -755,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
755 return icsk->icsk_af_ops->setsockopt(sk, level, optname, 745 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
756 optval, optlen); 746 optval, optlen);
757} 747}
758
759EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt); 748EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
760#endif 749#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a2ca6aed763b..5ff2a51b6d0c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
114 fq->last_in |= INET_FRAG_COMPLETE; 114 fq->last_in |= INET_FRAG_COMPLETE;
115 } 115 }
116} 116}
117
118EXPORT_SYMBOL(inet_frag_kill); 117EXPORT_SYMBOL(inet_frag_kill);
119 118
120static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 119static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377b468d..fb7ad5a21ff3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk)
99 __inet_put_port(sk); 99 __inet_put_port(sk);
100 local_bh_enable(); 100 local_bh_enable();
101} 101}
102
103EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
104 103
105void __inet_inherit_port(struct sock *sk, struct sock *child) 104void __inet_inherit_port(struct sock *sk, struct sock *child)
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
116 inet_csk(child)->icsk_bind_hash = tb; 115 inet_csk(child)->icsk_bind_hash = tb;
117 spin_unlock(&head->lock); 116 spin_unlock(&head->lock);
118} 117}
119
120EXPORT_SYMBOL_GPL(__inet_inherit_port); 118EXPORT_SYMBOL_GPL(__inet_inherit_port);
121 119
122static inline int compute_score(struct sock *sk, struct net *net, 120static inline int compute_score(struct sock *sk, struct net *net,
@@ -456,6 +454,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
456 local_bh_disable(); 454 local_bh_disable();
457 for (i = 1; i <= remaining; i++) { 455 for (i = 1; i <= remaining; i++) {
458 port = low + (i + offset) % remaining; 456 port = low + (i + offset) % remaining;
457 if (inet_is_reserved_local_port(port))
458 continue;
459 head = &hinfo->bhash[inet_bhashfn(net, port, 459 head = &hinfo->bhash[inet_bhashfn(net, port,
460 hinfo->bhash_size)]; 460 hinfo->bhash_size)];
461 spin_lock(&head->lock); 461 spin_lock(&head->lock);
@@ -544,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
544 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), 544 return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
545 __inet_check_established, __inet_hash_nolisten); 545 __inet_check_established, __inet_hash_nolisten);
546} 546}
547
548EXPORT_SYMBOL_GPL(inet_hash_connect); 547EXPORT_SYMBOL_GPL(inet_hash_connect);
549 548
550void inet_hashinfo_init(struct inet_hashinfo *h) 549void inet_hashinfo_init(struct inet_hashinfo *h)
@@ -558,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
558 i + LISTENING_NULLS_BASE); 557 i + LISTENING_NULLS_BASE);
559 } 558 }
560} 559}
561
562EXPORT_SYMBOL_GPL(inet_hashinfo_init); 560EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52a9c87..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
51 * lookups performed with disabled BHs. 51 * lookups performed with disabled BHs.
52 * 52 *
53 * Serialisation issues. 53 * Serialisation issues.
54 * 1. Nodes may appear in the tree only with the pool write lock held. 54 * 1. Nodes may appear in the tree only with the pool lock held.
55 * 2. Nodes may disappear from the tree only with the pool write lock held 55 * 2. Nodes may disappear from the tree only with the pool lock held
56 * AND reference count being 0. 56 * AND reference count being 0.
57 * 3. Nodes appears and disappears from unused node list only under 57 * 3. Nodes appears and disappears from unused node list only under
58 * "inet_peer_unused_lock". 58 * "inet_peer_unused_lock".
@@ -64,23 +64,31 @@
64 * usually under some other lock to prevent node disappearing 64 * usually under some other lock to prevent node disappearing
65 * dtime: unused node list lock 65 * dtime: unused node list lock
66 * v4daddr: unchangeable 66 * v4daddr: unchangeable
67 * ip_id_count: idlock 67 * ip_id_count: atomic value (no lock needed)
68 */ 68 */
69 69
70static struct kmem_cache *peer_cachep __read_mostly; 70static struct kmem_cache *peer_cachep __read_mostly;
71 71
72#define node_height(x) x->avl_height 72#define node_height(x) x->avl_height
73static struct inet_peer peer_fake_node = { 73
74 .avl_left = &peer_fake_node, 74#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
75 .avl_right = &peer_fake_node, 75static const struct inet_peer peer_fake_node = {
76 .avl_left = peer_avl_empty,
77 .avl_right = peer_avl_empty,
76 .avl_height = 0 78 .avl_height = 0
77}; 79};
78#define peer_avl_empty (&peer_fake_node) 80
79static struct inet_peer *peer_root = peer_avl_empty; 81static struct {
80static DEFINE_RWLOCK(peer_pool_lock); 82 struct inet_peer *root;
83 spinlock_t lock;
84 int total;
85} peers = {
86 .root = peer_avl_empty,
87 .lock = __SPIN_LOCK_UNLOCKED(peers.lock),
88 .total = 0,
89};
81#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ 90#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
82 91
83static int peer_total;
84/* Exported for sysctl_net_ipv4. */ 92/* Exported for sysctl_net_ipv4. */
85int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more 93int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more
86 * aggressively at this stage */ 94 * aggressively at this stage */
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min
89int inet_peer_gc_mintime __read_mostly = 10 * HZ; 97int inet_peer_gc_mintime __read_mostly = 10 * HZ;
90int inet_peer_gc_maxtime __read_mostly = 120 * HZ; 98int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
91 99
92static LIST_HEAD(unused_peers); 100static struct {
93static DEFINE_SPINLOCK(inet_peer_unused_lock); 101 struct list_head list;
102 spinlock_t lock;
103} unused_peers = {
104 .list = LIST_HEAD_INIT(unused_peers.list),
105 .lock = __SPIN_LOCK_UNLOCKED(unused_peers.lock),
106};
94 107
95static void peer_check_expire(unsigned long dummy); 108static void peer_check_expire(unsigned long dummy);
96static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); 109static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -116,7 +129,7 @@ void __init inet_initpeers(void)
116 129
117 peer_cachep = kmem_cache_create("inet_peer_cache", 130 peer_cachep = kmem_cache_create("inet_peer_cache",
118 sizeof(struct inet_peer), 131 sizeof(struct inet_peer),
119 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 132 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
120 NULL); 133 NULL);
121 134
122 /* All the timers, started at system startup tend 135 /* All the timers, started at system startup tend
@@ -131,38 +144,69 @@ void __init inet_initpeers(void)
131/* Called with or without local BH being disabled. */ 144/* Called with or without local BH being disabled. */
132static void unlink_from_unused(struct inet_peer *p) 145static void unlink_from_unused(struct inet_peer *p)
133{ 146{
134 spin_lock_bh(&inet_peer_unused_lock); 147 if (!list_empty(&p->unused)) {
135 list_del_init(&p->unused); 148 spin_lock_bh(&unused_peers.lock);
136 spin_unlock_bh(&inet_peer_unused_lock); 149 list_del_init(&p->unused);
150 spin_unlock_bh(&unused_peers.lock);
151 }
137} 152}
138 153
139/* 154/*
140 * Called with local BH disabled and the pool lock held. 155 * Called with local BH disabled and the pool lock held.
141 * _stack is known to be NULL or not at compile time,
142 * so compiler will optimize the if (_stack) tests.
143 */ 156 */
144#define lookup(_daddr, _stack) \ 157#define lookup(_daddr, _stack) \
145({ \ 158({ \
146 struct inet_peer *u, **v; \ 159 struct inet_peer *u, **v; \
147 if (_stack != NULL) { \ 160 \
148 stackptr = _stack; \ 161 stackptr = _stack; \
149 *stackptr++ = &peer_root; \ 162 *stackptr++ = &peers.root; \
150 } \ 163 for (u = peers.root; u != peer_avl_empty; ) { \
151 for (u = peer_root; u != peer_avl_empty; ) { \
152 if (_daddr == u->v4daddr) \ 164 if (_daddr == u->v4daddr) \
153 break; \ 165 break; \
154 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ 166 if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \
155 v = &u->avl_left; \ 167 v = &u->avl_left; \
156 else \ 168 else \
157 v = &u->avl_right; \ 169 v = &u->avl_right; \
158 if (_stack != NULL) \ 170 *stackptr++ = v; \
159 *stackptr++ = v; \
160 u = *v; \ 171 u = *v; \
161 } \ 172 } \
162 u; \ 173 u; \
163}) 174})
164 175
165/* Called with local BH disabled and the pool write lock held. */ 176/*
177 * Called with rcu_read_lock_bh()
178 * Because we hold no lock against a writer, its quite possible we fall
179 * in an endless loop.
180 * But every pointer we follow is guaranteed to be valid thanks to RCU.
181 * We exit from this function if number of links exceeds PEER_MAXDEPTH
182 */
183static struct inet_peer *lookup_rcu_bh(__be32 daddr)
184{
185 struct inet_peer *u = rcu_dereference_bh(peers.root);
186 int count = 0;
187
188 while (u != peer_avl_empty) {
189 if (daddr == u->v4daddr) {
190 /* Before taking a reference, check if this entry was
191 * deleted, unlink_from_pool() sets refcnt=-1 to make
192 * distinction between an unused entry (refcnt=0) and
193 * a freed one.
194 */
195 if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
196 u = NULL;
197 return u;
198 }
199 if ((__force __u32)daddr < (__force __u32)u->v4daddr)
200 u = rcu_dereference_bh(u->avl_left);
201 else
202 u = rcu_dereference_bh(u->avl_right);
203 if (unlikely(++count == PEER_MAXDEPTH))
204 break;
205 }
206 return NULL;
207}
208
209/* Called with local BH disabled and the pool lock held. */
166#define lookup_rightempty(start) \ 210#define lookup_rightempty(start) \
167({ \ 211({ \
168 struct inet_peer *u, **v; \ 212 struct inet_peer *u, **v; \
@@ -176,9 +220,10 @@ static void unlink_from_unused(struct inet_peer *p)
176 u; \ 220 u; \
177}) 221})
178 222
179/* Called with local BH disabled and the pool write lock held. 223/* Called with local BH disabled and the pool lock held.
180 * Variable names are the proof of operation correctness. 224 * Variable names are the proof of operation correctness.
181 * Look into mm/map_avl.c for more detail description of the ideas. */ 225 * Look into mm/map_avl.c for more detail description of the ideas.
226 */
182static void peer_avl_rebalance(struct inet_peer **stack[], 227static void peer_avl_rebalance(struct inet_peer **stack[],
183 struct inet_peer ***stackend) 228 struct inet_peer ***stackend)
184{ 229{
@@ -254,15 +299,21 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
254 } 299 }
255} 300}
256 301
257/* Called with local BH disabled and the pool write lock held. */ 302/* Called with local BH disabled and the pool lock held. */
258#define link_to_pool(n) \ 303#define link_to_pool(n) \
259do { \ 304do { \
260 n->avl_height = 1; \ 305 n->avl_height = 1; \
261 n->avl_left = peer_avl_empty; \ 306 n->avl_left = peer_avl_empty; \
262 n->avl_right = peer_avl_empty; \ 307 n->avl_right = peer_avl_empty; \
308 smp_wmb(); /* lockless readers can catch us now */ \
263 **--stackptr = n; \ 309 **--stackptr = n; \
264 peer_avl_rebalance(stack, stackptr); \ 310 peer_avl_rebalance(stack, stackptr); \
265} while(0) 311} while (0)
312
313static void inetpeer_free_rcu(struct rcu_head *head)
314{
315 kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
316}
266 317
267/* May be called with local BH enabled. */ 318/* May be called with local BH enabled. */
268static void unlink_from_pool(struct inet_peer *p) 319static void unlink_from_pool(struct inet_peer *p)
@@ -271,13 +322,14 @@ static void unlink_from_pool(struct inet_peer *p)
271 322
272 do_free = 0; 323 do_free = 0;
273 324
274 write_lock_bh(&peer_pool_lock); 325 spin_lock_bh(&peers.lock);
275 /* Check the reference counter. It was artificially incremented by 1 326 /* Check the reference counter. It was artificially incremented by 1
276 * in cleanup() function to prevent sudden disappearing. If the 327 * in cleanup() function to prevent sudden disappearing. If we can
277 * reference count is still 1 then the node is referenced only as `p' 328 * atomically (because of lockless readers) take this last reference,
278 * here and from the pool. So under the exclusive pool lock it's safe 329 * it's safe to remove the node and free it later.
279 * to remove the node and free it later. */ 330 * We use refcnt=-1 to alert lockless readers this entry is deleted.
280 if (atomic_read(&p->refcnt) == 1) { 331 */
332 if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
281 struct inet_peer **stack[PEER_MAXDEPTH]; 333 struct inet_peer **stack[PEER_MAXDEPTH];
282 struct inet_peer ***stackptr, ***delp; 334 struct inet_peer ***stackptr, ***delp;
283 if (lookup(p->v4daddr, stack) != p) 335 if (lookup(p->v4daddr, stack) != p)
@@ -303,20 +355,21 @@ static void unlink_from_pool(struct inet_peer *p)
303 delp[1] = &t->avl_left; /* was &p->avl_left */ 355 delp[1] = &t->avl_left; /* was &p->avl_left */
304 } 356 }
305 peer_avl_rebalance(stack, stackptr); 357 peer_avl_rebalance(stack, stackptr);
306 peer_total--; 358 peers.total--;
307 do_free = 1; 359 do_free = 1;
308 } 360 }
309 write_unlock_bh(&peer_pool_lock); 361 spin_unlock_bh(&peers.lock);
310 362
311 if (do_free) 363 if (do_free)
312 kmem_cache_free(peer_cachep, p); 364 call_rcu_bh(&p->rcu, inetpeer_free_rcu);
313 else 365 else
314 /* The node is used again. Decrease the reference counter 366 /* The node is used again. Decrease the reference counter
315 * back. The loop "cleanup -> unlink_from_unused 367 * back. The loop "cleanup -> unlink_from_unused
316 * -> unlink_from_pool -> putpeer -> link_to_unused 368 * -> unlink_from_pool -> putpeer -> link_to_unused
317 * -> cleanup (for the same node)" 369 * -> cleanup (for the same node)"
318 * doesn't really exist because the entry will have a 370 * doesn't really exist because the entry will have a
319 * recent deletion time and will not be cleaned again soon. */ 371 * recent deletion time and will not be cleaned again soon.
372 */
320 inet_putpeer(p); 373 inet_putpeer(p);
321} 374}
322 375
@@ -326,16 +379,16 @@ static int cleanup_once(unsigned long ttl)
326 struct inet_peer *p = NULL; 379 struct inet_peer *p = NULL;
327 380
328 /* Remove the first entry from the list of unused nodes. */ 381 /* Remove the first entry from the list of unused nodes. */
329 spin_lock_bh(&inet_peer_unused_lock); 382 spin_lock_bh(&unused_peers.lock);
330 if (!list_empty(&unused_peers)) { 383 if (!list_empty(&unused_peers.list)) {
331 __u32 delta; 384 __u32 delta;
332 385
333 p = list_first_entry(&unused_peers, struct inet_peer, unused); 386 p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
334 delta = (__u32)jiffies - p->dtime; 387 delta = (__u32)jiffies - p->dtime;
335 388
336 if (delta < ttl) { 389 if (delta < ttl) {
337 /* Do not prune fresh entries. */ 390 /* Do not prune fresh entries. */
338 spin_unlock_bh(&inet_peer_unused_lock); 391 spin_unlock_bh(&unused_peers.lock);
339 return -1; 392 return -1;
340 } 393 }
341 394
@@ -345,7 +398,7 @@ static int cleanup_once(unsigned long ttl)
345 * before unlink_from_pool() call. */ 398 * before unlink_from_pool() call. */
346 atomic_inc(&p->refcnt); 399 atomic_inc(&p->refcnt);
347 } 400 }
348 spin_unlock_bh(&inet_peer_unused_lock); 401 spin_unlock_bh(&unused_peers.lock);
349 402
350 if (p == NULL) 403 if (p == NULL)
351 /* It means that the total number of USED entries has 404 /* It means that the total number of USED entries has
@@ -360,62 +413,56 @@ static int cleanup_once(unsigned long ttl)
360/* Called with or without local BH being disabled. */ 413/* Called with or without local BH being disabled. */
361struct inet_peer *inet_getpeer(__be32 daddr, int create) 414struct inet_peer *inet_getpeer(__be32 daddr, int create)
362{ 415{
363 struct inet_peer *p, *n; 416 struct inet_peer *p;
364 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; 417 struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
365 418
366 /* Look up for the address quickly. */ 419 /* Look up for the address quickly, lockless.
367 read_lock_bh(&peer_pool_lock); 420 * Because of a concurrent writer, we might not find an existing entry.
368 p = lookup(daddr, NULL); 421 */
369 if (p != peer_avl_empty) 422 rcu_read_lock_bh();
370 atomic_inc(&p->refcnt); 423 p = lookup_rcu_bh(daddr);
371 read_unlock_bh(&peer_pool_lock); 424 rcu_read_unlock_bh();
425
426 if (p) {
427 /* The existing node has been found.
428 * Remove the entry from unused list if it was there.
429 */
430 unlink_from_unused(p);
431 return p;
432 }
372 433
434 /* retry an exact lookup, taking the lock before.
435 * At least, nodes should be hot in our cache.
436 */
437 spin_lock_bh(&peers.lock);
438 p = lookup(daddr, stack);
373 if (p != peer_avl_empty) { 439 if (p != peer_avl_empty) {
374 /* The existing node has been found. */ 440 atomic_inc(&p->refcnt);
441 spin_unlock_bh(&peers.lock);
375 /* Remove the entry from unused list if it was there. */ 442 /* Remove the entry from unused list if it was there. */
376 unlink_from_unused(p); 443 unlink_from_unused(p);
377 return p; 444 return p;
378 } 445 }
446 p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
447 if (p) {
448 p->v4daddr = daddr;
449 atomic_set(&p->refcnt, 1);
450 atomic_set(&p->rid, 0);
451 atomic_set(&p->ip_id_count, secure_ip_id(daddr));
452 p->tcp_ts_stamp = 0;
453 INIT_LIST_HEAD(&p->unused);
454
455
456 /* Link the node. */
457 link_to_pool(p);
458 peers.total++;
459 }
460 spin_unlock_bh(&peers.lock);
379 461
380 if (!create) 462 if (peers.total >= inet_peer_threshold)
381 return NULL;
382
383 /* Allocate the space outside the locked region. */
384 n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
385 if (n == NULL)
386 return NULL;
387 n->v4daddr = daddr;
388 atomic_set(&n->refcnt, 1);
389 atomic_set(&n->rid, 0);
390 atomic_set(&n->ip_id_count, secure_ip_id(daddr));
391 n->tcp_ts_stamp = 0;
392
393 write_lock_bh(&peer_pool_lock);
394 /* Check if an entry has suddenly appeared. */
395 p = lookup(daddr, stack);
396 if (p != peer_avl_empty)
397 goto out_free;
398
399 /* Link the node. */
400 link_to_pool(n);
401 INIT_LIST_HEAD(&n->unused);
402 peer_total++;
403 write_unlock_bh(&peer_pool_lock);
404
405 if (peer_total >= inet_peer_threshold)
406 /* Remove one less-recently-used entry. */ 463 /* Remove one less-recently-used entry. */
407 cleanup_once(0); 464 cleanup_once(0);
408 465
409 return n;
410
411out_free:
412 /* The appropriate node is already in the pool. */
413 atomic_inc(&p->refcnt);
414 write_unlock_bh(&peer_pool_lock);
415 /* Remove the entry from unused list if it was there. */
416 unlink_from_unused(p);
417 /* Free preallocated the preallocated node. */
418 kmem_cache_free(peer_cachep, n);
419 return p; 466 return p;
420} 467}
421 468
@@ -425,12 +472,12 @@ static void peer_check_expire(unsigned long dummy)
425 unsigned long now = jiffies; 472 unsigned long now = jiffies;
426 int ttl; 473 int ttl;
427 474
428 if (peer_total >= inet_peer_threshold) 475 if (peers.total >= inet_peer_threshold)
429 ttl = inet_peer_minttl; 476 ttl = inet_peer_minttl;
430 else 477 else
431 ttl = inet_peer_maxttl 478 ttl = inet_peer_maxttl
432 - (inet_peer_maxttl - inet_peer_minttl) / HZ * 479 - (inet_peer_maxttl - inet_peer_minttl) / HZ *
433 peer_total / inet_peer_threshold * HZ; 480 peers.total / inet_peer_threshold * HZ;
434 while (!cleanup_once(ttl)) { 481 while (!cleanup_once(ttl)) {
435 if (jiffies != now) 482 if (jiffies != now)
436 break; 483 break;
@@ -439,22 +486,25 @@ static void peer_check_expire(unsigned long dummy)
439 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime 486 /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
440 * interval depending on the total number of entries (more entries, 487 * interval depending on the total number of entries (more entries,
441 * less interval). */ 488 * less interval). */
442 if (peer_total >= inet_peer_threshold) 489 if (peers.total >= inet_peer_threshold)
443 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; 490 peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
444 else 491 else
445 peer_periodic_timer.expires = jiffies 492 peer_periodic_timer.expires = jiffies
446 + inet_peer_gc_maxtime 493 + inet_peer_gc_maxtime
447 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * 494 - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
448 peer_total / inet_peer_threshold * HZ; 495 peers.total / inet_peer_threshold * HZ;
449 add_timer(&peer_periodic_timer); 496 add_timer(&peer_periodic_timer);
450} 497}
451 498
452void inet_putpeer(struct inet_peer *p) 499void inet_putpeer(struct inet_peer *p)
453{ 500{
454 spin_lock_bh(&inet_peer_unused_lock); 501 local_bh_disable();
455 if (atomic_dec_and_test(&p->refcnt)) { 502
456 list_add_tail(&p->unused, &unused_peers); 503 if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) {
504 list_add_tail(&p->unused, &unused_peers.list);
457 p->dtime = (__u32)jiffies; 505 p->dtime = (__u32)jiffies;
506 spin_unlock(&unused_peers.lock);
458 } 507 }
459 spin_unlock_bh(&inet_peer_unused_lock); 508
509 local_bh_enable();
460} 510}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index af10942b326c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 87 if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
88 goto sr_failed; 88 goto sr_failed;
89 89
90 if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) && 90 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) { 91 (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
92 IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS); 92 IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 93 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
94 htonl(dst_mtu(&rt->u.dst))); 94 htonl(dst_mtu(&rt->dst)));
95 goto drop; 95 goto drop;
96 } 96 }
97 97
98 /* We are about to mangle packet. Copy it! */ 98 /* We are about to mangle packet. Copy it! */
99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len)) 99 if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
100 goto drop; 100 goto drop;
101 iph = ip_hdr(skb); 101 iph = ip_hdr(skb);
102 102
@@ -112,8 +112,8 @@ int ip_forward(struct sk_buff *skb)
112 112
113 skb->priority = rt_tos2priority(iph->tos); 113 skb->priority = rt_tos2priority(iph->tos);
114 114
115 return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev, 115 return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
116 ip_forward_finish); 116 rt->dst.dev, ip_forward_finish);
117 117
118sr_failed: 118sr_failed:
119 /* 119 /*
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea70ea0..b7c41654dde5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
127static __inline__ void frag_kfree_skb(struct netns_frags *nf, 127static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
128 struct sk_buff *skb, int *work)
129{ 128{
130 if (work)
131 *work -= skb->truesize;
132 atomic_sub(skb->truesize, &nf->mem); 129 atomic_sub(skb->truesize, &nf->mem);
133 kfree_skb(skb); 130 kfree_skb(skb);
134} 131}
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp)
309 fp = qp->q.fragments; 306 fp = qp->q.fragments;
310 do { 307 do {
311 struct sk_buff *xp = fp->next; 308 struct sk_buff *xp = fp->next;
312 frag_kfree_skb(qp->q.net, fp, NULL); 309 frag_kfree_skb(qp->q.net, fp);
313 fp = xp; 310 fp = xp;
314 } while (fp); 311 } while (fp);
315 312
@@ -317,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
317 qp->q.len = 0; 314 qp->q.len = 0;
318 qp->q.meat = 0; 315 qp->q.meat = 0;
319 qp->q.fragments = NULL; 316 qp->q.fragments = NULL;
317 qp->q.fragments_tail = NULL;
320 qp->iif = 0; 318 qp->iif = 0;
321 319
322 return 0; 320 return 0;
@@ -389,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
389 * in the chain of fragments so far. We must know where to put 387 * in the chain of fragments so far. We must know where to put
390 * this fragment, right? 388 * this fragment, right?
391 */ 389 */
390 prev = qp->q.fragments_tail;
391 if (!prev || FRAG_CB(prev)->offset < offset) {
392 next = NULL;
393 goto found;
394 }
392 prev = NULL; 395 prev = NULL;
393 for (next = qp->q.fragments; next != NULL; next = next->next) { 396 for (next = qp->q.fragments; next != NULL; next = next->next) {
394 if (FRAG_CB(next)->offset >= offset) 397 if (FRAG_CB(next)->offset >= offset)
@@ -396,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
396 prev = next; 399 prev = next;
397 } 400 }
398 401
402found:
399 /* We found where to put this one. Check for overlap with 403 /* We found where to put this one. Check for overlap with
400 * preceding fragment, and, if needed, align things so that 404 * preceding fragment, and, if needed, align things so that
401 * any overlaps are eliminated. 405 * any overlaps are eliminated.
@@ -446,7 +450,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
446 qp->q.fragments = next; 450 qp->q.fragments = next;
447 451
448 qp->q.meat -= free_it->len; 452 qp->q.meat -= free_it->len;
449 frag_kfree_skb(qp->q.net, free_it, NULL); 453 frag_kfree_skb(qp->q.net, free_it);
450 } 454 }
451 } 455 }
452 456
@@ -454,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
454 458
455 /* Insert this fragment in the chain of fragments. */ 459 /* Insert this fragment in the chain of fragments. */
456 skb->next = next; 460 skb->next = next;
461 if (!next)
462 qp->q.fragments_tail = skb;
457 if (prev) 463 if (prev)
458 prev->next = skb; 464 prev->next = skb;
459 else 465 else
@@ -507,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
507 goto out_nomem; 513 goto out_nomem;
508 514
509 fp->next = head->next; 515 fp->next = head->next;
516 if (!fp->next)
517 qp->q.fragments_tail = fp;
510 prev->next = fp; 518 prev->next = fp;
511 519
512 skb_morph(head, qp->q.fragments); 520 skb_morph(head, qp->q.fragments);
@@ -556,7 +564,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
556 564
557 skb_shinfo(head)->frag_list = head->next; 565 skb_shinfo(head)->frag_list = head->next;
558 skb_push(head, head->data - skb_network_header(head)); 566 skb_push(head, head->data - skb_network_header(head));
559 atomic_sub(head->truesize, &qp->q.net->mem);
560 567
561 for (fp=head->next; fp; fp = fp->next) { 568 for (fp=head->next; fp; fp = fp->next) {
562 head->data_len += fp->len; 569 head->data_len += fp->len;
@@ -566,8 +573,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
566 else if (head->ip_summed == CHECKSUM_COMPLETE) 573 else if (head->ip_summed == CHECKSUM_COMPLETE)
567 head->csum = csum_add(head->csum, fp->csum); 574 head->csum = csum_add(head->csum, fp->csum);
568 head->truesize += fp->truesize; 575 head->truesize += fp->truesize;
569 atomic_sub(fp->truesize, &qp->q.net->mem);
570 } 576 }
577 atomic_sub(head->truesize, &qp->q.net->mem);
571 578
572 head->next = NULL; 579 head->next = NULL;
573 head->dev = dev; 580 head->dev = dev;
@@ -578,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
578 iph->tot_len = htons(len); 585 iph->tot_len = htons(len);
579 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); 586 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
580 qp->q.fragments = NULL; 587 qp->q.fragments = NULL;
588 qp->q.fragments_tail = NULL;
581 return 0; 589 return 0;
582 590
583out_nomem: 591out_nomem:
@@ -624,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
624 kfree_skb(skb); 632 kfree_skb(skb);
625 return -ENOMEM; 633 return -ENOMEM;
626} 634}
635EXPORT_SYMBOL(ip_defrag);
627 636
628#ifdef CONFIG_SYSCTL 637#ifdef CONFIG_SYSCTL
629static int zero; 638static int zero;
@@ -777,5 +786,3 @@ void __init ipfrag_init(void)
777 ip4_frags.secret_interval = 10 * 60 * HZ; 786 ip4_frags.secret_interval = 10 * 60 * HZ;
778 inet_frags_init(&ip4_frags); 787 inet_frags_init(&ip4_frags);
779} 788}
780
781EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fe381d12ecdd..945b20a5ad50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -502,7 +502,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
502 t->err_time = jiffies; 502 t->err_time = jiffies;
503out: 503out:
504 rcu_read_unlock(); 504 rcu_read_unlock();
505 return;
506} 505}
507 506
508static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) 507static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
@@ -538,7 +537,6 @@ static int ipgre_rcv(struct sk_buff *skb)
538 struct ip_tunnel *tunnel; 537 struct ip_tunnel *tunnel;
539 int offset = 4; 538 int offset = 4;
540 __be16 gre_proto; 539 __be16 gre_proto;
541 unsigned int len;
542 540
543 if (!pskb_may_pull(skb, 16)) 541 if (!pskb_may_pull(skb, 16))
544 goto drop_nolock; 542 goto drop_nolock;
@@ -629,8 +627,6 @@ static int ipgre_rcv(struct sk_buff *skb)
629 tunnel->i_seqno = seqno + 1; 627 tunnel->i_seqno = seqno + 1;
630 } 628 }
631 629
632 len = skb->len;
633
634 /* Warning: All skb pointers will be invalidated! */ 630 /* Warning: All skb pointers will be invalidated! */
635 if (tunnel->dev->type == ARPHRD_ETHER) { 631 if (tunnel->dev->type == ARPHRD_ETHER) {
636 if (!pskb_may_pull(skb, ETH_HLEN)) { 632 if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -644,11 +640,7 @@ static int ipgre_rcv(struct sk_buff *skb)
644 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
645 } 641 }
646 642
647 stats->rx_packets++; 643 skb_tunnel_rx(skb, tunnel->dev);
648 stats->rx_bytes += len;
649 skb->dev = tunnel->dev;
650 skb_dst_drop(skb);
651 nf_reset(skb);
652 644
653 skb_reset_network_header(skb); 645 skb_reset_network_header(skb);
654 ipgre_ecn_decapsulate(iph, skb); 646 ipgre_ecn_decapsulate(iph, skb);
@@ -739,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
739 tos = 0; 731 tos = 0;
740 if (skb->protocol == htons(ETH_P_IP)) 732 if (skb->protocol == htons(ETH_P_IP))
741 tos = old_iph->tos; 733 tos = old_iph->tos;
734 else if (skb->protocol == htons(ETH_P_IPV6))
735 tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
742 } 736 }
743 737
744 { 738 {
@@ -753,7 +747,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
753 goto tx_error; 747 goto tx_error;
754 } 748 }
755 } 749 }
756 tdev = rt->u.dst.dev; 750 tdev = rt->dst.dev;
757 751
758 if (tdev == dev) { 752 if (tdev == dev) {
759 ip_rt_put(rt); 753 ip_rt_put(rt);
@@ -763,7 +757,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
763 757
764 df = tiph->frag_off; 758 df = tiph->frag_off;
765 if (df) 759 if (df)
766 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; 760 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
767 else 761 else
768 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; 762 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
769 763
@@ -811,7 +805,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
811 tunnel->err_count = 0; 805 tunnel->err_count = 0;
812 } 806 }
813 807
814 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len; 808 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
815 809
816 if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| 810 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
817 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { 811 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -838,7 +832,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
838 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 832 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
839 IPSKB_REROUTED); 833 IPSKB_REROUTED);
840 skb_dst_drop(skb); 834 skb_dst_drop(skb);
841 skb_dst_set(skb, &rt->u.dst); 835 skb_dst_set(skb, &rt->dst);
842 836
843 /* 837 /*
844 * Push down and install the IPIP header. 838 * Push down and install the IPIP header.
@@ -861,7 +855,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
861 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; 855 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
862#endif 856#endif
863 else 857 else
864 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); 858 iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
865 } 859 }
866 860
867 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; 861 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -923,7 +917,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
923 .proto = IPPROTO_GRE }; 917 .proto = IPPROTO_GRE };
924 struct rtable *rt; 918 struct rtable *rt;
925 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
926 tdev = rt->u.dst.dev; 920 tdev = rt->dst.dev;
927 ip_rt_put(rt); 921 ip_rt_put(rt);
928 } 922 }
929 923
@@ -1182,7 +1176,7 @@ static int ipgre_open(struct net_device *dev)
1182 struct rtable *rt; 1176 struct rtable *rt;
1183 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1177 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1184 return -EADDRNOTAVAIL; 1178 return -EADDRNOTAVAIL;
1185 dev = rt->u.dst.dev; 1179 dev = rt->dst.dev;
1186 ip_rt_put(rt); 1180 ip_rt_put(rt);
1187 if (__in_dev_get_rtnl(dev) == NULL) 1181 if (__in_dev_get_rtnl(dev) == NULL)
1188 return -EADDRNOTAVAIL; 1182 return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f8ab7a380d4a..d859bcc26cb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
146#include <linux/netlink.h> 146#include <linux/netlink.h>
147 147
148/* 148/*
149 * Process Router Attention IP option 149 * Process Router Attention IP option (RFC 2113)
150 */ 150 */
151int ip_call_ra_chain(struct sk_buff *skb) 151int ip_call_ra_chain(struct sk_buff *skb)
152{ 152{
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
155 struct sock *last = NULL; 155 struct sock *last = NULL;
156 struct net_device *dev = skb->dev; 156 struct net_device *dev = skb->dev;
157 157
158 read_lock(&ip_ra_lock); 158 for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
159 for (ra = ip_ra_chain; ra; ra = ra->next) {
160 struct sock *sk = ra->sk; 159 struct sock *sk = ra->sk;
161 160
162 /* If socket is bound to an interface, only report 161 /* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
167 sk->sk_bound_dev_if == dev->ifindex) && 166 sk->sk_bound_dev_if == dev->ifindex) &&
168 net_eq(sock_net(sk), dev_net(dev))) { 167 net_eq(sock_net(sk), dev_net(dev))) {
169 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 168 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
170 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { 169 if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
171 read_unlock(&ip_ra_lock);
172 return 1; 170 return 1;
173 }
174 } 171 }
175 if (last) { 172 if (last) {
176 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 173 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
183 180
184 if (last) { 181 if (last) {
185 raw_rcv(last, skb); 182 raw_rcv(last, skb);
186 read_unlock(&ip_ra_lock);
187 return 1; 183 return 1;
188 } 184 }
189 read_unlock(&ip_ra_lock);
190 return 0; 185 return 0;
191} 186}
192 187
@@ -266,7 +261,7 @@ int ip_local_deliver(struct sk_buff *skb)
266 return 0; 261 return 0;
267 } 262 }
268 263
269 return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL, 264 return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
270 ip_local_deliver_finish); 265 ip_local_deliver_finish);
271} 266}
272 267
@@ -298,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb)
298 } 293 }
299 294
300 if (unlikely(opt->srr)) { 295 if (unlikely(opt->srr)) {
301 struct in_device *in_dev = in_dev_get(dev); 296 struct in_device *in_dev = __in_dev_get_rcu(dev);
297
302 if (in_dev) { 298 if (in_dev) {
303 if (!IN_DEV_SOURCE_ROUTE(in_dev)) { 299 if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
304 if (IN_DEV_LOG_MARTIANS(in_dev) && 300 if (IN_DEV_LOG_MARTIANS(in_dev) &&
305 net_ratelimit()) 301 net_ratelimit())
306 printk(KERN_INFO "source route option %pI4 -> %pI4\n", 302 printk(KERN_INFO "source route option %pI4 -> %pI4\n",
307 &iph->saddr, &iph->daddr); 303 &iph->saddr, &iph->daddr);
308 in_dev_put(in_dev);
309 goto drop; 304 goto drop;
310 } 305 }
311
312 in_dev_put(in_dev);
313 } 306 }
314 307
315 if (ip_options_rcv_srr(skb)) 308 if (ip_options_rcv_srr(skb))
@@ -331,8 +324,8 @@ static int ip_rcv_finish(struct sk_buff *skb)
331 * how the packet travels inside Linux networking. 324 * how the packet travels inside Linux networking.
332 */ 325 */
333 if (skb_dst(skb) == NULL) { 326 if (skb_dst(skb) == NULL) {
334 int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, 327 int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
335 skb->dev); 328 iph->tos, skb->dev);
336 if (unlikely(err)) { 329 if (unlikely(err)) {
337 if (err == -EHOSTUNREACH) 330 if (err == -EHOSTUNREACH)
338 IP_INC_STATS_BH(dev_net(skb->dev), 331 IP_INC_STATS_BH(dev_net(skb->dev),
@@ -340,13 +333,16 @@ static int ip_rcv_finish(struct sk_buff *skb)
340 else if (err == -ENETUNREACH) 333 else if (err == -ENETUNREACH)
341 IP_INC_STATS_BH(dev_net(skb->dev), 334 IP_INC_STATS_BH(dev_net(skb->dev),
342 IPSTATS_MIB_INNOROUTES); 335 IPSTATS_MIB_INNOROUTES);
336 else if (err == -EXDEV)
337 NET_INC_STATS_BH(dev_net(skb->dev),
338 LINUX_MIB_IPRPFILTER);
343 goto drop; 339 goto drop;
344 } 340 }
345 } 341 }
346 342
347#ifdef CONFIG_NET_CLS_ROUTE 343#ifdef CONFIG_NET_CLS_ROUTE
348 if (unlikely(skb_dst(skb)->tclassid)) { 344 if (unlikely(skb_dst(skb)->tclassid)) {
349 struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
350 u32 idx = skb_dst(skb)->tclassid; 346 u32 idx = skb_dst(skb)->tclassid;
351 st[idx&0xFF].o_packets++; 347 st[idx&0xFF].o_packets++;
352 st[idx&0xFF].o_bytes += skb->len; 348 st[idx&0xFF].o_bytes += skb->len;
@@ -360,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
360 356
361 rt = skb_rtable(skb); 357 rt = skb_rtable(skb);
362 if (rt->rt_type == RTN_MULTICAST) { 358 if (rt->rt_type == RTN_MULTICAST) {
363 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST, 359 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
364 skb->len); 360 skb->len);
365 } else if (rt->rt_type == RTN_BROADCAST) 361 } else if (rt->rt_type == RTN_BROADCAST)
366 IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST, 362 IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
367 skb->len); 363 skb->len);
368 364
369 return dst_input(skb); 365 return dst_input(skb);
@@ -444,7 +440,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
444 /* Must drop socket now because of tproxy. */ 440 /* Must drop socket now because of tproxy. */
445 skb_orphan(skb); 441 skb_orphan(skb);
446 442
447 return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, 443 return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
448 ip_rcv_finish); 444 ip_rcv_finish);
449 445
450inhdr_error: 446inhdr_error:
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4c09a31fd140..ba9836c488ed 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -238,7 +238,6 @@ void ip_options_fragment(struct sk_buff * skb)
238 opt->rr_needaddr = 0; 238 opt->rr_needaddr = 0;
239 opt->ts_needaddr = 0; 239 opt->ts_needaddr = 0;
240 opt->ts_needtime = 0; 240 opt->ts_needtime = 0;
241 return;
242} 241}
243 242
244/* 243/*
@@ -601,6 +600,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
601 unsigned char *optptr = skb_network_header(skb) + opt->srr; 600 unsigned char *optptr = skb_network_header(skb) + opt->srr;
602 struct rtable *rt = skb_rtable(skb); 601 struct rtable *rt = skb_rtable(skb);
603 struct rtable *rt2; 602 struct rtable *rt2;
603 unsigned long orefdst;
604 int err; 604 int err;
605 605
606 if (!opt->srr) 606 if (!opt->srr)
@@ -624,16 +624,16 @@ int ip_options_rcv_srr(struct sk_buff *skb)
624 } 624 }
625 memcpy(&nexthop, &optptr[srrptr-1], 4); 625 memcpy(&nexthop, &optptr[srrptr-1], 4);
626 626
627 rt = skb_rtable(skb); 627 orefdst = skb->_skb_refdst;
628 skb_dst_set(skb, NULL); 628 skb_dst_set(skb, NULL);
629 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); 629 err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
630 rt2 = skb_rtable(skb); 630 rt2 = skb_rtable(skb);
631 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { 631 if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
632 ip_rt_put(rt2); 632 skb_dst_drop(skb);
633 skb_dst_set(skb, &rt->u.dst); 633 skb->_skb_refdst = orefdst;
634 return -EINVAL; 634 return -EINVAL;
635 } 635 }
636 ip_rt_put(rt); 636 refdst_drop(orefdst);
637 if (rt2->rt_type != RTN_LOCAL) 637 if (rt2->rt_type != RTN_LOCAL)
638 break; 638 break;
639 /* Superfast 8) loopback forward */ 639 /* Superfast 8) loopback forward */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d1bcc9f21d4f..04b69896df5f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
89 iph->check = 0; 89 iph->check = 0;
90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 90 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
91} 91}
92EXPORT_SYMBOL(ip_send_check);
92 93
93int __ip_local_out(struct sk_buff *skb) 94int __ip_local_out(struct sk_buff *skb)
94{ 95{
@@ -96,8 +97,8 @@ int __ip_local_out(struct sk_buff *skb)
96 97
97 iph->tot_len = htons(skb->len); 98 iph->tot_len = htons(skb->len);
98 ip_send_check(iph); 99 ip_send_check(iph);
99 return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 100 return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
100 dst_output); 101 skb_dst(skb)->dev, dst_output);
101} 102}
102 103
103int ip_local_out(struct sk_buff *skb) 104int ip_local_out(struct sk_buff *skb)
@@ -151,15 +152,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
151 iph->version = 4; 152 iph->version = 4;
152 iph->ihl = 5; 153 iph->ihl = 5;
153 iph->tos = inet->tos; 154 iph->tos = inet->tos;
154 if (ip_dont_fragment(sk, &rt->u.dst)) 155 if (ip_dont_fragment(sk, &rt->dst))
155 iph->frag_off = htons(IP_DF); 156 iph->frag_off = htons(IP_DF);
156 else 157 else
157 iph->frag_off = 0; 158 iph->frag_off = 0;
158 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 159 iph->ttl = ip_select_ttl(inet, &rt->dst);
159 iph->daddr = rt->rt_dst; 160 iph->daddr = rt->rt_dst;
160 iph->saddr = rt->rt_src; 161 iph->saddr = rt->rt_src;
161 iph->protocol = sk->sk_protocol; 162 iph->protocol = sk->sk_protocol;
162 ip_select_ident(iph, &rt->u.dst, sk); 163 ip_select_ident(iph, &rt->dst, sk);
163 164
164 if (opt && opt->optlen) { 165 if (opt && opt->optlen) {
165 iph->ihl += opt->optlen>>2; 166 iph->ihl += opt->optlen>>2;
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
172 /* Send it out. */ 173 /* Send it out. */
173 return ip_local_out(skb); 174 return ip_local_out(skb);
174} 175}
175
176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); 176EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
177 177
178static inline int ip_finish_output2(struct sk_buff *skb) 178static inline int ip_finish_output2(struct sk_buff *skb)
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
240{ 240{
241 struct sock *sk = skb->sk; 241 struct sock *sk = skb->sk;
242 struct rtable *rt = skb_rtable(skb); 242 struct rtable *rt = skb_rtable(skb);
243 struct net_device *dev = rt->u.dst.dev; 243 struct net_device *dev = rt->dst.dev;
244 244
245 /* 245 /*
246 * If the indicated interface is up and running, send the packet. 246 * If the indicated interface is up and running, send the packet.
@@ -272,8 +272,8 @@ int ip_mc_output(struct sk_buff *skb)
272 ) { 272 ) {
273 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 273 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
274 if (newskb) 274 if (newskb)
275 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, 275 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
276 NULL, newskb->dev, 276 newskb, NULL, newskb->dev,
277 ip_dev_loopback_xmit); 277 ip_dev_loopback_xmit);
278 } 278 }
279 279
@@ -288,12 +288,12 @@ int ip_mc_output(struct sk_buff *skb)
288 if (rt->rt_flags&RTCF_BROADCAST) { 288 if (rt->rt_flags&RTCF_BROADCAST) {
289 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); 289 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
290 if (newskb) 290 if (newskb)
291 NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL, 291 NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
292 newskb->dev, ip_dev_loopback_xmit); 292 NULL, newskb->dev, ip_dev_loopback_xmit);
293 } 293 }
294 294
295 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 295 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
296 ip_finish_output, 296 skb->dev, ip_finish_output,
297 !(IPCB(skb)->flags & IPSKB_REROUTED)); 297 !(IPCB(skb)->flags & IPSKB_REROUTED));
298} 298}
299 299
@@ -306,22 +306,24 @@ int ip_output(struct sk_buff *skb)
306 skb->dev = dev; 306 skb->dev = dev;
307 skb->protocol = htons(ETH_P_IP); 307 skb->protocol = htons(ETH_P_IP);
308 308
309 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev, 309 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
310 ip_finish_output, 310 ip_finish_output,
311 !(IPCB(skb)->flags & IPSKB_REROUTED)); 311 !(IPCB(skb)->flags & IPSKB_REROUTED));
312} 312}
313 313
314int ip_queue_xmit(struct sk_buff *skb, int ipfragok) 314int ip_queue_xmit(struct sk_buff *skb)
315{ 315{
316 struct sock *sk = skb->sk; 316 struct sock *sk = skb->sk;
317 struct inet_sock *inet = inet_sk(sk); 317 struct inet_sock *inet = inet_sk(sk);
318 struct ip_options *opt = inet->opt; 318 struct ip_options *opt = inet->opt;
319 struct rtable *rt; 319 struct rtable *rt;
320 struct iphdr *iph; 320 struct iphdr *iph;
321 int res;
321 322
322 /* Skip all of this if the packet is already routed, 323 /* Skip all of this if the packet is already routed,
323 * f.e. by something like SCTP. 324 * f.e. by something like SCTP.
324 */ 325 */
326 rcu_read_lock();
325 rt = skb_rtable(skb); 327 rt = skb_rtable(skb);
326 if (rt != NULL) 328 if (rt != NULL)
327 goto packet_routed; 329 goto packet_routed;
@@ -357,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
357 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0)) 359 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
358 goto no_route; 360 goto no_route;
359 } 361 }
360 sk_setup_caps(sk, &rt->u.dst); 362 sk_setup_caps(sk, &rt->dst);
361 } 363 }
362 skb_dst_set(skb, dst_clone(&rt->u.dst)); 364 skb_dst_set_noref(skb, &rt->dst);
363 365
364packet_routed: 366packet_routed:
365 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) 367 if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -370,11 +372,11 @@ packet_routed:
370 skb_reset_network_header(skb); 372 skb_reset_network_header(skb);
371 iph = ip_hdr(skb); 373 iph = ip_hdr(skb);
372 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 374 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
373 if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok) 375 if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
374 iph->frag_off = htons(IP_DF); 376 iph->frag_off = htons(IP_DF);
375 else 377 else
376 iph->frag_off = 0; 378 iph->frag_off = 0;
377 iph->ttl = ip_select_ttl(inet, &rt->u.dst); 379 iph->ttl = ip_select_ttl(inet, &rt->dst);
378 iph->protocol = sk->sk_protocol; 380 iph->protocol = sk->sk_protocol;
379 iph->saddr = rt->rt_src; 381 iph->saddr = rt->rt_src;
380 iph->daddr = rt->rt_dst; 382 iph->daddr = rt->rt_dst;
@@ -385,19 +387,23 @@ packet_routed:
385 ip_options_build(skb, opt, inet->inet_daddr, rt, 0); 387 ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
386 } 388 }
387 389
388 ip_select_ident_more(iph, &rt->u.dst, sk, 390 ip_select_ident_more(iph, &rt->dst, sk,
389 (skb_shinfo(skb)->gso_segs ?: 1) - 1); 391 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
390 392
391 skb->priority = sk->sk_priority; 393 skb->priority = sk->sk_priority;
392 skb->mark = sk->sk_mark; 394 skb->mark = sk->sk_mark;
393 395
394 return ip_local_out(skb); 396 res = ip_local_out(skb);
397 rcu_read_unlock();
398 return res;
395 399
396no_route: 400no_route:
401 rcu_read_unlock();
397 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); 402 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
398 kfree_skb(skb); 403 kfree_skb(skb);
399 return -EHOSTUNREACH; 404 return -EHOSTUNREACH;
400} 405}
406EXPORT_SYMBOL(ip_queue_xmit);
401 407
402 408
403static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) 409static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
@@ -406,7 +412,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
406 to->priority = from->priority; 412 to->priority = from->priority;
407 to->protocol = from->protocol; 413 to->protocol = from->protocol;
408 skb_dst_drop(to); 414 skb_dst_drop(to);
409 skb_dst_set(to, dst_clone(skb_dst(from))); 415 skb_dst_copy(to, from);
410 to->dev = from->dev; 416 to->dev = from->dev;
411 to->mark = from->mark; 417 to->mark = from->mark;
412 418
@@ -437,17 +443,16 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
437int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) 443int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
438{ 444{
439 struct iphdr *iph; 445 struct iphdr *iph;
440 int raw = 0;
441 int ptr; 446 int ptr;
442 struct net_device *dev; 447 struct net_device *dev;
443 struct sk_buff *skb2; 448 struct sk_buff *skb2;
444 unsigned int mtu, hlen, left, len, ll_rs, pad; 449 unsigned int mtu, hlen, left, len, ll_rs;
445 int offset; 450 int offset;
446 __be16 not_last_frag; 451 __be16 not_last_frag;
447 struct rtable *rt = skb_rtable(skb); 452 struct rtable *rt = skb_rtable(skb);
448 int err = 0; 453 int err = 0;
449 454
450 dev = rt->u.dst.dev; 455 dev = rt->dst.dev;
451 456
452 /* 457 /*
453 * Point into the IP datagram header. 458 * Point into the IP datagram header.
@@ -468,7 +473,11 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
468 */ 473 */
469 474
470 hlen = iph->ihl * 4; 475 hlen = iph->ihl * 4;
471 mtu = dst_mtu(&rt->u.dst) - hlen; /* Size of data space */ 476 mtu = dst_mtu(&rt->dst) - hlen; /* Size of data space */
477#ifdef CONFIG_BRIDGE_NETFILTER
478 if (skb->nf_bridge)
479 mtu -= nf_bridge_mtu_reduction(skb);
480#endif
472 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE; 481 IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
473 482
474 /* When frag_list is given, use it. First, check its validity: 483 /* When frag_list is given, use it. First, check its validity:
@@ -571,14 +580,12 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
571 580
572slow_path: 581slow_path:
573 left = skb->len - hlen; /* Space per frame */ 582 left = skb->len - hlen; /* Space per frame */
574 ptr = raw + hlen; /* Where to start from */ 583 ptr = hlen; /* Where to start from */
575 584
576 /* for bridged IP traffic encapsulated inside f.e. a vlan header, 585 /* for bridged IP traffic encapsulated inside f.e. a vlan header,
577 * we need to make room for the encapsulating header 586 * we need to make room for the encapsulating header
578 */ 587 */
579 pad = nf_bridge_pad(skb); 588 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
580 ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
581 mtu -= pad;
582 589
583 /* 590 /*
584 * Fragment the datagram. 591 * Fragment the datagram.
@@ -688,7 +695,6 @@ fail:
688 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 695 IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
689 return err; 696 return err;
690} 697}
691
692EXPORT_SYMBOL(ip_fragment); 698EXPORT_SYMBOL(ip_fragment);
693 699
694int 700int
@@ -707,6 +713,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
707 } 713 }
708 return 0; 714 return 0;
709} 715}
716EXPORT_SYMBOL(ip_generic_getfrag);
710 717
711static inline __wsum 718static inline __wsum
712csum_page(struct page *page, int offset, int copy) 719csum_page(struct page *page, int offset, int copy)
@@ -824,13 +831,13 @@ int ip_append_data(struct sock *sk,
824 */ 831 */
825 *rtp = NULL; 832 *rtp = NULL;
826 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? 833 inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
827 rt->u.dst.dev->mtu : 834 rt->dst.dev->mtu :
828 dst_mtu(rt->u.dst.path); 835 dst_mtu(rt->dst.path);
829 inet->cork.dst = &rt->u.dst; 836 inet->cork.dst = &rt->dst;
830 inet->cork.length = 0; 837 inet->cork.length = 0;
831 sk->sk_sndmsg_page = NULL; 838 sk->sk_sndmsg_page = NULL;
832 sk->sk_sndmsg_off = 0; 839 sk->sk_sndmsg_off = 0;
833 if ((exthdrlen = rt->u.dst.header_len) != 0) { 840 if ((exthdrlen = rt->dst.header_len) != 0) {
834 length += exthdrlen; 841 length += exthdrlen;
835 transhdrlen += exthdrlen; 842 transhdrlen += exthdrlen;
836 } 843 }
@@ -843,7 +850,7 @@ int ip_append_data(struct sock *sk,
843 exthdrlen = 0; 850 exthdrlen = 0;
844 mtu = inet->cork.fragsize; 851 mtu = inet->cork.fragsize;
845 } 852 }
846 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 853 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
847 854
848 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 855 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
849 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen; 856 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -860,14 +867,16 @@ int ip_append_data(struct sock *sk,
860 */ 867 */
861 if (transhdrlen && 868 if (transhdrlen &&
862 length + fragheaderlen <= mtu && 869 length + fragheaderlen <= mtu &&
863 rt->u.dst.dev->features & NETIF_F_V4_CSUM && 870 rt->dst.dev->features & NETIF_F_V4_CSUM &&
864 !exthdrlen) 871 !exthdrlen)
865 csummode = CHECKSUM_PARTIAL; 872 csummode = CHECKSUM_PARTIAL;
866 873
874 skb = skb_peek_tail(&sk->sk_write_queue);
875
867 inet->cork.length += length; 876 inet->cork.length += length;
868 if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) && 877 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
869 (sk->sk_protocol == IPPROTO_UDP) && 878 (sk->sk_protocol == IPPROTO_UDP) &&
870 (rt->u.dst.dev->features & NETIF_F_UFO)) { 879 (rt->dst.dev->features & NETIF_F_UFO)) {
871 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len, 880 err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
872 fragheaderlen, transhdrlen, mtu, 881 fragheaderlen, transhdrlen, mtu,
873 flags); 882 flags);
@@ -883,7 +892,7 @@ int ip_append_data(struct sock *sk,
883 * adding appropriate IP header. 892 * adding appropriate IP header.
884 */ 893 */
885 894
886 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 895 if (!skb)
887 goto alloc_new_skb; 896 goto alloc_new_skb;
888 897
889 while (length > 0) { 898 while (length > 0) {
@@ -915,7 +924,7 @@ alloc_new_skb:
915 fraglen = datalen + fragheaderlen; 924 fraglen = datalen + fragheaderlen;
916 925
917 if ((flags & MSG_MORE) && 926 if ((flags & MSG_MORE) &&
918 !(rt->u.dst.dev->features&NETIF_F_SG)) 927 !(rt->dst.dev->features&NETIF_F_SG))
919 alloclen = mtu; 928 alloclen = mtu;
920 else 929 else
921 alloclen = datalen + fragheaderlen; 930 alloclen = datalen + fragheaderlen;
@@ -926,7 +935,7 @@ alloc_new_skb:
926 * the last. 935 * the last.
927 */ 936 */
928 if (datalen == length + fraggap) 937 if (datalen == length + fraggap)
929 alloclen += rt->u.dst.trailer_len; 938 alloclen += rt->dst.trailer_len;
930 939
931 if (transhdrlen) { 940 if (transhdrlen) {
932 skb = sock_alloc_send_skb(sk, 941 skb = sock_alloc_send_skb(sk,
@@ -999,7 +1008,7 @@ alloc_new_skb:
999 if (copy > length) 1008 if (copy > length)
1000 copy = length; 1009 copy = length;
1001 1010
1002 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1011 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1003 unsigned int off; 1012 unsigned int off;
1004 1013
1005 off = skb->len; 1014 off = skb->len;
@@ -1094,10 +1103,10 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1094 if (inet->cork.flags & IPCORK_OPT) 1103 if (inet->cork.flags & IPCORK_OPT)
1095 opt = inet->cork.opt; 1104 opt = inet->cork.opt;
1096 1105
1097 if (!(rt->u.dst.dev->features&NETIF_F_SG)) 1106 if (!(rt->dst.dev->features&NETIF_F_SG))
1098 return -EOPNOTSUPP; 1107 return -EOPNOTSUPP;
1099 1108
1100 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1109 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1101 mtu = inet->cork.fragsize; 1110 mtu = inet->cork.fragsize;
1102 1111
1103 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0); 1112 fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1112,8 +1121,9 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
1112 return -EINVAL; 1121 return -EINVAL;
1113 1122
1114 inet->cork.length += size; 1123 inet->cork.length += size;
1115 if ((sk->sk_protocol == IPPROTO_UDP) && 1124 if ((size + skb->len > mtu) &&
1116 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1125 (sk->sk_protocol == IPPROTO_UDP) &&
1126 (rt->dst.dev->features & NETIF_F_UFO)) {
1117 skb_shinfo(skb)->gso_size = mtu - fragheaderlen; 1127 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1118 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; 1128 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1119 } 1129 }
@@ -1265,8 +1275,8 @@ int ip_push_pending_frames(struct sock *sk)
1265 * If local_df is set too, we still allow to fragment this frame 1275 * If local_df is set too, we still allow to fragment this frame
1266 * locally. */ 1276 * locally. */
1267 if (inet->pmtudisc >= IP_PMTUDISC_DO || 1277 if (inet->pmtudisc >= IP_PMTUDISC_DO ||
1268 (skb->len <= dst_mtu(&rt->u.dst) && 1278 (skb->len <= dst_mtu(&rt->dst) &&
1269 ip_dont_fragment(sk, &rt->u.dst))) 1279 ip_dont_fragment(sk, &rt->dst)))
1270 df = htons(IP_DF); 1280 df = htons(IP_DF);
1271 1281
1272 if (inet->cork.flags & IPCORK_OPT) 1282 if (inet->cork.flags & IPCORK_OPT)
@@ -1275,7 +1285,7 @@ int ip_push_pending_frames(struct sock *sk)
1275 if (rt->rt_type == RTN_MULTICAST) 1285 if (rt->rt_type == RTN_MULTICAST)
1276 ttl = inet->mc_ttl; 1286 ttl = inet->mc_ttl;
1277 else 1287 else
1278 ttl = ip_select_ttl(inet, &rt->u.dst); 1288 ttl = ip_select_ttl(inet, &rt->dst);
1279 1289
1280 iph = (struct iphdr *)skb->data; 1290 iph = (struct iphdr *)skb->data;
1281 iph->version = 4; 1291 iph->version = 4;
@@ -1286,7 +1296,7 @@ int ip_push_pending_frames(struct sock *sk)
1286 } 1296 }
1287 iph->tos = inet->tos; 1297 iph->tos = inet->tos;
1288 iph->frag_off = df; 1298 iph->frag_off = df;
1289 ip_select_ident(iph, &rt->u.dst, sk); 1299 ip_select_ident(iph, &rt->dst, sk);
1290 iph->ttl = ttl; 1300 iph->ttl = ttl;
1291 iph->protocol = sk->sk_protocol; 1301 iph->protocol = sk->sk_protocol;
1292 iph->saddr = rt->rt_src; 1302 iph->saddr = rt->rt_src;
@@ -1299,7 +1309,7 @@ int ip_push_pending_frames(struct sock *sk)
1299 * on dst refcount 1309 * on dst refcount
1300 */ 1310 */
1301 inet->cork.dst = NULL; 1311 inet->cork.dst = NULL;
1302 skb_dst_set(skb, &rt->u.dst); 1312 skb_dst_set(skb, &rt->dst);
1303 1313
1304 if (iph->protocol == IPPROTO_ICMP) 1314 if (iph->protocol == IPPROTO_ICMP)
1305 icmp_out_count(net, ((struct icmphdr *) 1315 icmp_out_count(net, ((struct icmphdr *)
@@ -1436,7 +1446,3 @@ void __init ip_init(void)
1436 igmp_mc_proc_init(); 1446 igmp_mc_proc_init();
1437#endif 1447#endif
1438} 1448}
1439
1440EXPORT_SYMBOL(ip_generic_getfrag);
1441EXPORT_SYMBOL(ip_queue_xmit);
1442EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1e64dabbd232..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
239 sent to multicast group to reach destination designated router. 239 sent to multicast group to reach destination designated router.
240 */ 240 */
241struct ip_ra_chain *ip_ra_chain; 241struct ip_ra_chain *ip_ra_chain;
242DEFINE_RWLOCK(ip_ra_lock); 242static DEFINE_SPINLOCK(ip_ra_lock);
243
244
245static void ip_ra_destroy_rcu(struct rcu_head *head)
246{
247 struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
248
249 sock_put(ra->saved_sk);
250 kfree(ra);
251}
243 252
244int ip_ra_control(struct sock *sk, unsigned char on, 253int ip_ra_control(struct sock *sk, unsigned char on,
245 void (*destructor)(struct sock *)) 254 void (*destructor)(struct sock *))
@@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on,
251 260
252 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; 261 new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
253 262
254 write_lock_bh(&ip_ra_lock); 263 spin_lock_bh(&ip_ra_lock);
255 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) { 264 for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
256 if (ra->sk == sk) { 265 if (ra->sk == sk) {
257 if (on) { 266 if (on) {
258 write_unlock_bh(&ip_ra_lock); 267 spin_unlock_bh(&ip_ra_lock);
259 kfree(new_ra); 268 kfree(new_ra);
260 return -EADDRINUSE; 269 return -EADDRINUSE;
261 } 270 }
262 *rap = ra->next; 271 /* dont let ip_call_ra_chain() use sk again */
263 write_unlock_bh(&ip_ra_lock); 272 ra->sk = NULL;
273 rcu_assign_pointer(*rap, ra->next);
274 spin_unlock_bh(&ip_ra_lock);
264 275
265 if (ra->destructor) 276 if (ra->destructor)
266 ra->destructor(sk); 277 ra->destructor(sk);
267 sock_put(sk); 278 /*
268 kfree(ra); 279 * Delay sock_put(sk) and kfree(ra) after one rcu grace
280 * period. This guarantee ip_call_ra_chain() dont need
281 * to mess with socket refcounts.
282 */
283 ra->saved_sk = sk;
284 call_rcu(&ra->rcu, ip_ra_destroy_rcu);
269 return 0; 285 return 0;
270 } 286 }
271 } 287 }
272 if (new_ra == NULL) { 288 if (new_ra == NULL) {
273 write_unlock_bh(&ip_ra_lock); 289 spin_unlock_bh(&ip_ra_lock);
274 return -ENOBUFS; 290 return -ENOBUFS;
275 } 291 }
276 new_ra->sk = sk; 292 new_ra->sk = sk;
277 new_ra->destructor = destructor; 293 new_ra->destructor = destructor;
278 294
279 new_ra->next = ra; 295 new_ra->next = ra;
280 *rap = new_ra; 296 rcu_assign_pointer(*rap, new_ra);
281 sock_hold(sk); 297 sock_hold(sk);
282 write_unlock_bh(&ip_ra_lock); 298 spin_unlock_bh(&ip_ra_lock);
283 299
284 return 0; 300 return 0;
285} 301}
@@ -287,12 +303,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
287void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, 303void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
288 __be16 port, u32 info, u8 *payload) 304 __be16 port, u32 info, u8 *payload)
289{ 305{
290 struct inet_sock *inet = inet_sk(sk);
291 struct sock_exterr_skb *serr; 306 struct sock_exterr_skb *serr;
292 307
293 if (!inet->recverr)
294 return;
295
296 skb = skb_clone(skb, GFP_ATOMIC); 308 skb = skb_clone(skb, GFP_ATOMIC);
297 if (!skb) 309 if (!skb)
298 return; 310 return;
@@ -453,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
453 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | 465 (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
454 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | 466 (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
455 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) | 467 (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
456 (1<<IP_MINTTL))) || 468 (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
457 optname == IP_MULTICAST_TTL || 469 optname == IP_MULTICAST_TTL ||
458 optname == IP_MULTICAST_ALL || 470 optname == IP_MULTICAST_ALL ||
459 optname == IP_MULTICAST_LOOP || 471 optname == IP_MULTICAST_LOOP ||
@@ -576,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
576 } 588 }
577 inet->hdrincl = val ? 1 : 0; 589 inet->hdrincl = val ? 1 : 0;
578 break; 590 break;
591 case IP_NODEFRAG:
592 if (sk->sk_type != SOCK_RAW) {
593 err = -ENOPROTOOPT;
594 break;
595 }
596 inet->nodefrag = val ? 1 : 0;
597 break;
579 case IP_MTU_DISCOVER: 598 case IP_MTU_DISCOVER:
580 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) 599 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
581 goto e_inval; 600 goto e_inval;
@@ -958,6 +977,22 @@ e_inval:
958 return -EINVAL; 977 return -EINVAL;
959} 978}
960 979
980/**
981 * ip_queue_rcv_skb - Queue an skb into sock receive queue
982 * @sk: socket
983 * @skb: buffer
984 *
985 * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
986 * is not set, we drop skb dst entry now, while dst cache line is hot.
987 */
988int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
989{
990 if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
991 skb_dst_drop(skb);
992 return sock_queue_rcv_skb(sk, skb);
993}
994EXPORT_SYMBOL(ip_queue_rcv_skb);
995
961int ip_setsockopt(struct sock *sk, int level, 996int ip_setsockopt(struct sock *sk, int level,
962 int optname, char __user *optval, unsigned int optlen) 997 int optname, char __user *optval, unsigned int optlen)
963{ 998{
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 067ce9e043dc..3a6e1ec5e9ae 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options)
665 memcpy(e, ic_req_params, sizeof(ic_req_params)); 665 memcpy(e, ic_req_params, sizeof(ic_req_params));
666 e += sizeof(ic_req_params); 666 e += sizeof(ic_req_params);
667 667
668 if (ic_host_name_set) {
669 *e++ = 12; /* host-name */
670 len = strlen(utsname()->nodename);
671 *e++ = len;
672 memcpy(e, utsname()->nodename, len);
673 e += len;
674 }
668 if (*vendor_class_identifier) { 675 if (*vendor_class_identifier) {
669 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n", 676 printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
670 vendor_class_identifier); 677 vendor_class_identifier);
@@ -976,7 +983,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
976 /* Is it a reply for the device we are configuring? */ 983 /* Is it a reply for the device we are configuring? */
977 if (b->xid != ic_dev_xid) { 984 if (b->xid != ic_dev_xid) {
978 if (net_ratelimit()) 985 if (net_ratelimit())
979 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n"); 986 printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
980 goto drop_unlock; 987 goto drop_unlock;
981 } 988 }
982 989
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0b27b14dcc9d..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -374,11 +374,8 @@ static int ipip_rcv(struct sk_buff *skb)
374 skb->protocol = htons(ETH_P_IP); 374 skb->protocol = htons(ETH_P_IP);
375 skb->pkt_type = PACKET_HOST; 375 skb->pkt_type = PACKET_HOST;
376 376
377 tunnel->dev->stats.rx_packets++; 377 skb_tunnel_rx(skb, tunnel->dev);
378 tunnel->dev->stats.rx_bytes += skb->len; 378
379 skb->dev = tunnel->dev;
380 skb_dst_drop(skb);
381 nf_reset(skb);
382 ipip_ecn_decapsulate(iph, skb); 379 ipip_ecn_decapsulate(iph, skb);
383 netif_rx(skb); 380 netif_rx(skb);
384 rcu_read_unlock(); 381 rcu_read_unlock();
@@ -438,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
438 goto tx_error_icmp; 435 goto tx_error_icmp;
439 } 436 }
440 } 437 }
441 tdev = rt->u.dst.dev; 438 tdev = rt->dst.dev;
442 439
443 if (tdev == dev) { 440 if (tdev == dev) {
444 ip_rt_put(rt); 441 ip_rt_put(rt);
@@ -449,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
449 df |= old_iph->frag_off & htons(IP_DF); 446 df |= old_iph->frag_off & htons(IP_DF);
450 447
451 if (df) { 448 if (df) {
452 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
453 450
454 if (mtu < 68) { 451 if (mtu < 68) {
455 stats->collisions++; 452 stats->collisions++;
@@ -506,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
506 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 503 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
507 IPSKB_REROUTED); 504 IPSKB_REROUTED);
508 skb_dst_drop(skb); 505 skb_dst_drop(skb);
509 skb_dst_set(skb, &rt->u.dst); 506 skb_dst_set(skb, &rt->dst);
510 507
511 /* 508 /*
512 * Push down and install the IPIP header. 509 * Push down and install the IPIP header.
@@ -555,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
555 .proto = IPPROTO_IPIP }; 552 .proto = IPPROTO_IPIP };
556 struct rtable *rt; 553 struct rtable *rt;
557 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
558 tdev = rt->u.dst.dev; 555 tdev = rt->dst.dev;
559 ip_rt_put(rt); 556 ip_rt_put(rt);
560 } 557 }
561 dev->flags |= IFF_POINTOPOINT; 558 dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d4f6d1340a4..179fcab866fc 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -22,7 +22,7 @@
22 * overflow. 22 * overflow.
23 * Carlos Picoto : PIMv1 Support 23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header 24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers. 25 * Relax this requirement to work with older peers.
26 * 26 *
27 */ 27 */
28 28
@@ -63,11 +63,40 @@
63#include <net/ipip.h> 63#include <net/ipip.h>
64#include <net/checksum.h> 64#include <net/checksum.h>
65#include <net/netlink.h> 65#include <net/netlink.h>
66#include <net/fib_rules.h>
66 67
67#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) 68#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68#define CONFIG_IP_PIMSM 1 69#define CONFIG_IP_PIMSM 1
69#endif 70#endif
70 71
72struct mr_table {
73 struct list_head list;
74#ifdef CONFIG_NET_NS
75 struct net *net;
76#endif
77 u32 id;
78 struct sock *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
83 int maxvif;
84 atomic_t cache_resolve_queue_len;
85 int mroute_do_assert;
86 int mroute_do_pim;
87#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
89#endif
90};
91
92struct ipmr_rule {
93 struct fib_rule common;
94};
95
96struct ipmr_result {
97 struct mr_table *mrt;
98};
99
71/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
72 Note that the changes are semaphored via rtnl_lock. 101 Note that the changes are semaphored via rtnl_lock.
73 */ 102 */
@@ -78,9 +107,7 @@ static DEFINE_RWLOCK(mrt_lock);
78 * Multicast router control variables 107 * Multicast router control variables
79 */ 108 */
80 109
81#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) 110#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
82
83static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
84 111
85/* Special spinlock for queue of unresolved entries */ 112/* Special spinlock for queue of unresolved entries */
86static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -95,12 +122,217 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
95 122
96static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
97 124
98static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 125static struct mr_table *ipmr_new_table(struct net *net, u32 id);
99static int ipmr_cache_report(struct net *net, 126static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
128 int local);
129static int ipmr_cache_report(struct mr_table *mrt,
100 struct sk_buff *pkt, vifi_t vifi, int assert); 130 struct sk_buff *pkt, vifi_t vifi, int assert);
101static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 131static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133static void ipmr_expire_process(unsigned long arg);
134
135#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136#define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140{
141 struct mr_table *mrt;
142
143 ipmr_for_each_table(mrt, net) {
144 if (mrt->id == id)
145 return mrt;
146 }
147 return NULL;
148}
149
150static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
152{
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
155 int err;
156
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158 if (err < 0)
159 return err;
160 *mrt = res.mrt;
161 return 0;
162}
163
164static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
166{
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
169
170 switch (rule->action) {
171 case FR_ACT_TO_TBL:
172 break;
173 case FR_ACT_UNREACHABLE:
174 return -ENETUNREACH;
175 case FR_ACT_PROHIBIT:
176 return -EACCES;
177 case FR_ACT_BLACKHOLE:
178 default:
179 return -EINVAL;
180 }
181
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
183 if (mrt == NULL)
184 return -EAGAIN;
185 res->mrt = mrt;
186 return 0;
187}
188
189static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190{
191 return 1;
192}
193
194static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 FRA_GENERIC_POLICY,
196};
197
198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
200{
201 return 0;
202}
203
204static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205 struct nlattr **tb)
206{
207 return 1;
208}
209
210static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
212{
213 frh->dst_len = 0;
214 frh->src_len = 0;
215 frh->tos = 0;
216 return 0;
217}
102 218
103static struct timer_list ipmr_expire_timer; 219static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
232};
233
234static int __net_init ipmr_rules_init(struct net *net)
235{
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
238 int err;
239
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 if (IS_ERR(ops))
242 return PTR_ERR(ops);
243
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247 if (mrt == NULL) {
248 err = -ENOMEM;
249 goto err1;
250 }
251
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 if (err < 0)
254 goto err2;
255
256 net->ipv4.mr_rules_ops = ops;
257 return 0;
258
259err2:
260 kfree(mrt);
261err1:
262 fib_rules_unregister(ops);
263 return err;
264}
265
266static void __net_exit ipmr_rules_exit(struct net *net)
267{
268 struct mr_table *mrt, *next;
269
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
272 kfree(mrt);
273 }
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
275}
276#else
277#define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
279
280static struct mr_table *ipmr_get_table(struct net *net, u32 id)
281{
282 return net->ipv4.mrt;
283}
284
285static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
287{
288 *mrt = net->ipv4.mrt;
289 return 0;
290}
291
292static int __net_init ipmr_rules_init(struct net *net)
293{
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
296}
297
298static void __net_exit ipmr_rules_exit(struct net *net)
299{
300 kfree(net->ipv4.mrt);
301}
302#endif
303
304static struct mr_table *ipmr_new_table(struct net *net, u32 id)
305{
306 struct mr_table *mrt;
307 unsigned int i;
308
309 mrt = ipmr_get_table(net, id);
310 if (mrt != NULL)
311 return mrt;
312
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
314 if (mrt == NULL)
315 return NULL;
316 write_pnet(&mrt->net, net);
317 mrt->id = id;
318
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
322
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
324
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
326 (unsigned long)mrt);
327
328#ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
330#endif
331#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
333#endif
334 return mrt;
335}
104 336
105/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ 337/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106 338
@@ -201,12 +433,24 @@ failure:
201static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 433static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202{ 434{
203 struct net *net = dev_net(dev); 435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
437 struct flowi fl = {
438 .oif = dev->ifindex,
439 .iif = skb->skb_iif,
440 .mark = skb->mark,
441 };
442 int err;
443
444 err = ipmr_fib_lookup(net, &fl, &mrt);
445 if (err < 0) {
446 kfree_skb(skb);
447 return err;
448 }
204 449
205 read_lock(&mrt_lock); 450 read_lock(&mrt_lock);
206 dev->stats.tx_bytes += skb->len; 451 dev->stats.tx_bytes += skb->len;
207 dev->stats.tx_packets++; 452 dev->stats.tx_packets++;
208 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, 453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
209 IGMPMSG_WHOLEPKT);
210 read_unlock(&mrt_lock); 454 read_unlock(&mrt_lock);
211 kfree_skb(skb); 455 kfree_skb(skb);
212 return NETDEV_TX_OK; 456 return NETDEV_TX_OK;
@@ -226,12 +470,18 @@ static void reg_vif_setup(struct net_device *dev)
226 dev->features |= NETIF_F_NETNS_LOCAL; 470 dev->features |= NETIF_F_NETNS_LOCAL;
227} 471}
228 472
229static struct net_device *ipmr_reg_vif(struct net *net) 473static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
230{ 474{
231 struct net_device *dev; 475 struct net_device *dev;
232 struct in_device *in_dev; 476 struct in_device *in_dev;
477 char name[IFNAMSIZ];
478
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
481 else
482 sprintf(name, "pimreg%u", mrt->id);
233 483
234 dev = alloc_netdev(0, "pimreg", reg_vif_setup); 484 dev = alloc_netdev(0, name, reg_vif_setup);
235 485
236 if (dev == NULL) 486 if (dev == NULL)
237 return NULL; 487 return NULL;
@@ -276,17 +526,17 @@ failure:
276 * @notify: Set to 1, if the caller is a notifier_call 526 * @notify: Set to 1, if the caller is a notifier_call
277 */ 527 */
278 528
279static int vif_delete(struct net *net, int vifi, int notify, 529static int vif_delete(struct mr_table *mrt, int vifi, int notify,
280 struct list_head *head) 530 struct list_head *head)
281{ 531{
282 struct vif_device *v; 532 struct vif_device *v;
283 struct net_device *dev; 533 struct net_device *dev;
284 struct in_device *in_dev; 534 struct in_device *in_dev;
285 535
286 if (vifi < 0 || vifi >= net->ipv4.maxvif) 536 if (vifi < 0 || vifi >= mrt->maxvif)
287 return -EADDRNOTAVAIL; 537 return -EADDRNOTAVAIL;
288 538
289 v = &net->ipv4.vif_table[vifi]; 539 v = &mrt->vif_table[vifi];
290 540
291 write_lock_bh(&mrt_lock); 541 write_lock_bh(&mrt_lock);
292 dev = v->dev; 542 dev = v->dev;
@@ -298,17 +548,17 @@ static int vif_delete(struct net *net, int vifi, int notify,
298 } 548 }
299 549
300#ifdef CONFIG_IP_PIMSM 550#ifdef CONFIG_IP_PIMSM
301 if (vifi == net->ipv4.mroute_reg_vif_num) 551 if (vifi == mrt->mroute_reg_vif_num)
302 net->ipv4.mroute_reg_vif_num = -1; 552 mrt->mroute_reg_vif_num = -1;
303#endif 553#endif
304 554
305 if (vifi+1 == net->ipv4.maxvif) { 555 if (vifi+1 == mrt->maxvif) {
306 int tmp; 556 int tmp;
307 for (tmp=vifi-1; tmp>=0; tmp--) { 557 for (tmp=vifi-1; tmp>=0; tmp--) {
308 if (VIF_EXISTS(net, tmp)) 558 if (VIF_EXISTS(mrt, tmp))
309 break; 559 break;
310 } 560 }
311 net->ipv4.maxvif = tmp+1; 561 mrt->maxvif = tmp+1;
312 } 562 }
313 563
314 write_unlock_bh(&mrt_lock); 564 write_unlock_bh(&mrt_lock);
@@ -329,7 +579,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
329 579
330static inline void ipmr_cache_free(struct mfc_cache *c) 580static inline void ipmr_cache_free(struct mfc_cache *c)
331{ 581{
332 release_net(mfc_net(c));
333 kmem_cache_free(mrt_cachep, c); 582 kmem_cache_free(mrt_cachep, c);
334} 583}
335 584
@@ -337,13 +586,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
337 and reporting error to netlink readers. 586 and reporting error to netlink readers.
338 */ 587 */
339 588
340static void ipmr_destroy_unres(struct mfc_cache *c) 589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
341{ 590{
591 struct net *net = read_pnet(&mrt->net);
342 struct sk_buff *skb; 592 struct sk_buff *skb;
343 struct nlmsgerr *e; 593 struct nlmsgerr *e;
344 struct net *net = mfc_net(c);
345 594
346 atomic_dec(&net->ipv4.cache_resolve_queue_len); 595 atomic_dec(&mrt->cache_resolve_queue_len);
347 596
348 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 597 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
349 if (ip_hdr(skb)->version == 0) { 598 if (ip_hdr(skb)->version == 0) {
@@ -364,42 +613,40 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
364} 613}
365 614
366 615
367/* Single timer process for all the unresolved queue. */ 616/* Timer process for the unresolved queue. */
368 617
369static void ipmr_expire_process(unsigned long dummy) 618static void ipmr_expire_process(unsigned long arg)
370{ 619{
620 struct mr_table *mrt = (struct mr_table *)arg;
371 unsigned long now; 621 unsigned long now;
372 unsigned long expires; 622 unsigned long expires;
373 struct mfc_cache *c, **cp; 623 struct mfc_cache *c, *next;
374 624
375 if (!spin_trylock(&mfc_unres_lock)) { 625 if (!spin_trylock(&mfc_unres_lock)) {
376 mod_timer(&ipmr_expire_timer, jiffies+HZ/10); 626 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
377 return; 627 return;
378 } 628 }
379 629
380 if (mfc_unres_queue == NULL) 630 if (list_empty(&mrt->mfc_unres_queue))
381 goto out; 631 goto out;
382 632
383 now = jiffies; 633 now = jiffies;
384 expires = 10*HZ; 634 expires = 10*HZ;
385 cp = &mfc_unres_queue;
386 635
387 while ((c=*cp) != NULL) { 636 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
388 if (time_after(c->mfc_un.unres.expires, now)) { 637 if (time_after(c->mfc_un.unres.expires, now)) {
389 unsigned long interval = c->mfc_un.unres.expires - now; 638 unsigned long interval = c->mfc_un.unres.expires - now;
390 if (interval < expires) 639 if (interval < expires)
391 expires = interval; 640 expires = interval;
392 cp = &c->next;
393 continue; 641 continue;
394 } 642 }
395 643
396 *cp = c->next; 644 list_del(&c->list);
397 645 ipmr_destroy_unres(mrt, c);
398 ipmr_destroy_unres(c);
399 } 646 }
400 647
401 if (mfc_unres_queue != NULL) 648 if (!list_empty(&mrt->mfc_unres_queue))
402 mod_timer(&ipmr_expire_timer, jiffies + expires); 649 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
403 650
404out: 651out:
405 spin_unlock(&mfc_unres_lock); 652 spin_unlock(&mfc_unres_lock);
@@ -407,17 +654,17 @@ out:
407 654
408/* Fill oifs list. It is called under write locked mrt_lock. */ 655/* Fill oifs list. It is called under write locked mrt_lock. */
409 656
410static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 657static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
658 unsigned char *ttls)
411{ 659{
412 int vifi; 660 int vifi;
413 struct net *net = mfc_net(cache);
414 661
415 cache->mfc_un.res.minvif = MAXVIFS; 662 cache->mfc_un.res.minvif = MAXVIFS;
416 cache->mfc_un.res.maxvif = 0; 663 cache->mfc_un.res.maxvif = 0;
417 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 664 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418 665
419 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { 666 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
420 if (VIF_EXISTS(net, vifi) && 667 if (VIF_EXISTS(mrt, vifi) &&
421 ttls[vifi] && ttls[vifi] < 255) { 668 ttls[vifi] && ttls[vifi] < 255) {
422 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 669 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423 if (cache->mfc_un.res.minvif > vifi) 670 if (cache->mfc_un.res.minvif > vifi)
@@ -428,16 +675,17 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
428 } 675 }
429} 676}
430 677
431static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) 678static int vif_add(struct net *net, struct mr_table *mrt,
679 struct vifctl *vifc, int mrtsock)
432{ 680{
433 int vifi = vifc->vifc_vifi; 681 int vifi = vifc->vifc_vifi;
434 struct vif_device *v = &net->ipv4.vif_table[vifi]; 682 struct vif_device *v = &mrt->vif_table[vifi];
435 struct net_device *dev; 683 struct net_device *dev;
436 struct in_device *in_dev; 684 struct in_device *in_dev;
437 int err; 685 int err;
438 686
439 /* Is vif busy ? */ 687 /* Is vif busy ? */
440 if (VIF_EXISTS(net, vifi)) 688 if (VIF_EXISTS(mrt, vifi))
441 return -EADDRINUSE; 689 return -EADDRINUSE;
442 690
443 switch (vifc->vifc_flags) { 691 switch (vifc->vifc_flags) {
@@ -447,9 +695,9 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
447 * Special Purpose VIF in PIM 695 * Special Purpose VIF in PIM
448 * All the packets will be sent to the daemon 696 * All the packets will be sent to the daemon
449 */ 697 */
450 if (net->ipv4.mroute_reg_vif_num >= 0) 698 if (mrt->mroute_reg_vif_num >= 0)
451 return -EADDRINUSE; 699 return -EADDRINUSE;
452 dev = ipmr_reg_vif(net); 700 dev = ipmr_reg_vif(net, mrt);
453 if (!dev) 701 if (!dev)
454 return -ENOBUFS; 702 return -ENOBUFS;
455 err = dev_set_allmulti(dev, 1); 703 err = dev_set_allmulti(dev, 1);
@@ -525,49 +773,47 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
525 v->dev = dev; 773 v->dev = dev;
526#ifdef CONFIG_IP_PIMSM 774#ifdef CONFIG_IP_PIMSM
527 if (v->flags&VIFF_REGISTER) 775 if (v->flags&VIFF_REGISTER)
528 net->ipv4.mroute_reg_vif_num = vifi; 776 mrt->mroute_reg_vif_num = vifi;
529#endif 777#endif
530 if (vifi+1 > net->ipv4.maxvif) 778 if (vifi+1 > mrt->maxvif)
531 net->ipv4.maxvif = vifi+1; 779 mrt->maxvif = vifi+1;
532 write_unlock_bh(&mrt_lock); 780 write_unlock_bh(&mrt_lock);
533 return 0; 781 return 0;
534} 782}
535 783
536static struct mfc_cache *ipmr_cache_find(struct net *net, 784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
537 __be32 origin, 785 __be32 origin,
538 __be32 mcastgrp) 786 __be32 mcastgrp)
539{ 787{
540 int line = MFC_HASH(mcastgrp, origin); 788 int line = MFC_HASH(mcastgrp, origin);
541 struct mfc_cache *c; 789 struct mfc_cache *c;
542 790
543 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { 791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
544 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
545 break; 793 return c;
546 } 794 }
547 return c; 795 return NULL;
548} 796}
549 797
550/* 798/*
551 * Allocate a multicast cache entry 799 * Allocate a multicast cache entry
552 */ 800 */
553static struct mfc_cache *ipmr_cache_alloc(struct net *net) 801static struct mfc_cache *ipmr_cache_alloc(void)
554{ 802{
555 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556 if (c == NULL) 804 if (c == NULL)
557 return NULL; 805 return NULL;
558 c->mfc_un.res.minvif = MAXVIFS; 806 c->mfc_un.res.minvif = MAXVIFS;
559 mfc_net_set(c, net);
560 return c; 807 return c;
561} 808}
562 809
563static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) 810static struct mfc_cache *ipmr_cache_alloc_unres(void)
564{ 811{
565 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566 if (c == NULL) 813 if (c == NULL)
567 return NULL; 814 return NULL;
568 skb_queue_head_init(&c->mfc_un.unres.unresolved); 815 skb_queue_head_init(&c->mfc_un.unres.unresolved);
569 c->mfc_un.unres.expires = jiffies + 10*HZ; 816 c->mfc_un.unres.expires = jiffies + 10*HZ;
570 mfc_net_set(c, net);
571 return c; 817 return c;
572} 818}
573 819
@@ -575,7 +821,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
575 * A cache entry has gone into a resolved state from queued 821 * A cache entry has gone into a resolved state from queued
576 */ 822 */
577 823
578static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) 824static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
825 struct mfc_cache *uc, struct mfc_cache *c)
579{ 826{
580 struct sk_buff *skb; 827 struct sk_buff *skb;
581 struct nlmsgerr *e; 828 struct nlmsgerr *e;
@@ -588,7 +835,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
588 if (ip_hdr(skb)->version == 0) { 835 if (ip_hdr(skb)->version == 0) {
589 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590 837
591 if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
592 nlh->nlmsg_len = (skb_tail_pointer(skb) - 839 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593 (u8 *)nlh); 840 (u8 *)nlh);
594 } else { 841 } else {
@@ -600,9 +847,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
600 memset(&e->msg, 0, sizeof(e->msg)); 847 memset(&e->msg, 0, sizeof(e->msg));
601 } 848 }
602 849
603 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); 850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
604 } else 851 } else
605 ip_mr_forward(skb, c, 0); 852 ip_mr_forward(net, mrt, skb, c, 0);
606 } 853 }
607} 854}
608 855
@@ -613,7 +860,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
613 * Called under mrt_lock. 860 * Called under mrt_lock.
614 */ 861 */
615 862
616static int ipmr_cache_report(struct net *net, 863static int ipmr_cache_report(struct mr_table *mrt,
617 struct sk_buff *pkt, vifi_t vifi, int assert) 864 struct sk_buff *pkt, vifi_t vifi, int assert)
618{ 865{
619 struct sk_buff *skb; 866 struct sk_buff *skb;
@@ -646,7 +893,7 @@ static int ipmr_cache_report(struct net *net,
646 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 893 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
647 msg->im_msgtype = IGMPMSG_WHOLEPKT; 894 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648 msg->im_mbz = 0; 895 msg->im_mbz = 0;
649 msg->im_vif = net->ipv4.mroute_reg_vif_num; 896 msg->im_vif = mrt->mroute_reg_vif_num;
650 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 897 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 898 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652 sizeof(struct iphdr)); 899 sizeof(struct iphdr));
@@ -678,7 +925,7 @@ static int ipmr_cache_report(struct net *net,
678 skb->transport_header = skb->network_header; 925 skb->transport_header = skb->network_header;
679 } 926 }
680 927
681 if (net->ipv4.mroute_sk == NULL) { 928 if (mrt->mroute_sk == NULL) {
682 kfree_skb(skb); 929 kfree_skb(skb);
683 return -EINVAL; 930 return -EINVAL;
684 } 931 }
@@ -686,7 +933,7 @@ static int ipmr_cache_report(struct net *net,
686 /* 933 /*
687 * Deliver to mrouted 934 * Deliver to mrouted
688 */ 935 */
689 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); 936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
690 if (ret < 0) { 937 if (ret < 0) {
691 if (net_ratelimit()) 938 if (net_ratelimit())
692 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -701,27 +948,29 @@ static int ipmr_cache_report(struct net *net,
701 */ 948 */
702 949
703static int 950static int
704ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) 951ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
705{ 952{
953 bool found = false;
706 int err; 954 int err;
707 struct mfc_cache *c; 955 struct mfc_cache *c;
708 const struct iphdr *iph = ip_hdr(skb); 956 const struct iphdr *iph = ip_hdr(skb);
709 957
710 spin_lock_bh(&mfc_unres_lock); 958 spin_lock_bh(&mfc_unres_lock);
711 for (c=mfc_unres_queue; c; c=c->next) { 959 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
712 if (net_eq(mfc_net(c), net) && 960 if (c->mfc_mcastgrp == iph->daddr &&
713 c->mfc_mcastgrp == iph->daddr && 961 c->mfc_origin == iph->saddr) {
714 c->mfc_origin == iph->saddr) 962 found = true;
715 break; 963 break;
964 }
716 } 965 }
717 966
718 if (c == NULL) { 967 if (!found) {
719 /* 968 /*
720 * Create a new entry if allowable 969 * Create a new entry if allowable
721 */ 970 */
722 971
723 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || 972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
724 (c = ipmr_cache_alloc_unres(net)) == NULL) { 973 (c = ipmr_cache_alloc_unres()) == NULL) {
725 spin_unlock_bh(&mfc_unres_lock); 974 spin_unlock_bh(&mfc_unres_lock);
726 975
727 kfree_skb(skb); 976 kfree_skb(skb);
@@ -738,7 +987,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
738 /* 987 /*
739 * Reflect first query at mrouted. 988 * Reflect first query at mrouted.
740 */ 989 */
741 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); 990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
742 if (err < 0) { 991 if (err < 0) {
743 /* If the report failed throw the cache entry 992 /* If the report failed throw the cache entry
744 out - Brad Parker 993 out - Brad Parker
@@ -750,11 +999,11 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
750 return err; 999 return err;
751 } 1000 }
752 1001
753 atomic_inc(&net->ipv4.cache_resolve_queue_len); 1002 atomic_inc(&mrt->cache_resolve_queue_len);
754 c->next = mfc_unres_queue; 1003 list_add(&c->list, &mrt->mfc_unres_queue);
755 mfc_unres_queue = c;
756 1004
757 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires); 1005 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
758 } 1007 }
759 1008
760 /* 1009 /*
@@ -776,19 +1025,18 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
776 * MFC cache manipulation by user space mroute daemon 1025 * MFC cache manipulation by user space mroute daemon
777 */ 1026 */
778 1027
779static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) 1028static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
780{ 1029{
781 int line; 1030 int line;
782 struct mfc_cache *c, **cp; 1031 struct mfc_cache *c, *next;
783 1032
784 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1033 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
785 1034
786 for (cp = &net->ipv4.mfc_cache_array[line]; 1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
787 (c = *cp) != NULL; cp = &c->next) {
788 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
789 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
790 write_lock_bh(&mrt_lock); 1038 write_lock_bh(&mrt_lock);
791 *cp = c->next; 1039 list_del(&c->list);
792 write_unlock_bh(&mrt_lock); 1040 write_unlock_bh(&mrt_lock);
793 1041
794 ipmr_cache_free(c); 1042 ipmr_cache_free(c);
@@ -798,27 +1046,30 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
798 return -ENOENT; 1046 return -ENOENT;
799} 1047}
800 1048
801static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) 1049static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1050 struct mfcctl *mfc, int mrtsock)
802{ 1051{
1052 bool found = false;
803 int line; 1053 int line;
804 struct mfc_cache *uc, *c, **cp; 1054 struct mfc_cache *uc, *c;
805 1055
806 if (mfc->mfcc_parent >= MAXVIFS) 1056 if (mfc->mfcc_parent >= MAXVIFS)
807 return -ENFILE; 1057 return -ENFILE;
808 1058
809 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 1059 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
810 1060
811 for (cp = &net->ipv4.mfc_cache_array[line]; 1061 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
812 (c = *cp) != NULL; cp = &c->next) {
813 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1062 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
814 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 1063 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1064 found = true;
815 break; 1065 break;
1066 }
816 } 1067 }
817 1068
818 if (c != NULL) { 1069 if (found) {
819 write_lock_bh(&mrt_lock); 1070 write_lock_bh(&mrt_lock);
820 c->mfc_parent = mfc->mfcc_parent; 1071 c->mfc_parent = mfc->mfcc_parent;
821 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1072 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
822 if (!mrtsock) 1073 if (!mrtsock)
823 c->mfc_flags |= MFC_STATIC; 1074 c->mfc_flags |= MFC_STATIC;
824 write_unlock_bh(&mrt_lock); 1075 write_unlock_bh(&mrt_lock);
@@ -828,43 +1079,42 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
828 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 1079 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
829 return -EINVAL; 1080 return -EINVAL;
830 1081
831 c = ipmr_cache_alloc(net); 1082 c = ipmr_cache_alloc();
832 if (c == NULL) 1083 if (c == NULL)
833 return -ENOMEM; 1084 return -ENOMEM;
834 1085
835 c->mfc_origin = mfc->mfcc_origin.s_addr; 1086 c->mfc_origin = mfc->mfcc_origin.s_addr;
836 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr; 1087 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
837 c->mfc_parent = mfc->mfcc_parent; 1088 c->mfc_parent = mfc->mfcc_parent;
838 ipmr_update_thresholds(c, mfc->mfcc_ttls); 1089 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
839 if (!mrtsock) 1090 if (!mrtsock)
840 c->mfc_flags |= MFC_STATIC; 1091 c->mfc_flags |= MFC_STATIC;
841 1092
842 write_lock_bh(&mrt_lock); 1093 write_lock_bh(&mrt_lock);
843 c->next = net->ipv4.mfc_cache_array[line]; 1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
844 net->ipv4.mfc_cache_array[line] = c;
845 write_unlock_bh(&mrt_lock); 1095 write_unlock_bh(&mrt_lock);
846 1096
847 /* 1097 /*
848 * Check to see if we resolved a queued list. If so we 1098 * Check to see if we resolved a queued list. If so we
849 * need to send on the frames and tidy up. 1099 * need to send on the frames and tidy up.
850 */ 1100 */
1101 found = false;
851 spin_lock_bh(&mfc_unres_lock); 1102 spin_lock_bh(&mfc_unres_lock);
852 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 1103 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
853 cp = &uc->next) { 1104 if (uc->mfc_origin == c->mfc_origin &&
854 if (net_eq(mfc_net(uc), net) &&
855 uc->mfc_origin == c->mfc_origin &&
856 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 1105 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
857 *cp = uc->next; 1106 list_del(&uc->list);
858 atomic_dec(&net->ipv4.cache_resolve_queue_len); 1107 atomic_dec(&mrt->cache_resolve_queue_len);
1108 found = true;
859 break; 1109 break;
860 } 1110 }
861 } 1111 }
862 if (mfc_unres_queue == NULL) 1112 if (list_empty(&mrt->mfc_unres_queue))
863 del_timer(&ipmr_expire_timer); 1113 del_timer(&mrt->ipmr_expire_timer);
864 spin_unlock_bh(&mfc_unres_lock); 1114 spin_unlock_bh(&mfc_unres_lock);
865 1115
866 if (uc) { 1116 if (found) {
867 ipmr_cache_resolve(uc, c); 1117 ipmr_cache_resolve(net, mrt, uc, c);
868 ipmr_cache_free(uc); 1118 ipmr_cache_free(uc);
869 } 1119 }
870 return 0; 1120 return 0;
@@ -874,53 +1124,41 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
874 * Close the multicast socket, and clear the vif tables etc 1124 * Close the multicast socket, and clear the vif tables etc
875 */ 1125 */
876 1126
877static void mroute_clean_tables(struct net *net) 1127static void mroute_clean_tables(struct mr_table *mrt)
878{ 1128{
879 int i; 1129 int i;
880 LIST_HEAD(list); 1130 LIST_HEAD(list);
1131 struct mfc_cache *c, *next;
881 1132
882 /* 1133 /*
883 * Shut down all active vif entries 1134 * Shut down all active vif entries
884 */ 1135 */
885 for (i = 0; i < net->ipv4.maxvif; i++) { 1136 for (i = 0; i < mrt->maxvif; i++) {
886 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) 1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
887 vif_delete(net, i, 0, &list); 1138 vif_delete(mrt, i, 0, &list);
888 } 1139 }
889 unregister_netdevice_many(&list); 1140 unregister_netdevice_many(&list);
890 1141
891 /* 1142 /*
892 * Wipe the cache 1143 * Wipe the cache
893 */ 1144 */
894 for (i=0; i<MFC_LINES; i++) { 1145 for (i = 0; i < MFC_LINES; i++) {
895 struct mfc_cache *c, **cp; 1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
896 1147 if (c->mfc_flags&MFC_STATIC)
897 cp = &net->ipv4.mfc_cache_array[i];
898 while ((c = *cp) != NULL) {
899 if (c->mfc_flags&MFC_STATIC) {
900 cp = &c->next;
901 continue; 1148 continue;
902 }
903 write_lock_bh(&mrt_lock); 1149 write_lock_bh(&mrt_lock);
904 *cp = c->next; 1150 list_del(&c->list);
905 write_unlock_bh(&mrt_lock); 1151 write_unlock_bh(&mrt_lock);
906 1152
907 ipmr_cache_free(c); 1153 ipmr_cache_free(c);
908 } 1154 }
909 } 1155 }
910 1156
911 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { 1157 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
912 struct mfc_cache *c, **cp;
913
914 spin_lock_bh(&mfc_unres_lock); 1158 spin_lock_bh(&mfc_unres_lock);
915 cp = &mfc_unres_queue; 1159 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
916 while ((c = *cp) != NULL) { 1160 list_del(&c->list);
917 if (!net_eq(mfc_net(c), net)) { 1161 ipmr_destroy_unres(mrt, c);
918 cp = &c->next;
919 continue;
920 }
921 *cp = c->next;
922
923 ipmr_destroy_unres(c);
924 } 1162 }
925 spin_unlock_bh(&mfc_unres_lock); 1163 spin_unlock_bh(&mfc_unres_lock);
926 } 1164 }
@@ -929,16 +1167,19 @@ static void mroute_clean_tables(struct net *net)
929static void mrtsock_destruct(struct sock *sk) 1167static void mrtsock_destruct(struct sock *sk)
930{ 1168{
931 struct net *net = sock_net(sk); 1169 struct net *net = sock_net(sk);
1170 struct mr_table *mrt;
932 1171
933 rtnl_lock(); 1172 rtnl_lock();
934 if (sk == net->ipv4.mroute_sk) { 1173 ipmr_for_each_table(mrt, net) {
935 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1174 if (sk == mrt->mroute_sk) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
936 1176
937 write_lock_bh(&mrt_lock); 1177 write_lock_bh(&mrt_lock);
938 net->ipv4.mroute_sk = NULL; 1178 mrt->mroute_sk = NULL;
939 write_unlock_bh(&mrt_lock); 1179 write_unlock_bh(&mrt_lock);
940 1180
941 mroute_clean_tables(net); 1181 mroute_clean_tables(mrt);
1182 }
942 } 1183 }
943 rtnl_unlock(); 1184 rtnl_unlock();
944} 1185}
@@ -956,9 +1197,14 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
956 struct vifctl vif; 1197 struct vifctl vif;
957 struct mfcctl mfc; 1198 struct mfcctl mfc;
958 struct net *net = sock_net(sk); 1199 struct net *net = sock_net(sk);
1200 struct mr_table *mrt;
1201
1202 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1203 if (mrt == NULL)
1204 return -ENOENT;
959 1205
960 if (optname != MRT_INIT) { 1206 if (optname != MRT_INIT) {
961 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) 1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
962 return -EACCES; 1208 return -EACCES;
963 } 1209 }
964 1210
@@ -971,7 +1217,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
971 return -ENOPROTOOPT; 1217 return -ENOPROTOOPT;
972 1218
973 rtnl_lock(); 1219 rtnl_lock();
974 if (net->ipv4.mroute_sk) { 1220 if (mrt->mroute_sk) {
975 rtnl_unlock(); 1221 rtnl_unlock();
976 return -EADDRINUSE; 1222 return -EADDRINUSE;
977 } 1223 }
@@ -979,7 +1225,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
979 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1225 ret = ip_ra_control(sk, 1, mrtsock_destruct);
980 if (ret == 0) { 1226 if (ret == 0) {
981 write_lock_bh(&mrt_lock); 1227 write_lock_bh(&mrt_lock);
982 net->ipv4.mroute_sk = sk; 1228 mrt->mroute_sk = sk;
983 write_unlock_bh(&mrt_lock); 1229 write_unlock_bh(&mrt_lock);
984 1230
985 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
@@ -987,7 +1233,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
987 rtnl_unlock(); 1233 rtnl_unlock();
988 return ret; 1234 return ret;
989 case MRT_DONE: 1235 case MRT_DONE:
990 if (sk != net->ipv4.mroute_sk) 1236 if (sk != mrt->mroute_sk)
991 return -EACCES; 1237 return -EACCES;
992 return ip_ra_control(sk, 0, NULL); 1238 return ip_ra_control(sk, 0, NULL);
993 case MRT_ADD_VIF: 1239 case MRT_ADD_VIF:
@@ -1000,9 +1246,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1000 return -ENFILE; 1246 return -ENFILE;
1001 rtnl_lock(); 1247 rtnl_lock();
1002 if (optname == MRT_ADD_VIF) { 1248 if (optname == MRT_ADD_VIF) {
1003 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); 1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1004 } else { 1250 } else {
1005 ret = vif_delete(net, vif.vifc_vifi, 0, NULL); 1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1006 } 1252 }
1007 rtnl_unlock(); 1253 rtnl_unlock();
1008 return ret; 1254 return ret;
@@ -1019,9 +1265,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1019 return -EFAULT; 1265 return -EFAULT;
1020 rtnl_lock(); 1266 rtnl_lock();
1021 if (optname == MRT_DEL_MFC) 1267 if (optname == MRT_DEL_MFC)
1022 ret = ipmr_mfc_delete(net, &mfc); 1268 ret = ipmr_mfc_delete(mrt, &mfc);
1023 else 1269 else
1024 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); 1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1025 rtnl_unlock(); 1271 rtnl_unlock();
1026 return ret; 1272 return ret;
1027 /* 1273 /*
@@ -1032,7 +1278,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1032 int v; 1278 int v;
1033 if (get_user(v,(int __user *)optval)) 1279 if (get_user(v,(int __user *)optval))
1034 return -EFAULT; 1280 return -EFAULT;
1035 net->ipv4.mroute_do_assert = (v) ? 1 : 0; 1281 mrt->mroute_do_assert = (v) ? 1 : 0;
1036 return 0; 1282 return 0;
1037 } 1283 }
1038#ifdef CONFIG_IP_PIMSM 1284#ifdef CONFIG_IP_PIMSM
@@ -1046,14 +1292,35 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1046 1292
1047 rtnl_lock(); 1293 rtnl_lock();
1048 ret = 0; 1294 ret = 0;
1049 if (v != net->ipv4.mroute_do_pim) { 1295 if (v != mrt->mroute_do_pim) {
1050 net->ipv4.mroute_do_pim = v; 1296 mrt->mroute_do_pim = v;
1051 net->ipv4.mroute_do_assert = v; 1297 mrt->mroute_do_assert = v;
1052 } 1298 }
1053 rtnl_unlock(); 1299 rtnl_unlock();
1054 return ret; 1300 return ret;
1055 } 1301 }
1056#endif 1302#endif
1303#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1304 case MRT_TABLE:
1305 {
1306 u32 v;
1307
1308 if (optlen != sizeof(u32))
1309 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314
1315 rtnl_lock();
1316 ret = 0;
1317 if (!ipmr_new_table(net, v))
1318 ret = -ENOMEM;
1319 raw_sk(sk)->ipmr_table = v;
1320 rtnl_unlock();
1321 return ret;
1322 }
1323#endif
1057 /* 1324 /*
1058 * Spurious command, or MRT_VERSION which you cannot 1325 * Spurious command, or MRT_VERSION which you cannot
1059 * set. 1326 * set.
@@ -1072,6 +1339,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1072 int olr; 1339 int olr;
1073 int val; 1340 int val;
1074 struct net *net = sock_net(sk); 1341 struct net *net = sock_net(sk);
1342 struct mr_table *mrt;
1343
1344 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1345 if (mrt == NULL)
1346 return -ENOENT;
1075 1347
1076 if (optname != MRT_VERSION && 1348 if (optname != MRT_VERSION &&
1077#ifdef CONFIG_IP_PIMSM 1349#ifdef CONFIG_IP_PIMSM
@@ -1093,10 +1365,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1093 val = 0x0305; 1365 val = 0x0305;
1094#ifdef CONFIG_IP_PIMSM 1366#ifdef CONFIG_IP_PIMSM
1095 else if (optname == MRT_PIM) 1367 else if (optname == MRT_PIM)
1096 val = net->ipv4.mroute_do_pim; 1368 val = mrt->mroute_do_pim;
1097#endif 1369#endif
1098 else 1370 else
1099 val = net->ipv4.mroute_do_assert; 1371 val = mrt->mroute_do_assert;
1100 if (copy_to_user(optval, &val, olr)) 1372 if (copy_to_user(optval, &val, olr))
1101 return -EFAULT; 1373 return -EFAULT;
1102 return 0; 1374 return 0;
@@ -1113,16 +1385,21 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1113 struct vif_device *vif; 1385 struct vif_device *vif;
1114 struct mfc_cache *c; 1386 struct mfc_cache *c;
1115 struct net *net = sock_net(sk); 1387 struct net *net = sock_net(sk);
1388 struct mr_table *mrt;
1389
1390 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1391 if (mrt == NULL)
1392 return -ENOENT;
1116 1393
1117 switch (cmd) { 1394 switch (cmd) {
1118 case SIOCGETVIFCNT: 1395 case SIOCGETVIFCNT:
1119 if (copy_from_user(&vr, arg, sizeof(vr))) 1396 if (copy_from_user(&vr, arg, sizeof(vr)))
1120 return -EFAULT; 1397 return -EFAULT;
1121 if (vr.vifi >= net->ipv4.maxvif) 1398 if (vr.vifi >= mrt->maxvif)
1122 return -EINVAL; 1399 return -EINVAL;
1123 read_lock(&mrt_lock); 1400 read_lock(&mrt_lock);
1124 vif = &net->ipv4.vif_table[vr.vifi]; 1401 vif = &mrt->vif_table[vr.vifi];
1125 if (VIF_EXISTS(net, vr.vifi)) { 1402 if (VIF_EXISTS(mrt, vr.vifi)) {
1126 vr.icount = vif->pkt_in; 1403 vr.icount = vif->pkt_in;
1127 vr.ocount = vif->pkt_out; 1404 vr.ocount = vif->pkt_out;
1128 vr.ibytes = vif->bytes_in; 1405 vr.ibytes = vif->bytes_in;
@@ -1140,7 +1417,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1140 return -EFAULT; 1417 return -EFAULT;
1141 1418
1142 read_lock(&mrt_lock); 1419 read_lock(&mrt_lock);
1143 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); 1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1144 if (c) { 1421 if (c) {
1145 sr.pktcnt = c->mfc_un.res.pkt; 1422 sr.pktcnt = c->mfc_un.res.pkt;
1146 sr.bytecnt = c->mfc_un.res.bytes; 1423 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1163,16 +1440,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
1163{ 1440{
1164 struct net_device *dev = ptr; 1441 struct net_device *dev = ptr;
1165 struct net *net = dev_net(dev); 1442 struct net *net = dev_net(dev);
1443 struct mr_table *mrt;
1166 struct vif_device *v; 1444 struct vif_device *v;
1167 int ct; 1445 int ct;
1168 LIST_HEAD(list); 1446 LIST_HEAD(list);
1169 1447
1170 if (event != NETDEV_UNREGISTER) 1448 if (event != NETDEV_UNREGISTER)
1171 return NOTIFY_DONE; 1449 return NOTIFY_DONE;
1172 v = &net->ipv4.vif_table[0]; 1450
1173 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { 1451 ipmr_for_each_table(mrt, net) {
1174 if (v->dev == dev) 1452 v = &mrt->vif_table[0];
1175 vif_delete(net, ct, 1, &list); 1453 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1454 if (v->dev == dev)
1455 vif_delete(mrt, ct, 1, &list);
1456 }
1176 } 1457 }
1177 unregister_netdevice_many(&list); 1458 unregister_netdevice_many(&list);
1178 return NOTIFY_DONE; 1459 return NOTIFY_DONE;
@@ -1231,11 +1512,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1231 * Processing handlers for ipmr_forward 1512 * Processing handlers for ipmr_forward
1232 */ 1513 */
1233 1514
1234static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1515static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1516 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1235{ 1517{
1236 struct net *net = mfc_net(c);
1237 const struct iphdr *iph = ip_hdr(skb); 1518 const struct iphdr *iph = ip_hdr(skb);
1238 struct vif_device *vif = &net->ipv4.vif_table[vifi]; 1519 struct vif_device *vif = &mrt->vif_table[vifi];
1239 struct net_device *dev; 1520 struct net_device *dev;
1240 struct rtable *rt; 1521 struct rtable *rt;
1241 int encap = 0; 1522 int encap = 0;
@@ -1249,7 +1530,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1249 vif->bytes_out += skb->len; 1530 vif->bytes_out += skb->len;
1250 vif->dev->stats.tx_bytes += skb->len; 1531 vif->dev->stats.tx_bytes += skb->len;
1251 vif->dev->stats.tx_packets++; 1532 vif->dev->stats.tx_packets++;
1252 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); 1533 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1253 goto out_free; 1534 goto out_free;
1254 } 1535 }
1255#endif 1536#endif
@@ -1274,9 +1555,9 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1274 goto out_free; 1555 goto out_free;
1275 } 1556 }
1276 1557
1277 dev = rt->u.dst.dev; 1558 dev = rt->dst.dev;
1278 1559
1279 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { 1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1280 /* Do not fragment multicasts. Alas, IPv4 does not 1561 /* Do not fragment multicasts. Alas, IPv4 does not
1281 allow to send ICMP, so that packets will disappear 1562 allow to send ICMP, so that packets will disappear
1282 to blackhole. 1563 to blackhole.
@@ -1287,7 +1568,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1287 goto out_free; 1568 goto out_free;
1288 } 1569 }
1289 1570
1290 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; 1571 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1291 1572
1292 if (skb_cow(skb, encap)) { 1573 if (skb_cow(skb, encap)) {
1293 ip_rt_put(rt); 1574 ip_rt_put(rt);
@@ -1298,7 +1579,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1298 vif->bytes_out += skb->len; 1579 vif->bytes_out += skb->len;
1299 1580
1300 skb_dst_drop(skb); 1581 skb_dst_drop(skb);
1301 skb_dst_set(skb, &rt->u.dst); 1582 skb_dst_set(skb, &rt->dst);
1302 ip_decrease_ttl(ip_hdr(skb)); 1583 ip_decrease_ttl(ip_hdr(skb));
1303 1584
1304 /* FIXME: forward and output firewalls used to be called here. 1585 /* FIXME: forward and output firewalls used to be called here.
@@ -1323,21 +1604,20 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1323 * not mrouter) cannot join to more than one interface - it will 1604 * not mrouter) cannot join to more than one interface - it will
1324 * result in receiving multiple packets. 1605 * result in receiving multiple packets.
1325 */ 1606 */
1326 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, 1607 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1327 ipmr_forward_finish); 1608 ipmr_forward_finish);
1328 return; 1609 return;
1329 1610
1330out_free: 1611out_free:
1331 kfree_skb(skb); 1612 kfree_skb(skb);
1332 return;
1333} 1613}
1334 1614
1335static int ipmr_find_vif(struct net_device *dev) 1615static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1336{ 1616{
1337 struct net *net = dev_net(dev);
1338 int ct; 1617 int ct;
1339 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { 1618
1340 if (net->ipv4.vif_table[ct].dev == dev) 1619 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1620 if (mrt->vif_table[ct].dev == dev)
1341 break; 1621 break;
1342 } 1622 }
1343 return ct; 1623 return ct;
@@ -1345,11 +1625,12 @@ static int ipmr_find_vif(struct net_device *dev)
1345 1625
1346/* "local" means that we should preserve one skb (for local delivery) */ 1626/* "local" means that we should preserve one skb (for local delivery) */
1347 1627
1348static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local) 1628static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1629 struct sk_buff *skb, struct mfc_cache *cache,
1630 int local)
1349{ 1631{
1350 int psend = -1; 1632 int psend = -1;
1351 int vif, ct; 1633 int vif, ct;
1352 struct net *net = mfc_net(cache);
1353 1634
1354 vif = cache->mfc_parent; 1635 vif = cache->mfc_parent;
1355 cache->mfc_un.res.pkt++; 1636 cache->mfc_un.res.pkt++;
@@ -1358,7 +1639,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1358 /* 1639 /*
1359 * Wrong interface: drop packet and (maybe) send PIM assert. 1640 * Wrong interface: drop packet and (maybe) send PIM assert.
1360 */ 1641 */
1361 if (net->ipv4.vif_table[vif].dev != skb->dev) { 1642 if (mrt->vif_table[vif].dev != skb->dev) {
1362 int true_vifi; 1643 int true_vifi;
1363 1644
1364 if (skb_rtable(skb)->fl.iif == 0) { 1645 if (skb_rtable(skb)->fl.iif == 0) {
@@ -1377,26 +1658,26 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1377 } 1658 }
1378 1659
1379 cache->mfc_un.res.wrong_if++; 1660 cache->mfc_un.res.wrong_if++;
1380 true_vifi = ipmr_find_vif(skb->dev); 1661 true_vifi = ipmr_find_vif(mrt, skb->dev);
1381 1662
1382 if (true_vifi >= 0 && net->ipv4.mroute_do_assert && 1663 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1383 /* pimsm uses asserts, when switching from RPT to SPT, 1664 /* pimsm uses asserts, when switching from RPT to SPT,
1384 so that we cannot check that packet arrived on an oif. 1665 so that we cannot check that packet arrived on an oif.
1385 It is bad, but otherwise we would need to move pretty 1666 It is bad, but otherwise we would need to move pretty
1386 large chunk of pimd to kernel. Ough... --ANK 1667 large chunk of pimd to kernel. Ough... --ANK
1387 */ 1668 */
1388 (net->ipv4.mroute_do_pim || 1669 (mrt->mroute_do_pim ||
1389 cache->mfc_un.res.ttls[true_vifi] < 255) && 1670 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1390 time_after(jiffies, 1671 time_after(jiffies,
1391 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1672 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1392 cache->mfc_un.res.last_assert = jiffies; 1673 cache->mfc_un.res.last_assert = jiffies;
1393 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); 1674 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1394 } 1675 }
1395 goto dont_forward; 1676 goto dont_forward;
1396 } 1677 }
1397 1678
1398 net->ipv4.vif_table[vif].pkt_in++; 1679 mrt->vif_table[vif].pkt_in++;
1399 net->ipv4.vif_table[vif].bytes_in += skb->len; 1680 mrt->vif_table[vif].bytes_in += skb->len;
1400 1681
1401 /* 1682 /*
1402 * Forward the frame 1683 * Forward the frame
@@ -1406,7 +1687,8 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1406 if (psend != -1) { 1687 if (psend != -1) {
1407 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1408 if (skb2) 1689 if (skb2)
1409 ipmr_queue_xmit(skb2, cache, psend); 1690 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend);
1410 } 1692 }
1411 psend = ct; 1693 psend = ct;
1412 } 1694 }
@@ -1415,9 +1697,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1415 if (local) { 1697 if (local) {
1416 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1417 if (skb2) 1699 if (skb2)
1418 ipmr_queue_xmit(skb2, cache, psend); 1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1419 } else { 1701 } else {
1420 ipmr_queue_xmit(skb, cache, psend); 1702 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1421 return 0; 1703 return 0;
1422 } 1704 }
1423 } 1705 }
@@ -1438,6 +1720,8 @@ int ip_mr_input(struct sk_buff *skb)
1438 struct mfc_cache *cache; 1720 struct mfc_cache *cache;
1439 struct net *net = dev_net(skb->dev); 1721 struct net *net = dev_net(skb->dev);
1440 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; 1722 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1723 struct mr_table *mrt;
1724 int err;
1441 1725
1442 /* Packet is looped back after forward, it should not be 1726 /* Packet is looped back after forward, it should not be
1443 forwarded second time, but still can be delivered locally. 1727 forwarded second time, but still can be delivered locally.
@@ -1445,6 +1729,12 @@ int ip_mr_input(struct sk_buff *skb)
1445 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1729 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1446 goto dont_forward; 1730 goto dont_forward;
1447 1731
1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1733 if (err < 0) {
1734 kfree_skb(skb);
1735 return err;
1736 }
1737
1448 if (!local) { 1738 if (!local) {
1449 if (IPCB(skb)->opt.router_alert) { 1739 if (IPCB(skb)->opt.router_alert) {
1450 if (ip_call_ra_chain(skb)) 1740 if (ip_call_ra_chain(skb))
@@ -1457,9 +1747,9 @@ int ip_mr_input(struct sk_buff *skb)
1457 that we can forward NO IGMP messages. 1747 that we can forward NO IGMP messages.
1458 */ 1748 */
1459 read_lock(&mrt_lock); 1749 read_lock(&mrt_lock);
1460 if (net->ipv4.mroute_sk) { 1750 if (mrt->mroute_sk) {
1461 nf_reset(skb); 1751 nf_reset(skb);
1462 raw_rcv(net->ipv4.mroute_sk, skb); 1752 raw_rcv(mrt->mroute_sk, skb);
1463 read_unlock(&mrt_lock); 1753 read_unlock(&mrt_lock);
1464 return 0; 1754 return 0;
1465 } 1755 }
@@ -1468,7 +1758,7 @@ int ip_mr_input(struct sk_buff *skb)
1468 } 1758 }
1469 1759
1470 read_lock(&mrt_lock); 1760 read_lock(&mrt_lock);
1471 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1472 1762
1473 /* 1763 /*
1474 * No usable cache entry 1764 * No usable cache entry
@@ -1486,19 +1776,19 @@ int ip_mr_input(struct sk_buff *skb)
1486 skb = skb2; 1776 skb = skb2;
1487 } 1777 }
1488 1778
1489 vif = ipmr_find_vif(skb->dev); 1779 vif = ipmr_find_vif(mrt, skb->dev);
1490 if (vif >= 0) { 1780 if (vif >= 0) {
1491 int err = ipmr_cache_unresolved(net, vif, skb); 1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1492 read_unlock(&mrt_lock); 1782 read_unlock(&mrt_lock);
1493 1783
1494 return err; 1784 return err2;
1495 } 1785 }
1496 read_unlock(&mrt_lock); 1786 read_unlock(&mrt_lock);
1497 kfree_skb(skb); 1787 kfree_skb(skb);
1498 return -ENODEV; 1788 return -ENODEV;
1499 } 1789 }
1500 1790
1501 ip_mr_forward(skb, cache, local); 1791 ip_mr_forward(net, mrt, skb, cache, local);
1502 1792
1503 read_unlock(&mrt_lock); 1793 read_unlock(&mrt_lock);
1504 1794
@@ -1515,11 +1805,11 @@ dont_forward:
1515} 1805}
1516 1806
1517#ifdef CONFIG_IP_PIMSM 1807#ifdef CONFIG_IP_PIMSM
1518static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) 1808static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809 unsigned int pimlen)
1519{ 1810{
1520 struct net_device *reg_dev = NULL; 1811 struct net_device *reg_dev = NULL;
1521 struct iphdr *encap; 1812 struct iphdr *encap;
1522 struct net *net = dev_net(skb->dev);
1523 1813
1524 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1525 /* 1815 /*
@@ -1534,8 +1824,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1534 return 1; 1824 return 1;
1535 1825
1536 read_lock(&mrt_lock); 1826 read_lock(&mrt_lock);
1537 if (net->ipv4.mroute_reg_vif_num >= 0) 1827 if (mrt->mroute_reg_vif_num >= 0)
1538 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; 1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1539 if (reg_dev) 1829 if (reg_dev)
1540 dev_hold(reg_dev); 1830 dev_hold(reg_dev);
1541 read_unlock(&mrt_lock); 1831 read_unlock(&mrt_lock);
@@ -1546,14 +1836,12 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1546 skb->mac_header = skb->network_header; 1836 skb->mac_header = skb->network_header;
1547 skb_pull(skb, (u8*)encap - skb->data); 1837 skb_pull(skb, (u8*)encap - skb->data);
1548 skb_reset_network_header(skb); 1838 skb_reset_network_header(skb);
1549 skb->dev = reg_dev;
1550 skb->protocol = htons(ETH_P_IP); 1839 skb->protocol = htons(ETH_P_IP);
1551 skb->ip_summed = 0; 1840 skb->ip_summed = 0;
1552 skb->pkt_type = PACKET_HOST; 1841 skb->pkt_type = PACKET_HOST;
1553 skb_dst_drop(skb); 1842
1554 reg_dev->stats.rx_bytes += skb->len; 1843 skb_tunnel_rx(skb, reg_dev);
1555 reg_dev->stats.rx_packets++; 1844
1556 nf_reset(skb);
1557 netif_rx(skb); 1845 netif_rx(skb);
1558 dev_put(reg_dev); 1846 dev_put(reg_dev);
1559 1847
@@ -1570,17 +1858,21 @@ int pim_rcv_v1(struct sk_buff * skb)
1570{ 1858{
1571 struct igmphdr *pim; 1859 struct igmphdr *pim;
1572 struct net *net = dev_net(skb->dev); 1860 struct net *net = dev_net(skb->dev);
1861 struct mr_table *mrt;
1573 1862
1574 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1863 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1575 goto drop; 1864 goto drop;
1576 1865
1577 pim = igmp_hdr(skb); 1866 pim = igmp_hdr(skb);
1578 1867
1579 if (!net->ipv4.mroute_do_pim || 1868 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1869 goto drop;
1870
1871 if (!mrt->mroute_do_pim ||
1580 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1872 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1581 goto drop; 1873 goto drop;
1582 1874
1583 if (__pim_rcv(skb, sizeof(*pim))) { 1875 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1584drop: 1876drop:
1585 kfree_skb(skb); 1877 kfree_skb(skb);
1586 } 1878 }
@@ -1592,6 +1884,8 @@ drop:
1592static int pim_rcv(struct sk_buff * skb) 1884static int pim_rcv(struct sk_buff * skb)
1593{ 1885{
1594 struct pimreghdr *pim; 1886 struct pimreghdr *pim;
1887 struct net *net = dev_net(skb->dev);
1888 struct mr_table *mrt;
1595 1889
1596 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1890 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1597 goto drop; 1891 goto drop;
@@ -1603,7 +1897,10 @@ static int pim_rcv(struct sk_buff * skb)
1603 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1897 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1604 goto drop; 1898 goto drop;
1605 1899
1606 if (__pim_rcv(skb, sizeof(*pim))) { 1900 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1901 goto drop;
1902
1903 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1607drop: 1904drop:
1608 kfree_skb(skb); 1905 kfree_skb(skb);
1609 } 1906 }
@@ -1611,32 +1908,31 @@ drop:
1611} 1908}
1612#endif 1909#endif
1613 1910
1614static int 1911static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1615ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) 1912 struct mfc_cache *c, struct rtmsg *rtm)
1616{ 1913{
1617 int ct; 1914 int ct;
1618 struct rtnexthop *nhp; 1915 struct rtnexthop *nhp;
1619 struct net *net = mfc_net(c);
1620 u8 *b = skb_tail_pointer(skb); 1916 u8 *b = skb_tail_pointer(skb);
1621 struct rtattr *mp_head; 1917 struct rtattr *mp_head;
1622 1918
1623 /* If cache is unresolved, don't try to parse IIF and OIF */ 1919 /* If cache is unresolved, don't try to parse IIF and OIF */
1624 if (c->mfc_parent > MAXVIFS) 1920 if (c->mfc_parent >= MAXVIFS)
1625 return -ENOENT; 1921 return -ENOENT;
1626 1922
1627 if (VIF_EXISTS(net, c->mfc_parent)) 1923 if (VIF_EXISTS(mrt, c->mfc_parent))
1628 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex); 1924 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1629 1925
1630 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 1926 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1631 1927
1632 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 1928 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1633 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { 1929 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1634 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 1930 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635 goto rtattr_failure; 1931 goto rtattr_failure;
1636 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1932 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1637 nhp->rtnh_flags = 0; 1933 nhp->rtnh_flags = 0;
1638 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1934 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1639 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; 1935 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1640 nhp->rtnh_len = sizeof(*nhp); 1936 nhp->rtnh_len = sizeof(*nhp);
1641 } 1937 }
1642 } 1938 }
@@ -1654,11 +1950,16 @@ int ipmr_get_route(struct net *net,
1654 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1950 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1655{ 1951{
1656 int err; 1952 int err;
1953 struct mr_table *mrt;
1657 struct mfc_cache *cache; 1954 struct mfc_cache *cache;
1658 struct rtable *rt = skb_rtable(skb); 1955 struct rtable *rt = skb_rtable(skb);
1659 1956
1957 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1958 if (mrt == NULL)
1959 return -ENOENT;
1960
1660 read_lock(&mrt_lock); 1961 read_lock(&mrt_lock);
1661 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); 1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1662 1963
1663 if (cache == NULL) { 1964 if (cache == NULL) {
1664 struct sk_buff *skb2; 1965 struct sk_buff *skb2;
@@ -1672,7 +1973,7 @@ int ipmr_get_route(struct net *net,
1672 } 1973 }
1673 1974
1674 dev = skb->dev; 1975 dev = skb->dev;
1675 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { 1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1676 read_unlock(&mrt_lock); 1977 read_unlock(&mrt_lock);
1677 return -ENODEV; 1978 return -ENODEV;
1678 } 1979 }
@@ -1689,24 +1990,107 @@ int ipmr_get_route(struct net *net,
1689 iph->saddr = rt->rt_src; 1990 iph->saddr = rt->rt_src;
1690 iph->daddr = rt->rt_dst; 1991 iph->daddr = rt->rt_dst;
1691 iph->version = 0; 1992 iph->version = 0;
1692 err = ipmr_cache_unresolved(net, vif, skb2); 1993 err = ipmr_cache_unresolved(mrt, vif, skb2);
1693 read_unlock(&mrt_lock); 1994 read_unlock(&mrt_lock);
1694 return err; 1995 return err;
1695 } 1996 }
1696 1997
1697 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1698 cache->mfc_flags |= MFC_NOTIFY; 1999 cache->mfc_flags |= MFC_NOTIFY;
1699 err = ipmr_fill_mroute(skb, cache, rtm); 2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1700 read_unlock(&mrt_lock); 2001 read_unlock(&mrt_lock);
1701 return err; 2002 return err;
1702} 2003}
1703 2004
2005static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2006 u32 pid, u32 seq, struct mfc_cache *c)
2007{
2008 struct nlmsghdr *nlh;
2009 struct rtmsg *rtm;
2010
2011 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2012 if (nlh == NULL)
2013 return -EMSGSIZE;
2014
2015 rtm = nlmsg_data(nlh);
2016 rtm->rtm_family = RTNL_FAMILY_IPMR;
2017 rtm->rtm_dst_len = 32;
2018 rtm->rtm_src_len = 32;
2019 rtm->rtm_tos = 0;
2020 rtm->rtm_table = mrt->id;
2021 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2022 rtm->rtm_type = RTN_MULTICAST;
2023 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2024 rtm->rtm_protocol = RTPROT_UNSPEC;
2025 rtm->rtm_flags = 0;
2026
2027 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2028 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2029
2030 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2031 goto nla_put_failure;
2032
2033 return nlmsg_end(skb, nlh);
2034
2035nla_put_failure:
2036 nlmsg_cancel(skb, nlh);
2037 return -EMSGSIZE;
2038}
2039
2040static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2041{
2042 struct net *net = sock_net(skb->sk);
2043 struct mr_table *mrt;
2044 struct mfc_cache *mfc;
2045 unsigned int t = 0, s_t;
2046 unsigned int h = 0, s_h;
2047 unsigned int e = 0, s_e;
2048
2049 s_t = cb->args[0];
2050 s_h = cb->args[1];
2051 s_e = cb->args[2];
2052
2053 read_lock(&mrt_lock);
2054 ipmr_for_each_table(mrt, net) {
2055 if (t < s_t)
2056 goto next_table;
2057 if (t > s_t)
2058 s_h = 0;
2059 for (h = s_h; h < MFC_LINES; h++) {
2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2061 if (e < s_e)
2062 goto next_entry;
2063 if (ipmr_fill_mroute(mrt, skb,
2064 NETLINK_CB(cb->skb).pid,
2065 cb->nlh->nlmsg_seq,
2066 mfc) < 0)
2067 goto done;
2068next_entry:
2069 e++;
2070 }
2071 e = s_e = 0;
2072 }
2073 s_h = 0;
2074next_table:
2075 t++;
2076 }
2077done:
2078 read_unlock(&mrt_lock);
2079
2080 cb->args[2] = e;
2081 cb->args[1] = h;
2082 cb->args[0] = t;
2083
2084 return skb->len;
2085}
2086
1704#ifdef CONFIG_PROC_FS 2087#ifdef CONFIG_PROC_FS
1705/* 2088/*
1706 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1707 */ 2090 */
1708struct ipmr_vif_iter { 2091struct ipmr_vif_iter {
1709 struct seq_net_private p; 2092 struct seq_net_private p;
2093 struct mr_table *mrt;
1710 int ct; 2094 int ct;
1711}; 2095};
1712 2096
@@ -1714,11 +2098,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1714 struct ipmr_vif_iter *iter, 2098 struct ipmr_vif_iter *iter,
1715 loff_t pos) 2099 loff_t pos)
1716{ 2100{
1717 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { 2101 struct mr_table *mrt = iter->mrt;
1718 if (!VIF_EXISTS(net, iter->ct)) 2102
2103 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2104 if (!VIF_EXISTS(mrt, iter->ct))
1719 continue; 2105 continue;
1720 if (pos-- == 0) 2106 if (pos-- == 0)
1721 return &net->ipv4.vif_table[iter->ct]; 2107 return &mrt->vif_table[iter->ct];
1722 } 2108 }
1723 return NULL; 2109 return NULL;
1724} 2110}
@@ -1726,7 +2112,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1726static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 2112static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1727 __acquires(mrt_lock) 2113 __acquires(mrt_lock)
1728{ 2114{
2115 struct ipmr_vif_iter *iter = seq->private;
1729 struct net *net = seq_file_net(seq); 2116 struct net *net = seq_file_net(seq);
2117 struct mr_table *mrt;
2118
2119 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2120 if (mrt == NULL)
2121 return ERR_PTR(-ENOENT);
2122
2123 iter->mrt = mrt;
1730 2124
1731 read_lock(&mrt_lock); 2125 read_lock(&mrt_lock);
1732 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) 2126 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1737,15 +2131,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1737{ 2131{
1738 struct ipmr_vif_iter *iter = seq->private; 2132 struct ipmr_vif_iter *iter = seq->private;
1739 struct net *net = seq_file_net(seq); 2133 struct net *net = seq_file_net(seq);
2134 struct mr_table *mrt = iter->mrt;
1740 2135
1741 ++*pos; 2136 ++*pos;
1742 if (v == SEQ_START_TOKEN) 2137 if (v == SEQ_START_TOKEN)
1743 return ipmr_vif_seq_idx(net, iter, 0); 2138 return ipmr_vif_seq_idx(net, iter, 0);
1744 2139
1745 while (++iter->ct < net->ipv4.maxvif) { 2140 while (++iter->ct < mrt->maxvif) {
1746 if (!VIF_EXISTS(net, iter->ct)) 2141 if (!VIF_EXISTS(mrt, iter->ct))
1747 continue; 2142 continue;
1748 return &net->ipv4.vif_table[iter->ct]; 2143 return &mrt->vif_table[iter->ct];
1749 } 2144 }
1750 return NULL; 2145 return NULL;
1751} 2146}
@@ -1758,7 +2153,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1758 2153
1759static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 2154static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1760{ 2155{
1761 struct net *net = seq_file_net(seq); 2156 struct ipmr_vif_iter *iter = seq->private;
2157 struct mr_table *mrt = iter->mrt;
1762 2158
1763 if (v == SEQ_START_TOKEN) { 2159 if (v == SEQ_START_TOKEN) {
1764 seq_puts(seq, 2160 seq_puts(seq,
@@ -1769,7 +2165,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1769 2165
1770 seq_printf(seq, 2166 seq_printf(seq,
1771 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 2167 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1772 vif - net->ipv4.vif_table, 2168 vif - mrt->vif_table,
1773 name, vif->bytes_in, vif->pkt_in, 2169 name, vif->bytes_in, vif->pkt_in,
1774 vif->bytes_out, vif->pkt_out, 2170 vif->bytes_out, vif->pkt_out,
1775 vif->flags, vif->local, vif->remote); 2171 vif->flags, vif->local, vif->remote);
@@ -1800,7 +2196,8 @@ static const struct file_operations ipmr_vif_fops = {
1800 2196
1801struct ipmr_mfc_iter { 2197struct ipmr_mfc_iter {
1802 struct seq_net_private p; 2198 struct seq_net_private p;
1803 struct mfc_cache **cache; 2199 struct mr_table *mrt;
2200 struct list_head *cache;
1804 int ct; 2201 int ct;
1805}; 2202};
1806 2203
@@ -1808,22 +2205,22 @@ struct ipmr_mfc_iter {
1808static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, 2205static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1809 struct ipmr_mfc_iter *it, loff_t pos) 2206 struct ipmr_mfc_iter *it, loff_t pos)
1810{ 2207{
2208 struct mr_table *mrt = it->mrt;
1811 struct mfc_cache *mfc; 2209 struct mfc_cache *mfc;
1812 2210
1813 it->cache = net->ipv4.mfc_cache_array;
1814 read_lock(&mrt_lock); 2211 read_lock(&mrt_lock);
1815 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1816 for (mfc = net->ipv4.mfc_cache_array[it->ct]; 2213 it->cache = &mrt->mfc_cache_array[it->ct];
1817 mfc; mfc = mfc->next) 2214 list_for_each_entry(mfc, it->cache, list)
1818 if (pos-- == 0) 2215 if (pos-- == 0)
1819 return mfc; 2216 return mfc;
2217 }
1820 read_unlock(&mrt_lock); 2218 read_unlock(&mrt_lock);
1821 2219
1822 it->cache = &mfc_unres_queue;
1823 spin_lock_bh(&mfc_unres_lock); 2220 spin_lock_bh(&mfc_unres_lock);
1824 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 2221 it->cache = &mrt->mfc_unres_queue;
1825 if (net_eq(mfc_net(mfc), net) && 2222 list_for_each_entry(mfc, it->cache, list)
1826 pos-- == 0) 2223 if (pos-- == 0)
1827 return mfc; 2224 return mfc;
1828 spin_unlock_bh(&mfc_unres_lock); 2225 spin_unlock_bh(&mfc_unres_lock);
1829 2226
@@ -1836,7 +2233,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1836{ 2233{
1837 struct ipmr_mfc_iter *it = seq->private; 2234 struct ipmr_mfc_iter *it = seq->private;
1838 struct net *net = seq_file_net(seq); 2235 struct net *net = seq_file_net(seq);
2236 struct mr_table *mrt;
1839 2237
2238 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2239 if (mrt == NULL)
2240 return ERR_PTR(-ENOENT);
2241
2242 it->mrt = mrt;
1840 it->cache = NULL; 2243 it->cache = NULL;
1841 it->ct = 0; 2244 it->ct = 0;
1842 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 2245 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1848,37 +2251,36 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1848 struct mfc_cache *mfc = v; 2251 struct mfc_cache *mfc = v;
1849 struct ipmr_mfc_iter *it = seq->private; 2252 struct ipmr_mfc_iter *it = seq->private;
1850 struct net *net = seq_file_net(seq); 2253 struct net *net = seq_file_net(seq);
2254 struct mr_table *mrt = it->mrt;
1851 2255
1852 ++*pos; 2256 ++*pos;
1853 2257
1854 if (v == SEQ_START_TOKEN) 2258 if (v == SEQ_START_TOKEN)
1855 return ipmr_mfc_seq_idx(net, seq->private, 0); 2259 return ipmr_mfc_seq_idx(net, seq->private, 0);
1856 2260
1857 if (mfc->next) 2261 if (mfc->list.next != it->cache)
1858 return mfc->next; 2262 return list_entry(mfc->list.next, struct mfc_cache, list);
1859 2263
1860 if (it->cache == &mfc_unres_queue) 2264 if (it->cache == &mrt->mfc_unres_queue)
1861 goto end_of_list; 2265 goto end_of_list;
1862 2266
1863 BUG_ON(it->cache != net->ipv4.mfc_cache_array); 2267 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1864 2268
1865 while (++it->ct < MFC_LINES) { 2269 while (++it->ct < MFC_LINES) {
1866 mfc = net->ipv4.mfc_cache_array[it->ct]; 2270 it->cache = &mrt->mfc_cache_array[it->ct];
1867 if (mfc) 2271 if (list_empty(it->cache))
1868 return mfc; 2272 continue;
2273 return list_first_entry(it->cache, struct mfc_cache, list);
1869 } 2274 }
1870 2275
1871 /* exhausted cache_array, show unresolved */ 2276 /* exhausted cache_array, show unresolved */
1872 read_unlock(&mrt_lock); 2277 read_unlock(&mrt_lock);
1873 it->cache = &mfc_unres_queue; 2278 it->cache = &mrt->mfc_unres_queue;
1874 it->ct = 0; 2279 it->ct = 0;
1875 2280
1876 spin_lock_bh(&mfc_unres_lock); 2281 spin_lock_bh(&mfc_unres_lock);
1877 mfc = mfc_unres_queue; 2282 if (!list_empty(it->cache))
1878 while (mfc && !net_eq(mfc_net(mfc), net)) 2283 return list_first_entry(it->cache, struct mfc_cache, list);
1879 mfc = mfc->next;
1880 if (mfc)
1881 return mfc;
1882 2284
1883 end_of_list: 2285 end_of_list:
1884 spin_unlock_bh(&mfc_unres_lock); 2286 spin_unlock_bh(&mfc_unres_lock);
@@ -1890,18 +2292,17 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1890static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 2292static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1891{ 2293{
1892 struct ipmr_mfc_iter *it = seq->private; 2294 struct ipmr_mfc_iter *it = seq->private;
1893 struct net *net = seq_file_net(seq); 2295 struct mr_table *mrt = it->mrt;
1894 2296
1895 if (it->cache == &mfc_unres_queue) 2297 if (it->cache == &mrt->mfc_unres_queue)
1896 spin_unlock_bh(&mfc_unres_lock); 2298 spin_unlock_bh(&mfc_unres_lock);
1897 else if (it->cache == net->ipv4.mfc_cache_array) 2299 else if (it->cache == &mrt->mfc_cache_array[it->ct])
1898 read_unlock(&mrt_lock); 2300 read_unlock(&mrt_lock);
1899} 2301}
1900 2302
1901static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2303static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1902{ 2304{
1903 int n; 2305 int n;
1904 struct net *net = seq_file_net(seq);
1905 2306
1906 if (v == SEQ_START_TOKEN) { 2307 if (v == SEQ_START_TOKEN) {
1907 seq_puts(seq, 2308 seq_puts(seq,
@@ -1909,20 +2310,21 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1909 } else { 2310 } else {
1910 const struct mfc_cache *mfc = v; 2311 const struct mfc_cache *mfc = v;
1911 const struct ipmr_mfc_iter *it = seq->private; 2312 const struct ipmr_mfc_iter *it = seq->private;
2313 const struct mr_table *mrt = it->mrt;
1912 2314
1913 seq_printf(seq, "%08lX %08lX %-3hd", 2315 seq_printf(seq, "%08X %08X %-3hd",
1914 (unsigned long) mfc->mfc_mcastgrp, 2316 (__force u32) mfc->mfc_mcastgrp,
1915 (unsigned long) mfc->mfc_origin, 2317 (__force u32) mfc->mfc_origin,
1916 mfc->mfc_parent); 2318 mfc->mfc_parent);
1917 2319
1918 if (it->cache != &mfc_unres_queue) { 2320 if (it->cache != &mrt->mfc_unres_queue) {
1919 seq_printf(seq, " %8lu %8lu %8lu", 2321 seq_printf(seq, " %8lu %8lu %8lu",
1920 mfc->mfc_un.res.pkt, 2322 mfc->mfc_un.res.pkt,
1921 mfc->mfc_un.res.bytes, 2323 mfc->mfc_un.res.bytes,
1922 mfc->mfc_un.res.wrong_if); 2324 mfc->mfc_un.res.wrong_if);
1923 for (n = mfc->mfc_un.res.minvif; 2325 for (n = mfc->mfc_un.res.minvif;
1924 n < mfc->mfc_un.res.maxvif; n++ ) { 2326 n < mfc->mfc_un.res.maxvif; n++ ) {
1925 if (VIF_EXISTS(net, n) && 2327 if (VIF_EXISTS(mrt, n) &&
1926 mfc->mfc_un.res.ttls[n] < 255) 2328 mfc->mfc_un.res.ttls[n] < 255)
1927 seq_printf(seq, 2329 seq_printf(seq,
1928 " %2d:%-3d", 2330 " %2d:%-3d",
@@ -1974,27 +2376,11 @@ static const struct net_protocol pim_protocol = {
1974 */ 2376 */
1975static int __net_init ipmr_net_init(struct net *net) 2377static int __net_init ipmr_net_init(struct net *net)
1976{ 2378{
1977 int err = 0; 2379 int err;
1978 2380
1979 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), 2381 err = ipmr_rules_init(net);
1980 GFP_KERNEL); 2382 if (err < 0)
1981 if (!net->ipv4.vif_table) {
1982 err = -ENOMEM;
1983 goto fail; 2383 goto fail;
1984 }
1985
1986 /* Forwarding cache */
1987 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1988 sizeof(struct mfc_cache *),
1989 GFP_KERNEL);
1990 if (!net->ipv4.mfc_cache_array) {
1991 err = -ENOMEM;
1992 goto fail_mfc_cache;
1993 }
1994
1995#ifdef CONFIG_IP_PIMSM
1996 net->ipv4.mroute_reg_vif_num = -1;
1997#endif
1998 2384
1999#ifdef CONFIG_PROC_FS 2385#ifdef CONFIG_PROC_FS
2000 err = -ENOMEM; 2386 err = -ENOMEM;
@@ -2009,10 +2395,8 @@ static int __net_init ipmr_net_init(struct net *net)
2009proc_cache_fail: 2395proc_cache_fail:
2010 proc_net_remove(net, "ip_mr_vif"); 2396 proc_net_remove(net, "ip_mr_vif");
2011proc_vif_fail: 2397proc_vif_fail:
2012 kfree(net->ipv4.mfc_cache_array); 2398 ipmr_rules_exit(net);
2013#endif 2399#endif
2014fail_mfc_cache:
2015 kfree(net->ipv4.vif_table);
2016fail: 2400fail:
2017 return err; 2401 return err;
2018} 2402}
@@ -2023,8 +2407,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
2023 proc_net_remove(net, "ip_mr_cache"); 2407 proc_net_remove(net, "ip_mr_cache");
2024 proc_net_remove(net, "ip_mr_vif"); 2408 proc_net_remove(net, "ip_mr_vif");
2025#endif 2409#endif
2026 kfree(net->ipv4.mfc_cache_array); 2410 ipmr_rules_exit(net);
2027 kfree(net->ipv4.vif_table);
2028} 2411}
2029 2412
2030static struct pernet_operations ipmr_net_ops = { 2413static struct pernet_operations ipmr_net_ops = {
@@ -2047,7 +2430,6 @@ int __init ip_mr_init(void)
2047 if (err) 2430 if (err)
2048 goto reg_pernet_fail; 2431 goto reg_pernet_fail;
2049 2432
2050 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2051 err = register_netdevice_notifier(&ip_mr_notifier); 2433 err = register_netdevice_notifier(&ip_mr_notifier);
2052 if (err) 2434 if (err)
2053 goto reg_notif_fail; 2435 goto reg_notif_fail;
@@ -2058,6 +2440,7 @@ int __init ip_mr_init(void)
2058 goto add_proto_fail; 2440 goto add_proto_fail;
2059 } 2441 }
2060#endif 2442#endif
2443 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2061 return 0; 2444 return 0;
2062 2445
2063#ifdef CONFIG_IP_PIMSM_V2 2446#ifdef CONFIG_IP_PIMSM_V2
@@ -2065,7 +2448,6 @@ add_proto_fail:
2065 unregister_netdevice_notifier(&ip_mr_notifier); 2448 unregister_netdevice_notifier(&ip_mr_notifier);
2066#endif 2449#endif
2067reg_notif_fail: 2450reg_notif_fail:
2068 del_timer(&ipmr_expire_timer);
2069 unregister_pernet_subsys(&ipmr_net_ops); 2451 unregister_pernet_subsys(&ipmr_net_ops);
2070reg_pernet_fail: 2452reg_pernet_fail:
2071 kmem_cache_destroy(mrt_cachep); 2453 kmem_cache_destroy(mrt_cachep);
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 82fb43c5c59e..d88a46c54fd1 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
17 const struct iphdr *iph = ip_hdr(skb); 17 const struct iphdr *iph = ip_hdr(skb);
18 struct rtable *rt; 18 struct rtable *rt;
19 struct flowi fl = {}; 19 struct flowi fl = {};
20 struct dst_entry *odst; 20 unsigned long orefdst;
21 unsigned int hh_len; 21 unsigned int hh_len;
22 unsigned int type; 22 unsigned int type;
23 23
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
43 43
44 /* Drop old route. */ 44 /* Drop old route. */
45 skb_dst_drop(skb); 45 skb_dst_drop(skb);
46 skb_dst_set(skb, &rt->u.dst); 46 skb_dst_set(skb, &rt->dst);
47 } else { 47 } else {
48 /* non-local src, find valid iif to satisfy 48 /* non-local src, find valid iif to satisfy
49 * rp-filter when calling ip_route_input. */ 49 * rp-filter when calling ip_route_input. */
@@ -51,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
51 if (ip_route_output_key(net, &rt, &fl) != 0) 51 if (ip_route_output_key(net, &rt, &fl) != 0)
52 return -1; 52 return -1;
53 53
54 odst = skb_dst(skb); 54 orefdst = skb->_skb_refdst;
55 if (ip_route_input(skb, iph->daddr, iph->saddr, 55 if (ip_route_input(skb, iph->daddr, iph->saddr,
56 RT_TOS(iph->tos), rt->u.dst.dev) != 0) { 56 RT_TOS(iph->tos), rt->dst.dev) != 0) {
57 dst_release(&rt->u.dst); 57 dst_release(&rt->dst);
58 return -1; 58 return -1;
59 } 59 }
60 dst_release(&rt->u.dst); 60 dst_release(&rt->dst);
61 dst_release(odst); 61 refdst_drop(orefdst);
62 } 62 }
63 63
64 if (skb_dst(skb)->error) 64 if (skb_dst(skb)->error)
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 212 skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
213 skb->len - dataoff, 0); 213 skb->len - dataoff, 0);
214 skb->ip_summed = CHECKSUM_NONE; 214 skb->ip_summed = CHECKSUM_NONE;
215 csum = __skb_checksum_complete_head(skb, dataoff + len); 215 return __skb_checksum_complete_head(skb, dataoff + len);
216 if (!csum)
217 skb->ip_summed = CHECKSUM_UNNECESSARY;
218 } 216 }
219 return csum; 217 return csum;
220} 218}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f07d77f65751..6bccba31d132 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -49,12 +49,7 @@ MODULE_DESCRIPTION("arptables core");
49#endif 49#endif
50 50
51#ifdef CONFIG_NETFILTER_DEBUG 51#ifdef CONFIG_NETFILTER_DEBUG
52#define ARP_NF_ASSERT(x) \ 52#define ARP_NF_ASSERT(x) WARN_ON(!(x))
53do { \
54 if (!(x)) \
55 printk("ARP_NF_ASSERT: %s:%s:%u\n", \
56 __func__, __FILE__, __LINE__); \
57} while(0)
58#else 53#else
59#define ARP_NF_ASSERT(x) 54#define ARP_NF_ASSERT(x)
60#endif 55#endif
@@ -224,10 +219,10 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
224} 219}
225 220
226static unsigned int 221static unsigned int
227arpt_error(struct sk_buff *skb, const struct xt_target_param *par) 222arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
228{ 223{
229 if (net_ratelimit()) 224 if (net_ratelimit())
230 printk("arp_tables: error: '%s'\n", 225 pr_err("arp_tables: error: '%s'\n",
231 (const char *)par->targinfo); 226 (const char *)par->targinfo);
232 227
233 return NF_DROP; 228 return NF_DROP;
@@ -260,12 +255,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
260 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 255 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
261 unsigned int verdict = NF_DROP; 256 unsigned int verdict = NF_DROP;
262 const struct arphdr *arp; 257 const struct arphdr *arp;
263 bool hotdrop = false;
264 struct arpt_entry *e, *back; 258 struct arpt_entry *e, *back;
265 const char *indev, *outdev; 259 const char *indev, *outdev;
266 void *table_base; 260 void *table_base;
267 const struct xt_table_info *private; 261 const struct xt_table_info *private;
268 struct xt_target_param tgpar; 262 struct xt_action_param acpar;
269 263
270 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) 264 if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
271 return NF_DROP; 265 return NF_DROP;
@@ -280,24 +274,22 @@ unsigned int arpt_do_table(struct sk_buff *skb,
280 e = get_entry(table_base, private->hook_entry[hook]); 274 e = get_entry(table_base, private->hook_entry[hook]);
281 back = get_entry(table_base, private->underflow[hook]); 275 back = get_entry(table_base, private->underflow[hook]);
282 276
283 tgpar.in = in; 277 acpar.in = in;
284 tgpar.out = out; 278 acpar.out = out;
285 tgpar.hooknum = hook; 279 acpar.hooknum = hook;
286 tgpar.family = NFPROTO_ARP; 280 acpar.family = NFPROTO_ARP;
281 acpar.hotdrop = false;
287 282
288 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
289 do { 284 do {
290 const struct arpt_entry_target *t; 285 const struct arpt_entry_target *t;
291 int hdr_len;
292 286
293 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
294 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
295 continue; 289 continue;
296 } 290 }
297 291
298 hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + 292 ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
299 (2 * skb->dev->addr_len);
300 ADD_COUNTER(e->counters, hdr_len, 1);
301 293
302 t = arpt_get_target_c(e); 294 t = arpt_get_target_c(e);
303 295
@@ -333,9 +325,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
333 /* Targets which reenter must return 325 /* Targets which reenter must return
334 * abs. verdicts 326 * abs. verdicts
335 */ 327 */
336 tgpar.target = t->u.kernel.target; 328 acpar.target = t->u.kernel.target;
337 tgpar.targinfo = t->data; 329 acpar.targinfo = t->data;
338 verdict = t->u.kernel.target->target(skb, &tgpar); 330 verdict = t->u.kernel.target->target(skb, &acpar);
339 331
340 /* Target might have changed stuff. */ 332 /* Target might have changed stuff. */
341 arp = arp_hdr(skb); 333 arp = arp_hdr(skb);
@@ -345,10 +337,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
345 else 337 else
346 /* Verdict */ 338 /* Verdict */
347 break; 339 break;
348 } while (!hotdrop); 340 } while (!acpar.hotdrop);
349 xt_info_rdunlock_bh(); 341 xt_info_rdunlock_bh();
350 342
351 if (hotdrop) 343 if (acpar.hotdrop)
352 return NF_DROP; 344 return NF_DROP;
353 else 345 else
354 return verdict; 346 return verdict;
@@ -390,7 +382,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
390 int visited = e->comefrom & (1 << hook); 382 int visited = e->comefrom & (1 << hook);
391 383
392 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { 384 if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
393 printk("arptables: loop hook %u pos %u %08X.\n", 385 pr_notice("arptables: loop hook %u pos %u %08X.\n",
394 hook, pos, e->comefrom); 386 hook, pos, e->comefrom);
395 return 0; 387 return 0;
396 } 388 }
@@ -523,13 +515,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
523 return ret; 515 return ret;
524 516
525 t = arpt_get_target(e); 517 t = arpt_get_target(e);
526 target = try_then_request_module(xt_find_target(NFPROTO_ARP, 518 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
527 t->u.user.name, 519 t->u.user.revision);
528 t->u.user.revision), 520 if (IS_ERR(target)) {
529 "arpt_%s", t->u.user.name);
530 if (IS_ERR(target) || !target) {
531 duprintf("find_check_entry: `%s' not found\n", t->u.user.name); 521 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
532 ret = target ? PTR_ERR(target) : -ENOENT; 522 ret = PTR_ERR(target);
533 goto out; 523 goto out;
534 } 524 }
535 t->u.kernel.target = target; 525 t->u.kernel.target = target;
@@ -651,6 +641,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
651 if (ret != 0) 641 if (ret != 0)
652 break; 642 break;
653 ++i; 643 ++i;
644 if (strcmp(arpt_get_target(iter)->u.user.name,
645 XT_ERROR_TARGET) == 0)
646 ++newinfo->stacksize;
654 } 647 }
655 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); 648 duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
656 if (ret != 0) 649 if (ret != 0)
@@ -717,7 +710,7 @@ static void get_counters(const struct xt_table_info *t,
717 struct arpt_entry *iter; 710 struct arpt_entry *iter;
718 unsigned int cpu; 711 unsigned int cpu;
719 unsigned int i; 712 unsigned int i;
720 unsigned int curcpu; 713 unsigned int curcpu = get_cpu();
721 714
722 /* Instead of clearing (by a previous call to memset()) 715 /* Instead of clearing (by a previous call to memset())
723 * the counters and using adds, we set the counters 716 * the counters and using adds, we set the counters
@@ -727,14 +720,16 @@ static void get_counters(const struct xt_table_info *t,
727 * if new softirq were to run and call ipt_do_table 720 * if new softirq were to run and call ipt_do_table
728 */ 721 */
729 local_bh_disable(); 722 local_bh_disable();
730 curcpu = smp_processor_id();
731
732 i = 0; 723 i = 0;
733 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 724 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
734 SET_COUNTER(counters[i], iter->counters.bcnt, 725 SET_COUNTER(counters[i], iter->counters.bcnt,
735 iter->counters.pcnt); 726 iter->counters.pcnt);
736 ++i; 727 ++i;
737 } 728 }
729 local_bh_enable();
730 /* Processing counters from other cpus, we can let bottom half enabled,
731 * (preemption is disabled)
732 */
738 733
739 for_each_possible_cpu(cpu) { 734 for_each_possible_cpu(cpu) {
740 if (cpu == curcpu) 735 if (cpu == curcpu)
@@ -748,7 +743,7 @@ static void get_counters(const struct xt_table_info *t,
748 } 743 }
749 xt_info_wrunlock(cpu); 744 xt_info_wrunlock(cpu);
750 } 745 }
751 local_bh_enable(); 746 put_cpu();
752} 747}
753 748
754static struct xt_counters *alloc_counters(const struct xt_table *table) 749static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -762,7 +757,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
762 * about). 757 * about).
763 */ 758 */
764 countersize = sizeof(struct xt_counters) * private->number; 759 countersize = sizeof(struct xt_counters) * private->number;
765 counters = vmalloc_node(countersize, numa_node_id()); 760 counters = vmalloc(countersize);
766 761
767 if (counters == NULL) 762 if (counters == NULL)
768 return ERR_PTR(-ENOMEM); 763 return ERR_PTR(-ENOMEM);
@@ -1009,8 +1004,7 @@ static int __do_replace(struct net *net, const char *name,
1009 struct arpt_entry *iter; 1004 struct arpt_entry *iter;
1010 1005
1011 ret = 0; 1006 ret = 0;
1012 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1007 counters = vmalloc(num_counters * sizeof(struct xt_counters));
1013 numa_node_id());
1014 if (!counters) { 1008 if (!counters) {
1015 ret = -ENOMEM; 1009 ret = -ENOMEM;
1016 goto out; 1010 goto out;
@@ -1163,7 +1157,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1163 if (len != size + num_counters * sizeof(struct xt_counters)) 1157 if (len != size + num_counters * sizeof(struct xt_counters))
1164 return -EINVAL; 1158 return -EINVAL;
1165 1159
1166 paddc = vmalloc_node(len - size, numa_node_id()); 1160 paddc = vmalloc(len - size);
1167 if (!paddc) 1161 if (!paddc)
1168 return -ENOMEM; 1162 return -ENOMEM;
1169 1163
@@ -1252,14 +1246,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1252 entry_offset = (void *)e - (void *)base; 1246 entry_offset = (void *)e - (void *)base;
1253 1247
1254 t = compat_arpt_get_target(e); 1248 t = compat_arpt_get_target(e);
1255 target = try_then_request_module(xt_find_target(NFPROTO_ARP, 1249 target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
1256 t->u.user.name, 1250 t->u.user.revision);
1257 t->u.user.revision), 1251 if (IS_ERR(target)) {
1258 "arpt_%s", t->u.user.name);
1259 if (IS_ERR(target) || !target) {
1260 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", 1252 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1261 t->u.user.name); 1253 t->u.user.name);
1262 ret = target ? PTR_ERR(target) : -ENOENT; 1254 ret = PTR_ERR(target);
1263 goto out; 1255 goto out;
1264 } 1256 }
1265 t->u.kernel.target = target; 1257 t->u.kernel.target = target;
@@ -1778,8 +1770,7 @@ struct xt_table *arpt_register_table(struct net *net,
1778{ 1770{
1779 int ret; 1771 int ret;
1780 struct xt_table_info *newinfo; 1772 struct xt_table_info *newinfo;
1781 struct xt_table_info bootstrap 1773 struct xt_table_info bootstrap = {0};
1782 = { 0, 0, 0, { 0 }, { 0 }, { } };
1783 void *loc_cpu_entry; 1774 void *loc_cpu_entry;
1784 struct xt_table *new_table; 1775 struct xt_table *new_table;
1785 1776
@@ -1830,22 +1821,23 @@ void arpt_unregister_table(struct xt_table *table)
1830} 1821}
1831 1822
1832/* The built-in targets: standard (NULL) and error. */ 1823/* The built-in targets: standard (NULL) and error. */
1833static struct xt_target arpt_standard_target __read_mostly = { 1824static struct xt_target arpt_builtin_tg[] __read_mostly = {
1834 .name = ARPT_STANDARD_TARGET, 1825 {
1835 .targetsize = sizeof(int), 1826 .name = ARPT_STANDARD_TARGET,
1836 .family = NFPROTO_ARP, 1827 .targetsize = sizeof(int),
1828 .family = NFPROTO_ARP,
1837#ifdef CONFIG_COMPAT 1829#ifdef CONFIG_COMPAT
1838 .compatsize = sizeof(compat_int_t), 1830 .compatsize = sizeof(compat_int_t),
1839 .compat_from_user = compat_standard_from_user, 1831 .compat_from_user = compat_standard_from_user,
1840 .compat_to_user = compat_standard_to_user, 1832 .compat_to_user = compat_standard_to_user,
1841#endif 1833#endif
1842}; 1834 },
1843 1835 {
1844static struct xt_target arpt_error_target __read_mostly = { 1836 .name = ARPT_ERROR_TARGET,
1845 .name = ARPT_ERROR_TARGET, 1837 .target = arpt_error,
1846 .target = arpt_error, 1838 .targetsize = ARPT_FUNCTION_MAXNAMELEN,
1847 .targetsize = ARPT_FUNCTION_MAXNAMELEN, 1839 .family = NFPROTO_ARP,
1848 .family = NFPROTO_ARP, 1840 },
1849}; 1841};
1850 1842
1851static struct nf_sockopt_ops arpt_sockopts = { 1843static struct nf_sockopt_ops arpt_sockopts = {
@@ -1889,12 +1881,9 @@ static int __init arp_tables_init(void)
1889 goto err1; 1881 goto err1;
1890 1882
1891 /* Noone else will be downing sem now, so we won't sleep */ 1883 /* Noone else will be downing sem now, so we won't sleep */
1892 ret = xt_register_target(&arpt_standard_target); 1884 ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
1893 if (ret < 0) 1885 if (ret < 0)
1894 goto err2; 1886 goto err2;
1895 ret = xt_register_target(&arpt_error_target);
1896 if (ret < 0)
1897 goto err3;
1898 1887
1899 /* Register setsockopt */ 1888 /* Register setsockopt */
1900 ret = nf_register_sockopt(&arpt_sockopts); 1889 ret = nf_register_sockopt(&arpt_sockopts);
@@ -1905,9 +1894,7 @@ static int __init arp_tables_init(void)
1905 return 0; 1894 return 0;
1906 1895
1907err4: 1896err4:
1908 xt_unregister_target(&arpt_error_target); 1897 xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
1909err3:
1910 xt_unregister_target(&arpt_standard_target);
1911err2: 1898err2:
1912 unregister_pernet_subsys(&arp_tables_net_ops); 1899 unregister_pernet_subsys(&arp_tables_net_ops);
1913err1: 1900err1:
@@ -1917,8 +1904,7 @@ err1:
1917static void __exit arp_tables_fini(void) 1904static void __exit arp_tables_fini(void)
1918{ 1905{
1919 nf_unregister_sockopt(&arpt_sockopts); 1906 nf_unregister_sockopt(&arpt_sockopts);
1920 xt_unregister_target(&arpt_error_target); 1907 xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
1921 xt_unregister_target(&arpt_standard_target);
1922 unregister_pernet_subsys(&arp_tables_net_ops); 1908 unregister_pernet_subsys(&arp_tables_net_ops);
1923} 1909}
1924 1910
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b0d5b1d0a769..e1be7dd1171b 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
9MODULE_DESCRIPTION("arptables arp payload mangle target"); 9MODULE_DESCRIPTION("arptables arp payload mangle target");
10 10
11static unsigned int 11static unsigned int
12target(struct sk_buff *skb, const struct xt_target_param *par) 12target(struct sk_buff *skb, const struct xt_action_param *par)
13{ 13{
14 const struct arpt_mangle *mangle = par->targinfo; 14 const struct arpt_mangle *mangle = par->targinfo;
15 const struct arphdr *arp; 15 const struct arphdr *arp;
@@ -54,7 +54,7 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
54 return mangle->target; 54 return mangle->target;
55} 55}
56 56
57static bool checkentry(const struct xt_tgchk_param *par) 57static int checkentry(const struct xt_tgchk_param *par)
58{ 58{
59 const struct arpt_mangle *mangle = par->targinfo; 59 const struct arpt_mangle *mangle = par->targinfo;
60 60
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e2787048aa0a..d2c1311cb28d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
42 42
43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
45static DEFINE_RWLOCK(queue_lock); 45static DEFINE_SPINLOCK(queue_lock);
46static int peer_pid __read_mostly; 46static int peer_pid __read_mostly;
47static unsigned int copy_range __read_mostly; 47static unsigned int copy_range __read_mostly;
48static unsigned int queue_total; 48static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
72 break; 72 break;
73 73
74 case IPQ_COPY_PACKET: 74 case IPQ_COPY_PACKET:
75 copy_mode = mode; 75 if (range > 0xFFFF)
76 range = 0xFFFF;
76 copy_range = range; 77 copy_range = range;
77 if (copy_range > 0xFFFF) 78 copy_mode = mode;
78 copy_range = 0xFFFF;
79 break; 79 break;
80 80
81 default: 81 default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
101{ 101{
102 struct nf_queue_entry *entry = NULL, *i; 102 struct nf_queue_entry *entry = NULL, *i;
103 103
104 write_lock_bh(&queue_lock); 104 spin_lock_bh(&queue_lock);
105 105
106 list_for_each_entry(i, &queue_list, list) { 106 list_for_each_entry(i, &queue_list, list) {
107 if ((unsigned long)i == id) { 107 if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
115 queue_total--; 115 queue_total--;
116 } 116 }
117 117
118 write_unlock_bh(&queue_lock); 118 spin_unlock_bh(&queue_lock);
119 return entry; 119 return entry;
120} 120}
121 121
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
136static void 136static void
137ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 137ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
138{ 138{
139 write_lock_bh(&queue_lock); 139 spin_lock_bh(&queue_lock);
140 __ipq_flush(cmpfn, data); 140 __ipq_flush(cmpfn, data);
141 write_unlock_bh(&queue_lock); 141 spin_unlock_bh(&queue_lock);
142} 142}
143 143
144static struct sk_buff * 144static struct sk_buff *
@@ -152,37 +152,29 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
152 struct nlmsghdr *nlh; 152 struct nlmsghdr *nlh;
153 struct timeval tv; 153 struct timeval tv;
154 154
155 read_lock_bh(&queue_lock); 155 switch (ACCESS_ONCE(copy_mode)) {
156
157 switch (copy_mode) {
158 case IPQ_COPY_META: 156 case IPQ_COPY_META:
159 case IPQ_COPY_NONE: 157 case IPQ_COPY_NONE:
160 size = NLMSG_SPACE(sizeof(*pmsg)); 158 size = NLMSG_SPACE(sizeof(*pmsg));
161 break; 159 break;
162 160
163 case IPQ_COPY_PACKET: 161 case IPQ_COPY_PACKET:
164 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || 162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
165 entry->skb->ip_summed == CHECKSUM_COMPLETE) && 163 (*errp = skb_checksum_help(entry->skb)))
166 (*errp = skb_checksum_help(entry->skb))) {
167 read_unlock_bh(&queue_lock);
168 return NULL; 164 return NULL;
169 } 165
170 if (copy_range == 0 || copy_range > entry->skb->len) 166 data_len = ACCESS_ONCE(copy_range);
167 if (data_len == 0 || data_len > entry->skb->len)
171 data_len = entry->skb->len; 168 data_len = entry->skb->len;
172 else
173 data_len = copy_range;
174 169
175 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
176 break; 171 break;
177 172
178 default: 173 default:
179 *errp = -EINVAL; 174 *errp = -EINVAL;
180 read_unlock_bh(&queue_lock);
181 return NULL; 175 return NULL;
182 } 176 }
183 177
184 read_unlock_bh(&queue_lock);
185
186 skb = alloc_skb(size, GFP_ATOMIC); 178 skb = alloc_skb(size, GFP_ATOMIC);
187 if (!skb) 179 if (!skb)
188 goto nlmsg_failure; 180 goto nlmsg_failure;
@@ -243,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
243 if (nskb == NULL) 235 if (nskb == NULL)
244 return status; 236 return status;
245 237
246 write_lock_bh(&queue_lock); 238 spin_lock_bh(&queue_lock);
247 239
248 if (!peer_pid) 240 if (!peer_pid)
249 goto err_out_free_nskb; 241 goto err_out_free_nskb;
@@ -267,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
267 259
268 __ipq_enqueue_entry(entry); 260 __ipq_enqueue_entry(entry);
269 261
270 write_unlock_bh(&queue_lock); 262 spin_unlock_bh(&queue_lock);
271 return status; 263 return status;
272 264
273err_out_free_nskb: 265err_out_free_nskb:
274 kfree_skb(nskb); 266 kfree_skb(nskb);
275 267
276err_out_unlock: 268err_out_unlock:
277 write_unlock_bh(&queue_lock); 269 spin_unlock_bh(&queue_lock);
278 return status; 270 return status;
279} 271}
280 272
@@ -343,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
343{ 335{
344 int status; 336 int status;
345 337
346 write_lock_bh(&queue_lock); 338 spin_lock_bh(&queue_lock);
347 status = __ipq_set_mode(mode, range); 339 status = __ipq_set_mode(mode, range);
348 write_unlock_bh(&queue_lock); 340 spin_unlock_bh(&queue_lock);
349 return status; 341 return status;
350} 342}
351 343
@@ -441,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
441 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 433 if (security_netlink_recv(skb, CAP_NET_ADMIN))
442 RCV_SKB_FAIL(-EPERM); 434 RCV_SKB_FAIL(-EPERM);
443 435
444 write_lock_bh(&queue_lock); 436 spin_lock_bh(&queue_lock);
445 437
446 if (peer_pid) { 438 if (peer_pid) {
447 if (peer_pid != pid) { 439 if (peer_pid != pid) {
448 write_unlock_bh(&queue_lock); 440 spin_unlock_bh(&queue_lock);
449 RCV_SKB_FAIL(-EBUSY); 441 RCV_SKB_FAIL(-EBUSY);
450 } 442 }
451 } else { 443 } else {
@@ -453,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
453 peer_pid = pid; 445 peer_pid = pid;
454 } 446 }
455 447
456 write_unlock_bh(&queue_lock); 448 spin_unlock_bh(&queue_lock);
457 449
458 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 450 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
459 nlmsglen - NLMSG_LENGTH(0)); 451 nlmsglen - NLMSG_LENGTH(0));
@@ -462,7 +454,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
462 454
463 if (flags & NLM_F_ACK) 455 if (flags & NLM_F_ACK)
464 netlink_ack(skb, nlh, 0); 456 netlink_ack(skb, nlh, 0);
465 return;
466} 457}
467 458
468static void 459static void
@@ -499,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
499 struct netlink_notify *n = ptr; 490 struct netlink_notify *n = ptr;
500 491
501 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { 492 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
502 write_lock_bh(&queue_lock); 493 spin_lock_bh(&queue_lock);
503 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 494 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
504 __ipq_reset(); 495 __ipq_reset();
505 write_unlock_bh(&queue_lock); 496 spin_unlock_bh(&queue_lock);
506 } 497 }
507 return NOTIFY_DONE; 498 return NOTIFY_DONE;
508} 499}
@@ -529,7 +520,7 @@ static ctl_table ipq_table[] = {
529#ifdef CONFIG_PROC_FS 520#ifdef CONFIG_PROC_FS
530static int ip_queue_show(struct seq_file *m, void *v) 521static int ip_queue_show(struct seq_file *m, void *v)
531{ 522{
532 read_lock_bh(&queue_lock); 523 spin_lock_bh(&queue_lock);
533 524
534 seq_printf(m, 525 seq_printf(m,
535 "Peer PID : %d\n" 526 "Peer PID : %d\n"
@@ -547,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
547 queue_dropped, 538 queue_dropped,
548 queue_user_dropped); 539 queue_user_dropped);
549 540
550 read_unlock_bh(&queue_lock); 541 spin_unlock_bh(&queue_lock);
551 return 0; 542 return 0;
552} 543}
553 544
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b29c66df8d1f..c439721b165a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -39,24 +39,19 @@ MODULE_DESCRIPTION("IPv4 packet filter");
39/*#define DEBUG_IP_FIREWALL_USER*/ 39/*#define DEBUG_IP_FIREWALL_USER*/
40 40
41#ifdef DEBUG_IP_FIREWALL 41#ifdef DEBUG_IP_FIREWALL
42#define dprintf(format, args...) printk(format , ## args) 42#define dprintf(format, args...) pr_info(format , ## args)
43#else 43#else
44#define dprintf(format, args...) 44#define dprintf(format, args...)
45#endif 45#endif
46 46
47#ifdef DEBUG_IP_FIREWALL_USER 47#ifdef DEBUG_IP_FIREWALL_USER
48#define duprintf(format, args...) printk(format , ## args) 48#define duprintf(format, args...) pr_info(format , ## args)
49#else 49#else
50#define duprintf(format, args...) 50#define duprintf(format, args...)
51#endif 51#endif
52 52
53#ifdef CONFIG_NETFILTER_DEBUG 53#ifdef CONFIG_NETFILTER_DEBUG
54#define IP_NF_ASSERT(x) \ 54#define IP_NF_ASSERT(x) WARN_ON(!(x))
55do { \
56 if (!(x)) \
57 printk("IP_NF_ASSERT: %s:%s:%u\n", \
58 __func__, __FILE__, __LINE__); \
59} while(0)
60#else 55#else
61#define IP_NF_ASSERT(x) 56#define IP_NF_ASSERT(x)
62#endif 57#endif
@@ -165,30 +160,14 @@ ip_checkentry(const struct ipt_ip *ip)
165} 160}
166 161
167static unsigned int 162static unsigned int
168ipt_error(struct sk_buff *skb, const struct xt_target_param *par) 163ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
169{ 164{
170 if (net_ratelimit()) 165 if (net_ratelimit())
171 printk("ip_tables: error: `%s'\n", 166 pr_info("error: `%s'\n", (const char *)par->targinfo);
172 (const char *)par->targinfo);
173 167
174 return NF_DROP; 168 return NF_DROP;
175} 169}
176 170
177/* Performance critical - called for every packet */
178static inline bool
179do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
180 struct xt_match_param *par)
181{
182 par->match = m->u.kernel.match;
183 par->matchinfo = m->data;
184
185 /* Stop iteration if it doesn't match */
186 if (!m->u.kernel.match->match(skb, par))
187 return true;
188 else
189 return false;
190}
191
192/* Performance critical */ 171/* Performance critical */
193static inline struct ipt_entry * 172static inline struct ipt_entry *
194get_entry(const void *base, unsigned int offset) 173get_entry(const void *base, unsigned int offset)
@@ -322,19 +301,16 @@ ipt_do_table(struct sk_buff *skb,
322 const struct net_device *out, 301 const struct net_device *out,
323 struct xt_table *table) 302 struct xt_table *table)
324{ 303{
325#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
326
327 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 304 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
328 const struct iphdr *ip; 305 const struct iphdr *ip;
329 bool hotdrop = false;
330 /* Initializing verdict to NF_DROP keeps gcc happy. */ 306 /* Initializing verdict to NF_DROP keeps gcc happy. */
331 unsigned int verdict = NF_DROP; 307 unsigned int verdict = NF_DROP;
332 const char *indev, *outdev; 308 const char *indev, *outdev;
333 const void *table_base; 309 const void *table_base;
334 struct ipt_entry *e, *back; 310 struct ipt_entry *e, **jumpstack;
311 unsigned int *stackptr, origptr, cpu;
335 const struct xt_table_info *private; 312 const struct xt_table_info *private;
336 struct xt_match_param mtpar; 313 struct xt_action_param acpar;
337 struct xt_target_param tgpar;
338 314
339 /* Initialization */ 315 /* Initialization */
340 ip = ip_hdr(skb); 316 ip = ip_hdr(skb);
@@ -346,42 +322,49 @@ ipt_do_table(struct sk_buff *skb,
346 * things we don't know, ie. tcp syn flag or ports). If the 322 * things we don't know, ie. tcp syn flag or ports). If the
347 * rule is also a fragment-specific rule, non-fragments won't 323 * rule is also a fragment-specific rule, non-fragments won't
348 * match it. */ 324 * match it. */
349 mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; 325 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
350 mtpar.thoff = ip_hdrlen(skb); 326 acpar.thoff = ip_hdrlen(skb);
351 mtpar.hotdrop = &hotdrop; 327 acpar.hotdrop = false;
352 mtpar.in = tgpar.in = in; 328 acpar.in = in;
353 mtpar.out = tgpar.out = out; 329 acpar.out = out;
354 mtpar.family = tgpar.family = NFPROTO_IPV4; 330 acpar.family = NFPROTO_IPV4;
355 mtpar.hooknum = tgpar.hooknum = hook; 331 acpar.hooknum = hook;
356 332
357 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 333 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
358 xt_info_rdlock_bh(); 334 xt_info_rdlock_bh();
359 private = table->private; 335 private = table->private;
360 table_base = private->entries[smp_processor_id()]; 336 cpu = smp_processor_id();
337 table_base = private->entries[cpu];
338 jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
339 stackptr = per_cpu_ptr(private->stackptr, cpu);
340 origptr = *stackptr;
361 341
362 e = get_entry(table_base, private->hook_entry[hook]); 342 e = get_entry(table_base, private->hook_entry[hook]);
363 343
364 /* For return from builtin chain */ 344 pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
365 back = get_entry(table_base, private->underflow[hook]); 345 table->name, hook, origptr,
346 get_entry(table_base, private->underflow[hook]));
366 347
367 do { 348 do {
368 const struct ipt_entry_target *t; 349 const struct ipt_entry_target *t;
369 const struct xt_entry_match *ematch; 350 const struct xt_entry_match *ematch;
370 351
371 IP_NF_ASSERT(e); 352 IP_NF_ASSERT(e);
372 IP_NF_ASSERT(back);
373 if (!ip_packet_match(ip, indev, outdev, 353 if (!ip_packet_match(ip, indev, outdev,
374 &e->ip, mtpar.fragoff)) { 354 &e->ip, acpar.fragoff)) {
375 no_match: 355 no_match:
376 e = ipt_next_entry(e); 356 e = ipt_next_entry(e);
377 continue; 357 continue;
378 } 358 }
379 359
380 xt_ematch_foreach(ematch, e) 360 xt_ematch_foreach(ematch, e) {
381 if (do_match(ematch, skb, &mtpar) != 0) 361 acpar.match = ematch->u.kernel.match;
362 acpar.matchinfo = ematch->data;
363 if (!acpar.match->match(skb, &acpar))
382 goto no_match; 364 goto no_match;
365 }
383 366
384 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 367 ADD_COUNTER(e->counters, skb->len, 1);
385 368
386 t = ipt_get_target(e); 369 t = ipt_get_target(e);
387 IP_NF_ASSERT(t->u.kernel.target); 370 IP_NF_ASSERT(t->u.kernel.target);
@@ -404,41 +387,38 @@ ipt_do_table(struct sk_buff *skb,
404 verdict = (unsigned)(-v) - 1; 387 verdict = (unsigned)(-v) - 1;
405 break; 388 break;
406 } 389 }
407 e = back; 390 if (*stackptr == 0) {
408 back = get_entry(table_base, back->comefrom); 391 e = get_entry(table_base,
392 private->underflow[hook]);
393 pr_debug("Underflow (this is normal) "
394 "to %p\n", e);
395 } else {
396 e = jumpstack[--*stackptr];
397 pr_debug("Pulled %p out from pos %u\n",
398 e, *stackptr);
399 e = ipt_next_entry(e);
400 }
409 continue; 401 continue;
410 } 402 }
411 if (table_base + v != ipt_next_entry(e) && 403 if (table_base + v != ipt_next_entry(e) &&
412 !(e->ip.flags & IPT_F_GOTO)) { 404 !(e->ip.flags & IPT_F_GOTO)) {
413 /* Save old back ptr in next entry */ 405 if (*stackptr >= private->stacksize) {
414 struct ipt_entry *next = ipt_next_entry(e); 406 verdict = NF_DROP;
415 next->comefrom = (void *)back - table_base; 407 break;
416 /* set back pointer to next entry */ 408 }
417 back = next; 409 jumpstack[(*stackptr)++] = e;
410 pr_debug("Pushed %p into pos %u\n",
411 e, *stackptr - 1);
418 } 412 }
419 413
420 e = get_entry(table_base, v); 414 e = get_entry(table_base, v);
421 continue; 415 continue;
422 } 416 }
423 417
424 /* Targets which reenter must return 418 acpar.target = t->u.kernel.target;
425 abs. verdicts */ 419 acpar.targinfo = t->data;
426 tgpar.target = t->u.kernel.target;
427 tgpar.targinfo = t->data;
428
429 420
430#ifdef CONFIG_NETFILTER_DEBUG 421 verdict = t->u.kernel.target->target(skb, &acpar);
431 tb_comefrom = 0xeeeeeeec;
432#endif
433 verdict = t->u.kernel.target->target(skb, &tgpar);
434#ifdef CONFIG_NETFILTER_DEBUG
435 if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
436 printk("Target %s reentered!\n",
437 t->u.kernel.target->name);
438 verdict = NF_DROP;
439 }
440 tb_comefrom = 0x57acc001;
441#endif
442 /* Target might have changed stuff. */ 422 /* Target might have changed stuff. */
443 ip = ip_hdr(skb); 423 ip = ip_hdr(skb);
444 if (verdict == IPT_CONTINUE) 424 if (verdict == IPT_CONTINUE)
@@ -446,18 +426,18 @@ ipt_do_table(struct sk_buff *skb,
446 else 426 else
447 /* Verdict */ 427 /* Verdict */
448 break; 428 break;
449 } while (!hotdrop); 429 } while (!acpar.hotdrop);
450 xt_info_rdunlock_bh(); 430 xt_info_rdunlock_bh();
451 431 pr_debug("Exiting %s; resetting sp from %u to %u\n",
432 __func__, *stackptr, origptr);
433 *stackptr = origptr;
452#ifdef DEBUG_ALLOW_ALL 434#ifdef DEBUG_ALLOW_ALL
453 return NF_ACCEPT; 435 return NF_ACCEPT;
454#else 436#else
455 if (hotdrop) 437 if (acpar.hotdrop)
456 return NF_DROP; 438 return NF_DROP;
457 else return verdict; 439 else return verdict;
458#endif 440#endif
459
460#undef tb_comefrom
461} 441}
462 442
463/* Figures out from what hook each rule can be called: returns 0 if 443/* Figures out from what hook each rule can be called: returns 0 if
@@ -486,7 +466,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
486 int visited = e->comefrom & (1 << hook); 466 int visited = e->comefrom & (1 << hook);
487 467
488 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 468 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
489 printk("iptables: loop hook %u pos %u %08X.\n", 469 pr_err("iptables: loop hook %u pos %u %08X.\n",
490 hook, pos, e->comefrom); 470 hook, pos, e->comefrom);
491 return 0; 471 return 0;
492 } 472 }
@@ -591,7 +571,7 @@ check_entry(const struct ipt_entry *e, const char *name)
591 const struct ipt_entry_target *t; 571 const struct ipt_entry_target *t;
592 572
593 if (!ip_checkentry(&e->ip)) { 573 if (!ip_checkentry(&e->ip)) {
594 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 574 duprintf("ip check failed %p %s.\n", e, par->match->name);
595 return -EINVAL; 575 return -EINVAL;
596 } 576 }
597 577
@@ -618,8 +598,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
618 ret = xt_check_match(par, m->u.match_size - sizeof(*m), 598 ret = xt_check_match(par, m->u.match_size - sizeof(*m),
619 ip->proto, ip->invflags & IPT_INV_PROTO); 599 ip->proto, ip->invflags & IPT_INV_PROTO);
620 if (ret < 0) { 600 if (ret < 0) {
621 duprintf("ip_tables: check failed for `%s'.\n", 601 duprintf("check failed for `%s'.\n", par->match->name);
622 par.match->name);
623 return ret; 602 return ret;
624 } 603 }
625 return 0; 604 return 0;
@@ -631,12 +610,11 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
631 struct xt_match *match; 610 struct xt_match *match;
632 int ret; 611 int ret;
633 612
634 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 613 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
635 m->u.user.revision), 614 m->u.user.revision);
636 "ipt_%s", m->u.user.name); 615 if (IS_ERR(match)) {
637 if (IS_ERR(match) || !match) {
638 duprintf("find_check_match: `%s' not found\n", m->u.user.name); 616 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
639 return match ? PTR_ERR(match) : -ENOENT; 617 return PTR_ERR(match);
640 } 618 }
641 m->u.kernel.match = match; 619 m->u.kernel.match = match;
642 620
@@ -667,7 +645,7 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
667 ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 645 ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
668 e->ip.proto, e->ip.invflags & IPT_INV_PROTO); 646 e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
669 if (ret < 0) { 647 if (ret < 0) {
670 duprintf("ip_tables: check failed for `%s'.\n", 648 duprintf("check failed for `%s'.\n",
671 t->u.kernel.target->name); 649 t->u.kernel.target->name);
672 return ret; 650 return ret;
673 } 651 }
@@ -703,13 +681,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
703 } 681 }
704 682
705 t = ipt_get_target(e); 683 t = ipt_get_target(e);
706 target = try_then_request_module(xt_find_target(AF_INET, 684 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
707 t->u.user.name, 685 t->u.user.revision);
708 t->u.user.revision), 686 if (IS_ERR(target)) {
709 "ipt_%s", t->u.user.name);
710 if (IS_ERR(target) || !target) {
711 duprintf("find_check_entry: `%s' not found\n", t->u.user.name); 687 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
712 ret = target ? PTR_ERR(target) : -ENOENT; 688 ret = PTR_ERR(target);
713 goto cleanup_matches; 689 goto cleanup_matches;
714 } 690 }
715 t->u.kernel.target = target; 691 t->u.kernel.target = target;
@@ -843,6 +819,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
843 if (ret != 0) 819 if (ret != 0)
844 return ret; 820 return ret;
845 ++i; 821 ++i;
822 if (strcmp(ipt_get_target(iter)->u.user.name,
823 XT_ERROR_TARGET) == 0)
824 ++newinfo->stacksize;
846 } 825 }
847 826
848 if (i != repl->num_entries) { 827 if (i != repl->num_entries) {
@@ -905,7 +884,7 @@ get_counters(const struct xt_table_info *t,
905 struct ipt_entry *iter; 884 struct ipt_entry *iter;
906 unsigned int cpu; 885 unsigned int cpu;
907 unsigned int i; 886 unsigned int i;
908 unsigned int curcpu; 887 unsigned int curcpu = get_cpu();
909 888
910 /* Instead of clearing (by a previous call to memset()) 889 /* Instead of clearing (by a previous call to memset())
911 * the counters and using adds, we set the counters 890 * the counters and using adds, we set the counters
@@ -915,14 +894,16 @@ get_counters(const struct xt_table_info *t,
915 * if new softirq were to run and call ipt_do_table 894 * if new softirq were to run and call ipt_do_table
916 */ 895 */
917 local_bh_disable(); 896 local_bh_disable();
918 curcpu = smp_processor_id();
919
920 i = 0; 897 i = 0;
921 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 898 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
922 SET_COUNTER(counters[i], iter->counters.bcnt, 899 SET_COUNTER(counters[i], iter->counters.bcnt,
923 iter->counters.pcnt); 900 iter->counters.pcnt);
924 ++i; 901 ++i;
925 } 902 }
903 local_bh_enable();
904 /* Processing counters from other cpus, we can let bottom half enabled,
905 * (preemption is disabled)
906 */
926 907
927 for_each_possible_cpu(cpu) { 908 for_each_possible_cpu(cpu) {
928 if (cpu == curcpu) 909 if (cpu == curcpu)
@@ -936,7 +917,7 @@ get_counters(const struct xt_table_info *t,
936 } 917 }
937 xt_info_wrunlock(cpu); 918 xt_info_wrunlock(cpu);
938 } 919 }
939 local_bh_enable(); 920 put_cpu();
940} 921}
941 922
942static struct xt_counters *alloc_counters(const struct xt_table *table) 923static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -949,7 +930,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
949 (other than comefrom, which userspace doesn't care 930 (other than comefrom, which userspace doesn't care
950 about). */ 931 about). */
951 countersize = sizeof(struct xt_counters) * private->number; 932 countersize = sizeof(struct xt_counters) * private->number;
952 counters = vmalloc_node(countersize, numa_node_id()); 933 counters = vmalloc(countersize);
953 934
954 if (counters == NULL) 935 if (counters == NULL)
955 return ERR_PTR(-ENOMEM); 936 return ERR_PTR(-ENOMEM);
@@ -1311,7 +1292,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
1311 if (ret != 0) 1292 if (ret != 0)
1312 goto free_newinfo; 1293 goto free_newinfo;
1313 1294
1314 duprintf("ip_tables: Translated table\n"); 1295 duprintf("Translated table\n");
1315 1296
1316 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, 1297 ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1317 tmp.num_counters, tmp.counters); 1298 tmp.num_counters, tmp.counters);
@@ -1373,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user,
1373 if (len != size + num_counters * sizeof(struct xt_counters)) 1354 if (len != size + num_counters * sizeof(struct xt_counters))
1374 return -EINVAL; 1355 return -EINVAL;
1375 1356
1376 paddc = vmalloc_node(len - size, numa_node_id()); 1357 paddc = vmalloc(len - size);
1377 if (!paddc) 1358 if (!paddc)
1378 return -ENOMEM; 1359 return -ENOMEM;
1379 1360
@@ -1476,13 +1457,12 @@ compat_find_calc_match(struct ipt_entry_match *m,
1476{ 1457{
1477 struct xt_match *match; 1458 struct xt_match *match;
1478 1459
1479 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, 1460 match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1480 m->u.user.revision), 1461 m->u.user.revision);
1481 "ipt_%s", m->u.user.name); 1462 if (IS_ERR(match)) {
1482 if (IS_ERR(match) || !match) {
1483 duprintf("compat_check_calc_match: `%s' not found\n", 1463 duprintf("compat_check_calc_match: `%s' not found\n",
1484 m->u.user.name); 1464 m->u.user.name);
1485 return match ? PTR_ERR(match) : -ENOENT; 1465 return PTR_ERR(match);
1486 } 1466 }
1487 m->u.kernel.match = match; 1467 m->u.kernel.match = match;
1488 *size += xt_compat_match_offset(match); 1468 *size += xt_compat_match_offset(match);
@@ -1549,14 +1529,12 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1549 } 1529 }
1550 1530
1551 t = compat_ipt_get_target(e); 1531 t = compat_ipt_get_target(e);
1552 target = try_then_request_module(xt_find_target(AF_INET, 1532 target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1553 t->u.user.name, 1533 t->u.user.revision);
1554 t->u.user.revision), 1534 if (IS_ERR(target)) {
1555 "ipt_%s", t->u.user.name);
1556 if (IS_ERR(target) || !target) {
1557 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", 1535 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1558 t->u.user.name); 1536 t->u.user.name);
1559 ret = target ? PTR_ERR(target) : -ENOENT; 1537 ret = PTR_ERR(target);
1560 goto release_matches; 1538 goto release_matches;
1561 } 1539 }
1562 t->u.kernel.target = target; 1540 t->u.kernel.target = target;
@@ -2094,8 +2072,7 @@ struct xt_table *ipt_register_table(struct net *net,
2094{ 2072{
2095 int ret; 2073 int ret;
2096 struct xt_table_info *newinfo; 2074 struct xt_table_info *newinfo;
2097 struct xt_table_info bootstrap 2075 struct xt_table_info bootstrap = {0};
2098 = { 0, 0, 0, { 0 }, { 0 }, { } };
2099 void *loc_cpu_entry; 2076 void *loc_cpu_entry;
2100 struct xt_table *new_table; 2077 struct xt_table *new_table;
2101 2078
@@ -2157,7 +2134,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2157} 2134}
2158 2135
2159static bool 2136static bool
2160icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) 2137icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
2161{ 2138{
2162 const struct icmphdr *ic; 2139 const struct icmphdr *ic;
2163 struct icmphdr _icmph; 2140 struct icmphdr _icmph;
@@ -2173,7 +2150,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2173 * can't. Hence, no choice but to drop. 2150 * can't. Hence, no choice but to drop.
2174 */ 2151 */
2175 duprintf("Dropping evil ICMP tinygram.\n"); 2152 duprintf("Dropping evil ICMP tinygram.\n");
2176 *par->hotdrop = true; 2153 par->hotdrop = true;
2177 return false; 2154 return false;
2178 } 2155 }
2179 2156
@@ -2184,31 +2161,31 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2184 !!(icmpinfo->invflags&IPT_ICMP_INV)); 2161 !!(icmpinfo->invflags&IPT_ICMP_INV));
2185} 2162}
2186 2163
2187static bool icmp_checkentry(const struct xt_mtchk_param *par) 2164static int icmp_checkentry(const struct xt_mtchk_param *par)
2188{ 2165{
2189 const struct ipt_icmp *icmpinfo = par->matchinfo; 2166 const struct ipt_icmp *icmpinfo = par->matchinfo;
2190 2167
2191 /* Must specify no unknown invflags */ 2168 /* Must specify no unknown invflags */
2192 return !(icmpinfo->invflags & ~IPT_ICMP_INV); 2169 return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
2193} 2170}
2194 2171
2195/* The built-in targets: standard (NULL) and error. */ 2172static struct xt_target ipt_builtin_tg[] __read_mostly = {
2196static struct xt_target ipt_standard_target __read_mostly = { 2173 {
2197 .name = IPT_STANDARD_TARGET, 2174 .name = IPT_STANDARD_TARGET,
2198 .targetsize = sizeof(int), 2175 .targetsize = sizeof(int),
2199 .family = NFPROTO_IPV4, 2176 .family = NFPROTO_IPV4,
2200#ifdef CONFIG_COMPAT 2177#ifdef CONFIG_COMPAT
2201 .compatsize = sizeof(compat_int_t), 2178 .compatsize = sizeof(compat_int_t),
2202 .compat_from_user = compat_standard_from_user, 2179 .compat_from_user = compat_standard_from_user,
2203 .compat_to_user = compat_standard_to_user, 2180 .compat_to_user = compat_standard_to_user,
2204#endif 2181#endif
2205}; 2182 },
2206 2183 {
2207static struct xt_target ipt_error_target __read_mostly = { 2184 .name = IPT_ERROR_TARGET,
2208 .name = IPT_ERROR_TARGET, 2185 .target = ipt_error,
2209 .target = ipt_error, 2186 .targetsize = IPT_FUNCTION_MAXNAMELEN,
2210 .targetsize = IPT_FUNCTION_MAXNAMELEN, 2187 .family = NFPROTO_IPV4,
2211 .family = NFPROTO_IPV4, 2188 },
2212}; 2189};
2213 2190
2214static struct nf_sockopt_ops ipt_sockopts = { 2191static struct nf_sockopt_ops ipt_sockopts = {
@@ -2228,13 +2205,15 @@ static struct nf_sockopt_ops ipt_sockopts = {
2228 .owner = THIS_MODULE, 2205 .owner = THIS_MODULE,
2229}; 2206};
2230 2207
2231static struct xt_match icmp_matchstruct __read_mostly = { 2208static struct xt_match ipt_builtin_mt[] __read_mostly = {
2232 .name = "icmp", 2209 {
2233 .match = icmp_match, 2210 .name = "icmp",
2234 .matchsize = sizeof(struct ipt_icmp), 2211 .match = icmp_match,
2235 .checkentry = icmp_checkentry, 2212 .matchsize = sizeof(struct ipt_icmp),
2236 .proto = IPPROTO_ICMP, 2213 .checkentry = icmp_checkentry,
2237 .family = NFPROTO_IPV4, 2214 .proto = IPPROTO_ICMP,
2215 .family = NFPROTO_IPV4,
2216 },
2238}; 2217};
2239 2218
2240static int __net_init ip_tables_net_init(struct net *net) 2219static int __net_init ip_tables_net_init(struct net *net)
@@ -2261,13 +2240,10 @@ static int __init ip_tables_init(void)
2261 goto err1; 2240 goto err1;
2262 2241
2263 /* Noone else will be downing sem now, so we won't sleep */ 2242 /* Noone else will be downing sem now, so we won't sleep */
2264 ret = xt_register_target(&ipt_standard_target); 2243 ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
2265 if (ret < 0) 2244 if (ret < 0)
2266 goto err2; 2245 goto err2;
2267 ret = xt_register_target(&ipt_error_target); 2246 ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
2268 if (ret < 0)
2269 goto err3;
2270 ret = xt_register_match(&icmp_matchstruct);
2271 if (ret < 0) 2247 if (ret < 0)
2272 goto err4; 2248 goto err4;
2273 2249
@@ -2276,15 +2252,13 @@ static int __init ip_tables_init(void)
2276 if (ret < 0) 2252 if (ret < 0)
2277 goto err5; 2253 goto err5;
2278 2254
2279 printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n"); 2255 pr_info("(C) 2000-2006 Netfilter Core Team\n");
2280 return 0; 2256 return 0;
2281 2257
2282err5: 2258err5:
2283 xt_unregister_match(&icmp_matchstruct); 2259 xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
2284err4: 2260err4:
2285 xt_unregister_target(&ipt_error_target); 2261 xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
2286err3:
2287 xt_unregister_target(&ipt_standard_target);
2288err2: 2262err2:
2289 unregister_pernet_subsys(&ip_tables_net_ops); 2263 unregister_pernet_subsys(&ip_tables_net_ops);
2290err1: 2264err1:
@@ -2295,10 +2269,8 @@ static void __exit ip_tables_fini(void)
2295{ 2269{
2296 nf_unregister_sockopt(&ipt_sockopts); 2270 nf_unregister_sockopt(&ipt_sockopts);
2297 2271
2298 xt_unregister_match(&icmp_matchstruct); 2272 xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
2299 xt_unregister_target(&ipt_error_target); 2273 xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
2300 xt_unregister_target(&ipt_standard_target);
2301
2302 unregister_pernet_subsys(&ip_tables_net_ops); 2274 unregister_pernet_subsys(&ip_tables_net_ops);
2303} 2275}
2304 2276
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index ab828400ed71..3a43cf36db87 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -9,6 +9,7 @@
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 * 10 *
11 */ 11 */
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
14#include <linux/jhash.h> 15#include <linux/jhash.h>
@@ -52,12 +53,13 @@ struct clusterip_config {
52#endif 53#endif
53 enum clusterip_hashmode hash_mode; /* which hashing mode */ 54 enum clusterip_hashmode hash_mode; /* which hashing mode */
54 u_int32_t hash_initval; /* hash initialization */ 55 u_int32_t hash_initval; /* hash initialization */
56 struct rcu_head rcu;
55}; 57};
56 58
57static LIST_HEAD(clusterip_configs); 59static LIST_HEAD(clusterip_configs);
58 60
59/* clusterip_lock protects the clusterip_configs list */ 61/* clusterip_lock protects the clusterip_configs list */
60static DEFINE_RWLOCK(clusterip_lock); 62static DEFINE_SPINLOCK(clusterip_lock);
61 63
62#ifdef CONFIG_PROC_FS 64#ifdef CONFIG_PROC_FS
63static const struct file_operations clusterip_proc_fops; 65static const struct file_operations clusterip_proc_fops;
@@ -70,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
70 atomic_inc(&c->refcount); 72 atomic_inc(&c->refcount);
71} 73}
72 74
75
76static void clusterip_config_rcu_free(struct rcu_head *head)
77{
78 kfree(container_of(head, struct clusterip_config, rcu));
79}
80
73static inline void 81static inline void
74clusterip_config_put(struct clusterip_config *c) 82clusterip_config_put(struct clusterip_config *c)
75{ 83{
76 if (atomic_dec_and_test(&c->refcount)) 84 if (atomic_dec_and_test(&c->refcount))
77 kfree(c); 85 call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
78} 86}
79 87
80/* decrease the count of entries using/referencing this config. If last 88/* decrease the count of entries using/referencing this config. If last
@@ -83,12 +91,13 @@ clusterip_config_put(struct clusterip_config *c)
83static inline void 91static inline void
84clusterip_config_entry_put(struct clusterip_config *c) 92clusterip_config_entry_put(struct clusterip_config *c)
85{ 93{
86 write_lock_bh(&clusterip_lock); 94 local_bh_disable();
87 if (atomic_dec_and_test(&c->entries)) { 95 if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
88 list_del(&c->list); 96 list_del_rcu(&c->list);
89 write_unlock_bh(&clusterip_lock); 97 spin_unlock(&clusterip_lock);
98 local_bh_enable();
90 99
91 dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); 100 dev_mc_del(c->dev, c->clustermac);
92 dev_put(c->dev); 101 dev_put(c->dev);
93 102
94 /* In case anyone still accesses the file, the open/close 103 /* In case anyone still accesses the file, the open/close
@@ -99,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
99#endif 108#endif
100 return; 109 return;
101 } 110 }
102 write_unlock_bh(&clusterip_lock); 111 local_bh_enable();
103} 112}
104 113
105static struct clusterip_config * 114static struct clusterip_config *
@@ -107,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
107{ 116{
108 struct clusterip_config *c; 117 struct clusterip_config *c;
109 118
110 list_for_each_entry(c, &clusterip_configs, list) { 119 list_for_each_entry_rcu(c, &clusterip_configs, list) {
111 if (c->clusterip == clusterip) 120 if (c->clusterip == clusterip)
112 return c; 121 return c;
113 } 122 }
@@ -120,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
120{ 129{
121 struct clusterip_config *c; 130 struct clusterip_config *c;
122 131
123 read_lock_bh(&clusterip_lock); 132 rcu_read_lock_bh();
124 c = __clusterip_config_find(clusterip); 133 c = __clusterip_config_find(clusterip);
125 if (!c) { 134 if (c) {
126 read_unlock_bh(&clusterip_lock); 135 if (unlikely(!atomic_inc_not_zero(&c->refcount)))
127 return NULL; 136 c = NULL;
137 else if (entry)
138 atomic_inc(&c->entries);
128 } 139 }
129 atomic_inc(&c->refcount); 140 rcu_read_unlock_bh();
130 if (entry)
131 atomic_inc(&c->entries);
132 read_unlock_bh(&clusterip_lock);
133 141
134 return c; 142 return c;
135} 143}
@@ -180,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
180 } 188 }
181#endif 189#endif
182 190
183 write_lock_bh(&clusterip_lock); 191 spin_lock_bh(&clusterip_lock);
184 list_add(&c->list, &clusterip_configs); 192 list_add_rcu(&c->list, &clusterip_configs);
185 write_unlock_bh(&clusterip_lock); 193 spin_unlock_bh(&clusterip_lock);
186 194
187 return c; 195 return c;
188} 196}
@@ -239,8 +247,7 @@ clusterip_hashfn(const struct sk_buff *skb,
239 break; 247 break;
240 default: 248 default:
241 if (net_ratelimit()) 249 if (net_ratelimit())
242 printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", 250 pr_info("unknown protocol %u\n", iph->protocol);
243 iph->protocol);
244 sport = dport = 0; 251 sport = dport = 0;
245 } 252 }
246 253
@@ -262,7 +269,7 @@ clusterip_hashfn(const struct sk_buff *skb,
262 hashval = 0; 269 hashval = 0;
263 /* This cannot happen, unless the check function wasn't called 270 /* This cannot happen, unless the check function wasn't called
264 * at rule load time */ 271 * at rule load time */
265 printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); 272 pr_info("unknown mode %u\n", config->hash_mode);
266 BUG(); 273 BUG();
267 break; 274 break;
268 } 275 }
@@ -282,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
282 ***********************************************************************/ 289 ***********************************************************************/
283 290
284static unsigned int 291static unsigned int
285clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) 292clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
286{ 293{
287 const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 294 const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
288 struct nf_conn *ct; 295 struct nf_conn *ct;
@@ -295,7 +302,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
295 302
296 ct = nf_ct_get(skb, &ctinfo); 303 ct = nf_ct_get(skb, &ctinfo);
297 if (ct == NULL) { 304 if (ct == NULL) {
298 printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); 305 pr_info("no conntrack!\n");
299 /* FIXME: need to drop invalid ones, since replies 306 /* FIXME: need to drop invalid ones, since replies
300 * to outgoing connections of other nodes will be 307 * to outgoing connections of other nodes will be
301 * marked as INVALID */ 308 * marked as INVALID */
@@ -348,25 +355,24 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
348 return XT_CONTINUE; 355 return XT_CONTINUE;
349} 356}
350 357
351static bool clusterip_tg_check(const struct xt_tgchk_param *par) 358static int clusterip_tg_check(const struct xt_tgchk_param *par)
352{ 359{
353 struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 360 struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
354 const struct ipt_entry *e = par->entryinfo; 361 const struct ipt_entry *e = par->entryinfo;
355
356 struct clusterip_config *config; 362 struct clusterip_config *config;
363 int ret;
357 364
358 if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && 365 if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
359 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && 366 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
360 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { 367 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
361 printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", 368 pr_info("unknown mode %u\n", cipinfo->hash_mode);
362 cipinfo->hash_mode); 369 return -EINVAL;
363 return false;
364 370
365 } 371 }
366 if (e->ip.dmsk.s_addr != htonl(0xffffffff) || 372 if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
367 e->ip.dst.s_addr == 0) { 373 e->ip.dst.s_addr == 0) {
368 printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); 374 pr_info("Please specify destination IP\n");
369 return false; 375 return -EINVAL;
370 } 376 }
371 377
372 /* FIXME: further sanity checks */ 378 /* FIXME: further sanity checks */
@@ -374,41 +380,41 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
374 config = clusterip_config_find_get(e->ip.dst.s_addr, 1); 380 config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
375 if (!config) { 381 if (!config) {
376 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 382 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
377 printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr); 383 pr_info("no config found for %pI4, need 'new'\n",
378 return false; 384 &e->ip.dst.s_addr);
385 return -EINVAL;
379 } else { 386 } else {
380 struct net_device *dev; 387 struct net_device *dev;
381 388
382 if (e->ip.iniface[0] == '\0') { 389 if (e->ip.iniface[0] == '\0') {
383 printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); 390 pr_info("Please specify an interface name\n");
384 return false; 391 return -EINVAL;
385 } 392 }
386 393
387 dev = dev_get_by_name(&init_net, e->ip.iniface); 394 dev = dev_get_by_name(&init_net, e->ip.iniface);
388 if (!dev) { 395 if (!dev) {
389 printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); 396 pr_info("no such interface %s\n",
390 return false; 397 e->ip.iniface);
398 return -ENOENT;
391 } 399 }
392 400
393 config = clusterip_config_init(cipinfo, 401 config = clusterip_config_init(cipinfo,
394 e->ip.dst.s_addr, dev); 402 e->ip.dst.s_addr, dev);
395 if (!config) { 403 if (!config) {
396 printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); 404 pr_info("cannot allocate config\n");
397 dev_put(dev); 405 dev_put(dev);
398 return false; 406 return -ENOMEM;
399 } 407 }
400 dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); 408 dev_mc_add(config->dev, config->clustermac);
401 } 409 }
402 } 410 }
403 cipinfo->config = config; 411 cipinfo->config = config;
404 412
405 if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { 413 ret = nf_ct_l3proto_try_module_get(par->family);
406 printk(KERN_WARNING "can't load conntrack support for " 414 if (ret < 0)
407 "proto=%u\n", par->target->family); 415 pr_info("cannot load conntrack support for proto=%u\n",
408 return false; 416 par->family);
409 } 417 return ret;
410
411 return true;
412} 418}
413 419
414/* drop reference count of cluster config when rule is deleted */ 420/* drop reference count of cluster config when rule is deleted */
@@ -422,7 +428,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
422 428
423 clusterip_config_put(cipinfo->config); 429 clusterip_config_put(cipinfo->config);
424 430
425 nf_ct_l3proto_module_put(par->target->family); 431 nf_ct_l3proto_module_put(par->family);
426} 432}
427 433
428#ifdef CONFIG_COMPAT 434#ifdef CONFIG_COMPAT
@@ -463,7 +469,7 @@ struct arp_payload {
463 __be32 src_ip; 469 __be32 src_ip;
464 u_int8_t dst_hw[ETH_ALEN]; 470 u_int8_t dst_hw[ETH_ALEN];
465 __be32 dst_ip; 471 __be32 dst_ip;
466} __attribute__ ((packed)); 472} __packed;
467 473
468#ifdef DEBUG 474#ifdef DEBUG
469static void arp_print(struct arp_payload *payload) 475static void arp_print(struct arp_payload *payload)
@@ -479,8 +485,8 @@ static void arp_print(struct arp_payload *payload)
479 } 485 }
480 hbuffer[--k]='\0'; 486 hbuffer[--k]='\0';
481 487
482 printk("src %pI4@%s, dst %pI4\n", 488 pr_debug("src %pI4@%s, dst %pI4\n",
483 &payload->src_ip, hbuffer, &payload->dst_ip); 489 &payload->src_ip, hbuffer, &payload->dst_ip);
484} 490}
485#endif 491#endif
486 492
@@ -519,7 +525,7 @@ arp_mangle(unsigned int hook,
519 * this wouldn't work, since we didn't subscribe the mcast group on 525 * this wouldn't work, since we didn't subscribe the mcast group on
520 * other interfaces */ 526 * other interfaces */
521 if (c->dev != out) { 527 if (c->dev != out) {
522 pr_debug("CLUSTERIP: not mangling arp reply on different " 528 pr_debug("not mangling arp reply on different "
523 "interface: cip'%s'-skb'%s'\n", 529 "interface: cip'%s'-skb'%s'\n",
524 c->dev->name, out->name); 530 c->dev->name, out->name);
525 clusterip_config_put(c); 531 clusterip_config_put(c);
@@ -530,7 +536,7 @@ arp_mangle(unsigned int hook,
530 memcpy(payload->src_hw, c->clustermac, arp->ar_hln); 536 memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
531 537
532#ifdef DEBUG 538#ifdef DEBUG
533 pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: "); 539 pr_debug("mangled arp reply: ");
534 arp_print(payload); 540 arp_print(payload);
535#endif 541#endif
536 542
@@ -601,7 +607,8 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
601 607
602static void clusterip_seq_stop(struct seq_file *s, void *v) 608static void clusterip_seq_stop(struct seq_file *s, void *v)
603{ 609{
604 kfree(v); 610 if (!IS_ERR(v))
611 kfree(v);
605} 612}
606 613
607static int clusterip_seq_show(struct seq_file *s, void *v) 614static int clusterip_seq_show(struct seq_file *s, void *v)
@@ -706,13 +713,13 @@ static int __init clusterip_tg_init(void)
706#ifdef CONFIG_PROC_FS 713#ifdef CONFIG_PROC_FS
707 clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); 714 clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
708 if (!clusterip_procdir) { 715 if (!clusterip_procdir) {
709 printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); 716 pr_err("Unable to proc dir entry\n");
710 ret = -ENOMEM; 717 ret = -ENOMEM;
711 goto cleanup_hook; 718 goto cleanup_hook;
712 } 719 }
713#endif /* CONFIG_PROC_FS */ 720#endif /* CONFIG_PROC_FS */
714 721
715 printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", 722 pr_info("ClusterIP Version %s loaded successfully\n",
716 CLUSTERIP_VERSION); 723 CLUSTERIP_VERSION);
717 return 0; 724 return 0;
718 725
@@ -727,13 +734,15 @@ cleanup_target:
727 734
728static void __exit clusterip_tg_exit(void) 735static void __exit clusterip_tg_exit(void)
729{ 736{
730 printk(KERN_NOTICE "ClusterIP Version %s unloading\n", 737 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
731 CLUSTERIP_VERSION);
732#ifdef CONFIG_PROC_FS 738#ifdef CONFIG_PROC_FS
733 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); 739 remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
734#endif 740#endif
735 nf_unregister_hook(&cip_arp_ops); 741 nf_unregister_hook(&cip_arp_ops);
736 xt_unregister_target(&clusterip_tg_reg); 742 xt_unregister_target(&clusterip_tg_reg);
743
744 /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
745 rcu_barrier_bh();
737} 746}
738 747
739module_init(clusterip_tg_init); 748module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ea5cea2415c1..4bf3dc49ad1e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8*/ 8*/
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/in.h> 10#include <linux/in.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
@@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
77} 77}
78 78
79static unsigned int 79static unsigned int
80ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) 80ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
81{ 81{
82 const struct ipt_ECN_info *einfo = par->targinfo; 82 const struct ipt_ECN_info *einfo = par->targinfo;
83 83
@@ -93,28 +93,25 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
93 return XT_CONTINUE; 93 return XT_CONTINUE;
94} 94}
95 95
96static bool ecn_tg_check(const struct xt_tgchk_param *par) 96static int ecn_tg_check(const struct xt_tgchk_param *par)
97{ 97{
98 const struct ipt_ECN_info *einfo = par->targinfo; 98 const struct ipt_ECN_info *einfo = par->targinfo;
99 const struct ipt_entry *e = par->entryinfo; 99 const struct ipt_entry *e = par->entryinfo;
100 100
101 if (einfo->operation & IPT_ECN_OP_MASK) { 101 if (einfo->operation & IPT_ECN_OP_MASK) {
102 printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", 102 pr_info("unsupported ECN operation %x\n", einfo->operation);
103 einfo->operation); 103 return -EINVAL;
104 return false;
105 } 104 }
106 if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { 105 if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
107 printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n", 106 pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
108 einfo->ip_ect); 107 return -EINVAL;
109 return false;
110 } 108 }
111 if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && 109 if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
112 (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { 110 (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
113 printk(KERN_WARNING "ECN: cannot use TCP operations on a " 111 pr_info("cannot use TCP operations on a non-tcp rule\n");
114 "non-tcp rule\n"); 112 return -EINVAL;
115 return false;
116 } 113 }
117 return true; 114 return 0;
118} 115}
119 116
120static struct xt_target ecn_tg_reg __read_mostly = { 117static struct xt_target ecn_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index ee128efa1c8d..915fc17d7ce2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -9,10 +9,11 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/if_arp.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <net/icmp.h> 18#include <net/icmp.h>
18#include <net/udp.h> 19#include <net/udp.h>
@@ -363,11 +364,47 @@ static void dump_packet(const struct nf_loginfo *info,
363 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 364 /* maxlen = 230+ 91 + 230 + 252 = 803 */
364} 365}
365 366
367static void dump_mac_header(const struct nf_loginfo *info,
368 const struct sk_buff *skb)
369{
370 struct net_device *dev = skb->dev;
371 unsigned int logflags = 0;
372
373 if (info->type == NF_LOG_TYPE_LOG)
374 logflags = info->u.log.logflags;
375
376 if (!(logflags & IPT_LOG_MACDECODE))
377 goto fallback;
378
379 switch (dev->type) {
380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto));
384 return;
385 default:
386 break;
387 }
388
389fallback:
390 printk("MAC=");
391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i;
395
396 printk("%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p);
399 }
400 printk(" ");
401}
402
366static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
367 .type = NF_LOG_TYPE_LOG, 404 .type = NF_LOG_TYPE_LOG,
368 .u = { 405 .u = {
369 .log = { 406 .log = {
370 .level = 0, 407 .level = 5,
371 .logflags = NF_LOG_MASK, 408 .logflags = NF_LOG_MASK,
372 }, 409 },
373 }, 410 },
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
404 } 441 }
405#endif 442#endif
406 443
407 if (in && !out) { 444 /* MAC logging for input path only. */
408 /* MAC logging for input chain only. */ 445 if (in && !out)
409 printk("MAC="); 446 dump_mac_header(loginfo, skb);
410 if (skb->dev && skb->dev->hard_header_len &&
411 skb->mac_header != skb->network_header) {
412 int i;
413 const unsigned char *p = skb_mac_header(skb);
414 for (i = 0; i < skb->dev->hard_header_len; i++,p++)
415 printk("%02x%c", *p,
416 i==skb->dev->hard_header_len - 1
417 ? ' ':':');
418 } else
419 printk(" ");
420 }
421 447
422 dump_packet(loginfo, skb, 0); 448 dump_packet(loginfo, skb, 0);
423 printk("\n"); 449 printk("\n");
@@ -425,7 +451,7 @@ ipt_log_packet(u_int8_t pf,
425} 451}
426 452
427static unsigned int 453static unsigned int
428log_tg(struct sk_buff *skb, const struct xt_target_param *par) 454log_tg(struct sk_buff *skb, const struct xt_action_param *par)
429{ 455{
430 const struct ipt_log_info *loginfo = par->targinfo; 456 const struct ipt_log_info *loginfo = par->targinfo;
431 struct nf_loginfo li; 457 struct nf_loginfo li;
@@ -439,20 +465,19 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par)
439 return XT_CONTINUE; 465 return XT_CONTINUE;
440} 466}
441 467
442static bool log_tg_check(const struct xt_tgchk_param *par) 468static int log_tg_check(const struct xt_tgchk_param *par)
443{ 469{
444 const struct ipt_log_info *loginfo = par->targinfo; 470 const struct ipt_log_info *loginfo = par->targinfo;
445 471
446 if (loginfo->level >= 8) { 472 if (loginfo->level >= 8) {
447 pr_debug("LOG: level %u >= 8\n", loginfo->level); 473 pr_debug("level %u >= 8\n", loginfo->level);
448 return false; 474 return -EINVAL;
449 } 475 }
450 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { 476 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
451 pr_debug("LOG: prefix term %i\n", 477 pr_debug("prefix is not null-terminated\n");
452 loginfo->prefix[sizeof(loginfo->prefix)-1]); 478 return -EINVAL;
453 return false;
454 } 479 }
455 return true; 480 return 0;
456} 481}
457 482
458static struct xt_target log_tg_reg __read_mostly = { 483static struct xt_target log_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 650b54042b01..d2ed9dc74ebc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -8,7 +8,7 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/inetdevice.h> 13#include <linux/inetdevice.h>
14#include <linux/ip.h> 14#include <linux/ip.h>
@@ -28,23 +28,23 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
28MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); 28MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
29 29
30/* FIXME: Multiple targets. --RR */ 30/* FIXME: Multiple targets. --RR */
31static bool masquerade_tg_check(const struct xt_tgchk_param *par) 31static int masquerade_tg_check(const struct xt_tgchk_param *par)
32{ 32{
33 const struct nf_nat_multi_range_compat *mr = par->targinfo; 33 const struct nf_nat_multi_range_compat *mr = par->targinfo;
34 34
35 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 35 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
36 pr_debug("masquerade_check: bad MAP_IPS.\n"); 36 pr_debug("bad MAP_IPS.\n");
37 return false; 37 return -EINVAL;
38 } 38 }
39 if (mr->rangesize != 1) { 39 if (mr->rangesize != 1) {
40 pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize); 40 pr_debug("bad rangesize %u\n", mr->rangesize);
41 return false; 41 return -EINVAL;
42 } 42 }
43 return true; 43 return 0;
44} 44}
45 45
46static unsigned int 46static unsigned int
47masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) 47masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
48{ 48{
49 struct nf_conn *ct; 49 struct nf_conn *ct;
50 struct nf_conn_nat *nat; 50 struct nf_conn_nat *nat;
@@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
72 rt = skb_rtable(skb); 72 rt = skb_rtable(skb);
73 newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); 73 newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
74 if (!newsrc) { 74 if (!newsrc) {
75 printk("MASQUERADE: %s ate my IP address\n", par->out->name); 75 pr_info("%s ate my IP address\n", par->out->name);
76 return NF_DROP; 76 return NF_DROP;
77 } 77 }
78 78
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 7c29582d4ec8..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -9,7 +9,7 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/ip.h> 13#include <linux/ip.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
@@ -22,23 +22,23 @@ MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); 22MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
23MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); 23MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
24 24
25static bool netmap_tg_check(const struct xt_tgchk_param *par) 25static int netmap_tg_check(const struct xt_tgchk_param *par)
26{ 26{
27 const struct nf_nat_multi_range_compat *mr = par->targinfo; 27 const struct nf_nat_multi_range_compat *mr = par->targinfo;
28 28
29 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { 29 if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
30 pr_debug("NETMAP:check: bad MAP_IPS.\n"); 30 pr_debug("bad MAP_IPS.\n");
31 return false; 31 return -EINVAL;
32 } 32 }
33 if (mr->rangesize != 1) { 33 if (mr->rangesize != 1) {
34 pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize); 34 pr_debug("bad rangesize %u.\n", mr->rangesize);
35 return false; 35 return -EINVAL;
36 } 36 }
37 return true; 37 return 0;
38} 38}
39 39
40static unsigned int 40static unsigned int
41netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) 41netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
42{ 42{
43 struct nf_conn *ct; 43 struct nf_conn *ct;
44 enum ip_conntrack_info ctinfo; 44 enum ip_conntrack_info ctinfo;
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
48 48
49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || 49 NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
50 par->hooknum == NF_INET_POST_ROUTING || 50 par->hooknum == NF_INET_POST_ROUTING ||
51 par->hooknum == NF_INET_LOCAL_OUT); 51 par->hooknum == NF_INET_LOCAL_OUT ||
52 par->hooknum == NF_INET_LOCAL_IN);
52 ct = nf_ct_get(skb, &ctinfo); 53 ct = nf_ct_get(skb, &ctinfo);
53 54
54 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); 55 netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
77 .table = "nat", 78 .table = "nat",
78 .hooks = (1 << NF_INET_PRE_ROUTING) | 79 .hooks = (1 << NF_INET_PRE_ROUTING) |
79 (1 << NF_INET_POST_ROUTING) | 80 (1 << NF_INET_POST_ROUTING) |
80 (1 << NF_INET_LOCAL_OUT), 81 (1 << NF_INET_LOCAL_OUT) |
82 (1 << NF_INET_LOCAL_IN),
81 .checkentry = netmap_tg_check, 83 .checkentry = netmap_tg_check,
82 .me = THIS_MODULE 84 .me = THIS_MODULE
83}; 85};
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 698e5e78685b..18a0656505a0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/ip.h> 11#include <linux/ip.h>
12#include <linux/timer.h> 12#include <linux/timer.h>
@@ -26,23 +26,23 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
26MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); 26MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
27 27
28/* FIXME: Take multiple ranges --RR */ 28/* FIXME: Take multiple ranges --RR */
29static bool redirect_tg_check(const struct xt_tgchk_param *par) 29static int redirect_tg_check(const struct xt_tgchk_param *par)
30{ 30{
31 const struct nf_nat_multi_range_compat *mr = par->targinfo; 31 const struct nf_nat_multi_range_compat *mr = par->targinfo;
32 32
33 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { 33 if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
34 pr_debug("redirect_check: bad MAP_IPS.\n"); 34 pr_debug("bad MAP_IPS.\n");
35 return false; 35 return -EINVAL;
36 } 36 }
37 if (mr->rangesize != 1) { 37 if (mr->rangesize != 1) {
38 pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize); 38 pr_debug("bad rangesize %u.\n", mr->rangesize);
39 return false; 39 return -EINVAL;
40 } 40 }
41 return true; 41 return 0;
42} 42}
43 43
44static unsigned int 44static unsigned int
45redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) 45redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
46{ 46{
47 struct nf_conn *ct; 47 struct nf_conn *ct;
48 enum ip_conntrack_info ctinfo; 48 enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index a0e8bcf04159..b254dafaf429 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -9,7 +9,7 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
@@ -95,10 +95,11 @@ static void send_reset(struct sk_buff *oldskb, int hook)
95 } 95 }
96 96
97 tcph->rst = 1; 97 tcph->rst = 1;
98 tcph->check = tcp_v4_check(sizeof(struct tcphdr), 98 tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
99 niph->saddr, niph->daddr, 99 niph->daddr, 0);
100 csum_partial(tcph, 100 nskb->ip_summed = CHECKSUM_PARTIAL;
101 sizeof(struct tcphdr), 0)); 101 nskb->csum_start = (unsigned char *)tcph - nskb->head;
102 nskb->csum_offset = offsetof(struct tcphdr, check);
102 103
103 addr_type = RTN_UNSPEC; 104 addr_type = RTN_UNSPEC;
104 if (hook != NF_INET_FORWARD 105 if (hook != NF_INET_FORWARD
@@ -109,13 +110,12 @@ static void send_reset(struct sk_buff *oldskb, int hook)
109 addr_type = RTN_LOCAL; 110 addr_type = RTN_LOCAL;
110 111
111 /* ip_route_me_harder expects skb->dst to be set */ 112 /* ip_route_me_harder expects skb->dst to be set */
112 skb_dst_set(nskb, dst_clone(skb_dst(oldskb))); 113 skb_dst_set_noref(nskb, skb_dst(oldskb));
113 114
114 if (ip_route_me_harder(nskb, addr_type)) 115 if (ip_route_me_harder(nskb, addr_type))
115 goto free_nskb; 116 goto free_nskb;
116 117
117 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); 118 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT);
118 nskb->ip_summed = CHECKSUM_NONE;
119 119
120 /* "Never happens" */ 120 /* "Never happens" */
121 if (nskb->len > dst_mtu(skb_dst(nskb))) 121 if (nskb->len > dst_mtu(skb_dst(nskb)))
@@ -136,13 +136,10 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
136} 136}
137 137
138static unsigned int 138static unsigned int
139reject_tg(struct sk_buff *skb, const struct xt_target_param *par) 139reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
140{ 140{
141 const struct ipt_reject_info *reject = par->targinfo; 141 const struct ipt_reject_info *reject = par->targinfo;
142 142
143 /* WARNING: This code causes reentry within iptables.
144 This means that the iptables jump stack is now crap. We
145 must return an absolute verdict. --RR */
146 switch (reject->with) { 143 switch (reject->with) {
147 case IPT_ICMP_NET_UNREACHABLE: 144 case IPT_ICMP_NET_UNREACHABLE:
148 send_unreach(skb, ICMP_NET_UNREACH); 145 send_unreach(skb, ICMP_NET_UNREACH);
@@ -175,23 +172,23 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
175 return NF_DROP; 172 return NF_DROP;
176} 173}
177 174
178static bool reject_tg_check(const struct xt_tgchk_param *par) 175static int reject_tg_check(const struct xt_tgchk_param *par)
179{ 176{
180 const struct ipt_reject_info *rejinfo = par->targinfo; 177 const struct ipt_reject_info *rejinfo = par->targinfo;
181 const struct ipt_entry *e = par->entryinfo; 178 const struct ipt_entry *e = par->entryinfo;
182 179
183 if (rejinfo->with == IPT_ICMP_ECHOREPLY) { 180 if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
184 printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); 181 pr_info("ECHOREPLY no longer supported.\n");
185 return false; 182 return -EINVAL;
186 } else if (rejinfo->with == IPT_TCP_RESET) { 183 } else if (rejinfo->with == IPT_TCP_RESET) {
187 /* Must specify that it's a TCP packet */ 184 /* Must specify that it's a TCP packet */
188 if (e->ip.proto != IPPROTO_TCP || 185 if (e->ip.proto != IPPROTO_TCP ||
189 (e->ip.invflags & XT_INV_PROTO)) { 186 (e->ip.invflags & XT_INV_PROTO)) {
190 printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n"); 187 pr_info("TCP_RESET invalid for non-tcp\n");
191 return false; 188 return -EINVAL;
192 } 189 }
193 } 190 }
194 return true; 191 return 0;
195} 192}
196 193
197static struct xt_target reject_tg_reg __read_mostly = { 194static struct xt_target reject_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 0dbe697f164f..446e0f467a17 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -29,7 +29,7 @@
29 * Specify, after how many hundredths of a second the queue should be 29 * Specify, after how many hundredths of a second the queue should be
30 * flushed even if it is not full yet. 30 * flushed even if it is not full yet.
31 */ 31 */
32 32#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
33#include <linux/module.h> 33#include <linux/module.h>
34#include <linux/spinlock.h> 34#include <linux/spinlock.h>
35#include <linux/socket.h> 35#include <linux/socket.h>
@@ -57,8 +57,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
57#define ULOG_NL_EVENT 111 /* Harald's favorite number */ 57#define ULOG_NL_EVENT 111 /* Harald's favorite number */
58#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ 58#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
59 59
60#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
61
62static unsigned int nlbufsiz = NLMSG_GOODSIZE; 60static unsigned int nlbufsiz = NLMSG_GOODSIZE;
63module_param(nlbufsiz, uint, 0400); 61module_param(nlbufsiz, uint, 0400);
64MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); 62MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
@@ -91,12 +89,12 @@ static void ulog_send(unsigned int nlgroupnum)
91 ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; 89 ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
92 90
93 if (timer_pending(&ub->timer)) { 91 if (timer_pending(&ub->timer)) {
94 pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n"); 92 pr_debug("ulog_send: timer was pending, deleting\n");
95 del_timer(&ub->timer); 93 del_timer(&ub->timer);
96 } 94 }
97 95
98 if (!ub->skb) { 96 if (!ub->skb) {
99 pr_debug("ipt_ULOG: ulog_send: nothing to send\n"); 97 pr_debug("ulog_send: nothing to send\n");
100 return; 98 return;
101 } 99 }
102 100
@@ -105,7 +103,7 @@ static void ulog_send(unsigned int nlgroupnum)
105 ub->lastnlh->nlmsg_type = NLMSG_DONE; 103 ub->lastnlh->nlmsg_type = NLMSG_DONE;
106 104
107 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; 105 NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
108 pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n", 106 pr_debug("throwing %d packets to netlink group %u\n",
109 ub->qlen, nlgroupnum + 1); 107 ub->qlen, nlgroupnum + 1);
110 netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); 108 netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
111 109
@@ -118,7 +116,7 @@ static void ulog_send(unsigned int nlgroupnum)
118/* timer function to flush queue in flushtimeout time */ 116/* timer function to flush queue in flushtimeout time */
119static void ulog_timer(unsigned long data) 117static void ulog_timer(unsigned long data)
120{ 118{
121 pr_debug("ipt_ULOG: timer function called, calling ulog_send\n"); 119 pr_debug("timer function called, calling ulog_send\n");
122 120
123 /* lock to protect against somebody modifying our structure 121 /* lock to protect against somebody modifying our structure
124 * from ipt_ulog_target at the same time */ 122 * from ipt_ulog_target at the same time */
@@ -139,7 +137,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
139 n = max(size, nlbufsiz); 137 n = max(size, nlbufsiz);
140 skb = alloc_skb(n, GFP_ATOMIC); 138 skb = alloc_skb(n, GFP_ATOMIC);
141 if (!skb) { 139 if (!skb) {
142 PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n); 140 pr_debug("cannot alloc whole buffer %ub!\n", n);
143 141
144 if (n > size) { 142 if (n > size) {
145 /* try to allocate only as much as we need for 143 /* try to allocate only as much as we need for
@@ -147,8 +145,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
147 145
148 skb = alloc_skb(size, GFP_ATOMIC); 146 skb = alloc_skb(size, GFP_ATOMIC);
149 if (!skb) 147 if (!skb)
150 PRINTR("ipt_ULOG: can't even allocate %ub\n", 148 pr_debug("cannot even allocate %ub\n", size);
151 size);
152 } 149 }
153 } 150 }
154 151
@@ -199,8 +196,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
199 goto alloc_failure; 196 goto alloc_failure;
200 } 197 }
201 198
202 pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen, 199 pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
203 loginfo->qthreshold);
204 200
205 /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ 201 /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
206 nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, 202 nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
@@ -273,16 +269,14 @@ static void ipt_ulog_packet(unsigned int hooknum,
273 return; 269 return;
274 270
275nlmsg_failure: 271nlmsg_failure:
276 PRINTR("ipt_ULOG: error during NLMSG_PUT\n"); 272 pr_debug("error during NLMSG_PUT\n");
277
278alloc_failure: 273alloc_failure:
279 PRINTR("ipt_ULOG: Error building netlink message\n"); 274 pr_debug("Error building netlink message\n");
280
281 spin_unlock_bh(&ulog_lock); 275 spin_unlock_bh(&ulog_lock);
282} 276}
283 277
284static unsigned int 278static unsigned int
285ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) 279ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
286{ 280{
287 ipt_ulog_packet(par->hooknum, skb, par->in, par->out, 281 ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
288 par->targinfo, NULL); 282 par->targinfo, NULL);
@@ -314,21 +308,20 @@ static void ipt_logfn(u_int8_t pf,
314 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); 308 ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
315} 309}
316 310
317static bool ulog_tg_check(const struct xt_tgchk_param *par) 311static int ulog_tg_check(const struct xt_tgchk_param *par)
318{ 312{
319 const struct ipt_ulog_info *loginfo = par->targinfo; 313 const struct ipt_ulog_info *loginfo = par->targinfo;
320 314
321 if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { 315 if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
322 pr_debug("ipt_ULOG: prefix term %i\n", 316 pr_debug("prefix not null-terminated\n");
323 loginfo->prefix[sizeof(loginfo->prefix) - 1]); 317 return -EINVAL;
324 return false;
325 } 318 }
326 if (loginfo->qthreshold > ULOG_MAX_QLEN) { 319 if (loginfo->qthreshold > ULOG_MAX_QLEN) {
327 pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n", 320 pr_debug("queue threshold %Zu > MAX_QLEN\n",
328 loginfo->qthreshold); 321 loginfo->qthreshold);
329 return false; 322 return -EINVAL;
330 } 323 }
331 return true; 324 return 0;
332} 325}
333 326
334#ifdef CONFIG_COMPAT 327#ifdef CONFIG_COMPAT
@@ -390,10 +383,10 @@ static int __init ulog_tg_init(void)
390{ 383{
391 int ret, i; 384 int ret, i;
392 385
393 pr_debug("ipt_ULOG: init module\n"); 386 pr_debug("init module\n");
394 387
395 if (nlbufsiz > 128*1024) { 388 if (nlbufsiz > 128*1024) {
396 printk("Netlink buffer has to be <= 128kB\n"); 389 pr_warning("Netlink buffer has to be <= 128kB\n");
397 return -EINVAL; 390 return -EINVAL;
398 } 391 }
399 392
@@ -423,7 +416,7 @@ static void __exit ulog_tg_exit(void)
423 ulog_buff_t *ub; 416 ulog_buff_t *ub;
424 int i; 417 int i;
425 418
426 pr_debug("ipt_ULOG: cleanup_module\n"); 419 pr_debug("cleanup_module\n");
427 420
428 if (nflog) 421 if (nflog)
429 nf_log_unregister(&ipt_ulog_logger); 422 nf_log_unregister(&ipt_ulog_logger);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 3b216be3bc9f..db8bff0fb86d 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -8,7 +8,7 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/kernel.h> 12#include <linux/kernel.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
30} 30}
31 31
32static bool 32static bool
33addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) 33addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
34{ 34{
35 struct net *net = dev_net(par->in ? par->in : par->out); 35 struct net *net = dev_net(par->in ? par->in : par->out);
36 const struct ipt_addrtype_info *info = par->matchinfo; 36 const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
48} 48}
49 49
50static bool 50static bool
51addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) 51addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
52{ 52{
53 struct net *net = dev_net(par->in ? par->in : par->out); 53 struct net *net = dev_net(par->in ? par->in : par->out);
54 const struct ipt_addrtype_info_v1 *info = par->matchinfo; 54 const struct ipt_addrtype_info_v1 *info = par->matchinfo;
@@ -70,34 +70,34 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
70 return ret; 70 return ret;
71} 71}
72 72
73static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) 73static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
74{ 74{
75 struct ipt_addrtype_info_v1 *info = par->matchinfo; 75 struct ipt_addrtype_info_v1 *info = par->matchinfo;
76 76
77 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && 77 if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
78 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { 78 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
79 printk(KERN_ERR "ipt_addrtype: both incoming and outgoing " 79 pr_info("both incoming and outgoing "
80 "interface limitation cannot be selected\n"); 80 "interface limitation cannot be selected\n");
81 return false; 81 return -EINVAL;
82 } 82 }
83 83
84 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | 84 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
85 (1 << NF_INET_LOCAL_IN)) && 85 (1 << NF_INET_LOCAL_IN)) &&
86 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { 86 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
87 printk(KERN_ERR "ipt_addrtype: output interface limitation " 87 pr_info("output interface limitation "
88 "not valid in PRE_ROUTING and INPUT\n"); 88 "not valid in PREROUTING and INPUT\n");
89 return false; 89 return -EINVAL;
90 } 90 }
91 91
92 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | 92 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
93 (1 << NF_INET_LOCAL_OUT)) && 93 (1 << NF_INET_LOCAL_OUT)) &&
94 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { 94 info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
95 printk(KERN_ERR "ipt_addrtype: input interface limitation " 95 pr_info("input interface limitation "
96 "not valid in POST_ROUTING and OUTPUT\n"); 96 "not valid in POSTROUTING and OUTPUT\n");
97 return false; 97 return -EINVAL;
98 } 98 }
99 99
100 return true; 100 return 0;
101} 101}
102 102
103static struct xt_match addrtype_mt_reg[] __read_mostly = { 103static struct xt_match addrtype_mt_reg[] __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 0104c0b399de..14a2aa8b8a14 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -5,7 +5,7 @@
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8 8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9#include <linux/in.h> 9#include <linux/in.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
@@ -18,25 +18,19 @@ MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); 18MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
19MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); 19MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
20 20
21#ifdef DEBUG_CONNTRACK
22#define duprintf(format, args...) printk(format , ## args)
23#else
24#define duprintf(format, args...)
25#endif
26
27/* Returns 1 if the spi is matched by the range, 0 otherwise */ 21/* Returns 1 if the spi is matched by the range, 0 otherwise */
28static inline bool 22static inline bool
29spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) 23spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
30{ 24{
31 bool r; 25 bool r;
32 duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', 26 pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
33 min,spi,max); 27 invert ? '!' : ' ', min, spi, max);
34 r=(spi >= min && spi <= max) ^ invert; 28 r=(spi >= min && spi <= max) ^ invert;
35 duprintf(" result %s\n",r? "PASS" : "FAILED"); 29 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
36 return r; 30 return r;
37} 31}
38 32
39static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) 33static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
40{ 34{
41 struct ip_auth_hdr _ahdr; 35 struct ip_auth_hdr _ahdr;
42 const struct ip_auth_hdr *ah; 36 const struct ip_auth_hdr *ah;
@@ -51,8 +45,8 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
51 /* We've been asked to examine this packet, and we 45 /* We've been asked to examine this packet, and we
52 * can't. Hence, no choice but to drop. 46 * can't. Hence, no choice but to drop.
53 */ 47 */
54 duprintf("Dropping evil AH tinygram.\n"); 48 pr_debug("Dropping evil AH tinygram.\n");
55 *par->hotdrop = true; 49 par->hotdrop = true;
56 return 0; 50 return 0;
57 } 51 }
58 52
@@ -61,16 +55,16 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
61 !!(ahinfo->invflags & IPT_AH_INV_SPI)); 55 !!(ahinfo->invflags & IPT_AH_INV_SPI));
62} 56}
63 57
64static bool ah_mt_check(const struct xt_mtchk_param *par) 58static int ah_mt_check(const struct xt_mtchk_param *par)
65{ 59{
66 const struct ipt_ah *ahinfo = par->matchinfo; 60 const struct ipt_ah *ahinfo = par->matchinfo;
67 61
68 /* Must specify no unknown invflags */ 62 /* Must specify no unknown invflags */
69 if (ahinfo->invflags & ~IPT_AH_INV_MASK) { 63 if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
70 duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags); 64 pr_debug("unknown flags %X\n", ahinfo->invflags);
71 return false; 65 return -EINVAL;
72 } 66 }
73 return true; 67 return 0;
74} 68}
75 69
76static struct xt_match ah_mt_reg __read_mostly = { 70static struct xt_match ah_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 2a1e56b71908..af6e9c778345 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/in.h> 10#include <linux/in.h>
11#include <linux/ip.h> 11#include <linux/ip.h>
12#include <net/ip.h> 12#include <net/ip.h>
@@ -67,7 +67,7 @@ static inline bool match_tcp(const struct sk_buff *skb,
67 return true; 67 return true;
68} 68}
69 69
70static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) 70static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
71{ 71{
72 const struct ipt_ecn_info *info = par->matchinfo; 72 const struct ipt_ecn_info *info = par->matchinfo;
73 73
@@ -78,32 +78,31 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
78 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { 78 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
79 if (ip_hdr(skb)->protocol != IPPROTO_TCP) 79 if (ip_hdr(skb)->protocol != IPPROTO_TCP)
80 return false; 80 return false;
81 if (!match_tcp(skb, info, par->hotdrop)) 81 if (!match_tcp(skb, info, &par->hotdrop))
82 return false; 82 return false;
83 } 83 }
84 84
85 return true; 85 return true;
86} 86}
87 87
88static bool ecn_mt_check(const struct xt_mtchk_param *par) 88static int ecn_mt_check(const struct xt_mtchk_param *par)
89{ 89{
90 const struct ipt_ecn_info *info = par->matchinfo; 90 const struct ipt_ecn_info *info = par->matchinfo;
91 const struct ipt_ip *ip = par->entryinfo; 91 const struct ipt_ip *ip = par->entryinfo;
92 92
93 if (info->operation & IPT_ECN_OP_MATCH_MASK) 93 if (info->operation & IPT_ECN_OP_MATCH_MASK)
94 return false; 94 return -EINVAL;
95 95
96 if (info->invert & IPT_ECN_OP_MATCH_MASK) 96 if (info->invert & IPT_ECN_OP_MATCH_MASK)
97 return false; 97 return -EINVAL;
98 98
99 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) && 99 if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
100 ip->proto != IPPROTO_TCP) { 100 ip->proto != IPPROTO_TCP) {
101 printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" 101 pr_info("cannot match TCP bits in rule for non-tcp packets\n");
102 " non-tcp packets\n"); 102 return -EINVAL;
103 return false;
104 } 103 }
105 104
106 return true; 105 return 0;
107} 106}
108 107
109static struct xt_match ecn_mt_reg __read_mostly = { 108static struct xt_match ecn_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 55392466daa4..c37641e819f2 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -89,7 +89,7 @@ static int __init iptable_filter_init(void)
89 int ret; 89 int ret;
90 90
91 if (forward < 0 || forward > NF_MAX_VERDICT) { 91 if (forward < 0 || forward > NF_MAX_VERDICT) {
92 printk("iptables forward must be 0 or 1\n"); 92 pr_err("iptables forward must be 0 or 1\n");
93 return -EINVAL; 93 return -EINVAL;
94 } 94 }
95 95
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2bb1f87051c4..5a03c02af999 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -382,32 +382,32 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
382 382
383 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4); 383 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
384 if (ret < 0) { 384 if (ret < 0) {
385 printk("nf_conntrack_ipv4: can't register tcp.\n"); 385 pr_err("nf_conntrack_ipv4: can't register tcp.\n");
386 goto cleanup_sockopt; 386 goto cleanup_sockopt;
387 } 387 }
388 388
389 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4); 389 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
390 if (ret < 0) { 390 if (ret < 0) {
391 printk("nf_conntrack_ipv4: can't register udp.\n"); 391 pr_err("nf_conntrack_ipv4: can't register udp.\n");
392 goto cleanup_tcp; 392 goto cleanup_tcp;
393 } 393 }
394 394
395 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp); 395 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
396 if (ret < 0) { 396 if (ret < 0) {
397 printk("nf_conntrack_ipv4: can't register icmp.\n"); 397 pr_err("nf_conntrack_ipv4: can't register icmp.\n");
398 goto cleanup_udp; 398 goto cleanup_udp;
399 } 399 }
400 400
401 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); 401 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
402 if (ret < 0) { 402 if (ret < 0) {
403 printk("nf_conntrack_ipv4: can't register ipv4\n"); 403 pr_err("nf_conntrack_ipv4: can't register ipv4\n");
404 goto cleanup_icmp; 404 goto cleanup_icmp;
405 } 405 }
406 406
407 ret = nf_register_hooks(ipv4_conntrack_ops, 407 ret = nf_register_hooks(ipv4_conntrack_ops,
408 ARRAY_SIZE(ipv4_conntrack_ops)); 408 ARRAY_SIZE(ipv4_conntrack_ops));
409 if (ret < 0) { 409 if (ret < 0) {
410 printk("nf_conntrack_ipv4: can't register hooks.\n"); 410 pr_err("nf_conntrack_ipv4: can't register hooks.\n");
411 goto cleanup_ipv4; 411 goto cleanup_ipv4;
412 } 412 }
413#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) 413#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 2fb7b76da94f..244f7cb08d68 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -336,12 +336,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
336 const struct ip_conntrack_stat *st = v; 336 const struct ip_conntrack_stat *st = v;
337 337
338 if (v == SEQ_START_TOKEN) { 338 if (v == SEQ_START_TOKEN) {
339 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); 339 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
340 return 0; 340 return 0;
341 } 341 }
342 342
343 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " 343 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
344 "%08x %08x %08x %08x %08x %08x %08x %08x \n", 344 "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
345 nr_conntracks, 345 nr_conntracks,
346 st->searched, 346 st->searched,
347 st->found, 347 st->found,
@@ -358,7 +358,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
358 358
359 st->expect_new, 359 st->expect_new,
360 st->expect_create, 360 st->expect_create,
361 st->expect_delete 361 st->expect_delete,
362 st->search_restart
362 ); 363 );
363 return 0; 364 return 0;
364} 365}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
66 const struct net_device *out, 66 const struct net_device *out,
67 int (*okfn)(struct sk_buff *)) 67 int (*okfn)(struct sk_buff *))
68{ 68{
69 struct inet_sock *inet = inet_sk(skb->sk);
70
71 if (inet && inet->nodefrag)
72 return NF_ACCEPT;
73
69#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 74#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
70#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) 75#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
71 /* Previously seen (loopback)? Ignore. Do this before 76 /* Previously seen (loopback)? Ignore. Do this before
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..8c8632d9b93c 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -261,14 +261,9 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
261 rcu_read_lock(); 261 rcu_read_lock();
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 263
264 /* Change protocol info to have some randomization */
265 if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
266 proto->unique_tuple(tuple, range, maniptype, ct);
267 goto out;
268 }
269
270 /* Only bother mapping if it's not already in range and unique */ 264 /* Only bother mapping if it's not already in range and unique */
271 if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) &&
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
272 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 267 proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
273 !nf_nat_used_tuple(tuple, ct)) 268 !nf_nat_used_tuple(tuple, ct))
274 goto out; 269 goto out;
@@ -440,7 +435,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
440 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 435 if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
441 return 0; 436 return 0;
442 437
443 inside = (void *)skb->data + ip_hdrlen(skb); 438 inside = (void *)skb->data + hdrlen;
444 439
445 /* We're actually going to mangle it beyond trivial checksum 440 /* We're actually going to mangle it beyond trivial checksum
446 adjustment, so make sure the current checksum is correct. */ 441 adjustment, so make sure the current checksum is correct. */
@@ -470,12 +465,10 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
470 /* rcu_read_lock()ed by nf_hook_slow */ 465 /* rcu_read_lock()ed by nf_hook_slow */
471 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 466 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
472 467
473 if (!nf_ct_get_tuple(skb, 468 if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr),
474 ip_hdrlen(skb) + sizeof(struct icmphdr), 469 (hdrlen +
475 (ip_hdrlen(skb) +
476 sizeof(struct icmphdr) + inside->ip.ihl * 4), 470 sizeof(struct icmphdr) + inside->ip.ihl * 4),
477 (u_int16_t)AF_INET, 471 (u_int16_t)AF_INET, inside->ip.protocol,
478 inside->ip.protocol,
479 &inner, l3proto, l4proto)) 472 &inner, l3proto, l4proto))
480 return 0; 473 return 0;
481 474
@@ -484,15 +477,13 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
484 pass all hooks (locally-generated ICMP). Consider incoming 477 pass all hooks (locally-generated ICMP). Consider incoming
485 packet: PREROUTING (DST manip), routing produces ICMP, goes 478 packet: PREROUTING (DST manip), routing produces ICMP, goes
486 through POSTROUTING (which must correct the DST manip). */ 479 through POSTROUTING (which must correct the DST manip). */
487 if (!manip_pkt(inside->ip.protocol, skb, 480 if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp),
488 ip_hdrlen(skb) + sizeof(inside->icmp), 481 &ct->tuplehash[!dir].tuple, !manip))
489 &ct->tuplehash[!dir].tuple,
490 !manip))
491 return 0; 482 return 0;
492 483
493 if (skb->ip_summed != CHECKSUM_PARTIAL) { 484 if (skb->ip_summed != CHECKSUM_PARTIAL) {
494 /* Reloading "inside" here since manip_pkt inner. */ 485 /* Reloading "inside" here since manip_pkt inner. */
495 inside = (void *)skb->data + ip_hdrlen(skb); 486 inside = (void *)skb->data + hdrlen;
496 inside->icmp.checksum = 0; 487 inside->icmp.checksum = 0;
497 inside->icmp.checksum = 488 inside->icmp.checksum =
498 csum_fold(skb_checksum(skb, hdrlen, 489 csum_fold(skb_checksum(skb, hdrlen,
@@ -742,7 +733,7 @@ static int __init nf_nat_init(void)
742 spin_unlock_bh(&nf_nat_lock); 733 spin_unlock_bh(&nf_nat_lock);
743 734
744 /* Initialize fake conntrack so that NAT will skip it */ 735 /* Initialize fake conntrack so that NAT will skip it */
745 nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; 736 nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
746 737
747 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); 738 l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
748 739
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 7e8e6fc75413..5045196d853c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -10,7 +10,6 @@
10 */ 10 */
11 11
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/moduleparam.h>
14#include <linux/tcp.h> 13#include <linux/tcp.h>
15#include <net/tcp.h> 14#include <net/tcp.h>
16 15
@@ -44,7 +43,7 @@ static int set_addr(struct sk_buff *skb,
44 addroff, sizeof(buf), 43 addroff, sizeof(buf),
45 (char *) &buf, sizeof(buf))) { 44 (char *) &buf, sizeof(buf))) {
46 if (net_ratelimit()) 45 if (net_ratelimit())
47 printk("nf_nat_h323: nf_nat_mangle_tcp_packet" 46 pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet"
48 " error\n"); 47 " error\n");
49 return -1; 48 return -1;
50 } 49 }
@@ -60,7 +59,7 @@ static int set_addr(struct sk_buff *skb,
60 addroff, sizeof(buf), 59 addroff, sizeof(buf),
61 (char *) &buf, sizeof(buf))) { 60 (char *) &buf, sizeof(buf))) {
62 if (net_ratelimit()) 61 if (net_ratelimit())
63 printk("nf_nat_h323: nf_nat_mangle_udp_packet" 62 pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet"
64 " error\n"); 63 " error\n");
65 return -1; 64 return -1;
66 } 65 }
@@ -216,7 +215,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
216 /* Run out of expectations */ 215 /* Run out of expectations */
217 if (i >= H323_RTP_CHANNEL_MAX) { 216 if (i >= H323_RTP_CHANNEL_MAX) {
218 if (net_ratelimit()) 217 if (net_ratelimit())
219 printk("nf_nat_h323: out of expectations\n"); 218 pr_notice("nf_nat_h323: out of expectations\n");
220 return 0; 219 return 0;
221 } 220 }
222 221
@@ -235,7 +234,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
235 234
236 if (nated_port == 0) { /* No port available */ 235 if (nated_port == 0) { /* No port available */
237 if (net_ratelimit()) 236 if (net_ratelimit())
238 printk("nf_nat_h323: out of RTP ports\n"); 237 pr_notice("nf_nat_h323: out of RTP ports\n");
239 return 0; 238 return 0;
240 } 239 }
241 240
@@ -292,7 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
292 291
293 if (nated_port == 0) { /* No port available */ 292 if (nated_port == 0) { /* No port available */
294 if (net_ratelimit()) 293 if (net_ratelimit())
295 printk("nf_nat_h323: out of TCP ports\n"); 294 pr_notice("nf_nat_h323: out of TCP ports\n");
296 return 0; 295 return 0;
297 } 296 }
298 297
@@ -342,7 +341,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
342 341
343 if (nated_port == 0) { /* No port available */ 342 if (nated_port == 0) { /* No port available */
344 if (net_ratelimit()) 343 if (net_ratelimit())
345 printk("nf_nat_q931: out of TCP ports\n"); 344 pr_notice("nf_nat_q931: out of TCP ports\n");
346 return 0; 345 return 0;
347 } 346 }
348 347
@@ -426,7 +425,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
426 425
427 if (nated_port == 0) { /* No port available */ 426 if (nated_port == 0) { /* No port available */
428 if (net_ratelimit()) 427 if (net_ratelimit())
429 printk("nf_nat_ras: out of TCP ports\n"); 428 pr_notice("nf_nat_ras: out of TCP ports\n");
430 return 0; 429 return 0;
431 } 430 }
432 431
@@ -508,7 +507,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
508 507
509 if (nated_port == 0) { /* No port available */ 508 if (nated_port == 0) { /* No port available */
510 if (net_ratelimit()) 509 if (net_ratelimit())
511 printk("nf_nat_q931: out of TCP ports\n"); 510 pr_notice("nf_nat_q931: out of TCP ports\n");
512 return 0; 511 return 0;
513 } 512 }
514 513
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index 6c4f11f51446..3e61faf23a9a 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -34,7 +34,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
34} 34}
35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); 35EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
36 36
37bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 37void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
38 const struct nf_nat_range *range, 38 const struct nf_nat_range *range,
39 enum nf_nat_manip_type maniptype, 39 enum nf_nat_manip_type maniptype,
40 const struct nf_conn *ct, 40 const struct nf_conn *ct,
@@ -53,7 +53,7 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
54 /* If it's dst rewrite, can't change port */ 54 /* If it's dst rewrite, can't change port */
55 if (maniptype == IP_NAT_MANIP_DST) 55 if (maniptype == IP_NAT_MANIP_DST)
56 return false; 56 return;
57 57
58 if (ntohs(*portptr) < 1024) { 58 if (ntohs(*portptr) < 1024) {
59 /* Loose convention: >> 512 is credential passing */ 59 /* Loose convention: >> 512 is credential passing */
@@ -81,15 +81,15 @@ bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
81 else 81 else
82 off = *rover; 82 off = *rover;
83 83
84 for (i = 0; i < range_size; i++, off++) { 84 for (i = 0; ; ++off) {
85 *portptr = htons(min + off % range_size); 85 *portptr = htons(min + off % range_size);
86 if (nf_nat_used_tuple(tuple, ct)) 86 if (++i != range_size && nf_nat_used_tuple(tuple, ct))
87 continue; 87 continue;
88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
89 *rover = off; 89 *rover = off;
90 return true; 90 return;
91 } 91 }
92 return false; 92 return;
93} 93}
94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); 94EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
95 95
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 22485ce306d4..570faf2667b2 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -22,14 +22,14 @@
22 22
23static u_int16_t dccp_port_rover; 23static u_int16_t dccp_port_rover;
24 24
25static bool 25static void
26dccp_unique_tuple(struct nf_conntrack_tuple *tuple, 26dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
27 const struct nf_nat_range *range, 27 const struct nf_nat_range *range,
28 enum nf_nat_manip_type maniptype, 28 enum nf_nat_manip_type maniptype,
29 const struct nf_conn *ct) 29 const struct nf_conn *ct)
30{ 30{
31 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 31 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
32 &dccp_port_rover); 32 &dccp_port_rover);
33} 33}
34 34
35static bool 35static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351e..bc8d83a31c73 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); 37MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
38 38
39/* generate unique tuple ... */ 39/* generate unique tuple ... */
40static bool 40static void
41gre_unique_tuple(struct nf_conntrack_tuple *tuple, 41gre_unique_tuple(struct nf_conntrack_tuple *tuple,
42 const struct nf_nat_range *range, 42 const struct nf_nat_range *range,
43 enum nf_nat_manip_type maniptype, 43 enum nf_nat_manip_type maniptype,
@@ -50,7 +50,7 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
50 /* If there is no master conntrack we are not PPTP, 50 /* If there is no master conntrack we are not PPTP,
51 do not change tuples */ 51 do not change tuples */
52 if (!ct->master) 52 if (!ct->master)
53 return false; 53 return;
54 54
55 if (maniptype == IP_NAT_MANIP_SRC) 55 if (maniptype == IP_NAT_MANIP_SRC)
56 keyptr = &tuple->src.u.gre.key; 56 keyptr = &tuple->src.u.gre.key;
@@ -68,14 +68,14 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
68 68
69 pr_debug("min = %u, range_size = %u\n", min, range_size); 69 pr_debug("min = %u, range_size = %u\n", min, range_size);
70 70
71 for (i = 0; i < range_size; i++, key++) { 71 for (i = 0; ; ++key) {
72 *keyptr = htons(min + key % range_size); 72 *keyptr = htons(min + key % range_size);
73 if (!nf_nat_used_tuple(tuple, ct)) 73 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
74 return true; 74 return;
75 } 75 }
76 76
77 pr_debug("%p: no NAT mapping\n", ct); 77 pr_debug("%p: no NAT mapping\n", ct);
78 return false; 78 return;
79} 79}
80 80
81/* manipulate a GRE packet according to maniptype */ 81/* manipulate a GRE packet according to maniptype */
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8e..5744c3ec847c 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -27,7 +27,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); 27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
28} 28}
29 29
30static bool 30static void
31icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 31icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
32 const struct nf_nat_range *range, 32 const struct nf_nat_range *range,
33 enum nf_nat_manip_type maniptype, 33 enum nf_nat_manip_type maniptype,
@@ -42,13 +42,13 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) 42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
43 range_size = 0xFFFF; 43 range_size = 0xFFFF;
44 44
45 for (i = 0; i < range_size; i++, id++) { 45 for (i = 0; ; ++id) {
46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + 46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
47 (id % range_size)); 47 (id % range_size));
48 if (!nf_nat_used_tuple(tuple, ct)) 48 if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
49 return true; 49 return;
50 } 50 }
51 return false; 51 return;
52} 52}
53 53
54static bool 54static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index 3fc598eeeb1a..756331d42661 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -16,14 +16,14 @@
16 16
17static u_int16_t nf_sctp_port_rover; 17static u_int16_t nf_sctp_port_rover;
18 18
19static bool 19static void
20sctp_unique_tuple(struct nf_conntrack_tuple *tuple, 20sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
21 const struct nf_nat_range *range, 21 const struct nf_nat_range *range,
22 enum nf_nat_manip_type maniptype, 22 enum nf_nat_manip_type maniptype,
23 const struct nf_conn *ct) 23 const struct nf_conn *ct)
24{ 24{
25 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 25 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
26 &nf_sctp_port_rover); 26 &nf_sctp_port_rover);
27} 27}
28 28
29static bool 29static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 399e2cfa263b..aa460a595d5d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -20,14 +20,13 @@
20 20
21static u_int16_t tcp_port_rover; 21static u_int16_t tcp_port_rover;
22 22
23static bool 23static void
24tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 24tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
25 const struct nf_nat_range *range, 25 const struct nf_nat_range *range,
26 enum nf_nat_manip_type maniptype, 26 enum nf_nat_manip_type maniptype,
27 const struct nf_conn *ct) 27 const struct nf_conn *ct)
28{ 28{
29 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 29 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover);
30 &tcp_port_rover);
31} 30}
32 31
33static bool 32static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 9e61c79492e4..dfe65c7e2925 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -19,14 +19,13 @@
19 19
20static u_int16_t udp_port_rover; 20static u_int16_t udp_port_rover;
21 21
22static bool 22static void
23udp_unique_tuple(struct nf_conntrack_tuple *tuple, 23udp_unique_tuple(struct nf_conntrack_tuple *tuple,
24 const struct nf_nat_range *range, 24 const struct nf_nat_range *range,
25 enum nf_nat_manip_type maniptype, 25 enum nf_nat_manip_type maniptype,
26 const struct nf_conn *ct) 26 const struct nf_conn *ct)
27{ 27{
28 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 28 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover);
29 &udp_port_rover);
30} 29}
31 30
32static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index 440a229bbd87..3cc8c8af39ef 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -18,14 +18,14 @@
18 18
19static u_int16_t udplite_port_rover; 19static u_int16_t udplite_port_rover;
20 20
21static bool 21static void
22udplite_unique_tuple(struct nf_conntrack_tuple *tuple, 22udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
23 const struct nf_nat_range *range, 23 const struct nf_nat_range *range,
24 enum nf_nat_manip_type maniptype, 24 enum nf_nat_manip_type maniptype,
25 const struct nf_conn *ct) 25 const struct nf_conn *ct)
26{ 26{
27 return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 27 nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
28 &udplite_port_rover); 28 &udplite_port_rover);
29} 29}
30 30
31static bool 31static bool
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index 14381c62acea..a50f2bc1c732 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -26,14 +26,14 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
26 return true; 26 return true;
27} 27}
28 28
29static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
30 const struct nf_nat_range *range, 30 const struct nf_nat_range *range,
31 enum nf_nat_manip_type maniptype, 31 enum nf_nat_manip_type maniptype,
32 const struct nf_conn *ct) 32 const struct nf_conn *ct)
33{ 33{
34 /* Sorry: we can't help you; if it's not unique, we can't frob 34 /* Sorry: we can't help you; if it's not unique, we can't frob
35 anything. */ 35 anything. */
36 return false; 36 return;
37} 37}
38 38
39static bool 39static bool
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 26de2c1f7fab..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9/* Everything about the rules for NAT. */ 9/* Everything about the rules for NAT. */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/types.h> 11#include <linux/types.h>
11#include <linux/ip.h> 12#include <linux/ip.h>
12#include <linux/netfilter.h> 13#include <linux/netfilter.h>
@@ -27,7 +28,8 @@
27 28
28#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ 29#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
29 (1 << NF_INET_POST_ROUTING) | \ 30 (1 << NF_INET_POST_ROUTING) | \
30 (1 << NF_INET_LOCAL_OUT)) 31 (1 << NF_INET_LOCAL_OUT) | \
32 (1 << NF_INET_LOCAL_IN))
31 33
32static const struct xt_table nat_table = { 34static const struct xt_table nat_table = {
33 .name = "nat", 35 .name = "nat",
@@ -38,13 +40,14 @@ static const struct xt_table nat_table = {
38 40
39/* Source NAT */ 41/* Source NAT */
40static unsigned int 42static unsigned int
41ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) 43ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
42{ 44{
43 struct nf_conn *ct; 45 struct nf_conn *ct;
44 enum ip_conntrack_info ctinfo; 46 enum ip_conntrack_info ctinfo;
45 const struct nf_nat_multi_range_compat *mr = par->targinfo; 47 const struct nf_nat_multi_range_compat *mr = par->targinfo;
46 48
47 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); 49 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
50 par->hooknum == NF_INET_LOCAL_IN);
48 51
49 ct = nf_ct_get(skb, &ctinfo); 52 ct = nf_ct_get(skb, &ctinfo);
50 53
@@ -57,7 +60,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
57} 60}
58 61
59static unsigned int 62static unsigned int
60ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) 63ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
61{ 64{
62 struct nf_conn *ct; 65 struct nf_conn *ct;
63 enum ip_conntrack_info ctinfo; 66 enum ip_conntrack_info ctinfo;
@@ -74,31 +77,31 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
74 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); 77 return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
75} 78}
76 79
77static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) 80static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
78{ 81{
79 const struct nf_nat_multi_range_compat *mr = par->targinfo; 82 const struct nf_nat_multi_range_compat *mr = par->targinfo;
80 83
81 /* Must be a valid range */ 84 /* Must be a valid range */
82 if (mr->rangesize != 1) { 85 if (mr->rangesize != 1) {
83 printk("SNAT: multiple ranges no longer supported\n"); 86 pr_info("SNAT: multiple ranges no longer supported\n");
84 return false; 87 return -EINVAL;
85 } 88 }
86 return true; 89 return 0;
87} 90}
88 91
89static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) 92static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
90{ 93{
91 const struct nf_nat_multi_range_compat *mr = par->targinfo; 94 const struct nf_nat_multi_range_compat *mr = par->targinfo;
92 95
93 /* Must be a valid range */ 96 /* Must be a valid range */
94 if (mr->rangesize != 1) { 97 if (mr->rangesize != 1) {
95 printk("DNAT: multiple ranges no longer supported\n"); 98 pr_info("DNAT: multiple ranges no longer supported\n");
96 return false; 99 return -EINVAL;
97 } 100 }
98 return true; 101 return 0;
99} 102}
100 103
101unsigned int 104static unsigned int
102alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 105alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
103{ 106{
104 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
@@ -140,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
140 .target = ipt_snat_target, 143 .target = ipt_snat_target,
141 .targetsize = sizeof(struct nf_nat_multi_range_compat), 144 .targetsize = sizeof(struct nf_nat_multi_range_compat),
142 .table = "nat", 145 .table = "nat",
143 .hooks = 1 << NF_INET_POST_ROUTING, 146 .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
144 .checkentry = ipt_snat_checkentry, 147 .checkentry = ipt_snat_checkentry,
145 .family = AF_INET, 148 .family = AF_INET,
146}; 149};
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 4d85b6e55f29..1679e2c0963d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -401,7 +401,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
401 *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); 401 *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
402 if (*octets == NULL) { 402 if (*octets == NULL) {
403 if (net_ratelimit()) 403 if (net_ratelimit())
404 printk("OOM in bsalg (%d)\n", __LINE__); 404 pr_notice("OOM in bsalg (%d)\n", __LINE__);
405 return 0; 405 return 0;
406 } 406 }
407 407
@@ -452,7 +452,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
452 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 452 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
453 if (*oid == NULL) { 453 if (*oid == NULL) {
454 if (net_ratelimit()) 454 if (net_ratelimit())
455 printk("OOM in bsalg (%d)\n", __LINE__); 455 pr_notice("OOM in bsalg (%d)\n", __LINE__);
456 return 0; 456 return 0;
457 } 457 }
458 458
@@ -729,7 +729,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
729 if (*obj == NULL) { 729 if (*obj == NULL) {
730 kfree(id); 730 kfree(id);
731 if (net_ratelimit()) 731 if (net_ratelimit())
732 printk("OOM in bsalg (%d)\n", __LINE__); 732 pr_notice("OOM in bsalg (%d)\n", __LINE__);
733 return 0; 733 return 0;
734 } 734 }
735 (*obj)->syntax.l[0] = l; 735 (*obj)->syntax.l[0] = l;
@@ -746,7 +746,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
746 kfree(p); 746 kfree(p);
747 kfree(id); 747 kfree(id);
748 if (net_ratelimit()) 748 if (net_ratelimit())
749 printk("OOM in bsalg (%d)\n", __LINE__); 749 pr_notice("OOM in bsalg (%d)\n", __LINE__);
750 return 0; 750 return 0;
751 } 751 }
752 memcpy((*obj)->syntax.c, p, len); 752 memcpy((*obj)->syntax.c, p, len);
@@ -761,7 +761,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
761 if (*obj == NULL) { 761 if (*obj == NULL) {
762 kfree(id); 762 kfree(id);
763 if (net_ratelimit()) 763 if (net_ratelimit())
764 printk("OOM in bsalg (%d)\n", __LINE__); 764 pr_notice("OOM in bsalg (%d)\n", __LINE__);
765 return 0; 765 return 0;
766 } 766 }
767 if (!asn1_null_decode(ctx, end)) { 767 if (!asn1_null_decode(ctx, end)) {
@@ -782,7 +782,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
782 kfree(lp); 782 kfree(lp);
783 kfree(id); 783 kfree(id);
784 if (net_ratelimit()) 784 if (net_ratelimit())
785 printk("OOM in bsalg (%d)\n", __LINE__); 785 pr_notice("OOM in bsalg (%d)\n", __LINE__);
786 return 0; 786 return 0;
787 } 787 }
788 memcpy((*obj)->syntax.ul, lp, len); 788 memcpy((*obj)->syntax.ul, lp, len);
@@ -803,7 +803,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
803 kfree(p); 803 kfree(p);
804 kfree(id); 804 kfree(id);
805 if (net_ratelimit()) 805 if (net_ratelimit())
806 printk("OOM in bsalg (%d)\n", __LINE__); 806 pr_notice("OOM in bsalg (%d)\n", __LINE__);
807 return 0; 807 return 0;
808 } 808 }
809 memcpy((*obj)->syntax.uc, p, len); 809 memcpy((*obj)->syntax.uc, p, len);
@@ -821,7 +821,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
821 if (*obj == NULL) { 821 if (*obj == NULL) {
822 kfree(id); 822 kfree(id);
823 if (net_ratelimit()) 823 if (net_ratelimit())
824 printk("OOM in bsalg (%d)\n", __LINE__); 824 pr_notice("OOM in bsalg (%d)\n", __LINE__);
825 return 0; 825 return 0;
826 } 826 }
827 (*obj)->syntax.ul[0] = ul; 827 (*obj)->syntax.ul[0] = ul;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index c39c9cf6bee6..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
98 return NF_ACCEPT; 98 return NF_ACCEPT;
99 99
100 /* Don't try to NAT if this packet is not conntracked */ 100 /* Don't try to NAT if this packet is not conntracked */
101 if (ct == &nf_conntrack_untracked) 101 if (nf_ct_is_untracked(ct))
102 return NF_ACCEPT; 102 return NF_ACCEPT;
103 103
104 nat = nfct_nat(ct); 104 nat = nfct_nat(ct);
@@ -131,16 +131,9 @@ nf_nat_fn(unsigned int hooknum,
131 if (!nf_nat_initialized(ct, maniptype)) { 131 if (!nf_nat_initialized(ct, maniptype)) {
132 unsigned int ret; 132 unsigned int ret;
133 133
134 if (hooknum == NF_INET_LOCAL_IN) 134 ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
135 /* LOCAL_IN hook doesn't have a chain! */ 135 if (ret != NF_ACCEPT)
136 ret = alloc_null_binding(ct, hooknum);
137 else
138 ret = nf_nat_rule_find(skb, hooknum, in, out,
139 ct);
140
141 if (ret != NF_ACCEPT) {
142 return ret; 136 return ret;
143 }
144 } else 137 } else
145 pr_debug("Already setup manip %s for ct %p\n", 138 pr_debug("Already setup manip %s for ct %p\n",
146 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", 139 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
@@ -294,12 +287,12 @@ static int __init nf_nat_standalone_init(void)
294#endif 287#endif
295 ret = nf_nat_rule_init(); 288 ret = nf_nat_rule_init();
296 if (ret < 0) { 289 if (ret < 0) {
297 printk("nf_nat_init: can't setup rules.\n"); 290 pr_err("nf_nat_init: can't setup rules.\n");
298 goto cleanup_decode_session; 291 goto cleanup_decode_session;
299 } 292 }
300 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops)); 293 ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
301 if (ret < 0) { 294 if (ret < 0) {
302 printk("nf_nat_init: can't register hooks.\n"); 295 pr_err("nf_nat_init: can't register hooks.\n");
303 goto cleanup_rule_init; 296 goto cleanup_rule_init;
304 } 297 }
305 return ret; 298 return ret;
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index b096e81500ae..7274a43c7a12 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -6,7 +6,6 @@
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/moduleparam.h>
10#include <linux/udp.h> 9#include <linux/udp.h>
11 10
12#include <net/netfilter/nf_nat_helper.h> 11#include <net/netfilter/nf_nat_helper.h>
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,8 @@ static const struct snmp_mib snmp4_net_list[] = {
251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), 251 SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP), 252 SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), 253 SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
254 SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
255 SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
254 SNMP_MIB_SENTINEL 256 SNMP_MIB_SENTINEL
255}; 257};
256 258
@@ -341,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
341 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, 343 IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
342 sysctl_ip_default_ttl); 344 sysctl_ip_default_ttl);
343 345
346 BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
344 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) 347 for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
345 seq_printf(seq, " %lu", 348 seq_printf(seq, " %llu",
346 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 349 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
347 snmp4_ipstats_list[i].entry)); 350 snmp4_ipstats_list[i].entry,
351 offsetof(struct ipstats_mib, syncp)));
348 352
349 icmp_put(seq); /* RFC 2011 compatibility */ 353 icmp_put(seq); /* RFC 2011 compatibility */
350 icmpmsg_put(seq); 354 icmpmsg_put(seq);
@@ -430,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
430 434
431 seq_puts(seq, "\nIpExt:"); 435 seq_puts(seq, "\nIpExt:");
432 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) 436 for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
433 seq_printf(seq, " %lu", 437 seq_printf(seq, " %llu",
434 snmp_fold_field((void __percpu **)net->mib.ip_statistics, 438 snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
435 snmp4_ipextstats_list[i].entry)); 439 snmp4_ipextstats_list[i].entry,
440 offsetof(struct ipstats_mib, syncp)));
436 441
437 seq_putc(seq, '\n'); 442 seq_putc(seq, '\n');
438 return 0; 443 return 0;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 542f22fc98b3..f2d297351405 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
52 52
53 return ret; 53 return ret;
54} 54}
55EXPORT_SYMBOL(inet_add_protocol);
55 56
56/* 57/*
57 * Remove a protocol from the hash tables. 58 * Remove a protocol from the hash tables.
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
76 77
77 return ret; 78 return ret;
78} 79}
79
80EXPORT_SYMBOL(inet_add_protocol);
81EXPORT_SYMBOL(inet_del_protocol); 80EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cc6f097fbd5f..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
290{ 290{
291 /* Charge it to the socket. */ 291 /* Charge it to the socket. */
292 292
293 if (sock_queue_rcv_skb(sk, skb) < 0) { 293 if (ip_queue_rcv_skb(sk, skb) < 0) {
294 kfree_skb(skb); 294 kfree_skb(skb);
295 return NET_RX_DROP; 295 return NET_RX_DROP;
296 } 296 }
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
314} 314}
315 315
316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length, 316static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
317 struct rtable *rt, 317 struct rtable **rtp,
318 unsigned int flags) 318 unsigned int flags)
319{ 319{
320 struct inet_sock *inet = inet_sk(sk); 320 struct inet_sock *inet = inet_sk(sk);
@@ -323,25 +323,27 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
323 struct sk_buff *skb; 323 struct sk_buff *skb;
324 unsigned int iphlen; 324 unsigned int iphlen;
325 int err; 325 int err;
326 struct rtable *rt = *rtp;
326 327
327 if (length > rt->u.dst.dev->mtu) { 328 if (length > rt->dst.dev->mtu) {
328 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport, 329 ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
329 rt->u.dst.dev->mtu); 330 rt->dst.dev->mtu);
330 return -EMSGSIZE; 331 return -EMSGSIZE;
331 } 332 }
332 if (flags&MSG_PROBE) 333 if (flags&MSG_PROBE)
333 goto out; 334 goto out;
334 335
335 skb = sock_alloc_send_skb(sk, 336 skb = sock_alloc_send_skb(sk,
336 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 337 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
337 flags & MSG_DONTWAIT, &err); 338 flags & MSG_DONTWAIT, &err);
338 if (skb == NULL) 339 if (skb == NULL)
339 goto error; 340 goto error;
340 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 341 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
341 342
342 skb->priority = sk->sk_priority; 343 skb->priority = sk->sk_priority;
343 skb->mark = sk->sk_mark; 344 skb->mark = sk->sk_mark;
344 skb_dst_set(skb, dst_clone(&rt->u.dst)); 345 skb_dst_set(skb, &rt->dst);
346 *rtp = NULL;
345 347
346 skb_reset_network_header(skb); 348 skb_reset_network_header(skb);
347 iph = ip_hdr(skb); 349 iph = ip_hdr(skb);
@@ -373,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
373 iph->check = 0; 375 iph->check = 0;
374 iph->tot_len = htons(length); 376 iph->tot_len = htons(length);
375 if (!iph->id) 377 if (!iph->id)
376 ip_select_ident(iph, &rt->u.dst, NULL); 378 ip_select_ident(iph, &rt->dst, NULL);
377 379
378 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); 380 iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
379 } 381 }
@@ -381,8 +383,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
381 icmp_out_count(net, ((struct icmphdr *) 383 icmp_out_count(net, ((struct icmphdr *)
382 skb_transport_header(skb))->type); 384 skb_transport_header(skb))->type);
383 385
384 err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 386 err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
385 dst_output); 387 rt->dst.dev, dst_output);
386 if (err > 0) 388 if (err > 0)
387 err = net_xmit_errno(err); 389 err = net_xmit_errno(err);
388 if (err) 390 if (err)
@@ -576,7 +578,7 @@ back_from_confirm:
576 578
577 if (inet->hdrincl) 579 if (inet->hdrincl)
578 err = raw_send_hdrinc(sk, msg->msg_iov, len, 580 err = raw_send_hdrinc(sk, msg->msg_iov, len,
579 rt, msg->msg_flags); 581 &rt, msg->msg_flags);
580 582
581 else { 583 else {
582 if (!ipc.addr) 584 if (!ipc.addr)
@@ -604,7 +606,7 @@ out:
604 return len; 606 return len;
605 607
606do_confirm: 608do_confirm:
607 dst_confirm(&rt->u.dst); 609 dst_confirm(&rt->dst);
608 if (!(msg->msg_flags & MSG_PROBE) || len) 610 if (!(msg->msg_flags & MSG_PROBE) || len)
609 goto back_from_confirm; 611 goto back_from_confirm;
610 err = 0; 612 err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb562fdd9b9a..3f56b6e6c6aa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; 129static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; 130static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
131static int ip_rt_min_advmss __read_mostly = 256; 131static int ip_rt_min_advmss __read_mostly = 256;
132static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
133static int rt_chain_length_max __read_mostly = 20; 132static int rt_chain_length_max __read_mostly = 20;
134 133
135static struct delayed_work expires_work; 134static struct delayed_work expires_work;
@@ -254,14 +253,12 @@ static unsigned rt_hash_mask __read_mostly;
254static unsigned int rt_hash_log __read_mostly; 253static unsigned int rt_hash_log __read_mostly;
255 254
256static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); 255static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
257#define RT_CACHE_STAT_INC(field) \ 256#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
258 (__raw_get_cpu_var(rt_cache_stat).field++)
259 257
260static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx, 258static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
261 int genid) 259 int genid)
262{ 260{
263 return jhash_3words((__force u32)(__be32)(daddr), 261 return jhash_3words((__force u32)daddr, (__force u32)saddr,
264 (__force u32)(__be32)(saddr),
265 idx, genid) 262 idx, genid)
266 & rt_hash_mask; 263 & rt_hash_mask;
267} 264}
@@ -289,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
289 rcu_read_lock_bh(); 286 rcu_read_lock_bh();
290 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); 287 r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
291 while (r) { 288 while (r) {
292 if (dev_net(r->u.dst.dev) == seq_file_net(seq) && 289 if (dev_net(r->dst.dev) == seq_file_net(seq) &&
293 r->rt_genid == st->genid) 290 r->rt_genid == st->genid)
294 return r; 291 return r;
295 r = rcu_dereference_bh(r->u.dst.rt_next); 292 r = rcu_dereference_bh(r->dst.rt_next);
296 } 293 }
297 rcu_read_unlock_bh(); 294 rcu_read_unlock_bh();
298 } 295 }
@@ -304,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
304{ 301{
305 struct rt_cache_iter_state *st = seq->private; 302 struct rt_cache_iter_state *st = seq->private;
306 303
307 r = r->u.dst.rt_next; 304 r = r->dst.rt_next;
308 while (!r) { 305 while (!r) {
309 rcu_read_unlock_bh(); 306 rcu_read_unlock_bh();
310 do { 307 do {
@@ -322,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
322{ 319{
323 struct rt_cache_iter_state *st = seq->private; 320 struct rt_cache_iter_state *st = seq->private;
324 while ((r = __rt_cache_get_next(seq, r)) != NULL) { 321 while ((r = __rt_cache_get_next(seq, r)) != NULL) {
325 if (dev_net(r->u.dst.dev) != seq_file_net(seq)) 322 if (dev_net(r->dst.dev) != seq_file_net(seq))
326 continue; 323 continue;
327 if (r->rt_genid == st->genid) 324 if (r->rt_genid == st->genid)
328 break; 325 break;
@@ -378,20 +375,21 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
378 struct rtable *r = v; 375 struct rtable *r = v;
379 int len; 376 int len;
380 377
381 seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" 378 seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
382 "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n", 379 "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
383 r->u.dst.dev ? r->u.dst.dev->name : "*", 380 r->dst.dev ? r->dst.dev->name : "*",
384 (unsigned long)r->rt_dst, (unsigned long)r->rt_gateway, 381 (__force u32)r->rt_dst,
385 r->rt_flags, atomic_read(&r->u.dst.__refcnt), 382 (__force u32)r->rt_gateway,
386 r->u.dst.__use, 0, (unsigned long)r->rt_src, 383 r->rt_flags, atomic_read(&r->dst.__refcnt),
387 (dst_metric(&r->u.dst, RTAX_ADVMSS) ? 384 r->dst.__use, 0, (__force u32)r->rt_src,
388 (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0), 385 (dst_metric(&r->dst, RTAX_ADVMSS) ?
389 dst_metric(&r->u.dst, RTAX_WINDOW), 386 (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
390 (int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) + 387 dst_metric(&r->dst, RTAX_WINDOW),
391 dst_metric(&r->u.dst, RTAX_RTTVAR)), 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
389 dst_metric(&r->dst, RTAX_RTTVAR)),
392 r->fl.fl4_tos, 390 r->fl.fl4_tos,
393 r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1, 391 r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
394 r->u.dst.hh ? (r->u.dst.hh->hh_output == 392 r->dst.hh ? (r->dst.hh->hh_output ==
395 dev_queue_xmit) : 0, 393 dev_queue_xmit) : 0,
396 r->rt_spec_dst, &len); 394 r->rt_spec_dst, &len);
397 395
@@ -610,13 +608,13 @@ static inline int ip_rt_proc_init(void)
610 608
611static inline void rt_free(struct rtable *rt) 609static inline void rt_free(struct rtable *rt)
612{ 610{
613 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 611 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
614} 612}
615 613
616static inline void rt_drop(struct rtable *rt) 614static inline void rt_drop(struct rtable *rt)
617{ 615{
618 ip_rt_put(rt); 616 ip_rt_put(rt);
619 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 617 call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
620} 618}
621 619
622static inline int rt_fast_clean(struct rtable *rth) 620static inline int rt_fast_clean(struct rtable *rth)
@@ -624,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
624 /* Kill broadcast/multicast entries very aggresively, if they 622 /* Kill broadcast/multicast entries very aggresively, if they
625 collide in hash table with more useful entries */ 623 collide in hash table with more useful entries */
626 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) && 624 return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
627 rth->fl.iif && rth->u.dst.rt_next; 625 rth->fl.iif && rth->dst.rt_next;
628} 626}
629 627
630static inline int rt_valuable(struct rtable *rth) 628static inline int rt_valuable(struct rtable *rth)
631{ 629{
632 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 630 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
633 rth->u.dst.expires; 631 rth->dst.expires;
634} 632}
635 633
636static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) 634static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -638,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
638 unsigned long age; 636 unsigned long age;
639 int ret = 0; 637 int ret = 0;
640 638
641 if (atomic_read(&rth->u.dst.__refcnt)) 639 if (atomic_read(&rth->dst.__refcnt))
642 goto out; 640 goto out;
643 641
644 ret = 1; 642 ret = 1;
645 if (rth->u.dst.expires && 643 if (rth->dst.expires &&
646 time_after_eq(jiffies, rth->u.dst.expires)) 644 time_after_eq(jiffies, rth->dst.expires))
647 goto out; 645 goto out;
648 646
649 age = jiffies - rth->u.dst.lastuse; 647 age = jiffies - rth->dst.lastuse;
650 ret = 0; 648 ret = 0;
651 if ((age <= tmo1 && !rt_fast_clean(rth)) || 649 if ((age <= tmo1 && !rt_fast_clean(rth)) ||
652 (age <= tmo2 && rt_valuable(rth))) 650 (age <= tmo2 && rt_valuable(rth)))
@@ -662,7 +660,7 @@ out: return ret;
662 */ 660 */
663static inline u32 rt_score(struct rtable *rt) 661static inline u32 rt_score(struct rtable *rt)
664{ 662{
665 u32 score = jiffies - rt->u.dst.lastuse; 663 u32 score = jiffies - rt->dst.lastuse;
666 664
667 score = ~score & ~(3<<30); 665 score = ~score & ~(3<<30);
668 666
@@ -685,30 +683,29 @@ static inline bool rt_caching(const struct net *net)
685static inline bool compare_hash_inputs(const struct flowi *fl1, 683static inline bool compare_hash_inputs(const struct flowi *fl1,
686 const struct flowi *fl2) 684 const struct flowi *fl2)
687{ 685{
688 return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | 686 return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
689 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | 687 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
690 (fl1->iif ^ fl2->iif)) == 0); 688 (fl1->iif ^ fl2->iif)) == 0);
691} 689}
692 690
693static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 691static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
694{ 692{
695 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | 693 return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
696 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | 694 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
697 (fl1->mark ^ fl2->mark) | 695 (fl1->mark ^ fl2->mark) |
698 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ 696 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
699 *(u16 *)&fl2->nl_u.ip4_u.tos) |
700 (fl1->oif ^ fl2->oif) | 697 (fl1->oif ^ fl2->oif) |
701 (fl1->iif ^ fl2->iif)) == 0; 698 (fl1->iif ^ fl2->iif)) == 0;
702} 699}
703 700
704static inline int compare_netns(struct rtable *rt1, struct rtable *rt2) 701static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
705{ 702{
706 return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev)); 703 return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
707} 704}
708 705
709static inline int rt_is_expired(struct rtable *rth) 706static inline int rt_is_expired(struct rtable *rth)
710{ 707{
711 return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev)); 708 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
712} 709}
713 710
714/* 711/*
@@ -737,7 +734,7 @@ static void rt_do_flush(int process_context)
737 rth = rt_hash_table[i].chain; 734 rth = rt_hash_table[i].chain;
738 735
739 /* defer releasing the head of the list after spin_unlock */ 736 /* defer releasing the head of the list after spin_unlock */
740 for (tail = rth; tail; tail = tail->u.dst.rt_next) 737 for (tail = rth; tail; tail = tail->dst.rt_next)
741 if (!rt_is_expired(tail)) 738 if (!rt_is_expired(tail))
742 break; 739 break;
743 if (rth != tail) 740 if (rth != tail)
@@ -746,9 +743,9 @@ static void rt_do_flush(int process_context)
746 /* call rt_free on entries after the tail requiring flush */ 743 /* call rt_free on entries after the tail requiring flush */
747 prev = &rt_hash_table[i].chain; 744 prev = &rt_hash_table[i].chain;
748 for (p = *prev; p; p = next) { 745 for (p = *prev; p; p = next) {
749 next = p->u.dst.rt_next; 746 next = p->dst.rt_next;
750 if (!rt_is_expired(p)) { 747 if (!rt_is_expired(p)) {
751 prev = &p->u.dst.rt_next; 748 prev = &p->dst.rt_next;
752 } else { 749 } else {
753 *prev = next; 750 *prev = next;
754 rt_free(p); 751 rt_free(p);
@@ -763,7 +760,7 @@ static void rt_do_flush(int process_context)
763 spin_unlock_bh(rt_hash_lock_addr(i)); 760 spin_unlock_bh(rt_hash_lock_addr(i));
764 761
765 for (; rth != tail; rth = next) { 762 for (; rth != tail; rth = next) {
766 next = rth->u.dst.rt_next; 763 next = rth->dst.rt_next;
767 rt_free(rth); 764 rt_free(rth);
768 } 765 }
769 } 766 }
@@ -794,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
794 while (aux != rth) { 791 while (aux != rth) {
795 if (compare_hash_inputs(&aux->fl, &rth->fl)) 792 if (compare_hash_inputs(&aux->fl, &rth->fl))
796 return 0; 793 return 0;
797 aux = aux->u.dst.rt_next; 794 aux = aux->dst.rt_next;
798 } 795 }
799 return ONE; 796 return ONE;
800} 797}
@@ -834,18 +831,18 @@ static void rt_check_expire(void)
834 length = 0; 831 length = 0;
835 spin_lock_bh(rt_hash_lock_addr(i)); 832 spin_lock_bh(rt_hash_lock_addr(i));
836 while ((rth = *rthp) != NULL) { 833 while ((rth = *rthp) != NULL) {
837 prefetch(rth->u.dst.rt_next); 834 prefetch(rth->dst.rt_next);
838 if (rt_is_expired(rth)) { 835 if (rt_is_expired(rth)) {
839 *rthp = rth->u.dst.rt_next; 836 *rthp = rth->dst.rt_next;
840 rt_free(rth); 837 rt_free(rth);
841 continue; 838 continue;
842 } 839 }
843 if (rth->u.dst.expires) { 840 if (rth->dst.expires) {
844 /* Entry is expired even if it is in use */ 841 /* Entry is expired even if it is in use */
845 if (time_before_eq(jiffies, rth->u.dst.expires)) { 842 if (time_before_eq(jiffies, rth->dst.expires)) {
846nofree: 843nofree:
847 tmo >>= 1; 844 tmo >>= 1;
848 rthp = &rth->u.dst.rt_next; 845 rthp = &rth->dst.rt_next;
849 /* 846 /*
850 * We only count entries on 847 * We only count entries on
851 * a chain with equal hash inputs once 848 * a chain with equal hash inputs once
@@ -861,7 +858,7 @@ nofree:
861 goto nofree; 858 goto nofree;
862 859
863 /* Cleanup aged off entries. */ 860 /* Cleanup aged off entries. */
864 *rthp = rth->u.dst.rt_next; 861 *rthp = rth->dst.rt_next;
865 rt_free(rth); 862 rt_free(rth);
866 } 863 }
867 spin_unlock_bh(rt_hash_lock_addr(i)); 864 spin_unlock_bh(rt_hash_lock_addr(i));
@@ -919,32 +916,11 @@ void rt_cache_flush_batch(void)
919 rt_do_flush(!in_softirq()); 916 rt_do_flush(!in_softirq());
920} 917}
921 918
922/*
923 * We change rt_genid and let gc do the cleanup
924 */
925static void rt_secret_rebuild(unsigned long __net)
926{
927 struct net *net = (struct net *)__net;
928 rt_cache_invalidate(net);
929 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
930}
931
932static void rt_secret_rebuild_oneshot(struct net *net)
933{
934 del_timer_sync(&net->ipv4.rt_secret_timer);
935 rt_cache_invalidate(net);
936 if (ip_rt_secret_interval)
937 mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
938}
939
940static void rt_emergency_hash_rebuild(struct net *net) 919static void rt_emergency_hash_rebuild(struct net *net)
941{ 920{
942 if (net_ratelimit()) { 921 if (net_ratelimit())
943 printk(KERN_WARNING "Route hash chain too long!\n"); 922 printk(KERN_WARNING "Route hash chain too long!\n");
944 printk(KERN_WARNING "Adjust your secret_interval!\n"); 923 rt_cache_invalidate(net);
945 }
946
947 rt_secret_rebuild_oneshot(net);
948} 924}
949 925
950/* 926/*
@@ -1023,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
1023 if (!rt_is_expired(rth) && 999 if (!rt_is_expired(rth) &&
1024 !rt_may_expire(rth, tmo, expire)) { 1000 !rt_may_expire(rth, tmo, expire)) {
1025 tmo >>= 1; 1001 tmo >>= 1;
1026 rthp = &rth->u.dst.rt_next; 1002 rthp = &rth->dst.rt_next;
1027 continue; 1003 continue;
1028 } 1004 }
1029 *rthp = rth->u.dst.rt_next; 1005 *rthp = rth->dst.rt_next;
1030 rt_free(rth); 1006 rt_free(rth);
1031 goal--; 1007 goal--;
1032 } 1008 }
@@ -1092,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
1092 1068
1093 while (rth) { 1069 while (rth) {
1094 length += has_noalias(head, rth); 1070 length += has_noalias(head, rth);
1095 rth = rth->u.dst.rt_next; 1071 rth = rth->dst.rt_next;
1096 } 1072 }
1097 return length >> FRACT_BITS; 1073 return length >> FRACT_BITS;
1098} 1074}
@@ -1114,7 +1090,7 @@ restart:
1114 candp = NULL; 1090 candp = NULL;
1115 now = jiffies; 1091 now = jiffies;
1116 1092
1117 if (!rt_caching(dev_net(rt->u.dst.dev))) { 1093 if (!rt_caching(dev_net(rt->dst.dev))) {
1118 /* 1094 /*
1119 * If we're not caching, just tell the caller we 1095 * If we're not caching, just tell the caller we
1120 * were successful and don't touch the route. The 1096 * were successful and don't touch the route. The
@@ -1132,7 +1108,7 @@ restart:
1132 */ 1108 */
1133 1109
1134 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1135 int err = arp_bind_neighbour(&rt->u.dst); 1111 int err = arp_bind_neighbour(&rt->dst);
1136 if (err) { 1112 if (err) {
1137 if (net_ratelimit()) 1113 if (net_ratelimit())
1138 printk(KERN_WARNING 1114 printk(KERN_WARNING
@@ -1151,19 +1127,19 @@ restart:
1151 spin_lock_bh(rt_hash_lock_addr(hash)); 1127 spin_lock_bh(rt_hash_lock_addr(hash));
1152 while ((rth = *rthp) != NULL) { 1128 while ((rth = *rthp) != NULL) {
1153 if (rt_is_expired(rth)) { 1129 if (rt_is_expired(rth)) {
1154 *rthp = rth->u.dst.rt_next; 1130 *rthp = rth->dst.rt_next;
1155 rt_free(rth); 1131 rt_free(rth);
1156 continue; 1132 continue;
1157 } 1133 }
1158 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) { 1134 if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
1159 /* Put it first */ 1135 /* Put it first */
1160 *rthp = rth->u.dst.rt_next; 1136 *rthp = rth->dst.rt_next;
1161 /* 1137 /*
1162 * Since lookup is lockfree, the deletion 1138 * Since lookup is lockfree, the deletion
1163 * must be visible to another weakly ordered CPU before 1139 * must be visible to another weakly ordered CPU before
1164 * the insertion at the start of the hash chain. 1140 * the insertion at the start of the hash chain.
1165 */ 1141 */
1166 rcu_assign_pointer(rth->u.dst.rt_next, 1142 rcu_assign_pointer(rth->dst.rt_next,
1167 rt_hash_table[hash].chain); 1143 rt_hash_table[hash].chain);
1168 /* 1144 /*
1169 * Since lookup is lockfree, the update writes 1145 * Since lookup is lockfree, the update writes
@@ -1171,18 +1147,18 @@ restart:
1171 */ 1147 */
1172 rcu_assign_pointer(rt_hash_table[hash].chain, rth); 1148 rcu_assign_pointer(rt_hash_table[hash].chain, rth);
1173 1149
1174 dst_use(&rth->u.dst, now); 1150 dst_use(&rth->dst, now);
1175 spin_unlock_bh(rt_hash_lock_addr(hash)); 1151 spin_unlock_bh(rt_hash_lock_addr(hash));
1176 1152
1177 rt_drop(rt); 1153 rt_drop(rt);
1178 if (rp) 1154 if (rp)
1179 *rp = rth; 1155 *rp = rth;
1180 else 1156 else
1181 skb_dst_set(skb, &rth->u.dst); 1157 skb_dst_set(skb, &rth->dst);
1182 return 0; 1158 return 0;
1183 } 1159 }
1184 1160
1185 if (!atomic_read(&rth->u.dst.__refcnt)) { 1161 if (!atomic_read(&rth->dst.__refcnt)) {
1186 u32 score = rt_score(rth); 1162 u32 score = rt_score(rth);
1187 1163
1188 if (score <= min_score) { 1164 if (score <= min_score) {
@@ -1194,7 +1170,7 @@ restart:
1194 1170
1195 chain_length++; 1171 chain_length++;
1196 1172
1197 rthp = &rth->u.dst.rt_next; 1173 rthp = &rth->dst.rt_next;
1198 } 1174 }
1199 1175
1200 if (cand) { 1176 if (cand) {
@@ -1205,17 +1181,17 @@ restart:
1205 * only 2 entries per bucket. We will see. 1181 * only 2 entries per bucket. We will see.
1206 */ 1182 */
1207 if (chain_length > ip_rt_gc_elasticity) { 1183 if (chain_length > ip_rt_gc_elasticity) {
1208 *candp = cand->u.dst.rt_next; 1184 *candp = cand->dst.rt_next;
1209 rt_free(cand); 1185 rt_free(cand);
1210 } 1186 }
1211 } else { 1187 } else {
1212 if (chain_length > rt_chain_length_max && 1188 if (chain_length > rt_chain_length_max &&
1213 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) { 1189 slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
1214 struct net *net = dev_net(rt->u.dst.dev); 1190 struct net *net = dev_net(rt->dst.dev);
1215 int num = ++net->ipv4.current_rt_cache_rebuild_count; 1191 int num = ++net->ipv4.current_rt_cache_rebuild_count;
1216 if (!rt_caching(net)) { 1192 if (!rt_caching(net)) {
1217 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", 1193 printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
1218 rt->u.dst.dev->name, num); 1194 rt->dst.dev->name, num);
1219 } 1195 }
1220 rt_emergency_hash_rebuild(net); 1196 rt_emergency_hash_rebuild(net);
1221 spin_unlock_bh(rt_hash_lock_addr(hash)); 1197 spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1230,7 +1206,7 @@ restart:
1230 route or unicast forwarding path. 1206 route or unicast forwarding path.
1231 */ 1207 */
1232 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1208 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1233 int err = arp_bind_neighbour(&rt->u.dst); 1209 int err = arp_bind_neighbour(&rt->dst);
1234 if (err) { 1210 if (err) {
1235 spin_unlock_bh(rt_hash_lock_addr(hash)); 1211 spin_unlock_bh(rt_hash_lock_addr(hash));
1236 1212
@@ -1261,14 +1237,14 @@ restart:
1261 } 1237 }
1262 } 1238 }
1263 1239
1264 rt->u.dst.rt_next = rt_hash_table[hash].chain; 1240 rt->dst.rt_next = rt_hash_table[hash].chain;
1265 1241
1266#if RT_CACHE_DEBUG >= 2 1242#if RT_CACHE_DEBUG >= 2
1267 if (rt->u.dst.rt_next) { 1243 if (rt->dst.rt_next) {
1268 struct rtable *trt; 1244 struct rtable *trt;
1269 printk(KERN_DEBUG "rt_cache @%02x: %pI4", 1245 printk(KERN_DEBUG "rt_cache @%02x: %pI4",
1270 hash, &rt->rt_dst); 1246 hash, &rt->rt_dst);
1271 for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) 1247 for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
1272 printk(" . %pI4", &trt->rt_dst); 1248 printk(" . %pI4", &trt->rt_dst);
1273 printk("\n"); 1249 printk("\n");
1274 } 1250 }
@@ -1286,7 +1262,7 @@ skip_hashing:
1286 if (rp) 1262 if (rp)
1287 *rp = rt; 1263 *rp = rt;
1288 else 1264 else
1289 skb_dst_set(skb, &rt->u.dst); 1265 skb_dst_set(skb, &rt->dst);
1290 return 0; 1266 return 0;
1291} 1267}
1292 1268
@@ -1348,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
1348 1324
1349 ip_select_fb_ident(iph); 1325 ip_select_fb_ident(iph);
1350} 1326}
1327EXPORT_SYMBOL(__ip_select_ident);
1351 1328
1352static void rt_del(unsigned hash, struct rtable *rt) 1329static void rt_del(unsigned hash, struct rtable *rt)
1353{ 1330{
@@ -1358,20 +1335,21 @@ static void rt_del(unsigned hash, struct rtable *rt)
1358 ip_rt_put(rt); 1335 ip_rt_put(rt);
1359 while ((aux = *rthp) != NULL) { 1336 while ((aux = *rthp) != NULL) {
1360 if (aux == rt || rt_is_expired(aux)) { 1337 if (aux == rt || rt_is_expired(aux)) {
1361 *rthp = aux->u.dst.rt_next; 1338 *rthp = aux->dst.rt_next;
1362 rt_free(aux); 1339 rt_free(aux);
1363 continue; 1340 continue;
1364 } 1341 }
1365 rthp = &aux->u.dst.rt_next; 1342 rthp = &aux->dst.rt_next;
1366 } 1343 }
1367 spin_unlock_bh(rt_hash_lock_addr(hash)); 1344 spin_unlock_bh(rt_hash_lock_addr(hash));
1368} 1345}
1369 1346
1347/* called in rcu_read_lock() section */
1370void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1348void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1371 __be32 saddr, struct net_device *dev) 1349 __be32 saddr, struct net_device *dev)
1372{ 1350{
1373 int i, k; 1351 int i, k;
1374 struct in_device *in_dev = in_dev_get(dev); 1352 struct in_device *in_dev = __in_dev_get_rcu(dev);
1375 struct rtable *rth, **rthp; 1353 struct rtable *rth, **rthp;
1376 __be32 skeys[2] = { saddr, 0 }; 1354 __be32 skeys[2] = { saddr, 0 };
1377 int ikeys[2] = { dev->ifindex, 0 }; 1355 int ikeys[2] = { dev->ifindex, 0 };
@@ -1407,7 +1385,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1407 1385
1408 rthp=&rt_hash_table[hash].chain; 1386 rthp=&rt_hash_table[hash].chain;
1409 1387
1410 rcu_read_lock();
1411 while ((rth = rcu_dereference(*rthp)) != NULL) { 1388 while ((rth = rcu_dereference(*rthp)) != NULL) {
1412 struct rtable *rt; 1389 struct rtable *rt;
1413 1390
@@ -1416,44 +1393,42 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1416 rth->fl.oif != ikeys[k] || 1393 rth->fl.oif != ikeys[k] ||
1417 rth->fl.iif != 0 || 1394 rth->fl.iif != 0 ||
1418 rt_is_expired(rth) || 1395 rt_is_expired(rth) ||
1419 !net_eq(dev_net(rth->u.dst.dev), net)) { 1396 !net_eq(dev_net(rth->dst.dev), net)) {
1420 rthp = &rth->u.dst.rt_next; 1397 rthp = &rth->dst.rt_next;
1421 continue; 1398 continue;
1422 } 1399 }
1423 1400
1424 if (rth->rt_dst != daddr || 1401 if (rth->rt_dst != daddr ||
1425 rth->rt_src != saddr || 1402 rth->rt_src != saddr ||
1426 rth->u.dst.error || 1403 rth->dst.error ||
1427 rth->rt_gateway != old_gw || 1404 rth->rt_gateway != old_gw ||
1428 rth->u.dst.dev != dev) 1405 rth->dst.dev != dev)
1429 break; 1406 break;
1430 1407
1431 dst_hold(&rth->u.dst); 1408 dst_hold(&rth->dst);
1432 rcu_read_unlock();
1433 1409
1434 rt = dst_alloc(&ipv4_dst_ops); 1410 rt = dst_alloc(&ipv4_dst_ops);
1435 if (rt == NULL) { 1411 if (rt == NULL) {
1436 ip_rt_put(rth); 1412 ip_rt_put(rth);
1437 in_dev_put(in_dev);
1438 return; 1413 return;
1439 } 1414 }
1440 1415
1441 /* Copy all the information. */ 1416 /* Copy all the information. */
1442 *rt = *rth; 1417 *rt = *rth;
1443 rt->u.dst.__use = 1; 1418 rt->dst.__use = 1;
1444 atomic_set(&rt->u.dst.__refcnt, 1); 1419 atomic_set(&rt->dst.__refcnt, 1);
1445 rt->u.dst.child = NULL; 1420 rt->dst.child = NULL;
1446 if (rt->u.dst.dev) 1421 if (rt->dst.dev)
1447 dev_hold(rt->u.dst.dev); 1422 dev_hold(rt->dst.dev);
1448 if (rt->idev) 1423 if (rt->idev)
1449 in_dev_hold(rt->idev); 1424 in_dev_hold(rt->idev);
1450 rt->u.dst.obsolete = -1; 1425 rt->dst.obsolete = -1;
1451 rt->u.dst.lastuse = jiffies; 1426 rt->dst.lastuse = jiffies;
1452 rt->u.dst.path = &rt->u.dst; 1427 rt->dst.path = &rt->dst;
1453 rt->u.dst.neighbour = NULL; 1428 rt->dst.neighbour = NULL;
1454 rt->u.dst.hh = NULL; 1429 rt->dst.hh = NULL;
1455#ifdef CONFIG_XFRM 1430#ifdef CONFIG_XFRM
1456 rt->u.dst.xfrm = NULL; 1431 rt->dst.xfrm = NULL;
1457#endif 1432#endif
1458 rt->rt_genid = rt_genid(net); 1433 rt->rt_genid = rt_genid(net);
1459 rt->rt_flags |= RTCF_REDIRECTED; 1434 rt->rt_flags |= RTCF_REDIRECTED;
@@ -1462,23 +1437,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1462 rt->rt_gateway = new_gw; 1437 rt->rt_gateway = new_gw;
1463 1438
1464 /* Redirect received -> path was valid */ 1439 /* Redirect received -> path was valid */
1465 dst_confirm(&rth->u.dst); 1440 dst_confirm(&rth->dst);
1466 1441
1467 if (rt->peer) 1442 if (rt->peer)
1468 atomic_inc(&rt->peer->refcnt); 1443 atomic_inc(&rt->peer->refcnt);
1469 1444
1470 if (arp_bind_neighbour(&rt->u.dst) || 1445 if (arp_bind_neighbour(&rt->dst) ||
1471 !(rt->u.dst.neighbour->nud_state & 1446 !(rt->dst.neighbour->nud_state &
1472 NUD_VALID)) { 1447 NUD_VALID)) {
1473 if (rt->u.dst.neighbour) 1448 if (rt->dst.neighbour)
1474 neigh_event_send(rt->u.dst.neighbour, NULL); 1449 neigh_event_send(rt->dst.neighbour, NULL);
1475 ip_rt_put(rth); 1450 ip_rt_put(rth);
1476 rt_drop(rt); 1451 rt_drop(rt);
1477 goto do_next; 1452 goto do_next;
1478 } 1453 }
1479 1454
1480 netevent.old = &rth->u.dst; 1455 netevent.old = &rth->dst;
1481 netevent.new = &rt->u.dst; 1456 netevent.new = &rt->dst;
1482 call_netevent_notifiers(NETEVENT_REDIRECT, 1457 call_netevent_notifiers(NETEVENT_REDIRECT,
1483 &netevent); 1458 &netevent);
1484 1459
@@ -1487,12 +1462,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
1487 ip_rt_put(rt); 1462 ip_rt_put(rt);
1488 goto do_next; 1463 goto do_next;
1489 } 1464 }
1490 rcu_read_unlock();
1491 do_next: 1465 do_next:
1492 ; 1466 ;
1493 } 1467 }
1494 } 1468 }
1495 in_dev_put(in_dev);
1496 return; 1469 return;
1497 1470
1498reject_redirect: 1471reject_redirect:
@@ -1503,7 +1476,7 @@ reject_redirect:
1503 &old_gw, dev->name, &new_gw, 1476 &old_gw, dev->name, &new_gw,
1504 &saddr, &daddr); 1477 &saddr, &daddr);
1505#endif 1478#endif
1506 in_dev_put(in_dev); 1479 ;
1507} 1480}
1508 1481
1509static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1482static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
@@ -1516,8 +1489,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
1516 ip_rt_put(rt); 1489 ip_rt_put(rt);
1517 ret = NULL; 1490 ret = NULL;
1518 } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1491 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
1519 (rt->u.dst.expires && 1492 (rt->dst.expires &&
1520 time_after_eq(jiffies, rt->u.dst.expires))) { 1493 time_after_eq(jiffies, rt->dst.expires))) {
1521 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src, 1494 unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
1522 rt->fl.oif, 1495 rt->fl.oif,
1523 rt_genid(dev_net(dst->dev))); 1496 rt_genid(dev_net(dst->dev)));
@@ -1555,7 +1528,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1555 int log_martians; 1528 int log_martians;
1556 1529
1557 rcu_read_lock(); 1530 rcu_read_lock();
1558 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 1531 in_dev = __in_dev_get_rcu(rt->dst.dev);
1559 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { 1532 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
1560 rcu_read_unlock(); 1533 rcu_read_unlock();
1561 return; 1534 return;
@@ -1566,30 +1539,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
1566 /* No redirected packets during ip_rt_redirect_silence; 1539 /* No redirected packets during ip_rt_redirect_silence;
1567 * reset the algorithm. 1540 * reset the algorithm.
1568 */ 1541 */
1569 if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence)) 1542 if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
1570 rt->u.dst.rate_tokens = 0; 1543 rt->dst.rate_tokens = 0;
1571 1544
1572 /* Too many ignored redirects; do not send anything 1545 /* Too many ignored redirects; do not send anything
1573 * set u.dst.rate_last to the last seen redirected packet. 1546 * set dst.rate_last to the last seen redirected packet.
1574 */ 1547 */
1575 if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) { 1548 if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
1576 rt->u.dst.rate_last = jiffies; 1549 rt->dst.rate_last = jiffies;
1577 return; 1550 return;
1578 } 1551 }
1579 1552
1580 /* Check for load limit; set rate_last to the latest sent 1553 /* Check for load limit; set rate_last to the latest sent
1581 * redirect. 1554 * redirect.
1582 */ 1555 */
1583 if (rt->u.dst.rate_tokens == 0 || 1556 if (rt->dst.rate_tokens == 0 ||
1584 time_after(jiffies, 1557 time_after(jiffies,
1585 (rt->u.dst.rate_last + 1558 (rt->dst.rate_last +
1586 (ip_rt_redirect_load << rt->u.dst.rate_tokens)))) { 1559 (ip_rt_redirect_load << rt->dst.rate_tokens)))) {
1587 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1560 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
1588 rt->u.dst.rate_last = jiffies; 1561 rt->dst.rate_last = jiffies;
1589 ++rt->u.dst.rate_tokens; 1562 ++rt->dst.rate_tokens;
1590#ifdef CONFIG_IP_ROUTE_VERBOSE 1563#ifdef CONFIG_IP_ROUTE_VERBOSE
1591 if (log_martians && 1564 if (log_martians &&
1592 rt->u.dst.rate_tokens == ip_rt_redirect_number && 1565 rt->dst.rate_tokens == ip_rt_redirect_number &&
1593 net_ratelimit()) 1566 net_ratelimit())
1594 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1567 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
1595 &rt->rt_src, rt->rt_iif, 1568 &rt->rt_src, rt->rt_iif,
@@ -1604,7 +1577,7 @@ static int ip_error(struct sk_buff *skb)
1604 unsigned long now; 1577 unsigned long now;
1605 int code; 1578 int code;
1606 1579
1607 switch (rt->u.dst.error) { 1580 switch (rt->dst.error) {
1608 case EINVAL: 1581 case EINVAL:
1609 default: 1582 default:
1610 goto out; 1583 goto out;
@@ -1613,7 +1586,7 @@ static int ip_error(struct sk_buff *skb)
1613 break; 1586 break;
1614 case ENETUNREACH: 1587 case ENETUNREACH:
1615 code = ICMP_NET_UNREACH; 1588 code = ICMP_NET_UNREACH;
1616 IP_INC_STATS_BH(dev_net(rt->u.dst.dev), 1589 IP_INC_STATS_BH(dev_net(rt->dst.dev),
1617 IPSTATS_MIB_INNOROUTES); 1590 IPSTATS_MIB_INNOROUTES);
1618 break; 1591 break;
1619 case EACCES: 1592 case EACCES:
@@ -1622,12 +1595,12 @@ static int ip_error(struct sk_buff *skb)
1622 } 1595 }
1623 1596
1624 now = jiffies; 1597 now = jiffies;
1625 rt->u.dst.rate_tokens += now - rt->u.dst.rate_last; 1598 rt->dst.rate_tokens += now - rt->dst.rate_last;
1626 if (rt->u.dst.rate_tokens > ip_rt_error_burst) 1599 if (rt->dst.rate_tokens > ip_rt_error_burst)
1627 rt->u.dst.rate_tokens = ip_rt_error_burst; 1600 rt->dst.rate_tokens = ip_rt_error_burst;
1628 rt->u.dst.rate_last = now; 1601 rt->dst.rate_last = now;
1629 if (rt->u.dst.rate_tokens >= ip_rt_error_cost) { 1602 if (rt->dst.rate_tokens >= ip_rt_error_cost) {
1630 rt->u.dst.rate_tokens -= ip_rt_error_cost; 1603 rt->dst.rate_tokens -= ip_rt_error_cost;
1631 icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1604 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
1632 } 1605 }
1633 1606
@@ -1672,7 +1645,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1672 1645
1673 rcu_read_lock(); 1646 rcu_read_lock();
1674 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 1647 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
1675 rth = rcu_dereference(rth->u.dst.rt_next)) { 1648 rth = rcu_dereference(rth->dst.rt_next)) {
1676 unsigned short mtu = new_mtu; 1649 unsigned short mtu = new_mtu;
1677 1650
1678 if (rth->fl.fl4_dst != daddr || 1651 if (rth->fl.fl4_dst != daddr ||
@@ -1681,8 +1654,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1681 rth->rt_src != iph->saddr || 1654 rth->rt_src != iph->saddr ||
1682 rth->fl.oif != ikeys[k] || 1655 rth->fl.oif != ikeys[k] ||
1683 rth->fl.iif != 0 || 1656 rth->fl.iif != 0 ||
1684 dst_metric_locked(&rth->u.dst, RTAX_MTU) || 1657 dst_metric_locked(&rth->dst, RTAX_MTU) ||
1685 !net_eq(dev_net(rth->u.dst.dev), net) || 1658 !net_eq(dev_net(rth->dst.dev), net) ||
1686 rt_is_expired(rth)) 1659 rt_is_expired(rth))
1687 continue; 1660 continue;
1688 1661
@@ -1690,22 +1663,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
1690 1663
1691 /* BSD 4.2 compatibility hack :-( */ 1664 /* BSD 4.2 compatibility hack :-( */
1692 if (mtu == 0 && 1665 if (mtu == 0 &&
1693 old_mtu >= dst_mtu(&rth->u.dst) && 1666 old_mtu >= dst_mtu(&rth->dst) &&
1694 old_mtu >= 68 + (iph->ihl << 2)) 1667 old_mtu >= 68 + (iph->ihl << 2))
1695 old_mtu -= iph->ihl << 2; 1668 old_mtu -= iph->ihl << 2;
1696 1669
1697 mtu = guess_mtu(old_mtu); 1670 mtu = guess_mtu(old_mtu);
1698 } 1671 }
1699 if (mtu <= dst_mtu(&rth->u.dst)) { 1672 if (mtu <= dst_mtu(&rth->dst)) {
1700 if (mtu < dst_mtu(&rth->u.dst)) { 1673 if (mtu < dst_mtu(&rth->dst)) {
1701 dst_confirm(&rth->u.dst); 1674 dst_confirm(&rth->dst);
1702 if (mtu < ip_rt_min_pmtu) { 1675 if (mtu < ip_rt_min_pmtu) {
1703 mtu = ip_rt_min_pmtu; 1676 mtu = ip_rt_min_pmtu;
1704 rth->u.dst.metrics[RTAX_LOCK-1] |= 1677 rth->dst.metrics[RTAX_LOCK-1] |=
1705 (1 << RTAX_MTU); 1678 (1 << RTAX_MTU);
1706 } 1679 }
1707 rth->u.dst.metrics[RTAX_MTU-1] = mtu; 1680 rth->dst.metrics[RTAX_MTU-1] = mtu;
1708 dst_set_expires(&rth->u.dst, 1681 dst_set_expires(&rth->dst,
1709 ip_rt_mtu_expires); 1682 ip_rt_mtu_expires);
1710 } 1683 }
1711 est_mtu = mtu; 1684 est_mtu = mtu;
@@ -1778,7 +1751,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
1778 1751
1779 rt = skb_rtable(skb); 1752 rt = skb_rtable(skb);
1780 if (rt) 1753 if (rt)
1781 dst_set_expires(&rt->u.dst, 0); 1754 dst_set_expires(&rt->dst, 0);
1782} 1755}
1783 1756
1784static int ip_rt_bug(struct sk_buff *skb) 1757static int ip_rt_bug(struct sk_buff *skb)
@@ -1806,11 +1779,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1806 1779
1807 if (rt->fl.iif == 0) 1780 if (rt->fl.iif == 0)
1808 src = rt->rt_src; 1781 src = rt->rt_src;
1809 else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) { 1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
1810 src = FIB_RES_PREFSRC(res); 1783 src = FIB_RES_PREFSRC(res);
1811 fib_res_put(&res); 1784 fib_res_put(&res);
1812 } else 1785 } else
1813 src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, 1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1814 RT_SCOPE_UNIVERSE); 1787 RT_SCOPE_UNIVERSE);
1815 memcpy(addr, &src, 4); 1788 memcpy(addr, &src, 4);
1816} 1789}
@@ -1818,10 +1791,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1818#ifdef CONFIG_NET_CLS_ROUTE 1791#ifdef CONFIG_NET_CLS_ROUTE
1819static void set_class_tag(struct rtable *rt, u32 tag) 1792static void set_class_tag(struct rtable *rt, u32 tag)
1820{ 1793{
1821 if (!(rt->u.dst.tclassid & 0xFFFF)) 1794 if (!(rt->dst.tclassid & 0xFFFF))
1822 rt->u.dst.tclassid |= tag & 0xFFFF; 1795 rt->dst.tclassid |= tag & 0xFFFF;
1823 if (!(rt->u.dst.tclassid & 0xFFFF0000)) 1796 if (!(rt->dst.tclassid & 0xFFFF0000))
1824 rt->u.dst.tclassid |= tag & 0xFFFF0000; 1797 rt->dst.tclassid |= tag & 0xFFFF0000;
1825} 1798}
1826#endif 1799#endif
1827 1800
@@ -1833,30 +1806,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1833 if (FIB_RES_GW(*res) && 1806 if (FIB_RES_GW(*res) &&
1834 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1807 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
1835 rt->rt_gateway = FIB_RES_GW(*res); 1808 rt->rt_gateway = FIB_RES_GW(*res);
1836 memcpy(rt->u.dst.metrics, fi->fib_metrics, 1809 memcpy(rt->dst.metrics, fi->fib_metrics,
1837 sizeof(rt->u.dst.metrics)); 1810 sizeof(rt->dst.metrics));
1838 if (fi->fib_mtu == 0) { 1811 if (fi->fib_mtu == 0) {
1839 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 1812 rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
1840 if (dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1813 if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
1841 rt->rt_gateway != rt->rt_dst && 1814 rt->rt_gateway != rt->rt_dst &&
1842 rt->u.dst.dev->mtu > 576) 1815 rt->dst.dev->mtu > 576)
1843 rt->u.dst.metrics[RTAX_MTU-1] = 576; 1816 rt->dst.metrics[RTAX_MTU-1] = 576;
1844 } 1817 }
1845#ifdef CONFIG_NET_CLS_ROUTE 1818#ifdef CONFIG_NET_CLS_ROUTE
1846 rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid; 1819 rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
1847#endif 1820#endif
1848 } else 1821 } else
1849 rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; 1822 rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
1850 1823
1851 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1824 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1852 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; 1825 rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
1853 if (dst_mtu(&rt->u.dst) > IP_MAX_MTU) 1826 if (dst_mtu(&rt->dst) > IP_MAX_MTU)
1854 rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; 1827 rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
1855 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0) 1828 if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
1856 rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40, 1829 rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
1857 ip_rt_min_advmss); 1830 ip_rt_min_advmss);
1858 if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40) 1831 if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
1859 rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; 1832 rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
1860 1833
1861#ifdef CONFIG_NET_CLS_ROUTE 1834#ifdef CONFIG_NET_CLS_ROUTE
1862#ifdef CONFIG_IP_MULTIPLE_TABLES 1835#ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1867,14 +1840,16 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
1867 rt->rt_type = res->type; 1840 rt->rt_type = res->type;
1868} 1841}
1869 1842
1843/* called in rcu_read_lock() section */
1870static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, 1844static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1871 u8 tos, struct net_device *dev, int our) 1845 u8 tos, struct net_device *dev, int our)
1872{ 1846{
1873 unsigned hash; 1847 unsigned int hash;
1874 struct rtable *rth; 1848 struct rtable *rth;
1875 __be32 spec_dst; 1849 __be32 spec_dst;
1876 struct in_device *in_dev = in_dev_get(dev); 1850 struct in_device *in_dev = __in_dev_get_rcu(dev);
1877 u32 itag = 0; 1851 u32 itag = 0;
1852 int err;
1878 1853
1879 /* Primary sanity checks. */ 1854 /* Primary sanity checks. */
1880 1855
@@ -1889,21 +1864,23 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1889 if (!ipv4_is_local_multicast(daddr)) 1864 if (!ipv4_is_local_multicast(daddr))
1890 goto e_inval; 1865 goto e_inval;
1891 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); 1866 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1892 } else if (fib_validate_source(saddr, 0, tos, 0, 1867 } else {
1893 dev, &spec_dst, &itag, 0) < 0) 1868 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
1894 goto e_inval; 1869 &itag, 0);
1895 1870 if (err < 0)
1871 goto e_err;
1872 }
1896 rth = dst_alloc(&ipv4_dst_ops); 1873 rth = dst_alloc(&ipv4_dst_ops);
1897 if (!rth) 1874 if (!rth)
1898 goto e_nobufs; 1875 goto e_nobufs;
1899 1876
1900 rth->u.dst.output = ip_rt_bug; 1877 rth->dst.output = ip_rt_bug;
1901 rth->u.dst.obsolete = -1; 1878 rth->dst.obsolete = -1;
1902 1879
1903 atomic_set(&rth->u.dst.__refcnt, 1); 1880 atomic_set(&rth->dst.__refcnt, 1);
1904 rth->u.dst.flags= DST_HOST; 1881 rth->dst.flags= DST_HOST;
1905 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 1882 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
1906 rth->u.dst.flags |= DST_NOPOLICY; 1883 rth->dst.flags |= DST_NOPOLICY;
1907 rth->fl.fl4_dst = daddr; 1884 rth->fl.fl4_dst = daddr;
1908 rth->rt_dst = daddr; 1885 rth->rt_dst = daddr;
1909 rth->fl.fl4_tos = tos; 1886 rth->fl.fl4_tos = tos;
@@ -1911,13 +1888,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1911 rth->fl.fl4_src = saddr; 1888 rth->fl.fl4_src = saddr;
1912 rth->rt_src = saddr; 1889 rth->rt_src = saddr;
1913#ifdef CONFIG_NET_CLS_ROUTE 1890#ifdef CONFIG_NET_CLS_ROUTE
1914 rth->u.dst.tclassid = itag; 1891 rth->dst.tclassid = itag;
1915#endif 1892#endif
1916 rth->rt_iif = 1893 rth->rt_iif =
1917 rth->fl.iif = dev->ifindex; 1894 rth->fl.iif = dev->ifindex;
1918 rth->u.dst.dev = init_net.loopback_dev; 1895 rth->dst.dev = init_net.loopback_dev;
1919 dev_hold(rth->u.dst.dev); 1896 dev_hold(rth->dst.dev);
1920 rth->idev = in_dev_get(rth->u.dst.dev); 1897 rth->idev = in_dev_get(rth->dst.dev);
1921 rth->fl.oif = 0; 1898 rth->fl.oif = 0;
1922 rth->rt_gateway = daddr; 1899 rth->rt_gateway = daddr;
1923 rth->rt_spec_dst= spec_dst; 1900 rth->rt_spec_dst= spec_dst;
@@ -1925,27 +1902,25 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1925 rth->rt_flags = RTCF_MULTICAST; 1902 rth->rt_flags = RTCF_MULTICAST;
1926 rth->rt_type = RTN_MULTICAST; 1903 rth->rt_type = RTN_MULTICAST;
1927 if (our) { 1904 if (our) {
1928 rth->u.dst.input= ip_local_deliver; 1905 rth->dst.input= ip_local_deliver;
1929 rth->rt_flags |= RTCF_LOCAL; 1906 rth->rt_flags |= RTCF_LOCAL;
1930 } 1907 }
1931 1908
1932#ifdef CONFIG_IP_MROUTE 1909#ifdef CONFIG_IP_MROUTE
1933 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) 1910 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
1934 rth->u.dst.input = ip_mr_input; 1911 rth->dst.input = ip_mr_input;
1935#endif 1912#endif
1936 RT_CACHE_STAT_INC(in_slow_mc); 1913 RT_CACHE_STAT_INC(in_slow_mc);
1937 1914
1938 in_dev_put(in_dev);
1939 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev))); 1915 hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
1940 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex); 1916 return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
1941 1917
1942e_nobufs: 1918e_nobufs:
1943 in_dev_put(in_dev);
1944 return -ENOBUFS; 1919 return -ENOBUFS;
1945
1946e_inval: 1920e_inval:
1947 in_dev_put(in_dev);
1948 return -EINVAL; 1921 return -EINVAL;
1922e_err:
1923 return err;
1949} 1924}
1950 1925
1951 1926
@@ -1979,22 +1954,22 @@ static void ip_handle_martian_source(struct net_device *dev,
1979#endif 1954#endif
1980} 1955}
1981 1956
1957/* called in rcu_read_lock() section */
1982static int __mkroute_input(struct sk_buff *skb, 1958static int __mkroute_input(struct sk_buff *skb,
1983 struct fib_result *res, 1959 struct fib_result *res,
1984 struct in_device *in_dev, 1960 struct in_device *in_dev,
1985 __be32 daddr, __be32 saddr, u32 tos, 1961 __be32 daddr, __be32 saddr, u32 tos,
1986 struct rtable **result) 1962 struct rtable **result)
1987{ 1963{
1988
1989 struct rtable *rth; 1964 struct rtable *rth;
1990 int err; 1965 int err;
1991 struct in_device *out_dev; 1966 struct in_device *out_dev;
1992 unsigned flags = 0; 1967 unsigned int flags = 0;
1993 __be32 spec_dst; 1968 __be32 spec_dst;
1994 u32 itag; 1969 u32 itag;
1995 1970
1996 /* get a working reference to the output device */ 1971 /* get a working reference to the output device */
1997 out_dev = in_dev_get(FIB_RES_DEV(*res)); 1972 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
1998 if (out_dev == NULL) { 1973 if (out_dev == NULL) {
1999 if (net_ratelimit()) 1974 if (net_ratelimit())
2000 printk(KERN_CRIT "Bug in ip_route_input" \ 1975 printk(KERN_CRIT "Bug in ip_route_input" \
@@ -2009,7 +1984,6 @@ static int __mkroute_input(struct sk_buff *skb,
2009 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, 1984 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
2010 saddr); 1985 saddr);
2011 1986
2012 err = -EINVAL;
2013 goto cleanup; 1987 goto cleanup;
2014 } 1988 }
2015 1989
@@ -2043,12 +2017,12 @@ static int __mkroute_input(struct sk_buff *skb,
2043 goto cleanup; 2017 goto cleanup;
2044 } 2018 }
2045 2019
2046 atomic_set(&rth->u.dst.__refcnt, 1); 2020 atomic_set(&rth->dst.__refcnt, 1);
2047 rth->u.dst.flags= DST_HOST; 2021 rth->dst.flags= DST_HOST;
2048 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2022 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2049 rth->u.dst.flags |= DST_NOPOLICY; 2023 rth->dst.flags |= DST_NOPOLICY;
2050 if (IN_DEV_CONF_GET(out_dev, NOXFRM)) 2024 if (IN_DEV_CONF_GET(out_dev, NOXFRM))
2051 rth->u.dst.flags |= DST_NOXFRM; 2025 rth->dst.flags |= DST_NOXFRM;
2052 rth->fl.fl4_dst = daddr; 2026 rth->fl.fl4_dst = daddr;
2053 rth->rt_dst = daddr; 2027 rth->rt_dst = daddr;
2054 rth->fl.fl4_tos = tos; 2028 rth->fl.fl4_tos = tos;
@@ -2058,16 +2032,16 @@ static int __mkroute_input(struct sk_buff *skb,
2058 rth->rt_gateway = daddr; 2032 rth->rt_gateway = daddr;
2059 rth->rt_iif = 2033 rth->rt_iif =
2060 rth->fl.iif = in_dev->dev->ifindex; 2034 rth->fl.iif = in_dev->dev->ifindex;
2061 rth->u.dst.dev = (out_dev)->dev; 2035 rth->dst.dev = (out_dev)->dev;
2062 dev_hold(rth->u.dst.dev); 2036 dev_hold(rth->dst.dev);
2063 rth->idev = in_dev_get(rth->u.dst.dev); 2037 rth->idev = in_dev_get(rth->dst.dev);
2064 rth->fl.oif = 0; 2038 rth->fl.oif = 0;
2065 rth->rt_spec_dst= spec_dst; 2039 rth->rt_spec_dst= spec_dst;
2066 2040
2067 rth->u.dst.obsolete = -1; 2041 rth->dst.obsolete = -1;
2068 rth->u.dst.input = ip_forward; 2042 rth->dst.input = ip_forward;
2069 rth->u.dst.output = ip_output; 2043 rth->dst.output = ip_output;
2070 rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev)); 2044 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
2071 2045
2072 rt_set_nexthop(rth, res, itag); 2046 rt_set_nexthop(rth, res, itag);
2073 2047
@@ -2076,8 +2050,6 @@ static int __mkroute_input(struct sk_buff *skb,
2076 *result = rth; 2050 *result = rth;
2077 err = 0; 2051 err = 0;
2078 cleanup: 2052 cleanup:
2079 /* release the working reference to the output device */
2080 in_dev_put(out_dev);
2081 return err; 2053 return err;
2082} 2054}
2083 2055
@@ -2103,7 +2075,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2103 2075
2104 /* put it into the cache */ 2076 /* put it into the cache */
2105 hash = rt_hash(daddr, saddr, fl->iif, 2077 hash = rt_hash(daddr, saddr, fl->iif,
2106 rt_genid(dev_net(rth->u.dst.dev))); 2078 rt_genid(dev_net(rth->dst.dev)));
2107 return rt_intern_hash(hash, rth, NULL, skb, fl->iif); 2079 return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
2108} 2080}
2109 2081
@@ -2121,7 +2093,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2121 u8 tos, struct net_device *dev) 2093 u8 tos, struct net_device *dev)
2122{ 2094{
2123 struct fib_result res; 2095 struct fib_result res;
2124 struct in_device *in_dev = in_dev_get(dev); 2096 struct in_device *in_dev = __in_dev_get_rcu(dev);
2125 struct flowi fl = { .nl_u = { .ip4_u = 2097 struct flowi fl = { .nl_u = { .ip4_u =
2126 { .daddr = daddr, 2098 { .daddr = daddr,
2127 .saddr = saddr, 2099 .saddr = saddr,
@@ -2181,13 +2153,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2181 goto brd_input; 2153 goto brd_input;
2182 2154
2183 if (res.type == RTN_LOCAL) { 2155 if (res.type == RTN_LOCAL) {
2184 int result; 2156 err = fib_validate_source(saddr, daddr, tos,
2185 result = fib_validate_source(saddr, daddr, tos,
2186 net->loopback_dev->ifindex, 2157 net->loopback_dev->ifindex,
2187 dev, &spec_dst, &itag, skb->mark); 2158 dev, &spec_dst, &itag, skb->mark);
2188 if (result < 0) 2159 if (err < 0)
2189 goto martian_source; 2160 goto martian_source_keep_err;
2190 if (result) 2161 if (err)
2191 flags |= RTCF_DIRECTSRC; 2162 flags |= RTCF_DIRECTSRC;
2192 spec_dst = daddr; 2163 spec_dst = daddr;
2193 goto local_input; 2164 goto local_input;
@@ -2200,7 +2171,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2200 2171
2201 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2202done: 2173done:
2203 in_dev_put(in_dev);
2204 if (free_res) 2174 if (free_res)
2205 fib_res_put(&res); 2175 fib_res_put(&res);
2206out: return err; 2176out: return err;
@@ -2215,7 +2185,7 @@ brd_input:
2215 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst, 2185 err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
2216 &itag, skb->mark); 2186 &itag, skb->mark);
2217 if (err < 0) 2187 if (err < 0)
2218 goto martian_source; 2188 goto martian_source_keep_err;
2219 if (err) 2189 if (err)
2220 flags |= RTCF_DIRECTSRC; 2190 flags |= RTCF_DIRECTSRC;
2221 } 2191 }
@@ -2228,14 +2198,14 @@ local_input:
2228 if (!rth) 2198 if (!rth)
2229 goto e_nobufs; 2199 goto e_nobufs;
2230 2200
2231 rth->u.dst.output= ip_rt_bug; 2201 rth->dst.output= ip_rt_bug;
2232 rth->u.dst.obsolete = -1; 2202 rth->dst.obsolete = -1;
2233 rth->rt_genid = rt_genid(net); 2203 rth->rt_genid = rt_genid(net);
2234 2204
2235 atomic_set(&rth->u.dst.__refcnt, 1); 2205 atomic_set(&rth->dst.__refcnt, 1);
2236 rth->u.dst.flags= DST_HOST; 2206 rth->dst.flags= DST_HOST;
2237 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2207 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2238 rth->u.dst.flags |= DST_NOPOLICY; 2208 rth->dst.flags |= DST_NOPOLICY;
2239 rth->fl.fl4_dst = daddr; 2209 rth->fl.fl4_dst = daddr;
2240 rth->rt_dst = daddr; 2210 rth->rt_dst = daddr;
2241 rth->fl.fl4_tos = tos; 2211 rth->fl.fl4_tos = tos;
@@ -2243,20 +2213,20 @@ local_input:
2243 rth->fl.fl4_src = saddr; 2213 rth->fl.fl4_src = saddr;
2244 rth->rt_src = saddr; 2214 rth->rt_src = saddr;
2245#ifdef CONFIG_NET_CLS_ROUTE 2215#ifdef CONFIG_NET_CLS_ROUTE
2246 rth->u.dst.tclassid = itag; 2216 rth->dst.tclassid = itag;
2247#endif 2217#endif
2248 rth->rt_iif = 2218 rth->rt_iif =
2249 rth->fl.iif = dev->ifindex; 2219 rth->fl.iif = dev->ifindex;
2250 rth->u.dst.dev = net->loopback_dev; 2220 rth->dst.dev = net->loopback_dev;
2251 dev_hold(rth->u.dst.dev); 2221 dev_hold(rth->dst.dev);
2252 rth->idev = in_dev_get(rth->u.dst.dev); 2222 rth->idev = in_dev_get(rth->dst.dev);
2253 rth->rt_gateway = daddr; 2223 rth->rt_gateway = daddr;
2254 rth->rt_spec_dst= spec_dst; 2224 rth->rt_spec_dst= spec_dst;
2255 rth->u.dst.input= ip_local_deliver; 2225 rth->dst.input= ip_local_deliver;
2256 rth->rt_flags = flags|RTCF_LOCAL; 2226 rth->rt_flags = flags|RTCF_LOCAL;
2257 if (res.type == RTN_UNREACHABLE) { 2227 if (res.type == RTN_UNREACHABLE) {
2258 rth->u.dst.input= ip_error; 2228 rth->dst.input= ip_error;
2259 rth->u.dst.error= -err; 2229 rth->dst.error= -err;
2260 rth->rt_flags &= ~RTCF_LOCAL; 2230 rth->rt_flags &= ~RTCF_LOCAL;
2261 } 2231 }
2262 rth->rt_type = res.type; 2232 rth->rt_type = res.type;
@@ -2296,46 +2266,54 @@ e_nobufs:
2296 goto done; 2266 goto done;
2297 2267
2298martian_source: 2268martian_source:
2269 err = -EINVAL;
2270martian_source_keep_err:
2299 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2300 goto e_inval; 2272 goto done;
2301} 2273}
2302 2274
2303int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2304 u8 tos, struct net_device *dev) 2276 u8 tos, struct net_device *dev, bool noref)
2305{ 2277{
2306 struct rtable * rth; 2278 struct rtable * rth;
2307 unsigned hash; 2279 unsigned hash;
2308 int iif = dev->ifindex; 2280 int iif = dev->ifindex;
2309 struct net *net; 2281 struct net *net;
2282 int res;
2310 2283
2311 net = dev_net(dev); 2284 net = dev_net(dev);
2312 2285
2286 rcu_read_lock();
2287
2313 if (!rt_caching(net)) 2288 if (!rt_caching(net))
2314 goto skip_cache; 2289 goto skip_cache;
2315 2290
2316 tos &= IPTOS_RT_MASK; 2291 tos &= IPTOS_RT_MASK;
2317 hash = rt_hash(daddr, saddr, iif, rt_genid(net)); 2292 hash = rt_hash(daddr, saddr, iif, rt_genid(net));
2318 2293
2319 rcu_read_lock();
2320 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth; 2294 for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
2321 rth = rcu_dereference(rth->u.dst.rt_next)) { 2295 rth = rcu_dereference(rth->dst.rt_next)) {
2322 if (((rth->fl.fl4_dst ^ daddr) | 2296 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
2323 (rth->fl.fl4_src ^ saddr) | 2297 ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
2324 (rth->fl.iif ^ iif) | 2298 (rth->fl.iif ^ iif) |
2325 rth->fl.oif | 2299 rth->fl.oif |
2326 (rth->fl.fl4_tos ^ tos)) == 0 && 2300 (rth->fl.fl4_tos ^ tos)) == 0 &&
2327 rth->fl.mark == skb->mark && 2301 rth->fl.mark == skb->mark &&
2328 net_eq(dev_net(rth->u.dst.dev), net) && 2302 net_eq(dev_net(rth->dst.dev), net) &&
2329 !rt_is_expired(rth)) { 2303 !rt_is_expired(rth)) {
2330 dst_use(&rth->u.dst, jiffies); 2304 if (noref) {
2305 dst_use_noref(&rth->dst, jiffies);
2306 skb_dst_set_noref(skb, &rth->dst);
2307 } else {
2308 dst_use(&rth->dst, jiffies);
2309 skb_dst_set(skb, &rth->dst);
2310 }
2331 RT_CACHE_STAT_INC(in_hit); 2311 RT_CACHE_STAT_INC(in_hit);
2332 rcu_read_unlock(); 2312 rcu_read_unlock();
2333 skb_dst_set(skb, &rth->u.dst);
2334 return 0; 2313 return 0;
2335 } 2314 }
2336 RT_CACHE_STAT_INC(in_hlist_search); 2315 RT_CACHE_STAT_INC(in_hlist_search);
2337 } 2316 }
2338 rcu_read_unlock();
2339 2317
2340skip_cache: 2318skip_cache:
2341 /* Multicast recognition logic is moved from route cache to here. 2319 /* Multicast recognition logic is moved from route cache to here.
@@ -2350,12 +2328,11 @@ skip_cache:
2350 route cache entry is created eventually. 2328 route cache entry is created eventually.
2351 */ 2329 */
2352 if (ipv4_is_multicast(daddr)) { 2330 if (ipv4_is_multicast(daddr)) {
2353 struct in_device *in_dev; 2331 struct in_device *in_dev = __in_dev_get_rcu(dev);
2354 2332
2355 rcu_read_lock(); 2333 if (in_dev) {
2356 if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
2357 int our = ip_check_mc(in_dev, daddr, saddr, 2334 int our = ip_check_mc(in_dev, daddr, saddr,
2358 ip_hdr(skb)->protocol); 2335 ip_hdr(skb)->protocol);
2359 if (our 2336 if (our
2360#ifdef CONFIG_IP_MROUTE 2337#ifdef CONFIG_IP_MROUTE
2361 || 2338 ||
@@ -2363,16 +2340,20 @@ skip_cache:
2363 IN_DEV_MFORWARD(in_dev)) 2340 IN_DEV_MFORWARD(in_dev))
2364#endif 2341#endif
2365 ) { 2342 ) {
2343 int res = ip_route_input_mc(skb, daddr, saddr,
2344 tos, dev, our);
2366 rcu_read_unlock(); 2345 rcu_read_unlock();
2367 return ip_route_input_mc(skb, daddr, saddr, 2346 return res;
2368 tos, dev, our);
2369 } 2347 }
2370 } 2348 }
2371 rcu_read_unlock(); 2349 rcu_read_unlock();
2372 return -EINVAL; 2350 return -EINVAL;
2373 } 2351 }
2374 return ip_route_input_slow(skb, daddr, saddr, tos, dev); 2352 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
2353 rcu_read_unlock();
2354 return res;
2375} 2355}
2356EXPORT_SYMBOL(ip_route_input_common);
2376 2357
2377static int __mkroute_output(struct rtable **result, 2358static int __mkroute_output(struct rtable **result,
2378 struct fib_result *res, 2359 struct fib_result *res,
@@ -2432,12 +2413,12 @@ static int __mkroute_output(struct rtable **result,
2432 goto cleanup; 2413 goto cleanup;
2433 } 2414 }
2434 2415
2435 atomic_set(&rth->u.dst.__refcnt, 1); 2416 atomic_set(&rth->dst.__refcnt, 1);
2436 rth->u.dst.flags= DST_HOST; 2417 rth->dst.flags= DST_HOST;
2437 if (IN_DEV_CONF_GET(in_dev, NOXFRM)) 2418 if (IN_DEV_CONF_GET(in_dev, NOXFRM))
2438 rth->u.dst.flags |= DST_NOXFRM; 2419 rth->dst.flags |= DST_NOXFRM;
2439 if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) 2420 if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
2440 rth->u.dst.flags |= DST_NOPOLICY; 2421 rth->dst.flags |= DST_NOPOLICY;
2441 2422
2442 rth->fl.fl4_dst = oldflp->fl4_dst; 2423 rth->fl.fl4_dst = oldflp->fl4_dst;
2443 rth->fl.fl4_tos = tos; 2424 rth->fl.fl4_tos = tos;
@@ -2449,35 +2430,35 @@ static int __mkroute_output(struct rtable **result,
2449 rth->rt_iif = oldflp->oif ? : dev_out->ifindex; 2430 rth->rt_iif = oldflp->oif ? : dev_out->ifindex;
2450 /* get references to the devices that are to be hold by the routing 2431 /* get references to the devices that are to be hold by the routing
2451 cache entry */ 2432 cache entry */
2452 rth->u.dst.dev = dev_out; 2433 rth->dst.dev = dev_out;
2453 dev_hold(dev_out); 2434 dev_hold(dev_out);
2454 rth->idev = in_dev_get(dev_out); 2435 rth->idev = in_dev_get(dev_out);
2455 rth->rt_gateway = fl->fl4_dst; 2436 rth->rt_gateway = fl->fl4_dst;
2456 rth->rt_spec_dst= fl->fl4_src; 2437 rth->rt_spec_dst= fl->fl4_src;
2457 2438
2458 rth->u.dst.output=ip_output; 2439 rth->dst.output=ip_output;
2459 rth->u.dst.obsolete = -1; 2440 rth->dst.obsolete = -1;
2460 rth->rt_genid = rt_genid(dev_net(dev_out)); 2441 rth->rt_genid = rt_genid(dev_net(dev_out));
2461 2442
2462 RT_CACHE_STAT_INC(out_slow_tot); 2443 RT_CACHE_STAT_INC(out_slow_tot);
2463 2444
2464 if (flags & RTCF_LOCAL) { 2445 if (flags & RTCF_LOCAL) {
2465 rth->u.dst.input = ip_local_deliver; 2446 rth->dst.input = ip_local_deliver;
2466 rth->rt_spec_dst = fl->fl4_dst; 2447 rth->rt_spec_dst = fl->fl4_dst;
2467 } 2448 }
2468 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { 2449 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
2469 rth->rt_spec_dst = fl->fl4_src; 2450 rth->rt_spec_dst = fl->fl4_src;
2470 if (flags & RTCF_LOCAL && 2451 if (flags & RTCF_LOCAL &&
2471 !(dev_out->flags & IFF_LOOPBACK)) { 2452 !(dev_out->flags & IFF_LOOPBACK)) {
2472 rth->u.dst.output = ip_mc_output; 2453 rth->dst.output = ip_mc_output;
2473 RT_CACHE_STAT_INC(out_slow_mc); 2454 RT_CACHE_STAT_INC(out_slow_mc);
2474 } 2455 }
2475#ifdef CONFIG_IP_MROUTE 2456#ifdef CONFIG_IP_MROUTE
2476 if (res->type == RTN_MULTICAST) { 2457 if (res->type == RTN_MULTICAST) {
2477 if (IN_DEV_MFORWARD(in_dev) && 2458 if (IN_DEV_MFORWARD(in_dev) &&
2478 !ipv4_is_local_multicast(oldflp->fl4_dst)) { 2459 !ipv4_is_local_multicast(oldflp->fl4_dst)) {
2479 rth->u.dst.input = ip_mr_input; 2460 rth->dst.input = ip_mr_input;
2480 rth->u.dst.output = ip_mc_output; 2461 rth->dst.output = ip_mc_output;
2481 } 2462 }
2482 } 2463 }
2483#endif 2464#endif
@@ -2732,7 +2713,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2732 2713
2733 rcu_read_lock_bh(); 2714 rcu_read_lock_bh();
2734 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth; 2715 for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
2735 rth = rcu_dereference_bh(rth->u.dst.rt_next)) { 2716 rth = rcu_dereference_bh(rth->dst.rt_next)) {
2736 if (rth->fl.fl4_dst == flp->fl4_dst && 2717 if (rth->fl.fl4_dst == flp->fl4_dst &&
2737 rth->fl.fl4_src == flp->fl4_src && 2718 rth->fl.fl4_src == flp->fl4_src &&
2738 rth->fl.iif == 0 && 2719 rth->fl.iif == 0 &&
@@ -2740,9 +2721,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2740 rth->fl.mark == flp->mark && 2721 rth->fl.mark == flp->mark &&
2741 !((rth->fl.fl4_tos ^ flp->fl4_tos) & 2722 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
2742 (IPTOS_RT_MASK | RTO_ONLINK)) && 2723 (IPTOS_RT_MASK | RTO_ONLINK)) &&
2743 net_eq(dev_net(rth->u.dst.dev), net) && 2724 net_eq(dev_net(rth->dst.dev), net) &&
2744 !rt_is_expired(rth)) { 2725 !rt_is_expired(rth)) {
2745 dst_use(&rth->u.dst, jiffies); 2726 dst_use(&rth->dst, jiffies);
2746 RT_CACHE_STAT_INC(out_hit); 2727 RT_CACHE_STAT_INC(out_hit);
2747 rcu_read_unlock_bh(); 2728 rcu_read_unlock_bh();
2748 *rp = rth; 2729 *rp = rth;
@@ -2755,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2755slow_output: 2736slow_output:
2756 return ip_route_output_slow(net, rp, flp); 2737 return ip_route_output_slow(net, rp, flp);
2757} 2738}
2758
2759EXPORT_SYMBOL_GPL(__ip_route_output_key); 2739EXPORT_SYMBOL_GPL(__ip_route_output_key);
2760 2740
2761static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 2741static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2779,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2779 dst_alloc(&ipv4_dst_blackhole_ops); 2759 dst_alloc(&ipv4_dst_blackhole_ops);
2780 2760
2781 if (rt) { 2761 if (rt) {
2782 struct dst_entry *new = &rt->u.dst; 2762 struct dst_entry *new = &rt->dst;
2783 2763
2784 atomic_set(&new->__refcnt, 1); 2764 atomic_set(&new->__refcnt, 1);
2785 new->__use = 1; 2765 new->__use = 1;
2786 new->input = dst_discard; 2766 new->input = dst_discard;
2787 new->output = dst_discard; 2767 new->output = dst_discard;
2788 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 2768 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
2789 2769
2790 new->dev = ort->u.dst.dev; 2770 new->dev = ort->dst.dev;
2791 if (new->dev) 2771 if (new->dev)
2792 dev_hold(new->dev); 2772 dev_hold(new->dev);
2793 2773
@@ -2811,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2811 dst_free(new); 2791 dst_free(new);
2812 } 2792 }
2813 2793
2814 dst_release(&(*rp)->u.dst); 2794 dst_release(&(*rp)->dst);
2815 *rp = rt; 2795 *rp = rt;
2816 return (rt ? 0 : -ENOMEM); 2796 return (rt ? 0 : -ENOMEM);
2817} 2797}
@@ -2839,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
2839 2819
2840 return 0; 2820 return 0;
2841} 2821}
2842
2843EXPORT_SYMBOL_GPL(ip_route_output_flow); 2822EXPORT_SYMBOL_GPL(ip_route_output_flow);
2844 2823
2845int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) 2824int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2846{ 2825{
2847 return ip_route_output_flow(net, rp, flp, NULL, 0); 2826 return ip_route_output_flow(net, rp, flp, NULL, 0);
2848} 2827}
2828EXPORT_SYMBOL(ip_route_output_key);
2849 2829
2850static int rt_fill_info(struct net *net, 2830static int rt_fill_info(struct net *net,
2851 struct sk_buff *skb, u32 pid, u32 seq, int event, 2831 struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -2881,11 +2861,11 @@ static int rt_fill_info(struct net *net,
2881 r->rtm_src_len = 32; 2861 r->rtm_src_len = 32;
2882 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src); 2862 NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
2883 } 2863 }
2884 if (rt->u.dst.dev) 2864 if (rt->dst.dev)
2885 NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); 2865 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2886#ifdef CONFIG_NET_CLS_ROUTE 2866#ifdef CONFIG_NET_CLS_ROUTE
2887 if (rt->u.dst.tclassid) 2867 if (rt->dst.tclassid)
2888 NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); 2868 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
2889#endif 2869#endif
2890 if (rt->fl.iif) 2870 if (rt->fl.iif)
2891 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); 2871 NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2895,12 +2875,16 @@ static int rt_fill_info(struct net *net,
2895 if (rt->rt_dst != rt->rt_gateway) 2875 if (rt->rt_dst != rt->rt_gateway)
2896 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); 2876 NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
2897 2877
2898 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2878 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2899 goto nla_put_failure; 2879 goto nla_put_failure;
2900 2880
2901 error = rt->u.dst.error; 2881 if (rt->fl.mark)
2902 expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0; 2882 NLA_PUT_BE32(skb, RTA_MARK, rt->fl.mark);
2883
2884 error = rt->dst.error;
2885 expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
2903 if (rt->peer) { 2886 if (rt->peer) {
2887 inet_peer_refcheck(rt->peer);
2904 id = atomic_read(&rt->peer->ip_id_count) & 0xffff; 2888 id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
2905 if (rt->peer->tcp_ts_stamp) { 2889 if (rt->peer->tcp_ts_stamp) {
2906 ts = rt->peer->tcp_ts; 2890 ts = rt->peer->tcp_ts;
@@ -2931,7 +2915,7 @@ static int rt_fill_info(struct net *net,
2931 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); 2915 NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
2932 } 2916 }
2933 2917
2934 if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage, 2918 if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
2935 expires, error) < 0) 2919 expires, error) < 0)
2936 goto nla_put_failure; 2920 goto nla_put_failure;
2937 2921
@@ -2952,6 +2936,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2952 __be32 src = 0; 2936 __be32 src = 0;
2953 u32 iif; 2937 u32 iif;
2954 int err; 2938 int err;
2939 int mark;
2955 struct sk_buff *skb; 2940 struct sk_buff *skb;
2956 2941
2957 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); 2942 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
@@ -2979,6 +2964,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2979 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0; 2964 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2980 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0; 2965 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
2981 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; 2966 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
2967 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
2982 2968
2983 if (iif) { 2969 if (iif) {
2984 struct net_device *dev; 2970 struct net_device *dev;
@@ -2991,13 +2977,14 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2991 2977
2992 skb->protocol = htons(ETH_P_IP); 2978 skb->protocol = htons(ETH_P_IP);
2993 skb->dev = dev; 2979 skb->dev = dev;
2980 skb->mark = mark;
2994 local_bh_disable(); 2981 local_bh_disable();
2995 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); 2982 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2996 local_bh_enable(); 2983 local_bh_enable();
2997 2984
2998 rt = skb_rtable(skb); 2985 rt = skb_rtable(skb);
2999 if (err == 0 && rt->u.dst.error) 2986 if (err == 0 && rt->dst.error)
3000 err = -rt->u.dst.error; 2987 err = -rt->dst.error;
3001 } else { 2988 } else {
3002 struct flowi fl = { 2989 struct flowi fl = {
3003 .nl_u = { 2990 .nl_u = {
@@ -3008,6 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
3008 }, 2995 },
3009 }, 2996 },
3010 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, 2997 .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
2998 .mark = mark,
3011 }; 2999 };
3012 err = ip_route_output_key(net, &rt, &fl); 3000 err = ip_route_output_key(net, &rt, &fl);
3013 } 3001 }
@@ -3015,7 +3003,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
3015 if (err) 3003 if (err)
3016 goto errout_free; 3004 goto errout_free;
3017 3005
3018 skb_dst_set(skb, &rt->u.dst); 3006 skb_dst_set(skb, &rt->dst);
3019 if (rtm->rtm_flags & RTM_F_NOTIFY) 3007 if (rtm->rtm_flags & RTM_F_NOTIFY)
3020 rt->rt_flags |= RTCF_NOTIFY; 3008 rt->rt_flags |= RTCF_NOTIFY;
3021 3009
@@ -3051,12 +3039,12 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
3051 continue; 3039 continue;
3052 rcu_read_lock_bh(); 3040 rcu_read_lock_bh();
3053 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt; 3041 for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
3054 rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) { 3042 rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
3055 if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx) 3043 if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
3056 continue; 3044 continue;
3057 if (rt_is_expired(rt)) 3045 if (rt_is_expired(rt))
3058 continue; 3046 continue;
3059 skb_dst_set(skb, dst_clone(&rt->u.dst)); 3047 skb_dst_set_noref(skb, &rt->dst);
3060 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, 3048 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
3061 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 3049 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
3062 1, NLM_F_MULTI) <= 0) { 3050 1, NLM_F_MULTI) <= 0) {
@@ -3102,48 +3090,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
3102 return -EINVAL; 3090 return -EINVAL;
3103} 3091}
3104 3092
3105static void rt_secret_reschedule(int old)
3106{
3107 struct net *net;
3108 int new = ip_rt_secret_interval;
3109 int diff = new - old;
3110
3111 if (!diff)
3112 return;
3113
3114 rtnl_lock();
3115 for_each_net(net) {
3116 int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
3117 long time;
3118
3119 if (!new)
3120 continue;
3121
3122 if (deleted) {
3123 time = net->ipv4.rt_secret_timer.expires - jiffies;
3124
3125 if (time <= 0 || (time += diff) <= 0)
3126 time = 0;
3127 } else
3128 time = new;
3129
3130 mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
3131 }
3132 rtnl_unlock();
3133}
3134
3135static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
3136 void __user *buffer, size_t *lenp,
3137 loff_t *ppos)
3138{
3139 int old = ip_rt_secret_interval;
3140 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
3141
3142 rt_secret_reschedule(old);
3143
3144 return ret;
3145}
3146
3147static ctl_table ipv4_route_table[] = { 3093static ctl_table ipv4_route_table[] = {
3148 { 3094 {
3149 .procname = "gc_thresh", 3095 .procname = "gc_thresh",
@@ -3252,13 +3198,6 @@ static ctl_table ipv4_route_table[] = {
3252 .mode = 0644, 3198 .mode = 0644,
3253 .proc_handler = proc_dointvec, 3199 .proc_handler = proc_dointvec,
3254 }, 3200 },
3255 {
3256 .procname = "secret_interval",
3257 .data = &ip_rt_secret_interval,
3258 .maxlen = sizeof(int),
3259 .mode = 0644,
3260 .proc_handler = ipv4_sysctl_rt_secret_interval,
3261 },
3262 { } 3201 { }
3263}; 3202};
3264 3203
@@ -3337,34 +3276,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
3337}; 3276};
3338#endif 3277#endif
3339 3278
3340 3279static __net_init int rt_genid_init(struct net *net)
3341static __net_init int rt_secret_timer_init(struct net *net)
3342{ 3280{
3343 atomic_set(&net->ipv4.rt_genid, 3281 get_random_bytes(&net->ipv4.rt_genid,
3344 (int) ((num_physpages ^ (num_physpages>>8)) ^ 3282 sizeof(net->ipv4.rt_genid));
3345 (jiffies ^ (jiffies >> 7))));
3346
3347 net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
3348 net->ipv4.rt_secret_timer.data = (unsigned long)net;
3349 init_timer_deferrable(&net->ipv4.rt_secret_timer);
3350
3351 if (ip_rt_secret_interval) {
3352 net->ipv4.rt_secret_timer.expires =
3353 jiffies + net_random() % ip_rt_secret_interval +
3354 ip_rt_secret_interval;
3355 add_timer(&net->ipv4.rt_secret_timer);
3356 }
3357 return 0; 3283 return 0;
3358} 3284}
3359 3285
3360static __net_exit void rt_secret_timer_exit(struct net *net) 3286static __net_initdata struct pernet_operations rt_genid_ops = {
3361{ 3287 .init = rt_genid_init,
3362 del_timer_sync(&net->ipv4.rt_secret_timer);
3363}
3364
3365static __net_initdata struct pernet_operations rt_secret_timer_ops = {
3366 .init = rt_secret_timer_init,
3367 .exit = rt_secret_timer_exit,
3368}; 3288};
3369 3289
3370 3290
@@ -3425,9 +3345,6 @@ int __init ip_rt_init(void)
3425 schedule_delayed_work(&expires_work, 3345 schedule_delayed_work(&expires_work,
3426 net_random() % ip_rt_gc_interval + ip_rt_gc_interval); 3346 net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
3427 3347
3428 if (register_pernet_subsys(&rt_secret_timer_ops))
3429 printk(KERN_ERR "Unable to setup rt_secret_timer\n");
3430
3431 if (ip_rt_proc_init()) 3348 if (ip_rt_proc_init())
3432 printk(KERN_ERR "Unable to create route proc files\n"); 3349 printk(KERN_ERR "Unable to create route proc files\n");
3433#ifdef CONFIG_XFRM 3350#ifdef CONFIG_XFRM
@@ -3439,6 +3356,7 @@ int __init ip_rt_init(void)
3439#ifdef CONFIG_SYSCTL 3356#ifdef CONFIG_SYSCTL
3440 register_pernet_subsys(&sysctl_route_ops); 3357 register_pernet_subsys(&sysctl_route_ops);
3441#endif 3358#endif
3359 register_pernet_subsys(&rt_genid_ops);
3442 return rc; 3360 return rc;
3443} 3361}
3444 3362
@@ -3452,7 +3370,3 @@ void __init ip_static_sysctl_init(void)
3452 register_sysctl_paths(ipv4_path, ipv4_skeleton); 3370 register_sysctl_paths(ipv4_path, ipv4_skeleton);
3453} 3371}
3454#endif 3372#endif
3455
3456EXPORT_SYMBOL(__ip_select_ident);
3457EXPORT_SYMBOL(ip_route_input);
3458EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
18#include <net/tcp.h> 18#include <net/tcp.h>
19#include <net/route.h> 19#include <net/route.h>
20 20
21/* Timestamps: lowest 9 bits store TCP options */ 21/* Timestamps: lowest bits store TCP options */
22#define TSBITS 9 22#define TSBITS 6
23#define TSMASK (((__u32)1 << TSBITS) - 1) 23#define TSMASK (((__u32)1 << TSBITS) - 1)
24 24
25extern int sysctl_tcp_syncookies; 25extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
58 58
59/* 59/*
60 * when syncookies are in effect and tcp timestamps are enabled we encode 60 * when syncookies are in effect and tcp timestamps are enabled we encode
61 * tcp options in the lowest 9 bits of the timestamp value that will be 61 * tcp options in the lower bits of the timestamp value that will be
62 * sent in the syn-ack. 62 * sent in the syn-ack.
63 * Since subsequent timestamps use the normal tcp_time_stamp value, we 63 * Since subsequent timestamps use the normal tcp_time_stamp value, we
64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp. 64 * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,10 @@ __u32 cookie_init_timestamp(struct request_sock *req)
70 u32 options = 0; 70 u32 options = 0;
71 71
72 ireq = inet_rsk(req); 72 ireq = inet_rsk(req);
73 if (ireq->wscale_ok) { 73
74 options = ireq->snd_wscale; 74 options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
75 options |= ireq->rcv_wscale << 4; 75 options |= ireq->sack_ok << 4;
76 } 76 options |= ireq->ecn_ok << 5;
77 options |= ireq->sack_ok << 8;
78 77
79 ts = ts_now & ~TSMASK; 78 ts = ts_now & ~TSMASK;
80 ts |= options; 79 ts |= options;
@@ -138,23 +137,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
138} 137}
139 138
140/* 139/*
141 * This table has to be sorted and terminated with (__u16)-1. 140 * MSS Values are taken from the 2009 paper
142 * XXX generate a better table. 141 * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
143 * Unresolved Issues: HIPPI with a 64k MSS is not well supported. 142 * - values 1440 to 1460 accounted for 80% of observed mss values
143 * - values outside the 536-1460 range are rare (<0.2%).
144 *
145 * Table must be sorted.
144 */ 146 */
145static __u16 const msstab[] = { 147static __u16 const msstab[] = {
146 64 - 1, 148 64,
147 256 - 1, 149 512,
148 512 - 1, 150 536,
149 536 - 1, 151 1024,
150 1024 - 1, 152 1440,
151 1440 - 1, 153 1460,
152 1460 - 1, 154 4312,
153 4312 - 1, 155 8960,
154 (__u16)-1
155}; 156};
156/* The number doesn't include the -1 terminator */
157#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
158 157
159/* 158/*
160 * Generate a syncookie. mssp points to the mss, which is returned 159 * Generate a syncookie. mssp points to the mss, which is returned
@@ -169,10 +168,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
169 168
170 tcp_synq_overflow(sk); 169 tcp_synq_overflow(sk);
171 170
172 /* XXX sort msstab[] by probability? Binary search? */ 171 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
173 for (mssind = 0; mss > msstab[mssind + 1]; mssind++) 172 if (mss >= msstab[mssind])
174 ; 173 break;
175 *mssp = msstab[mssind] + 1; 174 *mssp = msstab[mssind];
176 175
177 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); 176 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
178 177
@@ -202,7 +201,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
202 jiffies / (HZ * 60), 201 jiffies / (HZ * 60),
203 COUNTER_TRIES); 202 COUNTER_TRIES);
204 203
205 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 204 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
206} 205}
207 206
208static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, 207static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
@@ -227,26 +226,38 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
227 * additional tcp options in the timestamp. 226 * additional tcp options in the timestamp.
228 * This extracts these options from the timestamp echo. 227 * This extracts these options from the timestamp echo.
229 * 228 *
230 * The lowest 4 bits are for snd_wscale 229 * The lowest 4 bits store snd_wscale.
231 * The next 4 lsb are for rcv_wscale 230 * next 2 bits indicate SACK and ECN support.
232 * The next lsb is for sack_ok 231 *
232 * return false if we decode an option that should not be.
233 */ 233 */
234void cookie_check_timestamp(struct tcp_options_received *tcp_opt) 234bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
235{ 235{
236 /* echoed timestamp, 9 lowest bits contain options */ 236 /* echoed timestamp, lowest bits contain options */
237 u32 options = tcp_opt->rcv_tsecr & TSMASK; 237 u32 options = tcp_opt->rcv_tsecr & TSMASK;
238 238
239 tcp_opt->snd_wscale = options & 0xf; 239 if (!tcp_opt->saw_tstamp) {
240 options >>= 4; 240 tcp_clear_options(tcp_opt);
241 tcp_opt->rcv_wscale = options & 0xf; 241 return true;
242 }
243
244 if (!sysctl_tcp_timestamps)
245 return false;
242 246
243 tcp_opt->sack_ok = (options >> 4) & 0x1; 247 tcp_opt->sack_ok = (options >> 4) & 0x1;
248 *ecn_ok = (options >> 5) & 1;
249 if (*ecn_ok && !sysctl_tcp_ecn)
250 return false;
251
252 if (tcp_opt->sack_ok && !sysctl_tcp_sack)
253 return false;
244 254
245 if (tcp_opt->sack_ok) 255 if ((options & 0xf) == 0xf)
246 tcp_sack_reset(tcp_opt); 256 return true; /* no window scaling */
247 257
248 if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) 258 tcp_opt->wscale_ok = 1;
249 tcp_opt->wscale_ok = 1; 259 tcp_opt->snd_wscale = options & 0xf;
260 return sysctl_tcp_window_scaling != 0;
250} 261}
251EXPORT_SYMBOL(cookie_check_timestamp); 262EXPORT_SYMBOL(cookie_check_timestamp);
252 263
@@ -265,8 +276,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
265 int mss; 276 int mss;
266 struct rtable *rt; 277 struct rtable *rt;
267 __u8 rcv_wscale; 278 __u8 rcv_wscale;
279 bool ecn_ok;
268 280
269 if (!sysctl_tcp_syncookies || !th->ack) 281 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
270 goto out; 282 goto out;
271 283
272 if (tcp_synq_no_recent_overflow(sk) || 284 if (tcp_synq_no_recent_overflow(sk) ||
@@ -281,8 +293,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
281 memset(&tcp_opt, 0, sizeof(tcp_opt)); 293 memset(&tcp_opt, 0, sizeof(tcp_opt));
282 tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 294 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
283 295
284 if (tcp_opt.saw_tstamp) 296 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
285 cookie_check_timestamp(&tcp_opt); 297 goto out;
286 298
287 ret = NULL; 299 ret = NULL;
288 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */ 300 req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
@@ -298,9 +310,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
298 ireq->rmt_port = th->source; 310 ireq->rmt_port = th->source;
299 ireq->loc_addr = ip_hdr(skb)->daddr; 311 ireq->loc_addr = ip_hdr(skb)->daddr;
300 ireq->rmt_addr = ip_hdr(skb)->saddr; 312 ireq->rmt_addr = ip_hdr(skb)->saddr;
301 ireq->ecn_ok = 0; 313 ireq->ecn_ok = ecn_ok;
302 ireq->snd_wscale = tcp_opt.snd_wscale; 314 ireq->snd_wscale = tcp_opt.snd_wscale;
303 ireq->rcv_wscale = tcp_opt.rcv_wscale;
304 ireq->sack_ok = tcp_opt.sack_ok; 315 ireq->sack_ok = tcp_opt.sack_ok;
305 ireq->wscale_ok = tcp_opt.wscale_ok; 316 ireq->wscale_ok = tcp_opt.wscale_ok;
306 ireq->tstamp_ok = tcp_opt.saw_tstamp; 317 ireq->tstamp_ok = tcp_opt.saw_tstamp;
@@ -347,22 +358,22 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
347 { .sport = th->dest, 358 { .sport = th->dest,
348 .dport = th->source } } }; 359 .dport = th->source } } };
349 security_req_classify_flow(req, &fl); 360 security_req_classify_flow(req, &fl);
350 if (ip_route_output_key(&init_net, &rt, &fl)) { 361 if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
351 reqsk_free(req); 362 reqsk_free(req);
352 goto out; 363 goto out;
353 } 364 }
354 } 365 }
355 366
356 /* Try to redo what tcp_v4_send_synack did. */ 367 /* Try to redo what tcp_v4_send_synack did. */
357 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW); 368 req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
358 369
359 tcp_select_initial_window(tcp_full_space(sk), req->mss, 370 tcp_select_initial_window(tcp_full_space(sk), req->mss,
360 &req->rcv_wnd, &req->window_clamp, 371 &req->rcv_wnd, &req->window_clamp,
361 ireq->wscale_ok, &rcv_wscale, 372 ireq->wscale_ok, &rcv_wscale,
362 dst_metric(&rt->u.dst, RTAX_INITRWND)); 373 dst_metric(&rt->dst, RTAX_INITRWND));
363 374
364 ireq->rcv_wscale = rcv_wscale; 375 ireq->rcv_wscale = rcv_wscale;
365 376
366 ret = get_cookie_sock(sk, skb, req, &rt->u.dst); 377 ret = get_cookie_sock(sk, skb, req, &rt->dst);
367out: return ret; 378out: return ret;
368} 379}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1cd5c15174b8..d96c1da4b17c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -299,6 +299,13 @@ static struct ctl_table ipv4_table[] = {
299 .mode = 0644, 299 .mode = 0644,
300 .proc_handler = ipv4_local_port_range, 300 .proc_handler = ipv4_local_port_range,
301 }, 301 },
302 {
303 .procname = "ip_local_reserved_ports",
304 .data = NULL, /* initialized in sysctl_ipv4_init */
305 .maxlen = 65536,
306 .mode = 0644,
307 .proc_handler = proc_do_large_bitmap,
308 },
302#ifdef CONFIG_IP_MULTICAST 309#ifdef CONFIG_IP_MULTICAST
303 { 310 {
304 .procname = "igmp_max_memberships", 311 .procname = "igmp_max_memberships",
@@ -736,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
736static __init int sysctl_ipv4_init(void) 743static __init int sysctl_ipv4_init(void)
737{ 744{
738 struct ctl_table_header *hdr; 745 struct ctl_table_header *hdr;
746 struct ctl_table *i;
747
748 for (i = ipv4_table; i->procname; i++) {
749 if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
750 i->data = sysctl_local_reserved_ports;
751 break;
752 }
753 }
754 if (!i->procname)
755 return -EINVAL;
739 756
740 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table); 757 hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
741 if (hdr == NULL) 758 if (hdr == NULL)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..176e11aaea77 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -315,7 +315,6 @@ struct tcp_splice_state {
315 * is strict, actions are advisory and have some latency. 315 * is strict, actions are advisory and have some latency.
316 */ 316 */
317int tcp_memory_pressure __read_mostly; 317int tcp_memory_pressure __read_mostly;
318
319EXPORT_SYMBOL(tcp_memory_pressure); 318EXPORT_SYMBOL(tcp_memory_pressure);
320 319
321void tcp_enter_memory_pressure(struct sock *sk) 320void tcp_enter_memory_pressure(struct sock *sk)
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk)
325 tcp_memory_pressure = 1; 324 tcp_memory_pressure = 1;
326 } 325 }
327} 326}
328
329EXPORT_SYMBOL(tcp_enter_memory_pressure); 327EXPORT_SYMBOL(tcp_enter_memory_pressure);
330 328
331/* Convert seconds to retransmits based on initial and max timeout */ 329/* Convert seconds to retransmits based on initial and max timeout */
@@ -378,7 +376,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
378 struct sock *sk = sock->sk; 376 struct sock *sk = sock->sk;
379 struct tcp_sock *tp = tcp_sk(sk); 377 struct tcp_sock *tp = tcp_sk(sk);
380 378
381 sock_poll_wait(file, sk->sk_sleep, wait); 379 sock_poll_wait(file, sk_sleep(sk), wait);
382 if (sk->sk_state == TCP_LISTEN) 380 if (sk->sk_state == TCP_LISTEN)
383 return inet_csk_listen_poll(sk); 381 return inet_csk_listen_poll(sk);
384 382
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
460 } 458 }
461 return mask; 459 return mask;
462} 460}
461EXPORT_SYMBOL(tcp_poll);
463 462
464int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) 463int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
465{ 464{
@@ -508,10 +507,11 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
508 507
509 return put_user(answ, (int __user *)arg); 508 return put_user(answ, (int __user *)arg);
510} 509}
510EXPORT_SYMBOL(tcp_ioctl);
511 511
512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 512static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
513{ 513{
514 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 514 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
515 tp->pushed_seq = tp->write_seq; 515 tp->pushed_seq = tp->write_seq;
516} 516}
517 517
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
527 527
528 skb->csum = 0; 528 skb->csum = 0;
529 tcb->seq = tcb->end_seq = tp->write_seq; 529 tcb->seq = tcb->end_seq = tp->write_seq;
530 tcb->flags = TCPCB_FLAG_ACK; 530 tcb->flags = TCPHDR_ACK;
531 tcb->sacked = 0; 531 tcb->sacked = 0;
532 skb_header_release(skb); 532 skb_header_release(skb);
533 tcp_add_write_queue_tail(sk, skb); 533 tcp_add_write_queue_tail(sk, skb);
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
608 ssize_t spliced; 608 ssize_t spliced;
609 int ret; 609 int ret;
610 610
611 sock_rps_record_flow(sk);
611 /* 612 /*
612 * We can't seek on a socket input 613 * We can't seek on a socket input
613 */ 614 */
@@ -675,6 +676,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
675 676
676 return ret; 677 return ret;
677} 678}
679EXPORT_SYMBOL(tcp_splice_read);
678 680
679struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) 681struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
680{ 682{
@@ -815,7 +817,7 @@ new_segment:
815 skb_shinfo(skb)->gso_segs = 0; 817 skb_shinfo(skb)->gso_segs = 0;
816 818
817 if (!copied) 819 if (!copied)
818 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 820 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
819 821
820 copied += copy; 822 copied += copy;
821 poffset += copy; 823 poffset += copy;
@@ -856,15 +858,15 @@ out_err:
856 return sk_stream_error(sk, flags, err); 858 return sk_stream_error(sk, flags, err);
857} 859}
858 860
859ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, 861int tcp_sendpage(struct sock *sk, struct page *page, int offset,
860 size_t size, int flags) 862 size_t size, int flags)
861{ 863{
862 ssize_t res; 864 ssize_t res;
863 struct sock *sk = sock->sk;
864 865
865 if (!(sk->sk_route_caps & NETIF_F_SG) || 866 if (!(sk->sk_route_caps & NETIF_F_SG) ||
866 !(sk->sk_route_caps & NETIF_F_ALL_CSUM)) 867 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
867 return sock_no_sendpage(sock, page, offset, size, flags); 868 return sock_no_sendpage(sk->sk_socket, page, offset, size,
869 flags);
868 870
869 lock_sock(sk); 871 lock_sock(sk);
870 TCP_CHECK_TIMER(sk); 872 TCP_CHECK_TIMER(sk);
@@ -873,6 +875,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
873 release_sock(sk); 875 release_sock(sk);
874 return res; 876 return res;
875} 877}
878EXPORT_SYMBOL(tcp_sendpage);
876 879
877#define TCP_PAGE(sk) (sk->sk_sndmsg_page) 880#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
878#define TCP_OFF(sk) (sk->sk_sndmsg_off) 881#define TCP_OFF(sk) (sk->sk_sndmsg_off)
@@ -897,10 +900,9 @@ static inline int select_size(struct sock *sk, int sg)
897 return tmp; 900 return tmp;
898} 901}
899 902
900int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 903int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
901 size_t size) 904 size_t size)
902{ 905{
903 struct sock *sk = sock->sk;
904 struct iovec *iov; 906 struct iovec *iov;
905 struct tcp_sock *tp = tcp_sk(sk); 907 struct tcp_sock *tp = tcp_sk(sk);
906 struct sk_buff *skb; 908 struct sk_buff *skb;
@@ -1061,7 +1063,7 @@ new_segment:
1061 } 1063 }
1062 1064
1063 if (!copied) 1065 if (!copied)
1064 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; 1066 TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
1065 1067
1066 tp->write_seq += copy; 1068 tp->write_seq += copy;
1067 TCP_SKB_CB(skb)->end_seq += copy; 1069 TCP_SKB_CB(skb)->end_seq += copy;
@@ -1121,6 +1123,7 @@ out_err:
1121 release_sock(sk); 1123 release_sock(sk);
1122 return err; 1124 return err;
1123} 1125}
1126EXPORT_SYMBOL(tcp_sendmsg);
1124 1127
1125/* 1128/*
1126 * Handle reading urgent data. BSD has very simple semantics for 1129 * Handle reading urgent data. BSD has very simple semantics for
@@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1380 tcp_cleanup_rbuf(sk, copied); 1383 tcp_cleanup_rbuf(sk, copied);
1381 return copied; 1384 return copied;
1382} 1385}
1386EXPORT_SYMBOL(tcp_read_sock);
1383 1387
1384/* 1388/*
1385 * This routine copies from a sock struct into the user buffer. 1389 * This routine copies from a sock struct into the user buffer.
@@ -1774,6 +1778,7 @@ recv_urg:
1774 err = tcp_recv_urg(sk, msg, len, flags); 1778 err = tcp_recv_urg(sk, msg, len, flags);
1775 goto out; 1779 goto out;
1776} 1780}
1781EXPORT_SYMBOL(tcp_recvmsg);
1777 1782
1778void tcp_set_state(struct sock *sk, int state) 1783void tcp_set_state(struct sock *sk, int state)
1779{ 1784{
@@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how)
1866 tcp_send_fin(sk); 1871 tcp_send_fin(sk);
1867 } 1872 }
1868} 1873}
1874EXPORT_SYMBOL(tcp_shutdown);
1869 1875
1870void tcp_close(struct sock *sk, long timeout) 1876void tcp_close(struct sock *sk, long timeout)
1871{ 1877{
@@ -1898,6 +1904,10 @@ void tcp_close(struct sock *sk, long timeout)
1898 1904
1899 sk_mem_reclaim(sk); 1905 sk_mem_reclaim(sk);
1900 1906
1907 /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
1908 if (sk->sk_state == TCP_CLOSE)
1909 goto adjudge_to_death;
1910
1901 /* As outlined in RFC 2525, section 2.17, we send a RST here because 1911 /* As outlined in RFC 2525, section 2.17, we send a RST here because
1902 * data was lost. To witness the awful effects of the old behavior of 1912 * data was lost. To witness the awful effects of the old behavior of
1903 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk 1913 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
@@ -2025,6 +2035,7 @@ out:
2025 local_bh_enable(); 2035 local_bh_enable();
2026 sock_put(sk); 2036 sock_put(sk);
2027} 2037}
2038EXPORT_SYMBOL(tcp_close);
2028 2039
2029/* These states need RST on ABORT according to RFC793 */ 2040/* These states need RST on ABORT according to RFC793 */
2030 2041
@@ -2098,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags)
2098 sk->sk_error_report(sk); 2109 sk->sk_error_report(sk);
2099 return err; 2110 return err;
2100} 2111}
2112EXPORT_SYMBOL(tcp_disconnect);
2101 2113
2102/* 2114/*
2103 * Socket option code for TCP. 2115 * Socket option code for TCP.
@@ -2175,6 +2187,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2175 GFP_KERNEL); 2187 GFP_KERNEL);
2176 if (cvp == NULL) 2188 if (cvp == NULL)
2177 return -ENOMEM; 2189 return -ENOMEM;
2190
2191 kref_init(&cvp->kref);
2178 } 2192 }
2179 lock_sock(sk); 2193 lock_sock(sk);
2180 tp->rx_opt.cookie_in_always = 2194 tp->rx_opt.cookie_in_always =
@@ -2189,12 +2203,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2189 */ 2203 */
2190 kref_put(&tp->cookie_values->kref, 2204 kref_put(&tp->cookie_values->kref,
2191 tcp_cookie_values_release); 2205 tcp_cookie_values_release);
2192 kref_init(&cvp->kref);
2193 tp->cookie_values = cvp;
2194 } else { 2206 } else {
2195 cvp = tp->cookie_values; 2207 cvp = tp->cookie_values;
2196 } 2208 }
2197 } 2209 }
2210
2198 if (cvp != NULL) { 2211 if (cvp != NULL) {
2199 cvp->cookie_desired = ctd.tcpct_cookie_desired; 2212 cvp->cookie_desired = ctd.tcpct_cookie_desired;
2200 2213
@@ -2208,6 +2221,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2208 cvp->s_data_desired = ctd.tcpct_s_data_desired; 2221 cvp->s_data_desired = ctd.tcpct_s_data_desired;
2209 cvp->s_data_constant = 0; /* false */ 2222 cvp->s_data_constant = 0; /* false */
2210 } 2223 }
2224
2225 tp->cookie_values = cvp;
2211 } 2226 }
2212 release_sock(sk); 2227 release_sock(sk);
2213 return err; 2228 return err;
@@ -2215,7 +2230,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2215 default: 2230 default:
2216 /* fallthru */ 2231 /* fallthru */
2217 break; 2232 break;
2218 }; 2233 }
2219 2234
2220 if (optlen < sizeof(int)) 2235 if (optlen < sizeof(int))
2221 return -EINVAL; 2236 return -EINVAL;
@@ -2298,7 +2313,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2298 if (sock_flag(sk, SOCK_KEEPOPEN) && 2313 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2299 !((1 << sk->sk_state) & 2314 !((1 << sk->sk_state) &
2300 (TCPF_CLOSE | TCPF_LISTEN))) { 2315 (TCPF_CLOSE | TCPF_LISTEN))) {
2301 __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp; 2316 u32 elapsed = keepalive_time_elapsed(tp);
2302 if (tp->keepalive_time > elapsed) 2317 if (tp->keepalive_time > elapsed)
2303 elapsed = tp->keepalive_time - elapsed; 2318 elapsed = tp->keepalive_time - elapsed;
2304 else 2319 else
@@ -2396,6 +2411,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2396 optval, optlen); 2411 optval, optlen);
2397 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2412 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2398} 2413}
2414EXPORT_SYMBOL(tcp_setsockopt);
2399 2415
2400#ifdef CONFIG_COMPAT 2416#ifdef CONFIG_COMPAT
2401int compat_tcp_setsockopt(struct sock *sk, int level, int optname, 2417int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -2406,7 +2422,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2406 optval, optlen); 2422 optval, optlen);
2407 return do_tcp_setsockopt(sk, level, optname, optval, optlen); 2423 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2408} 2424}
2409
2410EXPORT_SYMBOL(compat_tcp_setsockopt); 2425EXPORT_SYMBOL(compat_tcp_setsockopt);
2411#endif 2426#endif
2412 2427
@@ -2472,7 +2487,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2472 2487
2473 info->tcpi_total_retrans = tp->total_retrans; 2488 info->tcpi_total_retrans = tp->total_retrans;
2474} 2489}
2475
2476EXPORT_SYMBOL_GPL(tcp_get_info); 2490EXPORT_SYMBOL_GPL(tcp_get_info);
2477 2491
2478static int do_tcp_getsockopt(struct sock *sk, int level, 2492static int do_tcp_getsockopt(struct sock *sk, int level,
@@ -2590,6 +2604,12 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2590 return -EFAULT; 2604 return -EFAULT;
2591 return 0; 2605 return 0;
2592 } 2606 }
2607 case TCP_THIN_LINEAR_TIMEOUTS:
2608 val = tp->thin_lto;
2609 break;
2610 case TCP_THIN_DUPACK:
2611 val = tp->thin_dupack;
2612 break;
2593 default: 2613 default:
2594 return -ENOPROTOOPT; 2614 return -ENOPROTOOPT;
2595 } 2615 }
@@ -2611,6 +2631,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2611 optval, optlen); 2631 optval, optlen);
2612 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2632 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2613} 2633}
2634EXPORT_SYMBOL(tcp_getsockopt);
2614 2635
2615#ifdef CONFIG_COMPAT 2636#ifdef CONFIG_COMPAT
2616int compat_tcp_getsockopt(struct sock *sk, int level, int optname, 2637int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -2621,7 +2642,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2621 optval, optlen); 2642 optval, optlen);
2622 return do_tcp_getsockopt(sk, level, optname, optval, optlen); 2643 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2623} 2644}
2624
2625EXPORT_SYMBOL(compat_tcp_getsockopt); 2645EXPORT_SYMBOL(compat_tcp_getsockopt);
2626#endif 2646#endif
2627 2647
@@ -2721,7 +2741,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2721 struct tcphdr *th2; 2741 struct tcphdr *th2;
2722 unsigned int len; 2742 unsigned int len;
2723 unsigned int thlen; 2743 unsigned int thlen;
2724 unsigned int flags; 2744 __be32 flags;
2725 unsigned int mss = 1; 2745 unsigned int mss = 1;
2726 unsigned int hlen; 2746 unsigned int hlen;
2727 unsigned int off; 2747 unsigned int off;
@@ -2771,10 +2791,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2771 2791
2772found: 2792found:
2773 flush = NAPI_GRO_CB(p)->flush; 2793 flush = NAPI_GRO_CB(p)->flush;
2774 flush |= flags & TCP_FLAG_CWR; 2794 flush |= (__force int)(flags & TCP_FLAG_CWR);
2775 flush |= (flags ^ tcp_flag_word(th2)) & 2795 flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
2776 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); 2796 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
2777 flush |= th->ack_seq ^ th2->ack_seq; 2797 flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
2778 for (i = sizeof(*th); i < thlen; i += 4) 2798 for (i = sizeof(*th); i < thlen; i += 4)
2779 flush |= *(u32 *)((u8 *)th + i) ^ 2799 flush |= *(u32 *)((u8 *)th + i) ^
2780 *(u32 *)((u8 *)th2 + i); 2800 *(u32 *)((u8 *)th2 + i);
@@ -2795,8 +2815,9 @@ found:
2795 2815
2796out_check_final: 2816out_check_final:
2797 flush = len < mss; 2817 flush = len < mss;
2798 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | 2818 flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
2799 TCP_FLAG_SYN | TCP_FLAG_FIN); 2819 TCP_FLAG_RST | TCP_FLAG_SYN |
2820 TCP_FLAG_FIN));
2800 2821
2801 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) 2822 if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
2802 pp = head; 2823 pp = head;
@@ -2839,7 +2860,6 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
2839 if (p->md5_desc.tfm) 2860 if (p->md5_desc.tfm)
2840 crypto_free_hash(p->md5_desc.tfm); 2861 crypto_free_hash(p->md5_desc.tfm);
2841 kfree(p); 2862 kfree(p);
2842 p = NULL;
2843 } 2863 }
2844 } 2864 }
2845 free_percpu(pool); 2865 free_percpu(pool);
@@ -2858,7 +2878,6 @@ void tcp_free_md5sig_pool(void)
2858 if (pool) 2878 if (pool)
2859 __tcp_free_md5sig_pool(pool); 2879 __tcp_free_md5sig_pool(pool);
2860} 2880}
2861
2862EXPORT_SYMBOL(tcp_free_md5sig_pool); 2881EXPORT_SYMBOL(tcp_free_md5sig_pool);
2863 2882
2864static struct tcp_md5sig_pool * __percpu * 2883static struct tcp_md5sig_pool * __percpu *
@@ -2934,28 +2953,42 @@ retry:
2934 } 2953 }
2935 return pool; 2954 return pool;
2936} 2955}
2937
2938EXPORT_SYMBOL(tcp_alloc_md5sig_pool); 2956EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2939 2957
2940struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu) 2958
2959/**
2960 * tcp_get_md5sig_pool - get md5sig_pool for this user
2961 *
2962 * We use percpu structure, so if we succeed, we exit with preemption
2963 * and BH disabled, to make sure another thread or softirq handling
2964 * wont try to get same context.
2965 */
2966struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
2941{ 2967{
2942 struct tcp_md5sig_pool * __percpu *p; 2968 struct tcp_md5sig_pool * __percpu *p;
2943 spin_lock_bh(&tcp_md5sig_pool_lock); 2969
2970 local_bh_disable();
2971
2972 spin_lock(&tcp_md5sig_pool_lock);
2944 p = tcp_md5sig_pool; 2973 p = tcp_md5sig_pool;
2945 if (p) 2974 if (p)
2946 tcp_md5sig_users++; 2975 tcp_md5sig_users++;
2947 spin_unlock_bh(&tcp_md5sig_pool_lock); 2976 spin_unlock(&tcp_md5sig_pool_lock);
2948 return (p ? *per_cpu_ptr(p, cpu) : NULL);
2949}
2950 2977
2951EXPORT_SYMBOL(__tcp_get_md5sig_pool); 2978 if (p)
2979 return *this_cpu_ptr(p);
2980
2981 local_bh_enable();
2982 return NULL;
2983}
2984EXPORT_SYMBOL(tcp_get_md5sig_pool);
2952 2985
2953void __tcp_put_md5sig_pool(void) 2986void tcp_put_md5sig_pool(void)
2954{ 2987{
2988 local_bh_enable();
2955 tcp_free_md5sig_pool(); 2989 tcp_free_md5sig_pool();
2956} 2990}
2957 2991EXPORT_SYMBOL(tcp_put_md5sig_pool);
2958EXPORT_SYMBOL(__tcp_put_md5sig_pool);
2959 2992
2960int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, 2993int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2961 struct tcphdr *th) 2994 struct tcphdr *th)
@@ -2971,7 +3004,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2971 th->check = old_checksum; 3004 th->check = old_checksum;
2972 return err; 3005 return err;
2973} 3006}
2974
2975EXPORT_SYMBOL(tcp_md5_hash_header); 3007EXPORT_SYMBOL(tcp_md5_hash_header);
2976 3008
2977int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, 3009int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
@@ -2984,6 +3016,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2984 const unsigned head_data_len = skb_headlen(skb) > header_len ? 3016 const unsigned head_data_len = skb_headlen(skb) > header_len ?
2985 skb_headlen(skb) - header_len : 0; 3017 skb_headlen(skb) - header_len : 0;
2986 const struct skb_shared_info *shi = skb_shinfo(skb); 3018 const struct skb_shared_info *shi = skb_shinfo(skb);
3019 struct sk_buff *frag_iter;
2987 3020
2988 sg_init_table(&sg, 1); 3021 sg_init_table(&sg, 1);
2989 3022
@@ -2998,9 +3031,12 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2998 return 1; 3031 return 1;
2999 } 3032 }
3000 3033
3034 skb_walk_frags(skb, frag_iter)
3035 if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
3036 return 1;
3037
3001 return 0; 3038 return 0;
3002} 3039}
3003
3004EXPORT_SYMBOL(tcp_md5_hash_skb_data); 3040EXPORT_SYMBOL(tcp_md5_hash_skb_data);
3005 3041
3006int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) 3042int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
@@ -3010,7 +3046,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
3010 sg_init_one(&sg, key->key, key->keylen); 3046 sg_init_one(&sg, key->key, key->keylen);
3011 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen); 3047 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
3012} 3048}
3013
3014EXPORT_SYMBOL(tcp_md5_hash_key); 3049EXPORT_SYMBOL(tcp_md5_hash_key);
3015 3050
3016#endif 3051#endif
@@ -3282,16 +3317,3 @@ void __init tcp_init(void)
3282 tcp_secret_retiring = &tcp_secret_two; 3317 tcp_secret_retiring = &tcp_secret_two;
3283 tcp_secret_secondary = &tcp_secret_two; 3318 tcp_secret_secondary = &tcp_secret_two;
3284} 3319}
3285
3286EXPORT_SYMBOL(tcp_close);
3287EXPORT_SYMBOL(tcp_disconnect);
3288EXPORT_SYMBOL(tcp_getsockopt);
3289EXPORT_SYMBOL(tcp_ioctl);
3290EXPORT_SYMBOL(tcp_poll);
3291EXPORT_SYMBOL(tcp_read_sock);
3292EXPORT_SYMBOL(tcp_recvmsg);
3293EXPORT_SYMBOL(tcp_sendmsg);
3294EXPORT_SYMBOL(tcp_splice_read);
3295EXPORT_SYMBOL(tcp_sendpage);
3296EXPORT_SYMBOL(tcp_setsockopt);
3297EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index c209e054a634..377bc9349371 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
126 * calculate 2^fract in a <<7 value. 126 * calculate 2^fract in a <<7 value.
127 */ 127 */
128 is_slowstart = 1; 128 is_slowstart = 1;
129 increment = ((1 << ca->rho) * hybla_fraction(rho_fractions)) 129 increment = ((1 << min(ca->rho, 16U)) *
130 - 128; 130 hybla_fraction(rho_fractions)) - 128;
131 } else { 131 } else {
132 /* 132 /*
133 * congestion avoidance 133 * congestion avoidance
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f240f57b2199..e663b78a2ef6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
78int sysctl_tcp_sack __read_mostly = 1; 78int sysctl_tcp_sack __read_mostly = 1;
79int sysctl_tcp_fack __read_mostly = 1; 79int sysctl_tcp_fack __read_mostly = 1;
80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; 80int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
81EXPORT_SYMBOL(sysctl_tcp_reordering);
81int sysctl_tcp_ecn __read_mostly = 2; 82int sysctl_tcp_ecn __read_mostly = 2;
83EXPORT_SYMBOL(sysctl_tcp_ecn);
82int sysctl_tcp_dsack __read_mostly = 1; 84int sysctl_tcp_dsack __read_mostly = 1;
83int sysctl_tcp_app_win __read_mostly = 31; 85int sysctl_tcp_app_win __read_mostly = 31;
84int sysctl_tcp_adv_win_scale __read_mostly = 2; 86int sysctl_tcp_adv_win_scale __read_mostly = 2;
87EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
85 88
86int sysctl_tcp_stdurg __read_mostly; 89int sysctl_tcp_stdurg __read_mostly;
87int sysctl_tcp_rfc1337 __read_mostly; 90int sysctl_tcp_rfc1337 __read_mostly;
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
419 422
420 inet_csk(sk)->icsk_ack.rcv_mss = hint; 423 inet_csk(sk)->icsk_ack.rcv_mss = hint;
421} 424}
425EXPORT_SYMBOL(tcp_initialize_rcv_mss);
422 426
423/* Receiver "autotuning" code. 427/* Receiver "autotuning" code.
424 * 428 *
@@ -2639,7 +2643,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2639 if (sk->sk_family == AF_INET) { 2643 if (sk->sk_family == AF_INET) {
2640 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n", 2644 printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
2641 msg, 2645 msg,
2642 &inet->daddr, ntohs(inet->dport), 2646 &inet->inet_daddr, ntohs(inet->inet_dport),
2643 tp->snd_cwnd, tcp_left_out(tp), 2647 tp->snd_cwnd, tcp_left_out(tp),
2644 tp->snd_ssthresh, tp->prior_ssthresh, 2648 tp->snd_ssthresh, tp->prior_ssthresh,
2645 tp->packets_out); 2649 tp->packets_out);
@@ -2649,7 +2653,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
2649 struct ipv6_pinfo *np = inet6_sk(sk); 2653 struct ipv6_pinfo *np = inet6_sk(sk);
2650 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", 2654 printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
2651 msg, 2655 msg,
2652 &np->daddr, ntohs(inet->dport), 2656 &np->daddr, ntohs(inet->inet_dport),
2653 tp->snd_cwnd, tcp_left_out(tp), 2657 tp->snd_cwnd, tcp_left_out(tp),
2654 tp->snd_ssthresh, tp->prior_ssthresh, 2658 tp->snd_ssthresh, tp->prior_ssthresh,
2655 tp->packets_out); 2659 tp->packets_out);
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk)
2938 } 2942 }
2939 tcp_xmit_retransmit_queue(sk); 2943 tcp_xmit_retransmit_queue(sk);
2940} 2944}
2945EXPORT_SYMBOL(tcp_simple_retransmit);
2941 2946
2942/* Process an event, which can update packets-in-flight not trivially. 2947/* Process an event, which can update packets-in-flight not trivially.
2943 * Main goal of this function is to calculate new estimate for left_out, 2948 * Main goal of this function is to calculate new estimate for left_out,
@@ -3286,7 +3291,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3286 * connection startup slow start one packet too 3291 * connection startup slow start one packet too
3287 * quickly. This is severely frowned upon behavior. 3292 * quickly. This is severely frowned upon behavior.
3288 */ 3293 */
3289 if (!(scb->flags & TCPCB_FLAG_SYN)) { 3294 if (!(scb->flags & TCPHDR_SYN)) {
3290 flag |= FLAG_DATA_ACKED; 3295 flag |= FLAG_DATA_ACKED;
3291 } else { 3296 } else {
3292 flag |= FLAG_SYN_ACKED; 3297 flag |= FLAG_SYN_ACKED;
@@ -3710,7 +3715,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3710 } 3715 }
3711 3716
3712 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) 3717 if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
3713 dst_confirm(sk->sk_dst_cache); 3718 dst_confirm(__sk_dst_get(sk));
3714 3719
3715 return 1; 3720 return 1;
3716 3721
@@ -3845,18 +3850,20 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
3845 /* 16-bit multiple */ 3850 /* 16-bit multiple */
3846 opt_rx->cookie_plus = opsize; 3851 opt_rx->cookie_plus = opsize;
3847 *hvpp = ptr; 3852 *hvpp = ptr;
3853 break;
3848 default: 3854 default:
3849 /* ignore option */ 3855 /* ignore option */
3850 break; 3856 break;
3851 }; 3857 }
3852 break; 3858 break;
3853 }; 3859 }
3854 3860
3855 ptr += opsize-2; 3861 ptr += opsize-2;
3856 length -= opsize; 3862 length -= opsize;
3857 } 3863 }
3858 } 3864 }
3859} 3865}
3866EXPORT_SYMBOL(tcp_parse_options);
3860 3867
3861static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) 3868static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
3862{ 3869{
@@ -3923,13 +3930,14 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
3923 if (opsize < 2 || opsize > length) 3930 if (opsize < 2 || opsize > length)
3924 return NULL; 3931 return NULL;
3925 if (opcode == TCPOPT_MD5SIG) 3932 if (opcode == TCPOPT_MD5SIG)
3926 return ptr; 3933 return opsize == TCPOLEN_MD5SIG ? ptr : NULL;
3927 } 3934 }
3928 ptr += opsize - 2; 3935 ptr += opsize - 2;
3929 length -= opsize; 3936 length -= opsize;
3930 } 3937 }
3931 return NULL; 3938 return NULL;
3932} 3939}
3940EXPORT_SYMBOL(tcp_parse_md5sig_option);
3933#endif 3941#endif
3934 3942
3935static inline void tcp_store_ts_recent(struct tcp_sock *tp) 3943static inline void tcp_store_ts_recent(struct tcp_sock *tp)
@@ -4319,7 +4327,7 @@ static void tcp_ofo_queue(struct sock *sk)
4319 } 4327 }
4320 4328
4321 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { 4329 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
4322 SOCK_DEBUG(sk, "ofo packet was already received \n"); 4330 SOCK_DEBUG(sk, "ofo packet was already received\n");
4323 __skb_unlink(skb, &tp->out_of_order_queue); 4331 __skb_unlink(skb, &tp->out_of_order_queue);
4324 __kfree_skb(skb); 4332 __kfree_skb(skb);
4325 continue; 4333 continue;
@@ -4367,6 +4375,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4367 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) 4375 if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
4368 goto drop; 4376 goto drop;
4369 4377
4378 skb_dst_drop(skb);
4370 __skb_pull(skb, th->doff * 4); 4379 __skb_pull(skb, th->doff * 4);
4371 4380
4372 TCP_ECN_accept_cwr(tp, skb); 4381 TCP_ECN_accept_cwr(tp, skb);
@@ -5430,6 +5439,7 @@ discard:
5430 __kfree_skb(skb); 5439 __kfree_skb(skb);
5431 return 0; 5440 return 0;
5432} 5441}
5442EXPORT_SYMBOL(tcp_rcv_established);
5433 5443
5434static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 5444static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5435 struct tcphdr *th, unsigned len) 5445 struct tcphdr *th, unsigned len)
@@ -5833,7 +5843,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5833 if (tp->snd_una == tp->write_seq) { 5843 if (tp->snd_una == tp->write_seq) {
5834 tcp_set_state(sk, TCP_FIN_WAIT2); 5844 tcp_set_state(sk, TCP_FIN_WAIT2);
5835 sk->sk_shutdown |= SEND_SHUTDOWN; 5845 sk->sk_shutdown |= SEND_SHUTDOWN;
5836 dst_confirm(sk->sk_dst_cache); 5846 dst_confirm(__sk_dst_get(sk));
5837 5847
5838 if (!sock_flag(sk, SOCK_DEAD)) 5848 if (!sock_flag(sk, SOCK_DEAD))
5839 /* Wake up lingering close() */ 5849 /* Wake up lingering close() */
@@ -5929,14 +5939,4 @@ discard:
5929 } 5939 }
5930 return 0; 5940 return 0;
5931} 5941}
5932
5933EXPORT_SYMBOL(sysctl_tcp_ecn);
5934EXPORT_SYMBOL(sysctl_tcp_reordering);
5935EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
5936EXPORT_SYMBOL(tcp_parse_options);
5937#ifdef CONFIG_TCP_MD5SIG
5938EXPORT_SYMBOL(tcp_parse_md5sig_option);
5939#endif
5940EXPORT_SYMBOL(tcp_rcv_established);
5941EXPORT_SYMBOL(tcp_rcv_state_process); 5942EXPORT_SYMBOL(tcp_rcv_state_process);
5942EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70885f4..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
84 84
85int sysctl_tcp_tw_reuse __read_mostly; 85int sysctl_tcp_tw_reuse __read_mostly;
86int sysctl_tcp_low_latency __read_mostly; 86int sysctl_tcp_low_latency __read_mostly;
87EXPORT_SYMBOL(sysctl_tcp_low_latency);
87 88
88 89
89#ifdef CONFIG_TCP_MD5SIG 90#ifdef CONFIG_TCP_MD5SIG
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
100#endif 101#endif
101 102
102struct inet_hashinfo tcp_hashinfo; 103struct inet_hashinfo tcp_hashinfo;
104EXPORT_SYMBOL(tcp_hashinfo);
103 105
104static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) 106static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
105{ 107{
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
139 141
140 return 0; 142 return 0;
141} 143}
142
143EXPORT_SYMBOL_GPL(tcp_twsk_unique); 144EXPORT_SYMBOL_GPL(tcp_twsk_unique);
144 145
145/* This will initiate an outgoing connection. */ 146/* This will initiate an outgoing connection. */
@@ -204,10 +205,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
204 * TIME-WAIT * and initialize rx_opt.ts_recent from it, 205 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
205 * when trying new connection. 206 * when trying new connection.
206 */ 207 */
207 if (peer != NULL && 208 if (peer) {
208 (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) { 209 inet_peer_refcheck(peer);
209 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; 210 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
210 tp->rx_opt.ts_recent = peer->tcp_ts; 211 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
212 tp->rx_opt.ts_recent = peer->tcp_ts;
213 }
211 } 214 }
212 } 215 }
213 216
@@ -237,7 +240,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
237 240
238 /* OK, now commit destination to socket. */ 241 /* OK, now commit destination to socket. */
239 sk->sk_gso_type = SKB_GSO_TCPV4; 242 sk->sk_gso_type = SKB_GSO_TCPV4;
240 sk_setup_caps(sk, &rt->u.dst); 243 sk_setup_caps(sk, &rt->dst);
241 244
242 if (!tp->write_seq) 245 if (!tp->write_seq)
243 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, 246 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -265,6 +268,7 @@ failure:
265 inet->inet_dport = 0; 268 inet->inet_dport = 0;
266 return err; 269 return err;
267} 270}
271EXPORT_SYMBOL(tcp_v4_connect);
268 272
269/* 273/*
270 * This routine does path mtu discovery as defined in RFC1191. 274 * This routine does path mtu discovery as defined in RFC1191.
@@ -519,26 +523,32 @@ out:
519 sock_put(sk); 523 sock_put(sk);
520} 524}
521 525
522/* This routine computes an IPv4 TCP checksum. */ 526static void __tcp_v4_send_check(struct sk_buff *skb,
523void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) 527 __be32 saddr, __be32 daddr)
524{ 528{
525 struct inet_sock *inet = inet_sk(sk);
526 struct tcphdr *th = tcp_hdr(skb); 529 struct tcphdr *th = tcp_hdr(skb);
527 530
528 if (skb->ip_summed == CHECKSUM_PARTIAL) { 531 if (skb->ip_summed == CHECKSUM_PARTIAL) {
529 th->check = ~tcp_v4_check(len, inet->inet_saddr, 532 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
530 inet->inet_daddr, 0);
531 skb->csum_start = skb_transport_header(skb) - skb->head; 533 skb->csum_start = skb_transport_header(skb) - skb->head;
532 skb->csum_offset = offsetof(struct tcphdr, check); 534 skb->csum_offset = offsetof(struct tcphdr, check);
533 } else { 535 } else {
534 th->check = tcp_v4_check(len, inet->inet_saddr, 536 th->check = tcp_v4_check(skb->len, saddr, daddr,
535 inet->inet_daddr,
536 csum_partial(th, 537 csum_partial(th,
537 th->doff << 2, 538 th->doff << 2,
538 skb->csum)); 539 skb->csum));
539 } 540 }
540} 541}
541 542
543/* This routine computes an IPv4 TCP checksum. */
544void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
545{
546 struct inet_sock *inet = inet_sk(sk);
547
548 __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
549}
550EXPORT_SYMBOL(tcp_v4_send_check);
551
542int tcp_v4_gso_send_check(struct sk_buff *skb) 552int tcp_v4_gso_send_check(struct sk_buff *skb)
543{ 553{
544 const struct iphdr *iph; 554 const struct iphdr *iph;
@@ -551,10 +561,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
551 th = tcp_hdr(skb); 561 th = tcp_hdr(skb);
552 562
553 th->check = 0; 563 th->check = 0;
554 th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
555 skb->csum_start = skb_transport_header(skb) - skb->head;
556 skb->csum_offset = offsetof(struct tcphdr, check);
557 skb->ip_summed = CHECKSUM_PARTIAL; 564 skb->ip_summed = CHECKSUM_PARTIAL;
565 __tcp_v4_send_check(skb, iph->saddr, iph->daddr);
558 return 0; 566 return 0;
559} 567}
560 568
@@ -763,13 +771,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
763 skb = tcp_make_synack(sk, dst, req, rvp); 771 skb = tcp_make_synack(sk, dst, req, rvp);
764 772
765 if (skb) { 773 if (skb) {
766 struct tcphdr *th = tcp_hdr(skb); 774 __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
767
768 th->check = tcp_v4_check(skb->len,
769 ireq->loc_addr,
770 ireq->rmt_addr,
771 csum_partial(th, skb->len,
772 skb->csum));
773 775
774 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 776 err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
775 ireq->rmt_addr, 777 ireq->rmt_addr,
@@ -796,19 +798,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
796 kfree(inet_rsk(req)->opt); 798 kfree(inet_rsk(req)->opt);
797} 799}
798 800
799#ifdef CONFIG_SYN_COOKIES 801static void syn_flood_warning(const struct sk_buff *skb)
800static void syn_flood_warning(struct sk_buff *skb)
801{ 802{
802 static unsigned long warntime; 803 const char *msg;
803 804
804 if (time_after(jiffies, (warntime + HZ * 60))) { 805#ifdef CONFIG_SYN_COOKIES
805 warntime = jiffies; 806 if (sysctl_tcp_syncookies)
806 printk(KERN_INFO 807 msg = "Sending cookies";
807 "possible SYN flooding on port %d. Sending cookies.\n", 808 else
808 ntohs(tcp_hdr(skb)->dest));
809 }
810}
811#endif 809#endif
810 msg = "Dropping request";
811
812 pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
813 ntohs(tcp_hdr(skb)->dest), msg);
814}
812 815
813/* 816/*
814 * Save and compile IPv4 options into the request_sock if needed. 817 * Save and compile IPv4 options into the request_sock if needed.
@@ -860,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
860{ 863{
861 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr); 864 return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
862} 865}
863
864EXPORT_SYMBOL(tcp_v4_md5_lookup); 866EXPORT_SYMBOL(tcp_v4_md5_lookup);
865 867
866static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, 868static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -894,7 +896,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
894 kfree(newkey); 896 kfree(newkey);
895 return -ENOMEM; 897 return -ENOMEM;
896 } 898 }
897 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 899 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
898 } 900 }
899 if (tcp_alloc_md5sig_pool(sk) == NULL) { 901 if (tcp_alloc_md5sig_pool(sk) == NULL) {
900 kfree(newkey); 902 kfree(newkey);
@@ -927,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
927 } 929 }
928 return 0; 930 return 0;
929} 931}
930
931EXPORT_SYMBOL(tcp_v4_md5_do_add); 932EXPORT_SYMBOL(tcp_v4_md5_do_add);
932 933
933static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, 934static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -965,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
965 } 966 }
966 return -ENOENT; 967 return -ENOENT;
967} 968}
968
969EXPORT_SYMBOL(tcp_v4_md5_do_del); 969EXPORT_SYMBOL(tcp_v4_md5_do_del);
970 970
971static void tcp_v4_clear_md5_list(struct sock *sk) 971static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1024,7 +1024,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1024 return -EINVAL; 1024 return -EINVAL;
1025 1025
1026 tp->md5sig_info = p; 1026 tp->md5sig_info = p;
1027 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1027 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
1028 } 1028 }
1029 1029
1030 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation); 1030 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1138,7 +1138,6 @@ clear_hash_noput:
1138 memset(md5_hash, 0, 16); 1138 memset(md5_hash, 0, 16);
1139 return 1; 1139 return 1;
1140} 1140}
1141
1142EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1141EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1143 1142
1144static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) 1143static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1246,6 +1245,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1246 * evidently real one. 1245 * evidently real one.
1247 */ 1246 */
1248 if (inet_csk_reqsk_queue_is_full(sk) && !isn) { 1247 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1248 if (net_ratelimit())
1249 syn_flood_warning(skb);
1249#ifdef CONFIG_SYN_COOKIES 1250#ifdef CONFIG_SYN_COOKIES
1250 if (sysctl_tcp_syncookies) { 1251 if (sysctl_tcp_syncookies) {
1251 want_cookie = 1; 1252 want_cookie = 1;
@@ -1289,8 +1290,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1289 goto drop_and_release; 1290 goto drop_and_release;
1290 1291
1291 /* Secret recipe starts with IP addresses */ 1292 /* Secret recipe starts with IP addresses */
1292 *mess++ ^= daddr; 1293 *mess++ ^= (__force u32)daddr;
1293 *mess++ ^= saddr; 1294 *mess++ ^= (__force u32)saddr;
1294 1295
1295 /* plus variable length Initiator Cookie */ 1296 /* plus variable length Initiator Cookie */
1296 c = (u8 *)mess; 1297 c = (u8 *)mess;
@@ -1326,15 +1327,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1326 if (security_inet_conn_request(sk, skb, req)) 1327 if (security_inet_conn_request(sk, skb, req))
1327 goto drop_and_free; 1328 goto drop_and_free;
1328 1329
1329 if (!want_cookie) 1330 if (!want_cookie || tmp_opt.tstamp_ok)
1330 TCP_ECN_create_request(req, tcp_hdr(skb)); 1331 TCP_ECN_create_request(req, tcp_hdr(skb));
1331 1332
1332 if (want_cookie) { 1333 if (want_cookie) {
1333#ifdef CONFIG_SYN_COOKIES
1334 syn_flood_warning(skb);
1335 req->cookie_ts = tmp_opt.tstamp_ok;
1336#endif
1337 isn = cookie_v4_init_sequence(sk, skb, &req->mss); 1334 isn = cookie_v4_init_sequence(sk, skb, &req->mss);
1335 req->cookie_ts = tmp_opt.tstamp_ok;
1338 } else if (!isn) { 1336 } else if (!isn) {
1339 struct inet_peer *peer = NULL; 1337 struct inet_peer *peer = NULL;
1340 1338
@@ -1352,6 +1350,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1352 (dst = inet_csk_route_req(sk, req)) != NULL && 1350 (dst = inet_csk_route_req(sk, req)) != NULL &&
1353 (peer = rt_get_peer((struct rtable *)dst)) != NULL && 1351 (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
1354 peer->v4daddr == saddr) { 1352 peer->v4daddr == saddr) {
1353 inet_peer_refcheck(peer);
1355 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && 1354 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1356 (s32)(peer->tcp_ts - req->ts_recent) > 1355 (s32)(peer->tcp_ts - req->ts_recent) >
1357 TCP_PAWS_WINDOW) { 1356 TCP_PAWS_WINDOW) {
@@ -1396,6 +1395,7 @@ drop_and_free:
1396drop: 1395drop:
1397 return 0; 1396 return 0;
1398} 1397}
1398EXPORT_SYMBOL(tcp_v4_conn_request);
1399 1399
1400 1400
1401/* 1401/*
@@ -1465,7 +1465,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1465 if (newkey != NULL) 1465 if (newkey != NULL)
1466 tcp_v4_md5_do_add(newsk, newinet->inet_daddr, 1466 tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
1467 newkey, key->keylen); 1467 newkey, key->keylen);
1468 newsk->sk_route_caps &= ~NETIF_F_GSO_MASK; 1468 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1469 } 1469 }
1470#endif 1470#endif
1471 1471
@@ -1481,6 +1481,7 @@ exit:
1481 dst_release(dst); 1481 dst_release(dst);
1482 return NULL; 1482 return NULL;
1483} 1483}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1484 1485
1485static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1486static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1486{ 1487{
@@ -1507,7 +1508,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
1507 } 1508 }
1508 1509
1509#ifdef CONFIG_SYN_COOKIES 1510#ifdef CONFIG_SYN_COOKIES
1510 if (!th->rst && !th->syn && th->ack) 1511 if (!th->syn)
1511 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); 1512 sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
1512#endif 1513#endif
1513 return sk; 1514 return sk;
@@ -1558,6 +1559,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1558#endif 1559#endif
1559 1560
1560 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1561 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1562 sock_rps_save_rxhash(sk, skb->rxhash);
1561 TCP_CHECK_TIMER(sk); 1563 TCP_CHECK_TIMER(sk);
1562 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { 1564 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
1563 rsk = sk; 1565 rsk = sk;
@@ -1582,7 +1584,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1582 } 1584 }
1583 return 0; 1585 return 0;
1584 } 1586 }
1585 } 1587 } else
1588 sock_rps_save_rxhash(sk, skb->rxhash);
1589
1586 1590
1587 TCP_CHECK_TIMER(sk); 1591 TCP_CHECK_TIMER(sk);
1588 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { 1592 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
@@ -1607,6 +1611,7 @@ csum_err:
1607 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); 1611 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1608 goto discard; 1612 goto discard;
1609} 1613}
1614EXPORT_SYMBOL(tcp_v4_do_rcv);
1610 1615
1611/* 1616/*
1612 * From tcp_input.c 1617 * From tcp_input.c
@@ -1793,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
1793 1798
1794 return 0; 1799 return 0;
1795} 1800}
1801EXPORT_SYMBOL(tcp_v4_remember_stamp);
1796 1802
1797int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) 1803int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
1798{ 1804{
@@ -1832,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1832 .compat_getsockopt = compat_ip_getsockopt, 1838 .compat_getsockopt = compat_ip_getsockopt,
1833#endif 1839#endif
1834}; 1840};
1841EXPORT_SYMBOL(ipv4_specific);
1835 1842
1836#ifdef CONFIG_TCP_MD5SIG 1843#ifdef CONFIG_TCP_MD5SIG
1837static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { 1844static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1960,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1960 1967
1961 percpu_counter_dec(&tcp_sockets_allocated); 1968 percpu_counter_dec(&tcp_sockets_allocated);
1962} 1969}
1963
1964EXPORT_SYMBOL(tcp_v4_destroy_sock); 1970EXPORT_SYMBOL(tcp_v4_destroy_sock);
1965 1971
1966#ifdef CONFIG_PROC_FS 1972#ifdef CONFIG_PROC_FS
@@ -1978,6 +1984,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
1978 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 1984 hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
1979} 1985}
1980 1986
1987/*
1988 * Get next listener socket follow cur. If cur is NULL, get first socket
1989 * starting from bucket given in st->bucket; when st->bucket is zero the
1990 * very first socket in the hash table is returned.
1991 */
1981static void *listening_get_next(struct seq_file *seq, void *cur) 1992static void *listening_get_next(struct seq_file *seq, void *cur)
1982{ 1993{
1983 struct inet_connection_sock *icsk; 1994 struct inet_connection_sock *icsk;
@@ -1988,14 +1999,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
1988 struct net *net = seq_file_net(seq); 1999 struct net *net = seq_file_net(seq);
1989 2000
1990 if (!sk) { 2001 if (!sk) {
1991 st->bucket = 0; 2002 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1992 ilb = &tcp_hashinfo.listening_hash[0];
1993 spin_lock_bh(&ilb->lock); 2003 spin_lock_bh(&ilb->lock);
1994 sk = sk_nulls_head(&ilb->head); 2004 sk = sk_nulls_head(&ilb->head);
2005 st->offset = 0;
1995 goto get_sk; 2006 goto get_sk;
1996 } 2007 }
1997 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2008 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1998 ++st->num; 2009 ++st->num;
2010 ++st->offset;
1999 2011
2000 if (st->state == TCP_SEQ_STATE_OPENREQ) { 2012 if (st->state == TCP_SEQ_STATE_OPENREQ) {
2001 struct request_sock *req = cur; 2013 struct request_sock *req = cur;
@@ -2010,6 +2022,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
2010 } 2022 }
2011 req = req->dl_next; 2023 req = req->dl_next;
2012 } 2024 }
2025 st->offset = 0;
2013 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 2026 if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
2014 break; 2027 break;
2015get_req: 2028get_req:
@@ -2045,6 +2058,7 @@ start_req:
2045 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2058 read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
2046 } 2059 }
2047 spin_unlock_bh(&ilb->lock); 2060 spin_unlock_bh(&ilb->lock);
2061 st->offset = 0;
2048 if (++st->bucket < INET_LHTABLE_SIZE) { 2062 if (++st->bucket < INET_LHTABLE_SIZE) {
2049 ilb = &tcp_hashinfo.listening_hash[st->bucket]; 2063 ilb = &tcp_hashinfo.listening_hash[st->bucket];
2050 spin_lock_bh(&ilb->lock); 2064 spin_lock_bh(&ilb->lock);
@@ -2058,7 +2072,12 @@ out:
2058 2072
2059static void *listening_get_idx(struct seq_file *seq, loff_t *pos) 2073static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
2060{ 2074{
2061 void *rc = listening_get_next(seq, NULL); 2075 struct tcp_iter_state *st = seq->private;
2076 void *rc;
2077
2078 st->bucket = 0;
2079 st->offset = 0;
2080 rc = listening_get_next(seq, NULL);
2062 2081
2063 while (rc && *pos) { 2082 while (rc && *pos) {
2064 rc = listening_get_next(seq, rc); 2083 rc = listening_get_next(seq, rc);
@@ -2073,13 +2092,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
2073 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2092 hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
2074} 2093}
2075 2094
2095/*
2096 * Get first established socket starting from bucket given in st->bucket.
2097 * If st->bucket is zero, the very first socket in the hash is returned.
2098 */
2076static void *established_get_first(struct seq_file *seq) 2099static void *established_get_first(struct seq_file *seq)
2077{ 2100{
2078 struct tcp_iter_state *st = seq->private; 2101 struct tcp_iter_state *st = seq->private;
2079 struct net *net = seq_file_net(seq); 2102 struct net *net = seq_file_net(seq);
2080 void *rc = NULL; 2103 void *rc = NULL;
2081 2104
2082 for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2105 st->offset = 0;
2106 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
2083 struct sock *sk; 2107 struct sock *sk;
2084 struct hlist_nulls_node *node; 2108 struct hlist_nulls_node *node;
2085 struct inet_timewait_sock *tw; 2109 struct inet_timewait_sock *tw;
@@ -2124,6 +2148,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
2124 struct net *net = seq_file_net(seq); 2148 struct net *net = seq_file_net(seq);
2125 2149
2126 ++st->num; 2150 ++st->num;
2151 ++st->offset;
2127 2152
2128 if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2153 if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
2129 tw = cur; 2154 tw = cur;
@@ -2140,6 +2165,7 @@ get_tw:
2140 st->state = TCP_SEQ_STATE_ESTABLISHED; 2165 st->state = TCP_SEQ_STATE_ESTABLISHED;
2141 2166
2142 /* Look for next non empty bucket */ 2167 /* Look for next non empty bucket */
2168 st->offset = 0;
2143 while (++st->bucket <= tcp_hashinfo.ehash_mask && 2169 while (++st->bucket <= tcp_hashinfo.ehash_mask &&
2144 empty_bucket(st)) 2170 empty_bucket(st))
2145 ; 2171 ;
@@ -2167,7 +2193,11 @@ out:
2167 2193
2168static void *established_get_idx(struct seq_file *seq, loff_t pos) 2194static void *established_get_idx(struct seq_file *seq, loff_t pos)
2169{ 2195{
2170 void *rc = established_get_first(seq); 2196 struct tcp_iter_state *st = seq->private;
2197 void *rc;
2198
2199 st->bucket = 0;
2200 rc = established_get_first(seq);
2171 2201
2172 while (rc && pos) { 2202 while (rc && pos) {
2173 rc = established_get_next(seq, rc); 2203 rc = established_get_next(seq, rc);
@@ -2192,24 +2222,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2192 return rc; 2222 return rc;
2193} 2223}
2194 2224
2225static void *tcp_seek_last_pos(struct seq_file *seq)
2226{
2227 struct tcp_iter_state *st = seq->private;
2228 int offset = st->offset;
2229 int orig_num = st->num;
2230 void *rc = NULL;
2231
2232 switch (st->state) {
2233 case TCP_SEQ_STATE_OPENREQ:
2234 case TCP_SEQ_STATE_LISTENING:
2235 if (st->bucket >= INET_LHTABLE_SIZE)
2236 break;
2237 st->state = TCP_SEQ_STATE_LISTENING;
2238 rc = listening_get_next(seq, NULL);
2239 while (offset-- && rc)
2240 rc = listening_get_next(seq, rc);
2241 if (rc)
2242 break;
2243 st->bucket = 0;
2244 /* Fallthrough */
2245 case TCP_SEQ_STATE_ESTABLISHED:
2246 case TCP_SEQ_STATE_TIME_WAIT:
2247 st->state = TCP_SEQ_STATE_ESTABLISHED;
2248 if (st->bucket > tcp_hashinfo.ehash_mask)
2249 break;
2250 rc = established_get_first(seq);
2251 while (offset-- && rc)
2252 rc = established_get_next(seq, rc);
2253 }
2254
2255 st->num = orig_num;
2256
2257 return rc;
2258}
2259
2195static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) 2260static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2196{ 2261{
2197 struct tcp_iter_state *st = seq->private; 2262 struct tcp_iter_state *st = seq->private;
2263 void *rc;
2264
2265 if (*pos && *pos == st->last_pos) {
2266 rc = tcp_seek_last_pos(seq);
2267 if (rc)
2268 goto out;
2269 }
2270
2198 st->state = TCP_SEQ_STATE_LISTENING; 2271 st->state = TCP_SEQ_STATE_LISTENING;
2199 st->num = 0; 2272 st->num = 0;
2200 return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2273 st->bucket = 0;
2274 st->offset = 0;
2275 rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2276
2277out:
2278 st->last_pos = *pos;
2279 return rc;
2201} 2280}
2202 2281
2203static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2282static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2204{ 2283{
2284 struct tcp_iter_state *st = seq->private;
2205 void *rc = NULL; 2285 void *rc = NULL;
2206 struct tcp_iter_state *st;
2207 2286
2208 if (v == SEQ_START_TOKEN) { 2287 if (v == SEQ_START_TOKEN) {
2209 rc = tcp_get_idx(seq, 0); 2288 rc = tcp_get_idx(seq, 0);
2210 goto out; 2289 goto out;
2211 } 2290 }
2212 st = seq->private;
2213 2291
2214 switch (st->state) { 2292 switch (st->state) {
2215 case TCP_SEQ_STATE_OPENREQ: 2293 case TCP_SEQ_STATE_OPENREQ:
@@ -2217,6 +2295,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2217 rc = listening_get_next(seq, v); 2295 rc = listening_get_next(seq, v);
2218 if (!rc) { 2296 if (!rc) {
2219 st->state = TCP_SEQ_STATE_ESTABLISHED; 2297 st->state = TCP_SEQ_STATE_ESTABLISHED;
2298 st->bucket = 0;
2299 st->offset = 0;
2220 rc = established_get_first(seq); 2300 rc = established_get_first(seq);
2221 } 2301 }
2222 break; 2302 break;
@@ -2227,6 +2307,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2227 } 2307 }
2228out: 2308out:
2229 ++*pos; 2309 ++*pos;
2310 st->last_pos = *pos;
2230 return rc; 2311 return rc;
2231} 2312}
2232 2313
@@ -2265,6 +2346,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
2265 2346
2266 s = ((struct seq_file *)file->private_data)->private; 2347 s = ((struct seq_file *)file->private_data)->private;
2267 s->family = afinfo->family; 2348 s->family = afinfo->family;
2349 s->last_pos = 0;
2268 return 0; 2350 return 0;
2269} 2351}
2270 2352
@@ -2288,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2288 rc = -ENOMEM; 2370 rc = -ENOMEM;
2289 return rc; 2371 return rc;
2290} 2372}
2373EXPORT_SYMBOL(tcp_proc_register);
2291 2374
2292void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) 2375void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2293{ 2376{
2294 proc_net_remove(net, afinfo->name); 2377 proc_net_remove(net, afinfo->name);
2295} 2378}
2379EXPORT_SYMBOL(tcp_proc_unregister);
2296 2380
2297static void get_openreq4(struct sock *sk, struct request_sock *req, 2381static void get_openreq4(struct sock *sk, struct request_sock *req,
2298 struct seq_file *f, int i, int uid, int *len) 2382 struct seq_file *f, int i, int uid, int *len)
@@ -2516,6 +2600,8 @@ struct proto tcp_prot = {
2516 .setsockopt = tcp_setsockopt, 2600 .setsockopt = tcp_setsockopt,
2517 .getsockopt = tcp_getsockopt, 2601 .getsockopt = tcp_getsockopt,
2518 .recvmsg = tcp_recvmsg, 2602 .recvmsg = tcp_recvmsg,
2603 .sendmsg = tcp_sendmsg,
2604 .sendpage = tcp_sendpage,
2519 .backlog_rcv = tcp_v4_do_rcv, 2605 .backlog_rcv = tcp_v4_do_rcv,
2520 .hash = inet_hash, 2606 .hash = inet_hash,
2521 .unhash = inet_unhash, 2607 .unhash = inet_unhash,
@@ -2534,11 +2620,13 @@ struct proto tcp_prot = {
2534 .twsk_prot = &tcp_timewait_sock_ops, 2620 .twsk_prot = &tcp_timewait_sock_ops,
2535 .rsk_prot = &tcp_request_sock_ops, 2621 .rsk_prot = &tcp_request_sock_ops,
2536 .h.hashinfo = &tcp_hashinfo, 2622 .h.hashinfo = &tcp_hashinfo,
2623 .no_autobind = true,
2537#ifdef CONFIG_COMPAT 2624#ifdef CONFIG_COMPAT
2538 .compat_setsockopt = compat_tcp_setsockopt, 2625 .compat_setsockopt = compat_tcp_setsockopt,
2539 .compat_getsockopt = compat_tcp_getsockopt, 2626 .compat_getsockopt = compat_tcp_getsockopt,
2540#endif 2627#endif
2541}; 2628};
2629EXPORT_SYMBOL(tcp_prot);
2542 2630
2543 2631
2544static int __net_init tcp_sk_init(struct net *net) 2632static int __net_init tcp_sk_init(struct net *net)
@@ -2569,20 +2657,3 @@ void __init tcp_v4_init(void)
2569 if (register_pernet_subsys(&tcp_sk_ops)) 2657 if (register_pernet_subsys(&tcp_sk_ops))
2570 panic("Failed to create the TCP control socket.\n"); 2658 panic("Failed to create the TCP control socket.\n");
2571} 2659}
2572
2573EXPORT_SYMBOL(ipv4_specific);
2574EXPORT_SYMBOL(tcp_hashinfo);
2575EXPORT_SYMBOL(tcp_prot);
2576EXPORT_SYMBOL(tcp_v4_conn_request);
2577EXPORT_SYMBOL(tcp_v4_connect);
2578EXPORT_SYMBOL(tcp_v4_do_rcv);
2579EXPORT_SYMBOL(tcp_v4_remember_stamp);
2580EXPORT_SYMBOL(tcp_v4_send_check);
2581EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
2582
2583#ifdef CONFIG_PROC_FS
2584EXPORT_SYMBOL(tcp_proc_register);
2585EXPORT_SYMBOL(tcp_proc_unregister);
2586#endif
2587EXPORT_SYMBOL(sysctl_tcp_low_latency);
2588
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 5fabff9ac6d6..f25b56cb85cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = {
47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, 47 .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
48 (unsigned long)&tcp_death_row), 48 (unsigned long)&tcp_death_row),
49}; 49};
50
51EXPORT_SYMBOL_GPL(tcp_death_row); 50EXPORT_SYMBOL_GPL(tcp_death_row);
52 51
53static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 52static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
@@ -262,6 +261,7 @@ kill:
262 inet_twsk_put(tw); 261 inet_twsk_put(tw);
263 return TCP_TW_SUCCESS; 262 return TCP_TW_SUCCESS;
264} 263}
264EXPORT_SYMBOL(tcp_timewait_state_process);
265 265
266/* 266/*
267 * Move a socket to time-wait or dead fin-wait-2 state. 267 * Move a socket to time-wait or dead fin-wait-2 state.
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk)
362 tcp_free_md5sig_pool(); 362 tcp_free_md5sig_pool();
363#endif 363#endif
364} 364}
365
366EXPORT_SYMBOL_GPL(tcp_twsk_destructor); 365EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
367 366
368static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, 367static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
510 } 509 }
511 return newsk; 510 return newsk;
512} 511}
512EXPORT_SYMBOL(tcp_create_openreq_child);
513 513
514/* 514/*
515 * Process an incoming packet for SYN_RECV sockets represented 515 * Process an incoming packet for SYN_RECV sockets represented
@@ -672,6 +672,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
672 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 672 if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
673 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { 673 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
674 inet_rsk(req)->acked = 1; 674 inet_rsk(req)->acked = 1;
675 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
675 return NULL; 676 return NULL;
676 } 677 }
677 678
@@ -705,6 +706,7 @@ embryonic_reset:
705 inet_csk_reqsk_queue_drop(sk, req, prev); 706 inet_csk_reqsk_queue_drop(sk, req, prev);
706 return NULL; 707 return NULL;
707} 708}
709EXPORT_SYMBOL(tcp_check_req);
708 710
709/* 711/*
710 * Queue segment on the new socket if the new socket is active, 712 * Queue segment on the new socket if the new socket is active,
@@ -736,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child,
736 sock_put(child); 738 sock_put(child);
737 return ret; 739 return ret;
738} 740}
739
740EXPORT_SYMBOL(tcp_check_req);
741EXPORT_SYMBOL(tcp_child_process); 741EXPORT_SYMBOL(tcp_child_process);
742EXPORT_SYMBOL(tcp_create_openreq_child);
743EXPORT_SYMBOL(tcp_timewait_state_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e72ad8..de3bd8458588 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
247 /* Set the clamp no higher than max representable value */ 247 /* Set the clamp no higher than max representable value */
248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); 248 (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
249} 249}
250EXPORT_SYMBOL(tcp_select_initial_window);
250 251
251/* Chose a new window to advertise, update state in tcp_sock for the 252/* Chose a new window to advertise, update state in tcp_sock for the
252 * socket, and return result with RFC1323 scaling applied. The return 253 * socket, and return result with RFC1323 scaling applied. The return
@@ -294,9 +295,9 @@ static u16 tcp_select_window(struct sock *sk)
294/* Packet ECN state for a SYN-ACK */ 295/* Packet ECN state for a SYN-ACK */
295static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) 296static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
296{ 297{
297 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 298 TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
298 if (!(tp->ecn_flags & TCP_ECN_OK)) 299 if (!(tp->ecn_flags & TCP_ECN_OK))
299 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 300 TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
300} 301}
301 302
302/* Packet ECN state for a SYN. */ 303/* Packet ECN state for a SYN. */
@@ -306,7 +307,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
306 307
307 tp->ecn_flags = 0; 308 tp->ecn_flags = 0;
308 if (sysctl_tcp_ecn == 1) { 309 if (sysctl_tcp_ecn == 1) {
309 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR; 310 TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
310 tp->ecn_flags = TCP_ECN_OK; 311 tp->ecn_flags = TCP_ECN_OK;
311 } 312 }
312} 313}
@@ -350,6 +351,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
350 */ 351 */
351static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) 352static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
352{ 353{
354 skb->ip_summed = CHECKSUM_PARTIAL;
353 skb->csum = 0; 355 skb->csum = 0;
354 356
355 TCP_SKB_CB(skb)->flags = flags; 357 TCP_SKB_CB(skb)->flags = flags;
@@ -360,7 +362,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
360 skb_shinfo(skb)->gso_type = 0; 362 skb_shinfo(skb)->gso_type = 0;
361 363
362 TCP_SKB_CB(skb)->seq = seq; 364 TCP_SKB_CB(skb)->seq = seq;
363 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN)) 365 if (flags & (TCPHDR_SYN | TCPHDR_FIN))
364 seq++; 366 seq++;
365 TCP_SKB_CB(skb)->end_seq = seq; 367 TCP_SKB_CB(skb)->end_seq = seq;
366} 368}
@@ -667,7 +669,6 @@ static unsigned tcp_synack_options(struct sock *sk,
667 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? 669 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
668 xvp->cookie_plus : 670 xvp->cookie_plus :
669 0; 671 0;
670 bool doing_ts = ireq->tstamp_ok;
671 672
672#ifdef CONFIG_TCP_MD5SIG 673#ifdef CONFIG_TCP_MD5SIG
673 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); 674 *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -680,7 +681,7 @@ static unsigned tcp_synack_options(struct sock *sk,
680 * rather than TS in order to fit in better with old, 681 * rather than TS in order to fit in better with old,
681 * buggy kernels, but that was deemed to be unnecessary. 682 * buggy kernels, but that was deemed to be unnecessary.
682 */ 683 */
683 doing_ts &= !ireq->sack_ok; 684 ireq->tstamp_ok &= !ireq->sack_ok;
684 } 685 }
685#else 686#else
686 *md5 = NULL; 687 *md5 = NULL;
@@ -695,7 +696,7 @@ static unsigned tcp_synack_options(struct sock *sk,
695 opts->options |= OPTION_WSCALE; 696 opts->options |= OPTION_WSCALE;
696 remaining -= TCPOLEN_WSCALE_ALIGNED; 697 remaining -= TCPOLEN_WSCALE_ALIGNED;
697 } 698 }
698 if (likely(doing_ts)) { 699 if (likely(ireq->tstamp_ok)) {
699 opts->options |= OPTION_TS; 700 opts->options |= OPTION_TS;
700 opts->tsval = TCP_SKB_CB(skb)->when; 701 opts->tsval = TCP_SKB_CB(skb)->when;
701 opts->tsecr = req->ts_recent; 702 opts->tsecr = req->ts_recent;
@@ -703,7 +704,7 @@ static unsigned tcp_synack_options(struct sock *sk,
703 } 704 }
704 if (likely(ireq->sack_ok)) { 705 if (likely(ireq->sack_ok)) {
705 opts->options |= OPTION_SACK_ADVERTISE; 706 opts->options |= OPTION_SACK_ADVERTISE;
706 if (unlikely(!doing_ts)) 707 if (unlikely(!ireq->tstamp_ok))
707 remaining -= TCPOLEN_SACKPERM_ALIGNED; 708 remaining -= TCPOLEN_SACKPERM_ALIGNED;
708 } 709 }
709 710
@@ -711,7 +712,7 @@ static unsigned tcp_synack_options(struct sock *sk,
711 * If the <SYN> options fit, the same options should fit now! 712 * If the <SYN> options fit, the same options should fit now!
712 */ 713 */
713 if (*md5 == NULL && 714 if (*md5 == NULL &&
714 doing_ts && 715 ireq->tstamp_ok &&
715 cookie_plus > TCPOLEN_COOKIE_BASE) { 716 cookie_plus > TCPOLEN_COOKIE_BASE) {
716 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ 717 int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
717 718
@@ -820,7 +821,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
820 tcb = TCP_SKB_CB(skb); 821 tcb = TCP_SKB_CB(skb);
821 memset(&opts, 0, sizeof(opts)); 822 memset(&opts, 0, sizeof(opts));
822 823
823 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) 824 if (unlikely(tcb->flags & TCPHDR_SYN))
824 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 825 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
825 else 826 else
826 tcp_options_size = tcp_established_options(sk, skb, &opts, 827 tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +844,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
843 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 844 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
844 tcb->flags); 845 tcb->flags);
845 846
846 if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { 847 if (unlikely(tcb->flags & TCPHDR_SYN)) {
847 /* RFC1323: The window in SYN & SYN/ACK segments 848 /* RFC1323: The window in SYN & SYN/ACK segments
848 * is never scaled. 849 * is never scaled.
849 */ 850 */
@@ -860,36 +861,37 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
860 th->urg_ptr = htons(tp->snd_up - tcb->seq); 861 th->urg_ptr = htons(tp->snd_up - tcb->seq);
861 th->urg = 1; 862 th->urg = 1;
862 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { 863 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
863 th->urg_ptr = 0xFFFF; 864 th->urg_ptr = htons(0xFFFF);
864 th->urg = 1; 865 th->urg = 1;
865 } 866 }
866 } 867 }
867 868
868 tcp_options_write((__be32 *)(th + 1), tp, &opts); 869 tcp_options_write((__be32 *)(th + 1), tp, &opts);
869 if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0)) 870 if (likely((tcb->flags & TCPHDR_SYN) == 0))
870 TCP_ECN_send(sk, skb, tcp_header_size); 871 TCP_ECN_send(sk, skb, tcp_header_size);
871 872
872#ifdef CONFIG_TCP_MD5SIG 873#ifdef CONFIG_TCP_MD5SIG
873 /* Calculate the MD5 hash, as we have all we need now */ 874 /* Calculate the MD5 hash, as we have all we need now */
874 if (md5) { 875 if (md5) {
875 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 876 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
876 tp->af_specific->calc_md5_hash(opts.hash_location, 877 tp->af_specific->calc_md5_hash(opts.hash_location,
877 md5, sk, NULL, skb); 878 md5, sk, NULL, skb);
878 } 879 }
879#endif 880#endif
880 881
881 icsk->icsk_af_ops->send_check(sk, skb->len, skb); 882 icsk->icsk_af_ops->send_check(sk, skb);
882 883
883 if (likely(tcb->flags & TCPCB_FLAG_ACK)) 884 if (likely(tcb->flags & TCPHDR_ACK))
884 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 885 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
885 886
886 if (skb->len != tcp_header_size) 887 if (skb->len != tcp_header_size)
887 tcp_event_data_sent(tp, skb, sk); 888 tcp_event_data_sent(tp, skb, sk);
888 889
889 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 890 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
890 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); 891 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
892 tcp_skb_pcount(skb));
891 893
892 err = icsk->icsk_af_ops->queue_xmit(skb, 0); 894 err = icsk->icsk_af_ops->queue_xmit(skb);
893 if (likely(err <= 0)) 895 if (likely(err <= 0))
894 return err; 896 return err;
895 897
@@ -1022,7 +1024,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1022 1024
1023 /* PSH and FIN should only be set in the second packet. */ 1025 /* PSH and FIN should only be set in the second packet. */
1024 flags = TCP_SKB_CB(skb)->flags; 1026 flags = TCP_SKB_CB(skb)->flags;
1025 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1027 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1026 TCP_SKB_CB(buff)->flags = flags; 1028 TCP_SKB_CB(buff)->flags = flags;
1027 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1029 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1028 1030
@@ -1188,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk)
1188 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss); 1190 icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
1189 icsk->icsk_mtup.probe_size = 0; 1191 icsk->icsk_mtup.probe_size = 0;
1190} 1192}
1193EXPORT_SYMBOL(tcp_mtup_init);
1191 1194
1192/* This function synchronize snd mss to current pmtu/exthdr set. 1195/* This function synchronize snd mss to current pmtu/exthdr set.
1193 1196
@@ -1231,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1231 1234
1232 return mss_now; 1235 return mss_now;
1233} 1236}
1237EXPORT_SYMBOL(tcp_sync_mss);
1234 1238
1235/* Compute the current effective MSS, taking SACKs and IP options, 1239/* Compute the current effective MSS, taking SACKs and IP options,
1236 * and even PMTU discovery events into account. 1240 * and even PMTU discovery events into account.
@@ -1327,8 +1331,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1327 u32 in_flight, cwnd; 1331 u32 in_flight, cwnd;
1328 1332
1329 /* Don't be strict about the congestion window for the final FIN. */ 1333 /* Don't be strict about the congestion window for the final FIN. */
1330 if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1334 if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
1331 tcp_skb_pcount(skb) == 1)
1332 return 1; 1335 return 1;
1333 1336
1334 in_flight = tcp_packets_in_flight(tp); 1337 in_flight = tcp_packets_in_flight(tp);
@@ -1397,7 +1400,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1397 * Nagle can be ignored during F-RTO too (see RFC4138). 1400 * Nagle can be ignored during F-RTO too (see RFC4138).
1398 */ 1401 */
1399 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1402 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1400 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) 1403 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
1401 return 1; 1404 return 1;
1402 1405
1403 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1406 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1460,7 +1463,7 @@ int tcp_may_send_now(struct sock *sk)
1460 * packet has never been sent out before (and thus is not cloned). 1463 * packet has never been sent out before (and thus is not cloned).
1461 */ 1464 */
1462static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, 1465static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1463 unsigned int mss_now) 1466 unsigned int mss_now, gfp_t gfp)
1464{ 1467{
1465 struct sk_buff *buff; 1468 struct sk_buff *buff;
1466 int nlen = skb->len - len; 1469 int nlen = skb->len - len;
@@ -1470,7 +1473,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1470 if (skb->len != skb->data_len) 1473 if (skb->len != skb->data_len)
1471 return tcp_fragment(sk, skb, len, mss_now); 1474 return tcp_fragment(sk, skb, len, mss_now);
1472 1475
1473 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC); 1476 buff = sk_stream_alloc_skb(sk, 0, gfp);
1474 if (unlikely(buff == NULL)) 1477 if (unlikely(buff == NULL))
1475 return -ENOMEM; 1478 return -ENOMEM;
1476 1479
@@ -1486,7 +1489,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1486 1489
1487 /* PSH and FIN should only be set in the second packet. */ 1490 /* PSH and FIN should only be set in the second packet. */
1488 flags = TCP_SKB_CB(skb)->flags; 1491 flags = TCP_SKB_CB(skb)->flags;
1489 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH); 1492 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1490 TCP_SKB_CB(buff)->flags = flags; 1493 TCP_SKB_CB(buff)->flags = flags;
1491 1494
1492 /* This packet was never sent out yet, so no SACK bits. */ 1495 /* This packet was never sent out yet, so no SACK bits. */
@@ -1517,7 +1520,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1517 const struct inet_connection_sock *icsk = inet_csk(sk); 1520 const struct inet_connection_sock *icsk = inet_csk(sk);
1518 u32 send_win, cong_win, limit, in_flight; 1521 u32 send_win, cong_win, limit, in_flight;
1519 1522
1520 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) 1523 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1521 goto send_now; 1524 goto send_now;
1522 1525
1523 if (icsk->icsk_ca_state != TCP_CA_Open) 1526 if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1643,7 +1646,7 @@ static int tcp_mtu_probe(struct sock *sk)
1643 1646
1644 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1647 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1645 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1648 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1646 TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; 1649 TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
1647 TCP_SKB_CB(nskb)->sacked = 0; 1650 TCP_SKB_CB(nskb)->sacked = 0;
1648 nskb->csum = 0; 1651 nskb->csum = 0;
1649 nskb->ip_summed = skb->ip_summed; 1652 nskb->ip_summed = skb->ip_summed;
@@ -1668,7 +1671,7 @@ static int tcp_mtu_probe(struct sock *sk)
1668 sk_wmem_free_skb(sk, skb); 1671 sk_wmem_free_skb(sk, skb);
1669 } else { 1672 } else {
1670 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1673 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1671 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1674 ~(TCPHDR_FIN|TCPHDR_PSH);
1672 if (!skb_shinfo(skb)->nr_frags) { 1675 if (!skb_shinfo(skb)->nr_frags) {
1673 skb_pull(skb, copy); 1676 skb_pull(skb, copy);
1674 if (skb->ip_summed != CHECKSUM_PARTIAL) 1677 if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -1768,7 +1771,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1768 cwnd_quota); 1771 cwnd_quota);
1769 1772
1770 if (skb->len > limit && 1773 if (skb->len > limit &&
1771 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1774 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
1772 break; 1775 break;
1773 1776
1774 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1777 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2019,7 +2022,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2019 2022
2020 if (!sysctl_tcp_retrans_collapse) 2023 if (!sysctl_tcp_retrans_collapse)
2021 return; 2024 return;
2022 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) 2025 if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
2023 return; 2026 return;
2024 2027
2025 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2028 tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2111,7 +2114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2111 * since it is cheap to do so and saves bytes on the network. 2114 * since it is cheap to do so and saves bytes on the network.
2112 */ 2115 */
2113 if (skb->len > 0 && 2116 if (skb->len > 0 &&
2114 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 2117 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
2115 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 2118 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2116 if (!pskb_trim(skb, 0)) { 2119 if (!pskb_trim(skb, 0)) {
2117 /* Reuse, even though it does some unnecessary work */ 2120 /* Reuse, even though it does some unnecessary work */
@@ -2207,6 +2210,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2207 int mib_idx; 2210 int mib_idx;
2208 int fwd_rexmitting = 0; 2211 int fwd_rexmitting = 0;
2209 2212
2213 if (!tp->packets_out)
2214 return;
2215
2210 if (!tp->lost_out) 2216 if (!tp->lost_out)
2211 tp->retransmit_high = tp->snd_una; 2217 tp->retransmit_high = tp->snd_una;
2212 2218
@@ -2300,7 +2306,7 @@ void tcp_send_fin(struct sock *sk)
2300 mss_now = tcp_current_mss(sk); 2306 mss_now = tcp_current_mss(sk);
2301 2307
2302 if (tcp_send_head(sk) != NULL) { 2308 if (tcp_send_head(sk) != NULL) {
2303 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2309 TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
2304 TCP_SKB_CB(skb)->end_seq++; 2310 TCP_SKB_CB(skb)->end_seq++;
2305 tp->write_seq++; 2311 tp->write_seq++;
2306 } else { 2312 } else {
@@ -2317,7 +2323,7 @@ void tcp_send_fin(struct sock *sk)
2317 skb_reserve(skb, MAX_TCP_HEADER); 2323 skb_reserve(skb, MAX_TCP_HEADER);
2318 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2324 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2319 tcp_init_nondata_skb(skb, tp->write_seq, 2325 tcp_init_nondata_skb(skb, tp->write_seq,
2320 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); 2326 TCPHDR_ACK | TCPHDR_FIN);
2321 tcp_queue_skb(sk, skb); 2327 tcp_queue_skb(sk, skb);
2322 } 2328 }
2323 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2329 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2342,7 +2348,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2342 /* Reserve space for headers and prepare control bits. */ 2348 /* Reserve space for headers and prepare control bits. */
2343 skb_reserve(skb, MAX_TCP_HEADER); 2349 skb_reserve(skb, MAX_TCP_HEADER);
2344 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk), 2350 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2345 TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2351 TCPHDR_ACK | TCPHDR_RST);
2346 /* Send it off. */ 2352 /* Send it off. */
2347 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2353 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2348 if (tcp_transmit_skb(sk, skb, 0, priority)) 2354 if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2362,11 +2368,11 @@ int tcp_send_synack(struct sock *sk)
2362 struct sk_buff *skb; 2368 struct sk_buff *skb;
2363 2369
2364 skb = tcp_write_queue_head(sk); 2370 skb = tcp_write_queue_head(sk);
2365 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) { 2371 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
2366 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2372 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2367 return -EFAULT; 2373 return -EFAULT;
2368 } 2374 }
2369 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) { 2375 if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
2370 if (skb_cloned(skb)) { 2376 if (skb_cloned(skb)) {
2371 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2377 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2372 if (nskb == NULL) 2378 if (nskb == NULL)
@@ -2380,7 +2386,7 @@ int tcp_send_synack(struct sock *sk)
2380 skb = nskb; 2386 skb = nskb;
2381 } 2387 }
2382 2388
2383 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK; 2389 TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
2384 TCP_ECN_send_synack(tcp_sk(sk), skb); 2390 TCP_ECN_send_synack(tcp_sk(sk), skb);
2385 } 2391 }
2386 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2392 TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2459,7 +2465,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2459 * not even correctly set) 2465 * not even correctly set)
2460 */ 2466 */
2461 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, 2467 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2462 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK); 2468 TCPHDR_SYN | TCPHDR_ACK);
2463 2469
2464 if (OPTION_COOKIE_EXTENSION & opts.options) { 2470 if (OPTION_COOKIE_EXTENSION & opts.options) {
2465 if (s_data_desired) { 2471 if (s_data_desired) {
@@ -2484,7 +2490,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2484 *tail-- ^= TCP_SKB_CB(skb)->seq + 1; 2490 *tail-- ^= TCP_SKB_CB(skb)->seq + 1;
2485 2491
2486 /* recommended */ 2492 /* recommended */
2487 *tail-- ^= ((th->dest << 16) | th->source); 2493 *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
2488 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ 2494 *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
2489 2495
2490 sha_transform((__u32 *)&xvp->cookie_bakery[0], 2496 sha_transform((__u32 *)&xvp->cookie_bakery[0],
@@ -2502,7 +2508,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2502 th->window = htons(min(req->rcv_wnd, 65535U)); 2508 th->window = htons(min(req->rcv_wnd, 65535U));
2503 tcp_options_write((__be32 *)(th + 1), tp, &opts); 2509 tcp_options_write((__be32 *)(th + 1), tp, &opts);
2504 th->doff = (tcp_header_size >> 2); 2510 th->doff = (tcp_header_size >> 2);
2505 TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); 2511 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
2506 2512
2507#ifdef CONFIG_TCP_MD5SIG 2513#ifdef CONFIG_TCP_MD5SIG
2508 /* Okay, we have all we need - do the md5 hash if needed */ 2514 /* Okay, we have all we need - do the md5 hash if needed */
@@ -2514,6 +2520,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2514 2520
2515 return skb; 2521 return skb;
2516} 2522}
2523EXPORT_SYMBOL(tcp_make_synack);
2517 2524
2518/* Do all connect socket setups that can be done AF independent. */ 2525/* Do all connect socket setups that can be done AF independent. */
2519static void tcp_connect_init(struct sock *sk) 2526static void tcp_connect_init(struct sock *sk)
@@ -2591,7 +2598,7 @@ int tcp_connect(struct sock *sk)
2591 skb_reserve(buff, MAX_TCP_HEADER); 2598 skb_reserve(buff, MAX_TCP_HEADER);
2592 2599
2593 tp->snd_nxt = tp->write_seq; 2600 tp->snd_nxt = tp->write_seq;
2594 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN); 2601 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
2595 TCP_ECN_send_syn(sk, buff); 2602 TCP_ECN_send_syn(sk, buff);
2596 2603
2597 /* Send it off. */ 2604 /* Send it off. */
@@ -2616,6 +2623,7 @@ int tcp_connect(struct sock *sk)
2616 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 2623 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
2617 return 0; 2624 return 0;
2618} 2625}
2626EXPORT_SYMBOL(tcp_connect);
2619 2627
2620/* Send out a delayed ack, the caller does the policy checking 2628/* Send out a delayed ack, the caller does the policy checking
2621 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check() 2629 * to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
@@ -2697,7 +2705,7 @@ void tcp_send_ack(struct sock *sk)
2697 2705
2698 /* Reserve space for headers and prepare control bits. */ 2706 /* Reserve space for headers and prepare control bits. */
2699 skb_reserve(buff, MAX_TCP_HEADER); 2707 skb_reserve(buff, MAX_TCP_HEADER);
2700 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK); 2708 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
2701 2709
2702 /* Send it off, this clears delayed acks for us. */ 2710 /* Send it off, this clears delayed acks for us. */
2703 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2711 TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2731,7 +2739,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2731 * end to send an ack. Don't queue or clone SKB, just 2739 * end to send an ack. Don't queue or clone SKB, just
2732 * send it. 2740 * send it.
2733 */ 2741 */
2734 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK); 2742 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
2735 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2743 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2736 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2744 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2737} 2745}
@@ -2761,13 +2769,13 @@ int tcp_write_wakeup(struct sock *sk)
2761 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || 2769 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2762 skb->len > mss) { 2770 skb->len > mss) {
2763 seg_size = min(seg_size, mss); 2771 seg_size = min(seg_size, mss);
2764 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2772 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2765 if (tcp_fragment(sk, skb, seg_size, mss)) 2773 if (tcp_fragment(sk, skb, seg_size, mss))
2766 return -1; 2774 return -1;
2767 } else if (!tcp_skb_pcount(skb)) 2775 } else if (!tcp_skb_pcount(skb))
2768 tcp_set_skb_tso_segs(sk, skb, mss); 2776 tcp_set_skb_tso_segs(sk, skb, mss);
2769 2777
2770 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2778 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
2771 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2779 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2772 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2780 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2773 if (!err) 2781 if (!err)
@@ -2820,10 +2828,3 @@ void tcp_send_probe0(struct sock *sk)
2820 TCP_RTO_MAX); 2828 TCP_RTO_MAX);
2821 } 2829 }
2822} 2830}
2823
2824EXPORT_SYMBOL(tcp_select_initial_window);
2825EXPORT_SYMBOL(tcp_connect);
2826EXPORT_SYMBOL(tcp_make_synack);
2827EXPORT_SYMBOL(tcp_simple_retransmit);
2828EXPORT_SYMBOL(tcp_sync_mss);
2829EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab2977f1f..808bb920c9f5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk)
41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, 41 inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
42 &tcp_keepalive_timer); 42 &tcp_keepalive_timer);
43} 43}
44
45EXPORT_SYMBOL(tcp_init_xmit_timers); 44EXPORT_SYMBOL(tcp_init_xmit_timers);
46 45
47static void tcp_write_err(struct sock *sk) 46static void tcp_write_err(struct sock *sk)
@@ -172,14 +171,14 @@ static int tcp_write_timeout(struct sock *sk)
172 171
173 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 172 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
174 if (icsk->icsk_retransmits) 173 if (icsk->icsk_retransmits)
175 dst_negative_advice(&sk->sk_dst_cache, sk); 174 dst_negative_advice(sk);
176 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 175 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
177 } else { 176 } else {
178 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { 177 if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
179 /* Black hole detection */ 178 /* Black hole detection */
180 tcp_mtu_probing(icsk, sk); 179 tcp_mtu_probing(icsk, sk);
181 180
182 dst_negative_advice(&sk->sk_dst_cache, sk); 181 dst_negative_advice(sk);
183 } 182 }
184 183
185 retry_until = sysctl_tcp_retries2; 184 retry_until = sysctl_tcp_retries2;
@@ -517,7 +516,7 @@ static void tcp_keepalive_timer (unsigned long data)
517 struct sock *sk = (struct sock *) data; 516 struct sock *sk = (struct sock *) data;
518 struct inet_connection_sock *icsk = inet_csk(sk); 517 struct inet_connection_sock *icsk = inet_csk(sk);
519 struct tcp_sock *tp = tcp_sk(sk); 518 struct tcp_sock *tp = tcp_sk(sk);
520 __u32 elapsed; 519 u32 elapsed;
521 520
522 /* Only process if socket is not in use. */ 521 /* Only process if socket is not in use. */
523 bh_lock_sock(sk); 522 bh_lock_sock(sk);
@@ -554,7 +553,7 @@ static void tcp_keepalive_timer (unsigned long data)
554 if (tp->packets_out || tcp_send_head(sk)) 553 if (tp->packets_out || tcp_send_head(sk))
555 goto resched; 554 goto resched;
556 555
557 elapsed = tcp_time_stamp - tp->rcv_tstamp; 556 elapsed = keepalive_time_elapsed(tp);
558 557
559 if (elapsed >= keepalive_time_when(tp)) { 558 if (elapsed >= keepalive_time_when(tp)) {
560 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 559 if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3b3813cc80b9..59186ca7808a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -48,7 +48,6 @@ err:
48 48
49 return ret; 49 return ret;
50} 50}
51
52EXPORT_SYMBOL(xfrm4_tunnel_register); 51EXPORT_SYMBOL(xfrm4_tunnel_register);
53 52
54int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family) 53int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
72 71
73 return ret; 72 return ret;
74} 73}
75
76EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
77 75
78static int tunnel4_rcv(struct sk_buff *skb) 76static int tunnel4_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -233,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
233 */ 233 */
234 do { 234 do {
235 if (low <= snum && snum <= high && 235 if (low <= snum && snum <= high &&
236 !test_bit(snum >> udptable->log, bitmap)) 236 !test_bit(snum >> udptable->log, bitmap) &&
237 !inet_is_reserved_local_port(snum))
237 goto found; 238 goto found;
238 snum += rand; 239 snum += rand;
239 } while (snum != first); 240 } while (snum != first);
@@ -307,13 +308,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
307static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr, 308static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
308 unsigned int port) 309 unsigned int port)
309{ 310{
310 return jhash_1word(saddr, net_hash_mix(net)) ^ port; 311 return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
311} 312}
312 313
313int udp_v4_get_port(struct sock *sk, unsigned short snum) 314int udp_v4_get_port(struct sock *sk, unsigned short snum)
314{ 315{
315 unsigned int hash2_nulladdr = 316 unsigned int hash2_nulladdr =
316 udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); 317 udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
317 unsigned int hash2_partial = 318 unsigned int hash2_partial =
318 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 319 udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
319 320
@@ -466,14 +467,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
466 daddr, hnum, dif, 467 daddr, hnum, dif,
467 hslot2, slot2); 468 hslot2, slot2);
468 if (!result) { 469 if (!result) {
469 hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum); 470 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
470 slot2 = hash2 & udptable->mask; 471 slot2 = hash2 & udptable->mask;
471 hslot2 = &udptable->hash2[slot2]; 472 hslot2 = &udptable->hash2[slot2];
472 if (hslot->count < hslot2->count) 473 if (hslot->count < hslot2->count)
473 goto begin; 474 goto begin;
474 475
475 result = udp4_lib_lookup2(net, saddr, sport, 476 result = udp4_lib_lookup2(net, saddr, sport,
476 INADDR_ANY, hnum, dif, 477 htonl(INADDR_ANY), hnum, dif,
477 hslot2, slot2); 478 hslot2, slot2);
478 } 479 }
479 rcu_read_unlock(); 480 rcu_read_unlock();
@@ -632,9 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
632 if (!inet->recverr) { 633 if (!inet->recverr) {
633 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 634 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
634 goto out; 635 goto out;
635 } else { 636 } else
636 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1)); 637 ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
637 } 638
638 sk->sk_err = err; 639 sk->sk_err = err;
639 sk->sk_error_report(sk); 640 sk->sk_error_report(sk);
640out: 641out:
@@ -913,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
913 !sock_flag(sk, SOCK_BROADCAST)) 914 !sock_flag(sk, SOCK_BROADCAST))
914 goto out; 915 goto out;
915 if (connected) 916 if (connected)
916 sk_dst_set(sk, dst_clone(&rt->u.dst)); 917 sk_dst_set(sk, dst_clone(&rt->dst));
917 } 918 }
918 919
919 if (msg->msg_flags&MSG_CONFIRM) 920 if (msg->msg_flags&MSG_CONFIRM)
@@ -977,7 +978,7 @@ out:
977 return err; 978 return err;
978 979
979do_confirm: 980do_confirm:
980 dst_confirm(&rt->u.dst); 981 dst_confirm(&rt->dst);
981 if (!(msg->msg_flags&MSG_PROBE) || len) 982 if (!(msg->msg_flags&MSG_PROBE) || len)
982 goto back_from_confirm; 983 goto back_from_confirm;
983 err = 0; 984 err = 0;
@@ -1062,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
1062 spin_unlock_bh(&rcvq->lock); 1063 spin_unlock_bh(&rcvq->lock);
1063 1064
1064 if (!skb_queue_empty(&list_kill)) { 1065 if (!skb_queue_empty(&list_kill)) {
1065 lock_sock(sk); 1066 bool slow = lock_sock_fast(sk);
1067
1066 __skb_queue_purge(&list_kill); 1068 __skb_queue_purge(&list_kill);
1067 sk_mem_reclaim_partial(sk); 1069 sk_mem_reclaim_partial(sk);
1068 release_sock(sk); 1070 unlock_sock_fast(sk, slow);
1069 } 1071 }
1070 return res; 1072 return res;
1071} 1073}
@@ -1122,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1122 int peeked; 1124 int peeked;
1123 int err; 1125 int err;
1124 int is_udplite = IS_UDPLITE(sk); 1126 int is_udplite = IS_UDPLITE(sk);
1127 bool slow;
1125 1128
1126 /* 1129 /*
1127 * Check any passed addresses 1130 * Check any passed addresses
@@ -1196,10 +1199,10 @@ out:
1196 return err; 1199 return err;
1197 1200
1198csum_copy_err: 1201csum_copy_err:
1199 lock_sock(sk); 1202 slow = lock_sock_fast(sk);
1200 if (!skb_kill_datagram(sk, skb, flags)) 1203 if (!skb_kill_datagram(sk, skb, flags))
1201 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); 1204 UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
1202 release_sock(sk); 1205 unlock_sock_fast(sk, slow);
1203 1206
1204 if (noblock) 1207 if (noblock)
1205 return -EAGAIN; 1208 return -EAGAIN;
@@ -1217,6 +1220,7 @@ int udp_disconnect(struct sock *sk, int flags)
1217 sk->sk_state = TCP_CLOSE; 1220 sk->sk_state = TCP_CLOSE;
1218 inet->inet_daddr = 0; 1221 inet->inet_daddr = 0;
1219 inet->inet_dport = 0; 1222 inet->inet_dport = 0;
1223 sock_rps_save_rxhash(sk, 0);
1220 sk->sk_bound_dev_if = 0; 1224 sk->sk_bound_dev_if = 0;
1221 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 1225 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1222 inet_reset_saddr(sk); 1226 inet_reset_saddr(sk);
@@ -1258,8 +1262,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
1258 1262
1259static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1263static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1260{ 1264{
1261 int rc = sock_queue_rcv_skb(sk, skb); 1265 int rc;
1266
1267 if (inet_sk(sk)->inet_daddr)
1268 sock_rps_save_rxhash(sk, skb->rxhash);
1262 1269
1270 rc = ip_queue_rcv_skb(sk, skb);
1263 if (rc < 0) { 1271 if (rc < 0) {
1264 int is_udplite = IS_UDPLITE(sk); 1272 int is_udplite = IS_UDPLITE(sk);
1265 1273
@@ -1367,6 +1375,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1367 goto drop; 1375 goto drop;
1368 } 1376 }
1369 1377
1378
1379 if (sk_rcvqueues_full(sk, skb))
1380 goto drop;
1381
1370 rc = 0; 1382 rc = 0;
1371 1383
1372 bh_lock_sock(sk); 1384 bh_lock_sock(sk);
@@ -1527,6 +1539,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1527 1539
1528 uh = udp_hdr(skb); 1540 uh = udp_hdr(skb);
1529 ulen = ntohs(uh->len); 1541 ulen = ntohs(uh->len);
1542 saddr = ip_hdr(skb)->saddr;
1543 daddr = ip_hdr(skb)->daddr;
1544
1530 if (ulen > skb->len) 1545 if (ulen > skb->len)
1531 goto short_packet; 1546 goto short_packet;
1532 1547
@@ -1540,9 +1555,6 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
1540 if (udp4_csum_init(skb, uh, proto)) 1555 if (udp4_csum_init(skb, uh, proto))
1541 goto csum_error; 1556 goto csum_error;
1542 1557
1543 saddr = ip_hdr(skb)->saddr;
1544 daddr = ip_hdr(skb)->daddr;
1545
1546 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) 1558 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1547 return __udp4_lib_mcast_deliver(net, skb, uh, 1559 return __udp4_lib_mcast_deliver(net, skb, uh,
1548 saddr, daddr, udptable); 1560 saddr, daddr, udptable);
@@ -1615,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
1615 1627
1616void udp_destroy_sock(struct sock *sk) 1628void udp_destroy_sock(struct sock *sk)
1617{ 1629{
1618 lock_sock(sk); 1630 bool slow = lock_sock_fast(sk);
1619 udp_flush_pending_frames(sk); 1631 udp_flush_pending_frames(sk);
1620 release_sock(sk); 1632 unlock_sock_fast(sk, slow);
1621} 1633}
1622 1634
1623/* 1635/*
@@ -1676,8 +1688,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1676 return -ENOPROTOOPT; 1688 return -ENOPROTOOPT;
1677 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ 1689 if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
1678 val = 8; 1690 val = 8;
1679 else if (val > USHORT_MAX) 1691 else if (val > USHRT_MAX)
1680 val = USHORT_MAX; 1692 val = USHRT_MAX;
1681 up->pcslen = val; 1693 up->pcslen = val;
1682 up->pcflag |= UDPLITE_SEND_CC; 1694 up->pcflag |= UDPLITE_SEND_CC;
1683 break; 1695 break;
@@ -1690,8 +1702,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
1690 return -ENOPROTOOPT; 1702 return -ENOPROTOOPT;
1691 if (val != 0 && val < 8) /* Avoid silly minimal values. */ 1703 if (val != 0 && val < 8) /* Avoid silly minimal values. */
1692 val = 8; 1704 val = 8;
1693 else if (val > USHORT_MAX) 1705 else if (val > USHRT_MAX)
1694 val = USHORT_MAX; 1706 val = USHRT_MAX;
1695 up->pcrlen = val; 1707 up->pcrlen = val;
1696 up->pcflag |= UDPLITE_RECV_CC; 1708 up->pcflag |= UDPLITE_RECV_CC;
1697 break; 1709 break;
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 6610bf76369f..ab76aa928fa9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -58,6 +58,7 @@ struct proto udplite_prot = {
58 .compat_getsockopt = compat_udp_getsockopt, 58 .compat_getsockopt = compat_udp_getsockopt,
59#endif 59#endif
60}; 60};
61EXPORT_SYMBOL(udplite_prot);
61 62
62static struct inet_protosw udplite4_protosw = { 63static struct inet_protosw udplite4_protosw = {
63 .type = SOCK_DGRAM, 64 .type = SOCK_DGRAM,
@@ -127,5 +128,3 @@ out_unregister_proto:
127out_register_err: 128out_register_err:
128 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); 129 printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
129} 130}
130
131EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index c791bb63203f..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
27 if (skb_dst(skb) == NULL) { 27 if (skb_dst(skb) == NULL) {
28 const struct iphdr *iph = ip_hdr(skb); 28 const struct iphdr *iph = ip_hdr(skb);
29 29
30 if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, 30 if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
31 skb->dev)) 31 iph->tos, skb->dev))
32 goto drop; 32 goto drop;
33 } 33 }
34 return dst_input(skb); 34 return dst_input(skb);
@@ -61,7 +61,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
61 iph->tot_len = htons(skb->len); 61 iph->tot_len = htons(skb->len);
62 ip_send_check(iph); 62 ip_send_check(iph);
63 63
64 NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 64 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
65 xfrm4_rcv_encap_finish); 65 xfrm4_rcv_encap_finish);
66 return 0; 66 return 0;
67} 67}
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb)
163{ 163{
164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); 164 return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
165} 165}
166
167EXPORT_SYMBOL(xfrm4_rcv); 166EXPORT_SYMBOL(xfrm4_rcv);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c908bd99bcba..571aa96a175c 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
86 86
87int xfrm4_output(struct sk_buff *skb) 87int xfrm4_output(struct sk_buff *skb)
88{ 88{
89 return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, 89 return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
90 NULL, skb_dst(skb)->dev, xfrm4_output_finish, 90 NULL, skb_dst(skb)->dev, xfrm4_output_finish,
91 !(IPCB(skb)->flags & IPSKB_REROUTED)); 91 !(IPCB(skb)->flags & IPSKB_REROUTED));
92} 92}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..869078d4eeb9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
37 fl.fl4_src = saddr->a4; 37 fl.fl4_src = saddr->a4;
38 38
39 err = __ip_route_output_key(net, &rt, &fl); 39 err = __ip_route_output_key(net, &rt, &fl);
40 dst = &rt->u.dst; 40 dst = &rt->dst;
41 if (err) 41 if (err)
42 dst = ERR_PTR(err); 42 dst = ERR_PTR(err);
43 return dst; 43 return dst;
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
59 return 0; 59 return 0;
60} 60}
61 61
62static struct dst_entry *
63__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
64{
65 struct dst_entry *dst;
66
67 read_lock_bh(&policy->lock);
68 for (dst = policy->bundles; dst; dst = dst->next) {
69 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
70 if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
71 xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
72 xdst->u.rt.fl.fl4_src == fl->fl4_src &&
73 xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
74 xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
75 dst_clone(dst);
76 break;
77 }
78 }
79 read_unlock_bh(&policy->lock);
80 return dst;
81}
82
83static int xfrm4_get_tos(struct flowi *fl) 62static int xfrm4_get_tos(struct flowi *fl)
84{ 63{
85 return fl->fl4_tos; 64 return fl->fl4_tos;
@@ -129,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
129 u8 *xprth = skb_network_header(skb) + iph->ihl * 4; 108 u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
130 109
131 memset(fl, 0, sizeof(struct flowi)); 110 memset(fl, 0, sizeof(struct flowi));
111 fl->mark = skb->mark;
112
132 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { 113 if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
133 switch (iph->protocol) { 114 switch (iph->protocol) {
134 case IPPROTO_UDP: 115 case IPPROTO_UDP:
@@ -259,7 +240,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
259 .dst_ops = &xfrm4_dst_ops, 240 .dst_ops = &xfrm4_dst_ops,
260 .dst_lookup = xfrm4_dst_lookup, 241 .dst_lookup = xfrm4_dst_lookup,
261 .get_saddr = xfrm4_get_saddr, 242 .get_saddr = xfrm4_get_saddr,
262 .find_bundle = __xfrm4_find_bundle,
263 .decode_session = _decode_session4, 243 .decode_session = _decode_session4,
264 .get_tos = xfrm4_get_tos, 244 .get_tos = xfrm4_get_tos,
265 .init_path = xfrm4_init_path, 245 .init_path = xfrm4_init_path,
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a578096152ab..36d7437ac054 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -229,6 +229,20 @@ config IPV6_MROUTE
229 Experimental support for IPv6 multicast forwarding. 229 Experimental support for IPv6 multicast forwarding.
230 If unsure, say N. 230 If unsure, say N.
231 231
232config IPV6_MROUTE_MULTIPLE_TABLES
233 bool "IPv6: multicast policy routing"
234 depends on IPV6_MROUTE
235 select FIB_RULES
236 help
237 Normally, a multicast router runs a userspace daemon and decides
238 what to do with a multicast packet based on the source and
239 destination addresses. If you say Y here, the multicast router
240 will also be able to take interfaces and packet marks into
241 account and run multiple instances of userspace daemons
242 simultaneously, each one handling a single table.
243
244 If unsure, say N.
245
232config IPV6_PIMSM_V2 246config IPV6_PIMSM_V2
233 bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)" 247 bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
234 depends on IPV6_MROUTE 248 depends on IPV6_MROUTE
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 413054f02aab..ab70a3fbcafa 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -82,7 +82,7 @@
82#include <linux/random.h> 82#include <linux/random.h>
83#endif 83#endif
84 84
85#include <asm/uaccess.h> 85#include <linux/uaccess.h>
86#include <asm/unaligned.h> 86#include <asm/unaligned.h>
87 87
88#include <linux/proc_fs.h> 88#include <linux/proc_fs.h>
@@ -98,7 +98,11 @@
98#endif 98#endif
99 99
100#define INFINITY_LIFE_TIME 0xFFFFFFFF 100#define INFINITY_LIFE_TIME 0xFFFFFFFF
101#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) 101#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
102
103#define ADDRCONF_TIMER_FUZZ_MINUS (HZ > 50 ? HZ/50 : 1)
104#define ADDRCONF_TIMER_FUZZ (HZ / 4)
105#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
102 106
103#ifdef CONFIG_SYSCTL 107#ifdef CONFIG_SYSCTL
104static void addrconf_sysctl_register(struct inet6_dev *idev); 108static void addrconf_sysctl_register(struct inet6_dev *idev);
@@ -117,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
117static int __ipv6_regen_rndid(struct inet6_dev *idev); 121static int __ipv6_regen_rndid(struct inet6_dev *idev);
118static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 122static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
119static void ipv6_regen_rndid(unsigned long data); 123static void ipv6_regen_rndid(unsigned long data);
120
121static int desync_factor = MAX_DESYNC_FACTOR * HZ;
122#endif 124#endif
123 125
124static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); 126static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
@@ -127,8 +129,8 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
127/* 129/*
128 * Configured unicast address hash table 130 * Configured unicast address hash table
129 */ 131 */
130static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; 132static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
131static DEFINE_RWLOCK(addrconf_hash_lock); 133static DEFINE_SPINLOCK(addrconf_hash_lock);
132 134
133static void addrconf_verify(unsigned long); 135static void addrconf_verify(unsigned long);
134 136
@@ -138,8 +140,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
138static void addrconf_join_anycast(struct inet6_ifaddr *ifp); 140static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
139static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); 141static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
140 142
141static void addrconf_bonding_change(struct net_device *dev, 143static void addrconf_type_change(struct net_device *dev,
142 unsigned long event); 144 unsigned long event);
143static int addrconf_ifdown(struct net_device *dev, int how); 145static int addrconf_ifdown(struct net_device *dev, int how);
144 146
145static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); 147static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -152,8 +154,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
152 154
153static void inet6_prefix_notify(int event, struct inet6_dev *idev, 155static void inet6_prefix_notify(int event, struct inet6_dev *idev,
154 struct prefix_info *pinfo); 156 struct prefix_info *pinfo);
155static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, 157static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
156 struct net_device *dev); 158 struct net_device *dev);
157 159
158static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); 160static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
159 161
@@ -250,8 +252,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp)
250 __in6_ifa_put(ifp); 252 __in6_ifa_put(ifp);
251} 253}
252 254
253enum addrconf_timer_t 255enum addrconf_timer_t {
254{
255 AC_NONE, 256 AC_NONE,
256 AC_DAD, 257 AC_DAD,
257 AC_RS, 258 AC_RS,
@@ -271,7 +272,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
271 case AC_RS: 272 case AC_RS:
272 ifp->timer.function = addrconf_rs_timer; 273 ifp->timer.function = addrconf_rs_timer;
273 break; 274 break;
274 default:; 275 default:
276 break;
275 } 277 }
276 ifp->timer.expires = jiffies + when; 278 ifp->timer.expires = jiffies + when;
277 add_timer(&ifp->timer); 279 add_timer(&ifp->timer);
@@ -280,13 +282,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
280static int snmp6_alloc_dev(struct inet6_dev *idev) 282static int snmp6_alloc_dev(struct inet6_dev *idev)
281{ 283{
282 if (snmp_mib_init((void __percpu **)idev->stats.ipv6, 284 if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
283 sizeof(struct ipstats_mib)) < 0) 285 sizeof(struct ipstats_mib),
286 __alignof__(struct ipstats_mib)) < 0)
284 goto err_ip; 287 goto err_ip;
285 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6, 288 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
286 sizeof(struct icmpv6_mib)) < 0) 289 sizeof(struct icmpv6_mib),
290 __alignof__(struct icmpv6_mib)) < 0)
287 goto err_icmp; 291 goto err_icmp;
288 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg, 292 if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
289 sizeof(struct icmpv6msg_mib)) < 0) 293 sizeof(struct icmpv6msg_mib),
294 __alignof__(struct icmpv6msg_mib)) < 0)
290 goto err_icmpmsg; 295 goto err_icmpmsg;
291 296
292 return 0; 297 return 0;
@@ -318,7 +323,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
318{ 323{
319 struct net_device *dev = idev->dev; 324 struct net_device *dev = idev->dev;
320 325
321 WARN_ON(idev->addr_list != NULL); 326 WARN_ON(!list_empty(&idev->addr_list));
322 WARN_ON(idev->mc_list != NULL); 327 WARN_ON(idev->mc_list != NULL);
323 328
324#ifdef NET_REFCNT_DEBUG 329#ifdef NET_REFCNT_DEBUG
@@ -326,7 +331,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
326#endif 331#endif
327 dev_put(dev); 332 dev_put(dev);
328 if (!idev->dead) { 333 if (!idev->dead) {
329 printk("Freeing alive inet6 device %p\n", idev); 334 pr_warning("Freeing alive inet6 device %p\n", idev);
330 return; 335 return;
331 } 336 }
332 snmp6_free_dev(idev); 337 snmp6_free_dev(idev);
@@ -351,6 +356,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
351 356
352 rwlock_init(&ndev->lock); 357 rwlock_init(&ndev->lock);
353 ndev->dev = dev; 358 ndev->dev = dev;
359 INIT_LIST_HEAD(&ndev->addr_list);
360
354 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); 361 memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
355 ndev->cnf.mtu6 = dev->mtu; 362 ndev->cnf.mtu6 = dev->mtu;
356 ndev->cnf.sysctl = NULL; 363 ndev->cnf.sysctl = NULL;
@@ -402,6 +409,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
402#endif 409#endif
403 410
404#ifdef CONFIG_IPV6_PRIVACY 411#ifdef CONFIG_IPV6_PRIVACY
412 INIT_LIST_HEAD(&ndev->tempaddr_list);
405 setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); 413 setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
406 if ((dev->flags&IFF_LOOPBACK) || 414 if ((dev->flags&IFF_LOOPBACK) ||
407 dev->type == ARPHRD_TUNNEL || 415 dev->type == ARPHRD_TUNNEL ||
@@ -439,8 +447,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
439 447
440 ASSERT_RTNL(); 448 ASSERT_RTNL();
441 449
442 if ((idev = __in6_dev_get(dev)) == NULL) { 450 idev = __in6_dev_get(dev);
443 if ((idev = ipv6_add_dev(dev)) == NULL) 451 if (!idev) {
452 idev = ipv6_add_dev(dev);
453 if (!idev)
444 return NULL; 454 return NULL;
445 } 455 }
446 456
@@ -466,7 +476,8 @@ static void dev_forward_change(struct inet6_dev *idev)
466 else 476 else
467 ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters); 477 ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
468 } 478 }
469 for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { 479
480 list_for_each_entry(ifa, &idev->addr_list, if_list) {
470 if (ifa->flags&IFA_F_TENTATIVE) 481 if (ifa->flags&IFA_F_TENTATIVE)
471 continue; 482 continue;
472 if (idev->cnf.forwarding) 483 if (idev->cnf.forwarding)
@@ -523,12 +534,16 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
523} 534}
524#endif 535#endif
525 536
526/* Nobody refers to this ifaddr, destroy it */ 537static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
538{
539 struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
540 kfree(ifp);
541}
527 542
543/* Nobody refers to this ifaddr, destroy it */
528void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) 544void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
529{ 545{
530 WARN_ON(ifp->if_next != NULL); 546 WARN_ON(!hlist_unhashed(&ifp->addr_lst));
531 WARN_ON(ifp->lst_next != NULL);
532 547
533#ifdef NET_REFCNT_DEBUG 548#ifdef NET_REFCNT_DEBUG
534 printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); 549 printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
@@ -537,54 +552,46 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
537 in6_dev_put(ifp->idev); 552 in6_dev_put(ifp->idev);
538 553
539 if (del_timer(&ifp->timer)) 554 if (del_timer(&ifp->timer))
540 printk("Timer is still running, when freeing ifa=%p\n", ifp); 555 pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
541 556
542 if (!ifp->dead) { 557 if (ifp->state != INET6_IFADDR_STATE_DEAD) {
543 printk("Freeing alive inet6 address %p\n", ifp); 558 pr_warning("Freeing alive inet6 address %p\n", ifp);
544 return; 559 return;
545 } 560 }
546 dst_release(&ifp->rt->u.dst); 561 dst_release(&ifp->rt->dst);
547 562
548 kfree(ifp); 563 call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
549} 564}
550 565
551static void 566static void
552ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) 567ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
553{ 568{
554 struct inet6_ifaddr *ifa, **ifap; 569 struct list_head *p;
555 int ifp_scope = ipv6_addr_src_scope(&ifp->addr); 570 int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
556 571
557 /* 572 /*
558 * Each device address list is sorted in order of scope - 573 * Each device address list is sorted in order of scope -
559 * global before linklocal. 574 * global before linklocal.
560 */ 575 */
561 for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; 576 list_for_each(p, &idev->addr_list) {
562 ifap = &ifa->if_next) { 577 struct inet6_ifaddr *ifa
578 = list_entry(p, struct inet6_ifaddr, if_list);
563 if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) 579 if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
564 break; 580 break;
565 } 581 }
566 582
567 ifp->if_next = *ifap; 583 list_add_tail(&ifp->if_list, p);
568 *ifap = ifp;
569} 584}
570 585
571/* 586static u32 ipv6_addr_hash(const struct in6_addr *addr)
572 * Hash function taken from net_alias.c
573 */
574static u8 ipv6_addr_hash(const struct in6_addr *addr)
575{ 587{
576 __u32 word;
577
578 /* 588 /*
579 * We perform the hash function over the last 64 bits of the address 589 * We perform the hash function over the last 64 bits of the address
580 * This will include the IEEE address token on links that support it. 590 * This will include the IEEE address token on links that support it.
581 */ 591 */
582 592 return jhash_2words((__force u32)addr->s6_addr32[2],
583 word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]); 593 (__force u32)addr->s6_addr32[3], 0)
584 word ^= (word >> 16); 594 & (IN6_ADDR_HSIZE - 1);
585 word ^= (word >> 8);
586
587 return ((word ^ (word >> 4)) & 0x0f);
588} 595}
589 596
590/* On success it returns ifp with increased reference count */ 597/* On success it returns ifp with increased reference count */
@@ -595,7 +602,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
595{ 602{
596 struct inet6_ifaddr *ifa = NULL; 603 struct inet6_ifaddr *ifa = NULL;
597 struct rt6_info *rt; 604 struct rt6_info *rt;
598 int hash; 605 unsigned int hash;
599 int err = 0; 606 int err = 0;
600 int addr_type = ipv6_addr_type(addr); 607 int addr_type = ipv6_addr_type(addr);
601 608
@@ -616,7 +623,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
616 goto out2; 623 goto out2;
617 } 624 }
618 625
619 write_lock(&addrconf_hash_lock); 626 spin_lock(&addrconf_hash_lock);
620 627
621 /* Ignore adding duplicate addresses on an interface */ 628 /* Ignore adding duplicate addresses on an interface */
622 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { 629 if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -642,7 +649,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
642 ipv6_addr_copy(&ifa->addr, addr); 649 ipv6_addr_copy(&ifa->addr, addr);
643 650
644 spin_lock_init(&ifa->lock); 651 spin_lock_init(&ifa->lock);
652 spin_lock_init(&ifa->state_lock);
645 init_timer(&ifa->timer); 653 init_timer(&ifa->timer);
654 INIT_HLIST_NODE(&ifa->addr_lst);
646 ifa->timer.data = (unsigned long) ifa; 655 ifa->timer.data = (unsigned long) ifa;
647 ifa->scope = scope; 656 ifa->scope = scope;
648 ifa->prefix_len = pfxlen; 657 ifa->prefix_len = pfxlen;
@@ -669,10 +678,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
669 /* Add to big hash table */ 678 /* Add to big hash table */
670 hash = ipv6_addr_hash(addr); 679 hash = ipv6_addr_hash(addr);
671 680
672 ifa->lst_next = inet6_addr_lst[hash]; 681 hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
673 inet6_addr_lst[hash] = ifa; 682 spin_unlock(&addrconf_hash_lock);
674 in6_ifa_hold(ifa);
675 write_unlock(&addrconf_hash_lock);
676 683
677 write_lock(&idev->lock); 684 write_lock(&idev->lock);
678 /* Add to inet6_dev unicast addr list. */ 685 /* Add to inet6_dev unicast addr list. */
@@ -680,8 +687,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
680 687
681#ifdef CONFIG_IPV6_PRIVACY 688#ifdef CONFIG_IPV6_PRIVACY
682 if (ifa->flags&IFA_F_TEMPORARY) { 689 if (ifa->flags&IFA_F_TEMPORARY) {
683 ifa->tmp_next = idev->tempaddr_list; 690 list_add(&ifa->tmp_list, &idev->tempaddr_list);
684 idev->tempaddr_list = ifa;
685 in6_ifa_hold(ifa); 691 in6_ifa_hold(ifa);
686 } 692 }
687#endif 693#endif
@@ -700,7 +706,7 @@ out2:
700 706
701 return ifa; 707 return ifa;
702out: 708out:
703 write_unlock(&addrconf_hash_lock); 709 spin_unlock(&addrconf_hash_lock);
704 goto out2; 710 goto out2;
705} 711}
706 712
@@ -708,52 +714,44 @@ out:
708 714
709static void ipv6_del_addr(struct inet6_ifaddr *ifp) 715static void ipv6_del_addr(struct inet6_ifaddr *ifp)
710{ 716{
711 struct inet6_ifaddr *ifa, **ifap; 717 struct inet6_ifaddr *ifa, *ifn;
712 struct inet6_dev *idev = ifp->idev; 718 struct inet6_dev *idev = ifp->idev;
719 int state;
713 int hash; 720 int hash;
714 int deleted = 0, onlink = 0; 721 int deleted = 0, onlink = 0;
715 unsigned long expires = jiffies; 722 unsigned long expires = jiffies;
716 723
717 hash = ipv6_addr_hash(&ifp->addr); 724 hash = ipv6_addr_hash(&ifp->addr);
718 725
719 ifp->dead = 1; 726 spin_lock_bh(&ifp->state_lock);
727 state = ifp->state;
728 ifp->state = INET6_IFADDR_STATE_DEAD;
729 spin_unlock_bh(&ifp->state_lock);
720 730
721 write_lock_bh(&addrconf_hash_lock); 731 if (state == INET6_IFADDR_STATE_DEAD)
722 for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; 732 goto out;
723 ifap = &ifa->lst_next) { 733
724 if (ifa == ifp) { 734 spin_lock_bh(&addrconf_hash_lock);
725 *ifap = ifa->lst_next; 735 hlist_del_init_rcu(&ifp->addr_lst);
726 __in6_ifa_put(ifp); 736 spin_unlock_bh(&addrconf_hash_lock);
727 ifa->lst_next = NULL;
728 break;
729 }
730 }
731 write_unlock_bh(&addrconf_hash_lock);
732 737
733 write_lock_bh(&idev->lock); 738 write_lock_bh(&idev->lock);
734#ifdef CONFIG_IPV6_PRIVACY 739#ifdef CONFIG_IPV6_PRIVACY
735 if (ifp->flags&IFA_F_TEMPORARY) { 740 if (ifp->flags&IFA_F_TEMPORARY) {
736 for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; 741 list_del(&ifp->tmp_list);
737 ifap = &ifa->tmp_next) { 742 if (ifp->ifpub) {
738 if (ifa == ifp) { 743 in6_ifa_put(ifp->ifpub);
739 *ifap = ifa->tmp_next; 744 ifp->ifpub = NULL;
740 if (ifp->ifpub) {
741 in6_ifa_put(ifp->ifpub);
742 ifp->ifpub = NULL;
743 }
744 __in6_ifa_put(ifp);
745 ifa->tmp_next = NULL;
746 break;
747 }
748 } 745 }
746 __in6_ifa_put(ifp);
749 } 747 }
750#endif 748#endif
751 749
752 for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { 750 list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
753 if (ifa == ifp) { 751 if (ifa == ifp) {
754 *ifap = ifa->if_next; 752 list_del_init(&ifp->if_list);
755 __in6_ifa_put(ifp); 753 __in6_ifa_put(ifp);
756 ifa->if_next = NULL; 754
757 if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) 755 if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
758 break; 756 break;
759 deleted = 1; 757 deleted = 1;
@@ -786,7 +784,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
786 } 784 }
787 } 785 }
788 } 786 }
789 ifap = &ifa->if_next;
790 } 787 }
791 write_unlock_bh(&idev->lock); 788 write_unlock_bh(&idev->lock);
792 789
@@ -827,9 +824,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
827 rt->rt6i_flags |= RTF_EXPIRES; 824 rt->rt6i_flags |= RTF_EXPIRES;
828 } 825 }
829 } 826 }
830 dst_release(&rt->u.dst); 827 dst_release(&rt->dst);
831 } 828 }
832 829
830out:
833 in6_ifa_put(ifp); 831 in6_ifa_put(ifp);
834} 832}
835 833
@@ -893,7 +891,8 @@ retry:
893 idev->cnf.temp_valid_lft); 891 idev->cnf.temp_valid_lft);
894 tmp_prefered_lft = min_t(__u32, 892 tmp_prefered_lft = min_t(__u32,
895 ifp->prefered_lft, 893 ifp->prefered_lft,
896 idev->cnf.temp_prefered_lft - desync_factor / HZ); 894 idev->cnf.temp_prefered_lft -
895 idev->cnf.max_desync_factor);
897 tmp_plen = ifp->prefix_len; 896 tmp_plen = ifp->prefix_len;
898 max_addresses = idev->cnf.max_addresses; 897 max_addresses = idev->cnf.max_addresses;
899 tmp_cstamp = ifp->cstamp; 898 tmp_cstamp = ifp->cstamp;
@@ -1165,7 +1164,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
1165 continue; 1164 continue;
1166 1165
1167 read_lock_bh(&idev->lock); 1166 read_lock_bh(&idev->lock);
1168 for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) { 1167 list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
1169 int i; 1168 int i;
1170 1169
1171 /* 1170 /*
@@ -1243,7 +1242,6 @@ try_nextdev:
1243 in6_ifa_put(hiscore->ifa); 1242 in6_ifa_put(hiscore->ifa);
1244 return 0; 1243 return 0;
1245} 1244}
1246
1247EXPORT_SYMBOL(ipv6_dev_get_saddr); 1245EXPORT_SYMBOL(ipv6_dev_get_saddr);
1248 1246
1249int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, 1247int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
@@ -1253,12 +1251,14 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
1253 int err = -EADDRNOTAVAIL; 1251 int err = -EADDRNOTAVAIL;
1254 1252
1255 rcu_read_lock(); 1253 rcu_read_lock();
1256 if ((idev = __in6_dev_get(dev)) != NULL) { 1254 idev = __in6_dev_get(dev);
1255 if (idev) {
1257 struct inet6_ifaddr *ifp; 1256 struct inet6_ifaddr *ifp;
1258 1257
1259 read_lock_bh(&idev->lock); 1258 read_lock_bh(&idev->lock);
1260 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { 1259 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1261 if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { 1260 if (ifp->scope == IFA_LINK &&
1261 !(ifp->flags & banned_flags)) {
1262 ipv6_addr_copy(addr, &ifp->addr); 1262 ipv6_addr_copy(addr, &ifp->addr);
1263 err = 0; 1263 err = 0;
1264 break; 1264 break;
@@ -1276,7 +1276,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
1276 struct inet6_ifaddr *ifp; 1276 struct inet6_ifaddr *ifp;
1277 1277
1278 read_lock_bh(&idev->lock); 1278 read_lock_bh(&idev->lock);
1279 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) 1279 list_for_each_entry(ifp, &idev->addr_list, if_list)
1280 cnt++; 1280 cnt++;
1281 read_unlock_bh(&idev->lock); 1281 read_unlock_bh(&idev->lock);
1282 return cnt; 1282 return cnt;
@@ -1285,41 +1285,44 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
1285int ipv6_chk_addr(struct net *net, struct in6_addr *addr, 1285int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
1286 struct net_device *dev, int strict) 1286 struct net_device *dev, int strict)
1287{ 1287{
1288 struct inet6_ifaddr * ifp; 1288 struct inet6_ifaddr *ifp;
1289 u8 hash = ipv6_addr_hash(addr); 1289 struct hlist_node *node;
1290 unsigned int hash = ipv6_addr_hash(addr);
1290 1291
1291 read_lock_bh(&addrconf_hash_lock); 1292 rcu_read_lock_bh();
1292 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1293 hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
1293 if (!net_eq(dev_net(ifp->idev->dev), net)) 1294 if (!net_eq(dev_net(ifp->idev->dev), net))
1294 continue; 1295 continue;
1295 if (ipv6_addr_equal(&ifp->addr, addr) && 1296 if (ipv6_addr_equal(&ifp->addr, addr) &&
1296 !(ifp->flags&IFA_F_TENTATIVE)) { 1297 !(ifp->flags&IFA_F_TENTATIVE) &&
1297 if (dev == NULL || ifp->idev->dev == dev || 1298 (dev == NULL || ifp->idev->dev == dev ||
1298 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) 1299 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
1299 break; 1300 rcu_read_unlock_bh();
1301 return 1;
1300 } 1302 }
1301 } 1303 }
1302 read_unlock_bh(&addrconf_hash_lock); 1304
1303 return ifp != NULL; 1305 rcu_read_unlock_bh();
1306 return 0;
1304} 1307}
1305EXPORT_SYMBOL(ipv6_chk_addr); 1308EXPORT_SYMBOL(ipv6_chk_addr);
1306 1309
1307static 1310static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
1308int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, 1311 struct net_device *dev)
1309 struct net_device *dev)
1310{ 1312{
1311 struct inet6_ifaddr * ifp; 1313 unsigned int hash = ipv6_addr_hash(addr);
1312 u8 hash = ipv6_addr_hash(addr); 1314 struct inet6_ifaddr *ifp;
1315 struct hlist_node *node;
1313 1316
1314 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1317 hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
1315 if (!net_eq(dev_net(ifp->idev->dev), net)) 1318 if (!net_eq(dev_net(ifp->idev->dev), net))
1316 continue; 1319 continue;
1317 if (ipv6_addr_equal(&ifp->addr, addr)) { 1320 if (ipv6_addr_equal(&ifp->addr, addr)) {
1318 if (dev == NULL || ifp->idev->dev == dev) 1321 if (dev == NULL || ifp->idev->dev == dev)
1319 break; 1322 return true;
1320 } 1323 }
1321 } 1324 }
1322 return ifp != NULL; 1325 return false;
1323} 1326}
1324 1327
1325int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev) 1328int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
@@ -1333,7 +1336,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
1333 idev = __in6_dev_get(dev); 1336 idev = __in6_dev_get(dev);
1334 if (idev) { 1337 if (idev) {
1335 read_lock_bh(&idev->lock); 1338 read_lock_bh(&idev->lock);
1336 for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { 1339 list_for_each_entry(ifa, &idev->addr_list, if_list) {
1337 onlink = ipv6_prefix_equal(addr, &ifa->addr, 1340 onlink = ipv6_prefix_equal(addr, &ifa->addr,
1338 ifa->prefix_len); 1341 ifa->prefix_len);
1339 if (onlink) 1342 if (onlink)
@@ -1350,24 +1353,26 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
1350struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, 1353struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
1351 struct net_device *dev, int strict) 1354 struct net_device *dev, int strict)
1352{ 1355{
1353 struct inet6_ifaddr * ifp; 1356 struct inet6_ifaddr *ifp, *result = NULL;
1354 u8 hash = ipv6_addr_hash(addr); 1357 unsigned int hash = ipv6_addr_hash(addr);
1358 struct hlist_node *node;
1355 1359
1356 read_lock_bh(&addrconf_hash_lock); 1360 rcu_read_lock_bh();
1357 for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { 1361 hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
1358 if (!net_eq(dev_net(ifp->idev->dev), net)) 1362 if (!net_eq(dev_net(ifp->idev->dev), net))
1359 continue; 1363 continue;
1360 if (ipv6_addr_equal(&ifp->addr, addr)) { 1364 if (ipv6_addr_equal(&ifp->addr, addr)) {
1361 if (dev == NULL || ifp->idev->dev == dev || 1365 if (dev == NULL || ifp->idev->dev == dev ||
1362 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { 1366 !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
1367 result = ifp;
1363 in6_ifa_hold(ifp); 1368 in6_ifa_hold(ifp);
1364 break; 1369 break;
1365 } 1370 }
1366 } 1371 }
1367 } 1372 }
1368 read_unlock_bh(&addrconf_hash_lock); 1373 rcu_read_unlock_bh();
1369 1374
1370 return ifp; 1375 return result;
1371} 1376}
1372 1377
1373/* Gets referenced address, destroys ifaddr */ 1378/* Gets referenced address, destroys ifaddr */
@@ -1403,10 +1408,27 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
1403 ipv6_del_addr(ifp); 1408 ipv6_del_addr(ifp);
1404} 1409}
1405 1410
1411static int addrconf_dad_end(struct inet6_ifaddr *ifp)
1412{
1413 int err = -ENOENT;
1414
1415 spin_lock(&ifp->state_lock);
1416 if (ifp->state == INET6_IFADDR_STATE_DAD) {
1417 ifp->state = INET6_IFADDR_STATE_POSTDAD;
1418 err = 0;
1419 }
1420 spin_unlock(&ifp->state_lock);
1421
1422 return err;
1423}
1424
1406void addrconf_dad_failure(struct inet6_ifaddr *ifp) 1425void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1407{ 1426{
1408 struct inet6_dev *idev = ifp->idev; 1427 struct inet6_dev *idev = ifp->idev;
1409 1428
1429 if (addrconf_dad_end(ifp))
1430 return;
1431
1410 if (net_ratelimit()) 1432 if (net_ratelimit())
1411 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n", 1433 printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
1412 ifp->idev->dev->name, &ifp->addr); 1434 ifp->idev->dev->name, &ifp->addr);
@@ -1570,7 +1592,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
1570 struct inet6_ifaddr *ifp; 1592 struct inet6_ifaddr *ifp;
1571 1593
1572 read_lock_bh(&idev->lock); 1594 read_lock_bh(&idev->lock);
1573 for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { 1595 list_for_each_entry(ifp, &idev->addr_list, if_list) {
1574 if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { 1596 if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
1575 memcpy(eui, ifp->addr.s6_addr+8, 8); 1597 memcpy(eui, ifp->addr.s6_addr+8, 8);
1576 err = 0; 1598 err = 0;
@@ -1630,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data)
1630 1652
1631 expires = jiffies + 1653 expires = jiffies +
1632 idev->cnf.temp_prefered_lft * HZ - 1654 idev->cnf.temp_prefered_lft * HZ -
1633 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; 1655 idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
1656 idev->cnf.max_desync_factor * HZ;
1634 if (time_before(expires, jiffies)) { 1657 if (time_before(expires, jiffies)) {
1635 printk(KERN_WARNING 1658 printk(KERN_WARNING
1636 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", 1659 "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
@@ -1738,8 +1761,12 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
1738 1761
1739 ASSERT_RTNL(); 1762 ASSERT_RTNL();
1740 1763
1741 if ((idev = ipv6_find_idev(dev)) == NULL) 1764 idev = ipv6_find_idev(dev);
1742 return NULL; 1765 if (!idev)
1766 return ERR_PTR(-ENOBUFS);
1767
1768 if (idev->cnf.disable_ipv6)
1769 return ERR_PTR(-EACCES);
1743 1770
1744 /* Add default multicast route */ 1771 /* Add default multicast route */
1745 addrconf_add_mroute(dev); 1772 addrconf_add_mroute(dev);
@@ -1842,7 +1869,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1842 dev, expires, flags); 1869 dev, expires, flags);
1843 } 1870 }
1844 if (rt) 1871 if (rt)
1845 dst_release(&rt->u.dst); 1872 dst_release(&rt->dst);
1846 } 1873 }
1847 1874
1848 /* Try to figure out our local address for this prefix */ 1875 /* Try to figure out our local address for this prefix */
@@ -1971,7 +1998,7 @@ ok:
1971#ifdef CONFIG_IPV6_PRIVACY 1998#ifdef CONFIG_IPV6_PRIVACY
1972 read_lock_bh(&in6_dev->lock); 1999 read_lock_bh(&in6_dev->lock);
1973 /* update all temporary addresses in the list */ 2000 /* update all temporary addresses in the list */
1974 for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { 2001 list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
1975 /* 2002 /*
1976 * When adjusting the lifetimes of an existing 2003 * When adjusting the lifetimes of an existing
1977 * temporary address, only lower the lifetimes. 2004 * temporary address, only lower the lifetimes.
@@ -2108,8 +2135,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2108 if (!dev) 2135 if (!dev)
2109 return -ENODEV; 2136 return -ENODEV;
2110 2137
2111 if ((idev = addrconf_add_dev(dev)) == NULL) 2138 idev = addrconf_add_dev(dev);
2112 return -ENOBUFS; 2139 if (IS_ERR(idev))
2140 return PTR_ERR(idev);
2113 2141
2114 scope = ipv6_addr_scope(pfx); 2142 scope = ipv6_addr_scope(pfx);
2115 2143
@@ -2174,7 +2202,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
2174 return -ENXIO; 2202 return -ENXIO;
2175 2203
2176 read_lock_bh(&idev->lock); 2204 read_lock_bh(&idev->lock);
2177 for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { 2205 list_for_each_entry(ifp, &idev->addr_list, if_list) {
2178 if (ifp->prefix_len == plen && 2206 if (ifp->prefix_len == plen &&
2179 ipv6_addr_equal(pfx, &ifp->addr)) { 2207 ipv6_addr_equal(pfx, &ifp->addr)) {
2180 in6_ifa_hold(ifp); 2208 in6_ifa_hold(ifp);
@@ -2185,7 +2213,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
2185 /* If the last address is deleted administratively, 2213 /* If the last address is deleted administratively,
2186 disable IPv6 on this interface. 2214 disable IPv6 on this interface.
2187 */ 2215 */
2188 if (idev->addr_list == NULL) 2216 if (list_empty(&idev->addr_list))
2189 addrconf_ifdown(idev->dev, 1); 2217 addrconf_ifdown(idev->dev, 1);
2190 return 0; 2218 return 0;
2191 } 2219 }
@@ -2356,7 +2384,7 @@ static void addrconf_dev_config(struct net_device *dev)
2356 } 2384 }
2357 2385
2358 idev = addrconf_add_dev(dev); 2386 idev = addrconf_add_dev(dev);
2359 if (idev == NULL) 2387 if (IS_ERR(idev))
2360 return; 2388 return;
2361 2389
2362 memset(&addr, 0, sizeof(struct in6_addr)); 2390 memset(&addr, 0, sizeof(struct in6_addr));
@@ -2446,7 +2474,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
2446 2474
2447 ASSERT_RTNL(); 2475 ASSERT_RTNL();
2448 2476
2449 if ((idev = addrconf_add_dev(dev)) == NULL) { 2477 idev = addrconf_add_dev(dev);
2478 if (IS_ERR(idev)) {
2450 printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); 2479 printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
2451 return; 2480 return;
2452 } 2481 }
@@ -2461,7 +2490,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2461 int run_pending = 0; 2490 int run_pending = 0;
2462 int err; 2491 int err;
2463 2492
2464 switch(event) { 2493 switch (event) {
2465 case NETDEV_REGISTER: 2494 case NETDEV_REGISTER:
2466 if (!idev && dev->mtu >= IPV6_MIN_MTU) { 2495 if (!idev && dev->mtu >= IPV6_MIN_MTU) {
2467 idev = ipv6_add_dev(dev); 2496 idev = ipv6_add_dev(dev);
@@ -2469,6 +2498,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2469 return notifier_from_errno(-ENOMEM); 2498 return notifier_from_errno(-ENOMEM);
2470 } 2499 }
2471 break; 2500 break;
2501
2472 case NETDEV_UP: 2502 case NETDEV_UP:
2473 case NETDEV_CHANGE: 2503 case NETDEV_CHANGE:
2474 if (dev->flags & IFF_SLAVE) 2504 if (dev->flags & IFF_SLAVE)
@@ -2498,10 +2528,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2498 } 2528 }
2499 2529
2500 if (idev) { 2530 if (idev) {
2501 if (idev->if_flags & IF_READY) { 2531 if (idev->if_flags & IF_READY)
2502 /* device is already configured. */ 2532 /* device is already configured. */
2503 break; 2533 break;
2504 }
2505 idev->if_flags |= IF_READY; 2534 idev->if_flags |= IF_READY;
2506 } 2535 }
2507 2536
@@ -2513,7 +2542,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2513 run_pending = 1; 2542 run_pending = 1;
2514 } 2543 }
2515 2544
2516 switch(dev->type) { 2545 switch (dev->type) {
2517#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2546#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2518 case ARPHRD_SIT: 2547 case ARPHRD_SIT:
2519 addrconf_sit_config(dev); 2548 addrconf_sit_config(dev);
@@ -2530,25 +2559,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2530 addrconf_dev_config(dev); 2559 addrconf_dev_config(dev);
2531 break; 2560 break;
2532 } 2561 }
2562
2533 if (idev) { 2563 if (idev) {
2534 if (run_pending) 2564 if (run_pending)
2535 addrconf_dad_run(idev); 2565 addrconf_dad_run(idev);
2536 2566
2537 /* If the MTU changed during the interface down, when the 2567 /*
2538 interface up, the changed MTU must be reflected in the 2568 * If the MTU changed during the interface down,
2539 idev as well as routers. 2569 * when the interface up, the changed MTU must be
2570 * reflected in the idev as well as routers.
2540 */ 2571 */
2541 if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { 2572 if (idev->cnf.mtu6 != dev->mtu &&
2573 dev->mtu >= IPV6_MIN_MTU) {
2542 rt6_mtu_change(dev, dev->mtu); 2574 rt6_mtu_change(dev, dev->mtu);
2543 idev->cnf.mtu6 = dev->mtu; 2575 idev->cnf.mtu6 = dev->mtu;
2544 } 2576 }
2545 idev->tstamp = jiffies; 2577 idev->tstamp = jiffies;
2546 inet6_ifinfo_notify(RTM_NEWLINK, idev); 2578 inet6_ifinfo_notify(RTM_NEWLINK, idev);
2547 /* If the changed mtu during down is lower than IPV6_MIN_MTU 2579
2548 stop IPv6 on this interface. 2580 /*
2581 * If the changed mtu during down is lower than
2582 * IPV6_MIN_MTU stop IPv6 on this interface.
2549 */ 2583 */
2550 if (dev->mtu < IPV6_MIN_MTU) 2584 if (dev->mtu < IPV6_MIN_MTU)
2551 addrconf_ifdown(dev, event != NETDEV_DOWN); 2585 addrconf_ifdown(dev, 1);
2552 } 2586 }
2553 break; 2587 break;
2554 2588
@@ -2565,7 +2599,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2565 break; 2599 break;
2566 } 2600 }
2567 2601
2568 /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ 2602 /*
2603 * MTU falled under IPV6_MIN_MTU.
2604 * Stop IPv6 on this interface.
2605 */
2569 2606
2570 case NETDEV_DOWN: 2607 case NETDEV_DOWN:
2571 case NETDEV_UNREGISTER: 2608 case NETDEV_UNREGISTER:
@@ -2585,9 +2622,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2585 return notifier_from_errno(err); 2622 return notifier_from_errno(err);
2586 } 2623 }
2587 break; 2624 break;
2588 case NETDEV_BONDING_OLDTYPE: 2625
2589 case NETDEV_BONDING_NEWTYPE: 2626 case NETDEV_PRE_TYPE_CHANGE:
2590 addrconf_bonding_change(dev, event); 2627 case NETDEV_POST_TYPE_CHANGE:
2628 addrconf_type_change(dev, event);
2591 break; 2629 break;
2592 } 2630 }
2593 2631
@@ -2599,28 +2637,28 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
2599 */ 2637 */
2600static struct notifier_block ipv6_dev_notf = { 2638static struct notifier_block ipv6_dev_notf = {
2601 .notifier_call = addrconf_notify, 2639 .notifier_call = addrconf_notify,
2602 .priority = 0
2603}; 2640};
2604 2641
2605static void addrconf_bonding_change(struct net_device *dev, unsigned long event) 2642static void addrconf_type_change(struct net_device *dev, unsigned long event)
2606{ 2643{
2607 struct inet6_dev *idev; 2644 struct inet6_dev *idev;
2608 ASSERT_RTNL(); 2645 ASSERT_RTNL();
2609 2646
2610 idev = __in6_dev_get(dev); 2647 idev = __in6_dev_get(dev);
2611 2648
2612 if (event == NETDEV_BONDING_NEWTYPE) 2649 if (event == NETDEV_POST_TYPE_CHANGE)
2613 ipv6_mc_remap(idev); 2650 ipv6_mc_remap(idev);
2614 else if (event == NETDEV_BONDING_OLDTYPE) 2651 else if (event == NETDEV_PRE_TYPE_CHANGE)
2615 ipv6_mc_unmap(idev); 2652 ipv6_mc_unmap(idev);
2616} 2653}
2617 2654
2618static int addrconf_ifdown(struct net_device *dev, int how) 2655static int addrconf_ifdown(struct net_device *dev, int how)
2619{ 2656{
2620 struct inet6_dev *idev;
2621 struct inet6_ifaddr *ifa, *keep_list, **bifa;
2622 struct net *net = dev_net(dev); 2657 struct net *net = dev_net(dev);
2623 int i; 2658 struct inet6_dev *idev;
2659 struct inet6_ifaddr *ifa;
2660 LIST_HEAD(keep_list);
2661 int state;
2624 2662
2625 ASSERT_RTNL(); 2663 ASSERT_RTNL();
2626 2664
@@ -2631,8 +2669,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2631 if (idev == NULL) 2669 if (idev == NULL)
2632 return -ENODEV; 2670 return -ENODEV;
2633 2671
2634 /* Step 1: remove reference to ipv6 device from parent device. 2672 /*
2635 Do not dev_put! 2673 * Step 1: remove reference to ipv6 device from parent device.
2674 * Do not dev_put!
2636 */ 2675 */
2637 if (how) { 2676 if (how) {
2638 idev->dead = 1; 2677 idev->dead = 1;
@@ -2645,41 +2684,21 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2645 2684
2646 } 2685 }
2647 2686
2648 /* Step 2: clear hash table */
2649 for (i=0; i<IN6_ADDR_HSIZE; i++) {
2650 bifa = &inet6_addr_lst[i];
2651
2652 write_lock_bh(&addrconf_hash_lock);
2653 while ((ifa = *bifa) != NULL) {
2654 if (ifa->idev == idev &&
2655 (how || !(ifa->flags&IFA_F_PERMANENT) ||
2656 ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
2657 *bifa = ifa->lst_next;
2658 ifa->lst_next = NULL;
2659 __in6_ifa_put(ifa);
2660 continue;
2661 }
2662 bifa = &ifa->lst_next;
2663 }
2664 write_unlock_bh(&addrconf_hash_lock);
2665 }
2666
2667 write_lock_bh(&idev->lock); 2687 write_lock_bh(&idev->lock);
2668 2688
2669 /* Step 3: clear flags for stateless addrconf */ 2689 /* Step 2: clear flags for stateless addrconf */
2670 if (!how) 2690 if (!how)
2671 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); 2691 idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
2672 2692
2673 /* Step 4: clear address list */
2674#ifdef CONFIG_IPV6_PRIVACY 2693#ifdef CONFIG_IPV6_PRIVACY
2675 if (how && del_timer(&idev->regen_timer)) 2694 if (how && del_timer(&idev->regen_timer))
2676 in6_dev_put(idev); 2695 in6_dev_put(idev);
2677 2696
2678 /* clear tempaddr list */ 2697 /* Step 3: clear tempaddr list */
2679 while ((ifa = idev->tempaddr_list) != NULL) { 2698 while (!list_empty(&idev->tempaddr_list)) {
2680 idev->tempaddr_list = ifa->tmp_next; 2699 ifa = list_first_entry(&idev->tempaddr_list,
2681 ifa->tmp_next = NULL; 2700 struct inet6_ifaddr, tmp_list);
2682 ifa->dead = 1; 2701 list_del(&ifa->tmp_list);
2683 write_unlock_bh(&idev->lock); 2702 write_unlock_bh(&idev->lock);
2684 spin_lock_bh(&ifa->lock); 2703 spin_lock_bh(&ifa->lock);
2685 2704
@@ -2692,23 +2711,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2692 write_lock_bh(&idev->lock); 2711 write_lock_bh(&idev->lock);
2693 } 2712 }
2694#endif 2713#endif
2695 keep_list = NULL;
2696 bifa = &keep_list;
2697 while ((ifa = idev->addr_list) != NULL) {
2698 idev->addr_list = ifa->if_next;
2699 ifa->if_next = NULL;
2700 2714
2715 while (!list_empty(&idev->addr_list)) {
2716 ifa = list_first_entry(&idev->addr_list,
2717 struct inet6_ifaddr, if_list);
2701 addrconf_del_timer(ifa); 2718 addrconf_del_timer(ifa);
2702 2719
2703 /* If just doing link down, and address is permanent 2720 /* If just doing link down, and address is permanent
2704 and not link-local, then retain it. */ 2721 and not link-local, then retain it. */
2705 if (how == 0 && 2722 if (!how &&
2706 (ifa->flags&IFA_F_PERMANENT) && 2723 (ifa->flags&IFA_F_PERMANENT) &&
2707 !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) { 2724 !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
2708 2725 list_move_tail(&ifa->if_list, &keep_list);
2709 /* Move to holding list */
2710 *bifa = ifa;
2711 bifa = &ifa->if_next;
2712 2726
2713 /* If not doing DAD on this address, just keep it. */ 2727 /* If not doing DAD on this address, just keep it. */
2714 if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) || 2728 if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
@@ -2722,25 +2736,45 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2722 2736
2723 /* Flag it for later restoration when link comes up */ 2737 /* Flag it for later restoration when link comes up */
2724 ifa->flags |= IFA_F_TENTATIVE; 2738 ifa->flags |= IFA_F_TENTATIVE;
2739 ifa->state = INET6_IFADDR_STATE_DAD;
2740
2741 write_unlock_bh(&idev->lock);
2742
2725 in6_ifa_hold(ifa); 2743 in6_ifa_hold(ifa);
2726 } else { 2744 } else {
2727 ifa->dead = 1; 2745 list_del(&ifa->if_list);
2746
2747 /* clear hash table */
2748 spin_lock_bh(&addrconf_hash_lock);
2749 hlist_del_init_rcu(&ifa->addr_lst);
2750 spin_unlock_bh(&addrconf_hash_lock);
2751
2752 write_unlock_bh(&idev->lock);
2753 spin_lock_bh(&ifa->state_lock);
2754 state = ifa->state;
2755 ifa->state = INET6_IFADDR_STATE_DEAD;
2756 spin_unlock_bh(&ifa->state_lock);
2757
2758 if (state == INET6_IFADDR_STATE_DEAD)
2759 goto put_ifa;
2728 } 2760 }
2729 write_unlock_bh(&idev->lock);
2730 2761
2731 __ipv6_ifa_notify(RTM_DELADDR, ifa); 2762 __ipv6_ifa_notify(RTM_DELADDR, ifa);
2732 atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); 2763 if (ifa->state == INET6_IFADDR_STATE_DEAD)
2764 atomic_notifier_call_chain(&inet6addr_chain,
2765 NETDEV_DOWN, ifa);
2766
2767put_ifa:
2733 in6_ifa_put(ifa); 2768 in6_ifa_put(ifa);
2734 2769
2735 write_lock_bh(&idev->lock); 2770 write_lock_bh(&idev->lock);
2736 } 2771 }
2737 2772
2738 idev->addr_list = keep_list; 2773 list_splice(&keep_list, &idev->addr_list);
2739 2774
2740 write_unlock_bh(&idev->lock); 2775 write_unlock_bh(&idev->lock);
2741 2776
2742 /* Step 5: Discard multicast list */ 2777 /* Step 5: Discard multicast list */
2743
2744 if (how) 2778 if (how)
2745 ipv6_mc_destroy_dev(idev); 2779 ipv6_mc_destroy_dev(idev);
2746 else 2780 else
@@ -2748,8 +2782,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
2748 2782
2749 idev->tstamp = jiffies; 2783 idev->tstamp = jiffies;
2750 2784
2751 /* Shot the device (if unregistered) */ 2785 /* Last: Shot the device (if unregistered) */
2752
2753 if (how) { 2786 if (how) {
2754 addrconf_sysctl_unregister(idev); 2787 addrconf_sysctl_unregister(idev);
2755 neigh_parms_release(&nd_tbl, idev->nd_parms); 2788 neigh_parms_release(&nd_tbl, idev->nd_parms);
@@ -2827,10 +2860,10 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2827 net_srandom(ifp->addr.s6_addr32[3]); 2860 net_srandom(ifp->addr.s6_addr32[3]);
2828 2861
2829 read_lock_bh(&idev->lock); 2862 read_lock_bh(&idev->lock);
2830 if (ifp->dead) 2863 spin_lock(&ifp->lock);
2864 if (ifp->state == INET6_IFADDR_STATE_DEAD)
2831 goto out; 2865 goto out;
2832 2866
2833 spin_lock(&ifp->lock);
2834 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || 2867 if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
2835 idev->cnf.accept_dad < 1 || 2868 idev->cnf.accept_dad < 1 ||
2836 !(ifp->flags&IFA_F_TENTATIVE) || 2869 !(ifp->flags&IFA_F_TENTATIVE) ||
@@ -2860,12 +2893,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
2860 * Optimistic nodes can start receiving 2893 * Optimistic nodes can start receiving
2861 * Frames right away 2894 * Frames right away
2862 */ 2895 */
2863 if(ifp->flags & IFA_F_OPTIMISTIC) 2896 if (ifp->flags & IFA_F_OPTIMISTIC)
2864 ip6_ins_rt(ifp->rt); 2897 ip6_ins_rt(ifp->rt);
2865 2898
2866 addrconf_dad_kick(ifp); 2899 addrconf_dad_kick(ifp);
2867 spin_unlock(&ifp->lock);
2868out: 2900out:
2901 spin_unlock(&ifp->lock);
2869 read_unlock_bh(&idev->lock); 2902 read_unlock_bh(&idev->lock);
2870} 2903}
2871 2904
@@ -2875,6 +2908,9 @@ static void addrconf_dad_timer(unsigned long data)
2875 struct inet6_dev *idev = ifp->idev; 2908 struct inet6_dev *idev = ifp->idev;
2876 struct in6_addr mcaddr; 2909 struct in6_addr mcaddr;
2877 2910
2911 if (!ifp->probes && addrconf_dad_end(ifp))
2912 goto out;
2913
2878 read_lock(&idev->lock); 2914 read_lock(&idev->lock);
2879 if (idev->dead || !(idev->if_flags & IF_READY)) { 2915 if (idev->dead || !(idev->if_flags & IF_READY)) {
2880 read_unlock(&idev->lock); 2916 read_unlock(&idev->lock);
@@ -2882,6 +2918,12 @@ static void addrconf_dad_timer(unsigned long data)
2882 } 2918 }
2883 2919
2884 spin_lock(&ifp->lock); 2920 spin_lock(&ifp->lock);
2921 if (ifp->state == INET6_IFADDR_STATE_DEAD) {
2922 spin_unlock(&ifp->lock);
2923 read_unlock(&idev->lock);
2924 goto out;
2925 }
2926
2885 if (ifp->probes == 0) { 2927 if (ifp->probes == 0) {
2886 /* 2928 /*
2887 * DAD was successful 2929 * DAD was successful
@@ -2910,7 +2952,7 @@ out:
2910 2952
2911static void addrconf_dad_completed(struct inet6_ifaddr *ifp) 2953static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2912{ 2954{
2913 struct net_device * dev = ifp->idev->dev; 2955 struct net_device *dev = ifp->idev->dev;
2914 2956
2915 /* 2957 /*
2916 * Configure the address for reception. Now it is valid. 2958 * Configure the address for reception. Now it is valid.
@@ -2941,18 +2983,17 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2941 } 2983 }
2942} 2984}
2943 2985
2944static void addrconf_dad_run(struct inet6_dev *idev) { 2986static void addrconf_dad_run(struct inet6_dev *idev)
2987{
2945 struct inet6_ifaddr *ifp; 2988 struct inet6_ifaddr *ifp;
2946 2989
2947 read_lock_bh(&idev->lock); 2990 read_lock_bh(&idev->lock);
2948 for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { 2991 list_for_each_entry(ifp, &idev->addr_list, if_list) {
2949 spin_lock(&ifp->lock); 2992 spin_lock(&ifp->lock);
2950 if (!(ifp->flags & IFA_F_TENTATIVE)) { 2993 if (ifp->flags & IFA_F_TENTATIVE &&
2951 spin_unlock(&ifp->lock); 2994 ifp->state == INET6_IFADDR_STATE_DAD)
2952 continue; 2995 addrconf_dad_kick(ifp);
2953 }
2954 spin_unlock(&ifp->lock); 2996 spin_unlock(&ifp->lock);
2955 addrconf_dad_kick(ifp);
2956 } 2997 }
2957 read_unlock_bh(&idev->lock); 2998 read_unlock_bh(&idev->lock);
2958} 2999}
@@ -2970,36 +3011,35 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
2970 struct net *net = seq_file_net(seq); 3011 struct net *net = seq_file_net(seq);
2971 3012
2972 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { 3013 for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
2973 ifa = inet6_addr_lst[state->bucket]; 3014 struct hlist_node *n;
2974 3015 hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
2975 while (ifa && !net_eq(dev_net(ifa->idev->dev), net)) 3016 addr_lst)
2976 ifa = ifa->lst_next; 3017 if (net_eq(dev_net(ifa->idev->dev), net))
2977 if (ifa) 3018 return ifa;
2978 break;
2979 } 3019 }
2980 return ifa; 3020 return NULL;
2981} 3021}
2982 3022
2983static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) 3023static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
3024 struct inet6_ifaddr *ifa)
2984{ 3025{
2985 struct if6_iter_state *state = seq->private; 3026 struct if6_iter_state *state = seq->private;
2986 struct net *net = seq_file_net(seq); 3027 struct net *net = seq_file_net(seq);
3028 struct hlist_node *n = &ifa->addr_lst;
2987 3029
2988 ifa = ifa->lst_next; 3030 hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
2989try_again: 3031 if (net_eq(dev_net(ifa->idev->dev), net))
2990 if (ifa) { 3032 return ifa;
2991 if (!net_eq(dev_net(ifa->idev->dev), net)) {
2992 ifa = ifa->lst_next;
2993 goto try_again;
2994 }
2995 }
2996 3033
2997 if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { 3034 while (++state->bucket < IN6_ADDR_HSIZE) {
2998 ifa = inet6_addr_lst[state->bucket]; 3035 hlist_for_each_entry_rcu_bh(ifa, n,
2999 goto try_again; 3036 &inet6_addr_lst[state->bucket], addr_lst) {
3037 if (net_eq(dev_net(ifa->idev->dev), net))
3038 return ifa;
3039 }
3000 } 3040 }
3001 3041
3002 return ifa; 3042 return NULL;
3003} 3043}
3004 3044
3005static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) 3045static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
@@ -3007,15 +3047,15 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
3007 struct inet6_ifaddr *ifa = if6_get_first(seq); 3047 struct inet6_ifaddr *ifa = if6_get_first(seq);
3008 3048
3009 if (ifa) 3049 if (ifa)
3010 while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) 3050 while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
3011 --pos; 3051 --pos;
3012 return pos ? NULL : ifa; 3052 return pos ? NULL : ifa;
3013} 3053}
3014 3054
3015static void *if6_seq_start(struct seq_file *seq, loff_t *pos) 3055static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
3016 __acquires(addrconf_hash_lock) 3056 __acquires(rcu_bh)
3017{ 3057{
3018 read_lock_bh(&addrconf_hash_lock); 3058 rcu_read_lock_bh();
3019 return if6_get_idx(seq, *pos); 3059 return if6_get_idx(seq, *pos);
3020} 3060}
3021 3061
@@ -3029,9 +3069,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3029} 3069}
3030 3070
3031static void if6_seq_stop(struct seq_file *seq, void *v) 3071static void if6_seq_stop(struct seq_file *seq, void *v)
3032 __releases(addrconf_hash_lock) 3072 __releases(rcu_bh)
3033{ 3073{
3034 read_unlock_bh(&addrconf_hash_lock); 3074 rcu_read_unlock_bh();
3035} 3075}
3036 3076
3037static int if6_seq_show(struct seq_file *seq, void *v) 3077static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3101,10 +3141,12 @@ void if6_proc_exit(void)
3101int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr) 3141int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
3102{ 3142{
3103 int ret = 0; 3143 int ret = 0;
3104 struct inet6_ifaddr * ifp; 3144 struct inet6_ifaddr *ifp = NULL;
3105 u8 hash = ipv6_addr_hash(addr); 3145 struct hlist_node *n;
3106 read_lock_bh(&addrconf_hash_lock); 3146 unsigned int hash = ipv6_addr_hash(addr);
3107 for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { 3147
3148 rcu_read_lock_bh();
3149 hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
3108 if (!net_eq(dev_net(ifp->idev->dev), net)) 3150 if (!net_eq(dev_net(ifp->idev->dev), net))
3109 continue; 3151 continue;
3110 if (ipv6_addr_equal(&ifp->addr, addr) && 3152 if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3113,7 +3155,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
3113 break; 3155 break;
3114 } 3156 }
3115 } 3157 }
3116 read_unlock_bh(&addrconf_hash_lock); 3158 rcu_read_unlock_bh();
3117 return ret; 3159 return ret;
3118} 3160}
3119#endif 3161#endif
@@ -3124,43 +3166,35 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
3124 3166
3125static void addrconf_verify(unsigned long foo) 3167static void addrconf_verify(unsigned long foo)
3126{ 3168{
3169 unsigned long now, next, next_sec, next_sched;
3127 struct inet6_ifaddr *ifp; 3170 struct inet6_ifaddr *ifp;
3128 unsigned long now, next; 3171 struct hlist_node *node;
3129 int i; 3172 int i;
3130 3173
3131 spin_lock_bh(&addrconf_verify_lock); 3174 rcu_read_lock_bh();
3175 spin_lock(&addrconf_verify_lock);
3132 now = jiffies; 3176 now = jiffies;
3133 next = now + ADDR_CHECK_FREQUENCY; 3177 next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
3134 3178
3135 del_timer(&addr_chk_timer); 3179 del_timer(&addr_chk_timer);
3136 3180
3137 for (i=0; i < IN6_ADDR_HSIZE; i++) { 3181 for (i = 0; i < IN6_ADDR_HSIZE; i++) {
3138
3139restart: 3182restart:
3140 read_lock(&addrconf_hash_lock); 3183 hlist_for_each_entry_rcu_bh(ifp, node,
3141 for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { 3184 &inet6_addr_lst[i], addr_lst) {
3142 unsigned long age; 3185 unsigned long age;
3143#ifdef CONFIG_IPV6_PRIVACY
3144 unsigned long regen_advance;
3145#endif
3146 3186
3147 if (ifp->flags & IFA_F_PERMANENT) 3187 if (ifp->flags & IFA_F_PERMANENT)
3148 continue; 3188 continue;
3149 3189
3150 spin_lock(&ifp->lock); 3190 spin_lock(&ifp->lock);
3151 age = (now - ifp->tstamp) / HZ; 3191 /* We try to batch several events at once. */
3152 3192 age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
3153#ifdef CONFIG_IPV6_PRIVACY
3154 regen_advance = ifp->idev->cnf.regen_max_retry *
3155 ifp->idev->cnf.dad_transmits *
3156 ifp->idev->nd_parms->retrans_time / HZ;
3157#endif
3158 3193
3159 if (ifp->valid_lft != INFINITY_LIFE_TIME && 3194 if (ifp->valid_lft != INFINITY_LIFE_TIME &&
3160 age >= ifp->valid_lft) { 3195 age >= ifp->valid_lft) {
3161 spin_unlock(&ifp->lock); 3196 spin_unlock(&ifp->lock);
3162 in6_ifa_hold(ifp); 3197 in6_ifa_hold(ifp);
3163 read_unlock(&addrconf_hash_lock);
3164 ipv6_del_addr(ifp); 3198 ipv6_del_addr(ifp);
3165 goto restart; 3199 goto restart;
3166 } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { 3200 } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3182,7 +3216,6 @@ restart:
3182 3216
3183 if (deprecate) { 3217 if (deprecate) {
3184 in6_ifa_hold(ifp); 3218 in6_ifa_hold(ifp);
3185 read_unlock(&addrconf_hash_lock);
3186 3219
3187 ipv6_ifa_notify(0, ifp); 3220 ipv6_ifa_notify(0, ifp);
3188 in6_ifa_put(ifp); 3221 in6_ifa_put(ifp);
@@ -3191,6 +3224,10 @@ restart:
3191#ifdef CONFIG_IPV6_PRIVACY 3224#ifdef CONFIG_IPV6_PRIVACY
3192 } else if ((ifp->flags&IFA_F_TEMPORARY) && 3225 } else if ((ifp->flags&IFA_F_TEMPORARY) &&
3193 !(ifp->flags&IFA_F_TENTATIVE)) { 3226 !(ifp->flags&IFA_F_TENTATIVE)) {
3227 unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
3228 ifp->idev->cnf.dad_transmits *
3229 ifp->idev->nd_parms->retrans_time / HZ;
3230
3194 if (age >= ifp->prefered_lft - regen_advance) { 3231 if (age >= ifp->prefered_lft - regen_advance) {
3195 struct inet6_ifaddr *ifpub = ifp->ifpub; 3232 struct inet6_ifaddr *ifpub = ifp->ifpub;
3196 if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) 3233 if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
@@ -3200,7 +3237,7 @@ restart:
3200 in6_ifa_hold(ifp); 3237 in6_ifa_hold(ifp);
3201 in6_ifa_hold(ifpub); 3238 in6_ifa_hold(ifpub);
3202 spin_unlock(&ifp->lock); 3239 spin_unlock(&ifp->lock);
3203 read_unlock(&addrconf_hash_lock); 3240
3204 spin_lock(&ifpub->lock); 3241 spin_lock(&ifpub->lock);
3205 ifpub->regen_count = 0; 3242 ifpub->regen_count = 0;
3206 spin_unlock(&ifpub->lock); 3243 spin_unlock(&ifpub->lock);
@@ -3220,12 +3257,26 @@ restart:
3220 spin_unlock(&ifp->lock); 3257 spin_unlock(&ifp->lock);
3221 } 3258 }
3222 } 3259 }
3223 read_unlock(&addrconf_hash_lock);
3224 } 3260 }
3225 3261
3226 addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; 3262 next_sec = round_jiffies_up(next);
3263 next_sched = next;
3264
3265 /* If rounded timeout is accurate enough, accept it. */
3266 if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
3267 next_sched = next_sec;
3268
3269 /* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
3270 if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
3271 next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
3272
3273 ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
3274 now, next, next_sec, next_sched));
3275
3276 addr_chk_timer.expires = next_sched;
3227 add_timer(&addr_chk_timer); 3277 add_timer(&addr_chk_timer);
3228 spin_unlock_bh(&addrconf_verify_lock); 3278 spin_unlock(&addrconf_verify_lock);
3279 rcu_read_unlock_bh();
3229} 3280}
3230 3281
3231static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) 3282static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -3448,8 +3499,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
3448 preferred -= tval; 3499 preferred -= tval;
3449 else 3500 else
3450 preferred = 0; 3501 preferred = 0;
3451 if (valid != INFINITY_LIFE_TIME) 3502 if (valid != INFINITY_LIFE_TIME) {
3452 valid -= tval; 3503 if (valid > tval)
3504 valid -= tval;
3505 else
3506 valid = 0;
3507 }
3453 } 3508 }
3454 } else { 3509 } else {
3455 preferred = INFINITY_LIFE_TIME; 3510 preferred = INFINITY_LIFE_TIME;
@@ -3515,8 +3570,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
3515 return nlmsg_end(skb, nlh); 3570 return nlmsg_end(skb, nlh);
3516} 3571}
3517 3572
3518enum addr_type_t 3573enum addr_type_t {
3519{
3520 UNICAST_ADDR, 3574 UNICAST_ADDR,
3521 MULTICAST_ADDR, 3575 MULTICAST_ADDR,
3522 ANYCAST_ADDR, 3576 ANYCAST_ADDR,
@@ -3527,7 +3581,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3527 struct netlink_callback *cb, enum addr_type_t type, 3581 struct netlink_callback *cb, enum addr_type_t type,
3528 int s_ip_idx, int *p_ip_idx) 3582 int s_ip_idx, int *p_ip_idx)
3529{ 3583{
3530 struct inet6_ifaddr *ifa;
3531 struct ifmcaddr6 *ifmca; 3584 struct ifmcaddr6 *ifmca;
3532 struct ifacaddr6 *ifaca; 3585 struct ifacaddr6 *ifaca;
3533 int err = 1; 3586 int err = 1;
@@ -3535,11 +3588,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3535 3588
3536 read_lock_bh(&idev->lock); 3589 read_lock_bh(&idev->lock);
3537 switch (type) { 3590 switch (type) {
3538 case UNICAST_ADDR: 3591 case UNICAST_ADDR: {
3592 struct inet6_ifaddr *ifa;
3593
3539 /* unicast address incl. temp addr */ 3594 /* unicast address incl. temp addr */
3540 for (ifa = idev->addr_list; ifa; 3595 list_for_each_entry(ifa, &idev->addr_list, if_list) {
3541 ifa = ifa->if_next, ip_idx++) { 3596 if (++ip_idx < s_ip_idx)
3542 if (ip_idx < s_ip_idx)
3543 continue; 3597 continue;
3544 err = inet6_fill_ifaddr(skb, ifa, 3598 err = inet6_fill_ifaddr(skb, ifa,
3545 NETLINK_CB(cb->skb).pid, 3599 NETLINK_CB(cb->skb).pid,
@@ -3550,6 +3604,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
3550 break; 3604 break;
3551 } 3605 }
3552 break; 3606 break;
3607 }
3553 case MULTICAST_ADDR: 3608 case MULTICAST_ADDR:
3554 /* multicast address */ 3609 /* multicast address */
3555 for (ifmca = idev->mc_list; ifmca; 3610 for (ifmca = idev->mc_list; ifmca;
@@ -3614,7 +3669,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
3614 if (h > s_h || idx > s_idx) 3669 if (h > s_h || idx > s_idx)
3615 s_ip_idx = 0; 3670 s_ip_idx = 0;
3616 ip_idx = 0; 3671 ip_idx = 0;
3617 if ((idev = __in6_dev_get(dev)) == NULL) 3672 idev = __in6_dev_get(dev);
3673 if (!idev)
3618 goto cont; 3674 goto cont;
3619 3675
3620 if (in6_dump_addrs(idev, skb, cb, type, 3676 if (in6_dump_addrs(idev, skb, cb, type,
@@ -3681,12 +3737,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
3681 if (ifm->ifa_index) 3737 if (ifm->ifa_index)
3682 dev = __dev_get_by_index(net, ifm->ifa_index); 3738 dev = __dev_get_by_index(net, ifm->ifa_index);
3683 3739
3684 if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) { 3740 ifa = ipv6_get_ifaddr(net, addr, dev, 1);
3741 if (!ifa) {
3685 err = -EADDRNOTAVAIL; 3742 err = -EADDRNOTAVAIL;
3686 goto errout; 3743 goto errout;
3687 } 3744 }
3688 3745
3689 if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { 3746 skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
3747 if (!skb) {
3690 err = -ENOBUFS; 3748 err = -ENOBUFS;
3691 goto errout_ifa; 3749 goto errout_ifa;
3692 } 3750 }
@@ -3808,12 +3866,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
3808 memset(&stats[items], 0, pad); 3866 memset(&stats[items], 0, pad);
3809} 3867}
3810 3868
3869static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
3870 int items, int bytes, size_t syncpoff)
3871{
3872 int i;
3873 int pad = bytes - sizeof(u64) * items;
3874 BUG_ON(pad < 0);
3875
3876 /* Use put_unaligned() because stats may not be aligned for u64. */
3877 put_unaligned(items, &stats[0]);
3878 for (i = 1; i < items; i++)
3879 put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
3880
3881 memset(&stats[items], 0, pad);
3882}
3883
3811static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, 3884static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
3812 int bytes) 3885 int bytes)
3813{ 3886{
3814 switch(attrtype) { 3887 switch (attrtype) {
3815 case IFLA_INET6_STATS: 3888 case IFLA_INET6_STATS:
3816 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); 3889 __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
3890 IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
3817 break; 3891 break;
3818 case IFLA_INET6_ICMP6STATS: 3892 case IFLA_INET6_ICMP6STATS:
3819 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); 3893 __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
@@ -4046,9 +4120,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
4046 if (ifp->idev->cnf.forwarding) 4120 if (ifp->idev->cnf.forwarding)
4047 addrconf_leave_anycast(ifp); 4121 addrconf_leave_anycast(ifp);
4048 addrconf_leave_solict(ifp->idev, &ifp->addr); 4122 addrconf_leave_solict(ifp->idev, &ifp->addr);
4049 dst_hold(&ifp->rt->u.dst); 4123 dst_hold(&ifp->rt->dst);
4050 if (ip6_del_rt(ifp->rt)) 4124
4051 dst_free(&ifp->rt->u.dst); 4125 if (ifp->state == INET6_IFADDR_STATE_DEAD &&
4126 ip6_del_rt(ifp->rt))
4127 dst_free(&ifp->rt->dst);
4052 break; 4128 break;
4053 } 4129 }
4054} 4130}
@@ -4163,211 +4239,211 @@ static struct addrconf_sysctl_table
4163 .sysctl_header = NULL, 4239 .sysctl_header = NULL,
4164 .addrconf_vars = { 4240 .addrconf_vars = {
4165 { 4241 {
4166 .procname = "forwarding", 4242 .procname = "forwarding",
4167 .data = &ipv6_devconf.forwarding, 4243 .data = &ipv6_devconf.forwarding,
4168 .maxlen = sizeof(int), 4244 .maxlen = sizeof(int),
4169 .mode = 0644, 4245 .mode = 0644,
4170 .proc_handler = addrconf_sysctl_forward, 4246 .proc_handler = addrconf_sysctl_forward,
4171 }, 4247 },
4172 { 4248 {
4173 .procname = "hop_limit", 4249 .procname = "hop_limit",
4174 .data = &ipv6_devconf.hop_limit, 4250 .data = &ipv6_devconf.hop_limit,
4175 .maxlen = sizeof(int), 4251 .maxlen = sizeof(int),
4176 .mode = 0644, 4252 .mode = 0644,
4177 .proc_handler = proc_dointvec, 4253 .proc_handler = proc_dointvec,
4178 }, 4254 },
4179 { 4255 {
4180 .procname = "mtu", 4256 .procname = "mtu",
4181 .data = &ipv6_devconf.mtu6, 4257 .data = &ipv6_devconf.mtu6,
4182 .maxlen = sizeof(int), 4258 .maxlen = sizeof(int),
4183 .mode = 0644, 4259 .mode = 0644,
4184 .proc_handler = proc_dointvec, 4260 .proc_handler = proc_dointvec,
4185 }, 4261 },
4186 { 4262 {
4187 .procname = "accept_ra", 4263 .procname = "accept_ra",
4188 .data = &ipv6_devconf.accept_ra, 4264 .data = &ipv6_devconf.accept_ra,
4189 .maxlen = sizeof(int), 4265 .maxlen = sizeof(int),
4190 .mode = 0644, 4266 .mode = 0644,
4191 .proc_handler = proc_dointvec, 4267 .proc_handler = proc_dointvec,
4192 }, 4268 },
4193 { 4269 {
4194 .procname = "accept_redirects", 4270 .procname = "accept_redirects",
4195 .data = &ipv6_devconf.accept_redirects, 4271 .data = &ipv6_devconf.accept_redirects,
4196 .maxlen = sizeof(int), 4272 .maxlen = sizeof(int),
4197 .mode = 0644, 4273 .mode = 0644,
4198 .proc_handler = proc_dointvec, 4274 .proc_handler = proc_dointvec,
4199 }, 4275 },
4200 { 4276 {
4201 .procname = "autoconf", 4277 .procname = "autoconf",
4202 .data = &ipv6_devconf.autoconf, 4278 .data = &ipv6_devconf.autoconf,
4203 .maxlen = sizeof(int), 4279 .maxlen = sizeof(int),
4204 .mode = 0644, 4280 .mode = 0644,
4205 .proc_handler = proc_dointvec, 4281 .proc_handler = proc_dointvec,
4206 }, 4282 },
4207 { 4283 {
4208 .procname = "dad_transmits", 4284 .procname = "dad_transmits",
4209 .data = &ipv6_devconf.dad_transmits, 4285 .data = &ipv6_devconf.dad_transmits,
4210 .maxlen = sizeof(int), 4286 .maxlen = sizeof(int),
4211 .mode = 0644, 4287 .mode = 0644,
4212 .proc_handler = proc_dointvec, 4288 .proc_handler = proc_dointvec,
4213 }, 4289 },
4214 { 4290 {
4215 .procname = "router_solicitations", 4291 .procname = "router_solicitations",
4216 .data = &ipv6_devconf.rtr_solicits, 4292 .data = &ipv6_devconf.rtr_solicits,
4217 .maxlen = sizeof(int), 4293 .maxlen = sizeof(int),
4218 .mode = 0644, 4294 .mode = 0644,
4219 .proc_handler = proc_dointvec, 4295 .proc_handler = proc_dointvec,
4220 }, 4296 },
4221 { 4297 {
4222 .procname = "router_solicitation_interval", 4298 .procname = "router_solicitation_interval",
4223 .data = &ipv6_devconf.rtr_solicit_interval, 4299 .data = &ipv6_devconf.rtr_solicit_interval,
4224 .maxlen = sizeof(int), 4300 .maxlen = sizeof(int),
4225 .mode = 0644, 4301 .mode = 0644,
4226 .proc_handler = proc_dointvec_jiffies, 4302 .proc_handler = proc_dointvec_jiffies,
4227 }, 4303 },
4228 { 4304 {
4229 .procname = "router_solicitation_delay", 4305 .procname = "router_solicitation_delay",
4230 .data = &ipv6_devconf.rtr_solicit_delay, 4306 .data = &ipv6_devconf.rtr_solicit_delay,
4231 .maxlen = sizeof(int), 4307 .maxlen = sizeof(int),
4232 .mode = 0644, 4308 .mode = 0644,
4233 .proc_handler = proc_dointvec_jiffies, 4309 .proc_handler = proc_dointvec_jiffies,
4234 }, 4310 },
4235 { 4311 {
4236 .procname = "force_mld_version", 4312 .procname = "force_mld_version",
4237 .data = &ipv6_devconf.force_mld_version, 4313 .data = &ipv6_devconf.force_mld_version,
4238 .maxlen = sizeof(int), 4314 .maxlen = sizeof(int),
4239 .mode = 0644, 4315 .mode = 0644,
4240 .proc_handler = proc_dointvec, 4316 .proc_handler = proc_dointvec,
4241 }, 4317 },
4242#ifdef CONFIG_IPV6_PRIVACY 4318#ifdef CONFIG_IPV6_PRIVACY
4243 { 4319 {
4244 .procname = "use_tempaddr", 4320 .procname = "use_tempaddr",
4245 .data = &ipv6_devconf.use_tempaddr, 4321 .data = &ipv6_devconf.use_tempaddr,
4246 .maxlen = sizeof(int), 4322 .maxlen = sizeof(int),
4247 .mode = 0644, 4323 .mode = 0644,
4248 .proc_handler = proc_dointvec, 4324 .proc_handler = proc_dointvec,
4249 }, 4325 },
4250 { 4326 {
4251 .procname = "temp_valid_lft", 4327 .procname = "temp_valid_lft",
4252 .data = &ipv6_devconf.temp_valid_lft, 4328 .data = &ipv6_devconf.temp_valid_lft,
4253 .maxlen = sizeof(int), 4329 .maxlen = sizeof(int),
4254 .mode = 0644, 4330 .mode = 0644,
4255 .proc_handler = proc_dointvec, 4331 .proc_handler = proc_dointvec,
4256 }, 4332 },
4257 { 4333 {
4258 .procname = "temp_prefered_lft", 4334 .procname = "temp_prefered_lft",
4259 .data = &ipv6_devconf.temp_prefered_lft, 4335 .data = &ipv6_devconf.temp_prefered_lft,
4260 .maxlen = sizeof(int), 4336 .maxlen = sizeof(int),
4261 .mode = 0644, 4337 .mode = 0644,
4262 .proc_handler = proc_dointvec, 4338 .proc_handler = proc_dointvec,
4263 }, 4339 },
4264 { 4340 {
4265 .procname = "regen_max_retry", 4341 .procname = "regen_max_retry",
4266 .data = &ipv6_devconf.regen_max_retry, 4342 .data = &ipv6_devconf.regen_max_retry,
4267 .maxlen = sizeof(int), 4343 .maxlen = sizeof(int),
4268 .mode = 0644, 4344 .mode = 0644,
4269 .proc_handler = proc_dointvec, 4345 .proc_handler = proc_dointvec,
4270 }, 4346 },
4271 { 4347 {
4272 .procname = "max_desync_factor", 4348 .procname = "max_desync_factor",
4273 .data = &ipv6_devconf.max_desync_factor, 4349 .data = &ipv6_devconf.max_desync_factor,
4274 .maxlen = sizeof(int), 4350 .maxlen = sizeof(int),
4275 .mode = 0644, 4351 .mode = 0644,
4276 .proc_handler = proc_dointvec, 4352 .proc_handler = proc_dointvec,
4277 }, 4353 },
4278#endif 4354#endif
4279 { 4355 {
4280 .procname = "max_addresses", 4356 .procname = "max_addresses",
4281 .data = &ipv6_devconf.max_addresses, 4357 .data = &ipv6_devconf.max_addresses,
4282 .maxlen = sizeof(int), 4358 .maxlen = sizeof(int),
4283 .mode = 0644, 4359 .mode = 0644,
4284 .proc_handler = proc_dointvec, 4360 .proc_handler = proc_dointvec,
4285 }, 4361 },
4286 { 4362 {
4287 .procname = "accept_ra_defrtr", 4363 .procname = "accept_ra_defrtr",
4288 .data = &ipv6_devconf.accept_ra_defrtr, 4364 .data = &ipv6_devconf.accept_ra_defrtr,
4289 .maxlen = sizeof(int), 4365 .maxlen = sizeof(int),
4290 .mode = 0644, 4366 .mode = 0644,
4291 .proc_handler = proc_dointvec, 4367 .proc_handler = proc_dointvec,
4292 }, 4368 },
4293 { 4369 {
4294 .procname = "accept_ra_pinfo", 4370 .procname = "accept_ra_pinfo",
4295 .data = &ipv6_devconf.accept_ra_pinfo, 4371 .data = &ipv6_devconf.accept_ra_pinfo,
4296 .maxlen = sizeof(int), 4372 .maxlen = sizeof(int),
4297 .mode = 0644, 4373 .mode = 0644,
4298 .proc_handler = proc_dointvec, 4374 .proc_handler = proc_dointvec,
4299 }, 4375 },
4300#ifdef CONFIG_IPV6_ROUTER_PREF 4376#ifdef CONFIG_IPV6_ROUTER_PREF
4301 { 4377 {
4302 .procname = "accept_ra_rtr_pref", 4378 .procname = "accept_ra_rtr_pref",
4303 .data = &ipv6_devconf.accept_ra_rtr_pref, 4379 .data = &ipv6_devconf.accept_ra_rtr_pref,
4304 .maxlen = sizeof(int), 4380 .maxlen = sizeof(int),
4305 .mode = 0644, 4381 .mode = 0644,
4306 .proc_handler = proc_dointvec, 4382 .proc_handler = proc_dointvec,
4307 }, 4383 },
4308 { 4384 {
4309 .procname = "router_probe_interval", 4385 .procname = "router_probe_interval",
4310 .data = &ipv6_devconf.rtr_probe_interval, 4386 .data = &ipv6_devconf.rtr_probe_interval,
4311 .maxlen = sizeof(int), 4387 .maxlen = sizeof(int),
4312 .mode = 0644, 4388 .mode = 0644,
4313 .proc_handler = proc_dointvec_jiffies, 4389 .proc_handler = proc_dointvec_jiffies,
4314 }, 4390 },
4315#ifdef CONFIG_IPV6_ROUTE_INFO 4391#ifdef CONFIG_IPV6_ROUTE_INFO
4316 { 4392 {
4317 .procname = "accept_ra_rt_info_max_plen", 4393 .procname = "accept_ra_rt_info_max_plen",
4318 .data = &ipv6_devconf.accept_ra_rt_info_max_plen, 4394 .data = &ipv6_devconf.accept_ra_rt_info_max_plen,
4319 .maxlen = sizeof(int), 4395 .maxlen = sizeof(int),
4320 .mode = 0644, 4396 .mode = 0644,
4321 .proc_handler = proc_dointvec, 4397 .proc_handler = proc_dointvec,
4322 }, 4398 },
4323#endif 4399#endif
4324#endif 4400#endif
4325 { 4401 {
4326 .procname = "proxy_ndp", 4402 .procname = "proxy_ndp",
4327 .data = &ipv6_devconf.proxy_ndp, 4403 .data = &ipv6_devconf.proxy_ndp,
4328 .maxlen = sizeof(int), 4404 .maxlen = sizeof(int),
4329 .mode = 0644, 4405 .mode = 0644,
4330 .proc_handler = proc_dointvec, 4406 .proc_handler = proc_dointvec,
4331 }, 4407 },
4332 { 4408 {
4333 .procname = "accept_source_route", 4409 .procname = "accept_source_route",
4334 .data = &ipv6_devconf.accept_source_route, 4410 .data = &ipv6_devconf.accept_source_route,
4335 .maxlen = sizeof(int), 4411 .maxlen = sizeof(int),
4336 .mode = 0644, 4412 .mode = 0644,
4337 .proc_handler = proc_dointvec, 4413 .proc_handler = proc_dointvec,
4338 }, 4414 },
4339#ifdef CONFIG_IPV6_OPTIMISTIC_DAD 4415#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
4340 { 4416 {
4341 .procname = "optimistic_dad", 4417 .procname = "optimistic_dad",
4342 .data = &ipv6_devconf.optimistic_dad, 4418 .data = &ipv6_devconf.optimistic_dad,
4343 .maxlen = sizeof(int), 4419 .maxlen = sizeof(int),
4344 .mode = 0644, 4420 .mode = 0644,
4345 .proc_handler = proc_dointvec, 4421 .proc_handler = proc_dointvec,
4346 4422
4347 }, 4423 },
4348#endif 4424#endif
4349#ifdef CONFIG_IPV6_MROUTE 4425#ifdef CONFIG_IPV6_MROUTE
4350 { 4426 {
4351 .procname = "mc_forwarding", 4427 .procname = "mc_forwarding",
4352 .data = &ipv6_devconf.mc_forwarding, 4428 .data = &ipv6_devconf.mc_forwarding,
4353 .maxlen = sizeof(int), 4429 .maxlen = sizeof(int),
4354 .mode = 0444, 4430 .mode = 0444,
4355 .proc_handler = proc_dointvec, 4431 .proc_handler = proc_dointvec,
4356 }, 4432 },
4357#endif 4433#endif
4358 { 4434 {
4359 .procname = "disable_ipv6", 4435 .procname = "disable_ipv6",
4360 .data = &ipv6_devconf.disable_ipv6, 4436 .data = &ipv6_devconf.disable_ipv6,
4361 .maxlen = sizeof(int), 4437 .maxlen = sizeof(int),
4362 .mode = 0644, 4438 .mode = 0644,
4363 .proc_handler = addrconf_sysctl_disable, 4439 .proc_handler = addrconf_sysctl_disable,
4364 }, 4440 },
4365 { 4441 {
4366 .procname = "accept_dad", 4442 .procname = "accept_dad",
4367 .data = &ipv6_devconf.accept_dad, 4443 .data = &ipv6_devconf.accept_dad,
4368 .maxlen = sizeof(int), 4444 .maxlen = sizeof(int),
4369 .mode = 0644, 4445 .mode = 0644,
4370 .proc_handler = proc_dointvec, 4446 .proc_handler = proc_dointvec,
4371 }, 4447 },
4372 { 4448 {
4373 .procname = "force_tllao", 4449 .procname = "force_tllao",
@@ -4403,8 +4479,8 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
4403 if (t == NULL) 4479 if (t == NULL)
4404 goto out; 4480 goto out;
4405 4481
4406 for (i=0; t->addrconf_vars[i].data; i++) { 4482 for (i = 0; t->addrconf_vars[i].data; i++) {
4407 t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; 4483 t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
4408 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ 4484 t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
4409 t->addrconf_vars[i].extra2 = net; 4485 t->addrconf_vars[i].extra2 = net;
4410 } 4486 }
@@ -4541,14 +4617,12 @@ int register_inet6addr_notifier(struct notifier_block *nb)
4541{ 4617{
4542 return atomic_notifier_chain_register(&inet6addr_chain, nb); 4618 return atomic_notifier_chain_register(&inet6addr_chain, nb);
4543} 4619}
4544
4545EXPORT_SYMBOL(register_inet6addr_notifier); 4620EXPORT_SYMBOL(register_inet6addr_notifier);
4546 4621
4547int unregister_inet6addr_notifier(struct notifier_block *nb) 4622int unregister_inet6addr_notifier(struct notifier_block *nb)
4548{ 4623{
4549 return atomic_notifier_chain_unregister(&inet6addr_chain,nb); 4624 return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
4550} 4625}
4551
4552EXPORT_SYMBOL(unregister_inet6addr_notifier); 4626EXPORT_SYMBOL(unregister_inet6addr_notifier);
4553 4627
4554/* 4628/*
@@ -4557,11 +4631,12 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
4557 4631
4558int __init addrconf_init(void) 4632int __init addrconf_init(void)
4559{ 4633{
4560 int err; 4634 int i, err;
4561 4635
4562 if ((err = ipv6_addr_label_init()) < 0) { 4636 err = ipv6_addr_label_init();
4563 printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n", 4637 if (err < 0) {
4564 err); 4638 printk(KERN_CRIT "IPv6 Addrconf:"
4639 " cannot initialize default policy table: %d.\n", err);
4565 return err; 4640 return err;
4566 } 4641 }
4567 4642
@@ -4592,6 +4667,9 @@ int __init addrconf_init(void)
4592 if (err) 4667 if (err)
4593 goto errlo; 4668 goto errlo;
4594 4669
4670 for (i = 0; i < IN6_ADDR_HSIZE; i++)
4671 INIT_HLIST_HEAD(&inet6_addr_lst[i]);
4672
4595 register_netdevice_notifier(&ipv6_dev_notf); 4673 register_netdevice_notifier(&ipv6_dev_notf);
4596 4674
4597 addrconf_verify(0); 4675 addrconf_verify(0);
@@ -4620,7 +4698,6 @@ errlo:
4620 4698
4621void addrconf_cleanup(void) 4699void addrconf_cleanup(void)
4622{ 4700{
4623 struct inet6_ifaddr *ifa;
4624 struct net_device *dev; 4701 struct net_device *dev;
4625 int i; 4702 int i;
4626 4703
@@ -4640,20 +4717,10 @@ void addrconf_cleanup(void)
4640 /* 4717 /*
4641 * Check hash table. 4718 * Check hash table.
4642 */ 4719 */
4643 write_lock_bh(&addrconf_hash_lock); 4720 spin_lock_bh(&addrconf_hash_lock);
4644 for (i=0; i < IN6_ADDR_HSIZE; i++) { 4721 for (i = 0; i < IN6_ADDR_HSIZE; i++)
4645 for (ifa=inet6_addr_lst[i]; ifa; ) { 4722 WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
4646 struct inet6_ifaddr *bifa; 4723 spin_unlock_bh(&addrconf_hash_lock);
4647
4648 bifa = ifa;
4649 ifa = ifa->lst_next;
4650 printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
4651 /* Do not free it; something is wrong.
4652 Now we can investigate it with debugger.
4653 */
4654 }
4655 }
4656 write_unlock_bh(&addrconf_hash_lock);
4657 4724
4658 del_timer(&addr_chk_timer); 4725 del_timer(&addr_chk_timer);
4659 rtnl_unlock(); 4726 rtnl_unlock();
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ae404c9a746c..f0e774cea386 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
53static inline 53static inline
54struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) 54struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
55{ 55{
56#ifdef CONFIG_NET_NS 56 return read_pnet(&lbl->lbl_net);
57 return lbl->lbl_net;
58#else
59 return &init_net;
60#endif
61} 57}
62 58
63/* 59/*
@@ -422,10 +418,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
422 ifal->ifal_prefixlen > 128) 418 ifal->ifal_prefixlen > 128)
423 return -EINVAL; 419 return -EINVAL;
424 420
425 if (ifal->ifal_index &&
426 !__dev_get_by_index(net, ifal->ifal_index))
427 return -EINVAL;
428
429 if (!tb[IFAL_ADDRESS]) 421 if (!tb[IFAL_ADDRESS])
430 return -EINVAL; 422 return -EINVAL;
431 423
@@ -441,6 +433,10 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
441 433
442 switch(nlh->nlmsg_type) { 434 switch(nlh->nlmsg_type) {
443 case RTM_NEWADDRLABEL: 435 case RTM_NEWADDRLABEL:
436 if (ifal->ifal_index &&
437 !__dev_get_by_index(net, ifal->ifal_index))
438 return -EINVAL;
439
444 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, 440 err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
445 ifal->ifal_index, label, 441 ifal->ifal_index, label,
446 nlh->nlmsg_flags & NLM_F_REPLACE); 442 nlh->nlmsg_flags & NLM_F_REPLACE);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f9e86b15e0d..56b9bf2516f4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk)
417 if ((skb = xchg(&np->pktoptions, NULL)) != NULL) 417 if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
418 kfree_skb(skb); 418 kfree_skb(skb);
419 419
420 if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
421 kfree_skb(skb);
422
420 /* Free flowlabels */ 423 /* Free flowlabels */
421 fl6_free_socklist(sk); 424 fl6_free_socklist(sk);
422 425
@@ -519,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
519 .shutdown = inet_shutdown, /* ok */ 522 .shutdown = inet_shutdown, /* ok */
520 .setsockopt = sock_common_setsockopt, /* ok */ 523 .setsockopt = sock_common_setsockopt, /* ok */
521 .getsockopt = sock_common_getsockopt, /* ok */ 524 .getsockopt = sock_common_getsockopt, /* ok */
522 .sendmsg = tcp_sendmsg, /* ok */ 525 .sendmsg = inet_sendmsg, /* ok */
523 .recvmsg = sock_common_recvmsg, /* ok */ 526 .recvmsg = inet_recvmsg, /* ok */
524 .mmap = sock_no_mmap, 527 .mmap = sock_no_mmap,
525 .sendpage = tcp_sendpage, 528 .sendpage = inet_sendpage,
526 .splice_read = tcp_splice_read, 529 .splice_read = tcp_splice_read,
527#ifdef CONFIG_COMPAT 530#ifdef CONFIG_COMPAT
528 .compat_setsockopt = compat_sock_common_setsockopt, 531 .compat_setsockopt = compat_sock_common_setsockopt,
@@ -546,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
546 .setsockopt = sock_common_setsockopt, /* ok */ 549 .setsockopt = sock_common_setsockopt, /* ok */
547 .getsockopt = sock_common_getsockopt, /* ok */ 550 .getsockopt = sock_common_getsockopt, /* ok */
548 .sendmsg = inet_sendmsg, /* ok */ 551 .sendmsg = inet_sendmsg, /* ok */
549 .recvmsg = sock_common_recvmsg, /* ok */ 552 .recvmsg = inet_recvmsg, /* ok */
550 .mmap = sock_no_mmap, 553 .mmap = sock_no_mmap,
551 .sendpage = sock_no_sendpage, 554 .sendpage = sock_no_sendpage,
552#ifdef CONFIG_COMPAT 555#ifdef CONFIG_COMPAT
@@ -648,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
648 651
649 if (dst == NULL) { 652 if (dst == NULL) {
650 struct inet_sock *inet = inet_sk(sk); 653 struct inet_sock *inet = inet_sk(sk);
651 struct in6_addr *final_p = NULL, final; 654 struct in6_addr *final_p, final;
652 struct flowi fl; 655 struct flowi fl;
653 656
654 memset(&fl, 0, sizeof(fl)); 657 memset(&fl, 0, sizeof(fl));
@@ -662,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
662 fl.fl_ip_sport = inet->inet_sport; 665 fl.fl_ip_sport = inet->inet_sport;
663 security_sk_classify_flow(sk, &fl); 666 security_sk_classify_flow(sk, &fl);
664 667
665 if (np->opt && np->opt->srcrt) { 668 final_p = fl6_update_dst(&fl, np->opt, &final);
666 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
667 ipv6_addr_copy(&final, &fl.fl6_dst);
668 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
669 final_p = &final;
670 }
671 669
672 err = ip6_dst_lookup(sk, &dst, &fl); 670 err = ip6_dst_lookup(sk, &dst, &fl);
673 if (err) { 671 if (err) {
@@ -973,19 +971,24 @@ static void ipv6_packet_cleanup(void)
973static int __net_init ipv6_init_mibs(struct net *net) 971static int __net_init ipv6_init_mibs(struct net *net)
974{ 972{
975 if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, 973 if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
976 sizeof (struct udp_mib)) < 0) 974 sizeof(struct udp_mib),
975 __alignof__(struct udp_mib)) < 0)
977 return -ENOMEM; 976 return -ENOMEM;
978 if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, 977 if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
979 sizeof (struct udp_mib)) < 0) 978 sizeof(struct udp_mib),
979 __alignof__(struct udp_mib)) < 0)
980 goto err_udplite_mib; 980 goto err_udplite_mib;
981 if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, 981 if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
982 sizeof(struct ipstats_mib)) < 0) 982 sizeof(struct ipstats_mib),
983 __alignof__(struct ipstats_mib)) < 0)
983 goto err_ip_mib; 984 goto err_ip_mib;
984 if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, 985 if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
985 sizeof(struct icmpv6_mib)) < 0) 986 sizeof(struct icmpv6_mib),
987 __alignof__(struct icmpv6_mib)) < 0)
986 goto err_icmp_mib; 988 goto err_icmp_mib;
987 if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics, 989 if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
988 sizeof(struct icmpv6msg_mib)) < 0) 990 sizeof(struct icmpv6msg_mib),
991 __alignof__(struct icmpv6msg_mib)) < 0)
989 goto err_icmpmsg_mib; 992 goto err_icmpmsg_mib;
990 return 0; 993 return 0;
991 994
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b5b07054508a..0e5e943446f0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
77 pac->acl_next = NULL; 77 pac->acl_next = NULL;
78 ipv6_addr_copy(&pac->acl_addr, addr); 78 ipv6_addr_copy(&pac->acl_addr, addr);
79 79
80 rcu_read_lock();
80 if (ifindex == 0) { 81 if (ifindex == 0) {
81 struct rt6_info *rt; 82 struct rt6_info *rt;
82 83
83 rt = rt6_lookup(net, addr, NULL, 0, 0); 84 rt = rt6_lookup(net, addr, NULL, 0, 0);
84 if (rt) { 85 if (rt) {
85 dev = rt->rt6i_dev; 86 dev = rt->rt6i_dev;
86 dev_hold(dev); 87 dst_release(&rt->dst);
87 dst_release(&rt->u.dst);
88 } else if (ishost) { 88 } else if (ishost) {
89 err = -EADDRNOTAVAIL; 89 err = -EADDRNOTAVAIL;
90 goto out_free_pac; 90 goto error;
91 } else { 91 } else {
92 /* router, no matching interface: just pick one */ 92 /* router, no matching interface: just pick one */
93 93 dev = dev_get_by_flags_rcu(net, IFF_UP,
94 dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK); 94 IFF_UP | IFF_LOOPBACK);
95 } 95 }
96 } else 96 } else
97 dev = dev_get_by_index(net, ifindex); 97 dev = dev_get_by_index_rcu(net, ifindex);
98 98
99 if (dev == NULL) { 99 if (dev == NULL) {
100 err = -ENODEV; 100 err = -ENODEV;
101 goto out_free_pac; 101 goto error;
102 } 102 }
103 103
104 idev = in6_dev_get(dev); 104 idev = __in6_dev_get(dev);
105 if (!idev) { 105 if (!idev) {
106 if (ifindex) 106 if (ifindex)
107 err = -ENODEV; 107 err = -ENODEV;
108 else 108 else
109 err = -EADDRNOTAVAIL; 109 err = -EADDRNOTAVAIL;
110 goto out_dev_put; 110 goto error;
111 } 111 }
112 /* reset ishost, now that we have a specific device */ 112 /* reset ishost, now that we have a specific device */
113 ishost = !idev->cnf.forwarding; 113 ishost = !idev->cnf.forwarding;
114 in6_dev_put(idev);
115 114
116 pac->acl_ifindex = dev->ifindex; 115 pac->acl_ifindex = dev->ifindex;
117 116
@@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
124 if (ishost) 123 if (ishost)
125 err = -EADDRNOTAVAIL; 124 err = -EADDRNOTAVAIL;
126 if (err) 125 if (err)
127 goto out_dev_put; 126 goto error;
128 } 127 }
129 128
130 err = ipv6_dev_ac_inc(dev, addr); 129 err = ipv6_dev_ac_inc(dev, addr);
131 if (err) 130 if (!err) {
132 goto out_dev_put; 131 write_lock_bh(&ipv6_sk_ac_lock);
133 132 pac->acl_next = np->ipv6_ac_list;
134 write_lock_bh(&ipv6_sk_ac_lock); 133 np->ipv6_ac_list = pac;
135 pac->acl_next = np->ipv6_ac_list; 134 write_unlock_bh(&ipv6_sk_ac_lock);
136 np->ipv6_ac_list = pac; 135 pac = NULL;
137 write_unlock_bh(&ipv6_sk_ac_lock); 136 }
138
139 dev_put(dev);
140
141 return 0;
142 137
143out_dev_put: 138error:
144 dev_put(dev); 139 rcu_read_unlock();
145out_free_pac: 140 if (pac)
146 sock_kfree_s(sk, pac, sizeof(*pac)); 141 sock_kfree_s(sk, pac, sizeof(*pac));
147 return err; 142 return err;
148} 143}
149 144
@@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
176 171
177 write_unlock_bh(&ipv6_sk_ac_lock); 172 write_unlock_bh(&ipv6_sk_ac_lock);
178 173
179 dev = dev_get_by_index(net, pac->acl_ifindex); 174 rcu_read_lock();
180 if (dev) { 175 dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
176 if (dev)
181 ipv6_dev_ac_dec(dev, &pac->acl_addr); 177 ipv6_dev_ac_dec(dev, &pac->acl_addr);
182 dev_put(dev); 178 rcu_read_unlock();
183 } 179
184 sock_kfree_s(sk, pac, sizeof(*pac)); 180 sock_kfree_s(sk, pac, sizeof(*pac));
185 return 0; 181 return 0;
186} 182}
@@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk)
199 write_unlock_bh(&ipv6_sk_ac_lock); 195 write_unlock_bh(&ipv6_sk_ac_lock);
200 196
201 prev_index = 0; 197 prev_index = 0;
198 rcu_read_lock();
202 while (pac) { 199 while (pac) {
203 struct ipv6_ac_socklist *next = pac->acl_next; 200 struct ipv6_ac_socklist *next = pac->acl_next;
204 201
205 if (pac->acl_ifindex != prev_index) { 202 if (pac->acl_ifindex != prev_index) {
206 if (dev) 203 dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
207 dev_put(dev);
208 dev = dev_get_by_index(net, pac->acl_ifindex);
209 prev_index = pac->acl_ifindex; 204 prev_index = pac->acl_ifindex;
210 } 205 }
211 if (dev) 206 if (dev)
@@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk)
213 sock_kfree_s(sk, pac, sizeof(*pac)); 208 sock_kfree_s(sk, pac, sizeof(*pac));
214 pac = next; 209 pac = next;
215 } 210 }
216 if (dev) 211 rcu_read_unlock();
217 dev_put(dev);
218} 212}
219 213
220#if 0 214#if 0
@@ -250,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
250{ 244{
251 if (atomic_dec_and_test(&ac->aca_refcnt)) { 245 if (atomic_dec_and_test(&ac->aca_refcnt)) {
252 in6_dev_put(ac->aca_idev); 246 in6_dev_put(ac->aca_idev);
253 dst_release(&ac->aca_rt->u.dst); 247 dst_release(&ac->aca_rt->dst);
254 kfree(ac); 248 kfree(ac);
255 } 249 }
256} 250}
@@ -356,40 +350,39 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
356 write_unlock_bh(&idev->lock); 350 write_unlock_bh(&idev->lock);
357 addrconf_leave_solict(idev, &aca->aca_addr); 351 addrconf_leave_solict(idev, &aca->aca_addr);
358 352
359 dst_hold(&aca->aca_rt->u.dst); 353 dst_hold(&aca->aca_rt->dst);
360 ip6_del_rt(aca->aca_rt); 354 ip6_del_rt(aca->aca_rt);
361 355
362 aca_put(aca); 356 aca_put(aca);
363 return 0; 357 return 0;
364} 358}
365 359
360/* called with rcu_read_lock() */
366static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) 361static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
367{ 362{
368 int ret; 363 struct inet6_dev *idev = __in6_dev_get(dev);
369 struct inet6_dev *idev = in6_dev_get(dev); 364
370 if (idev == NULL) 365 if (idev == NULL)
371 return -ENODEV; 366 return -ENODEV;
372 ret = __ipv6_dev_ac_dec(idev, addr); 367 return __ipv6_dev_ac_dec(idev, addr);
373 in6_dev_put(idev);
374 return ret;
375} 368}
376 369
377/* 370/*
378 * check if the interface has this anycast address 371 * check if the interface has this anycast address
372 * called with rcu_read_lock()
379 */ 373 */
380static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) 374static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
381{ 375{
382 struct inet6_dev *idev; 376 struct inet6_dev *idev;
383 struct ifacaddr6 *aca; 377 struct ifacaddr6 *aca;
384 378
385 idev = in6_dev_get(dev); 379 idev = __in6_dev_get(dev);
386 if (idev) { 380 if (idev) {
387 read_lock_bh(&idev->lock); 381 read_lock_bh(&idev->lock);
388 for (aca = idev->ac_list; aca; aca = aca->aca_next) 382 for (aca = idev->ac_list; aca; aca = aca->aca_next)
389 if (ipv6_addr_equal(&aca->aca_addr, addr)) 383 if (ipv6_addr_equal(&aca->aca_addr, addr))
390 break; 384 break;
391 read_unlock_bh(&idev->lock); 385 read_unlock_bh(&idev->lock);
392 in6_dev_put(idev);
393 return aca != NULL; 386 return aca != NULL;
394 } 387 }
395 return 0; 388 return 0;
@@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
403{ 396{
404 int found = 0; 397 int found = 0;
405 398
406 if (dev)
407 return ipv6_chk_acast_dev(dev, addr);
408 rcu_read_lock(); 399 rcu_read_lock();
409 for_each_netdev_rcu(net, dev) 400 if (dev)
410 if (ipv6_chk_acast_dev(dev, addr)) { 401 found = ipv6_chk_acast_dev(dev, addr);
411 found = 1; 402 else
412 break; 403 for_each_netdev_rcu(net, dev)
413 } 404 if (ipv6_chk_acast_dev(dev, addr)) {
405 found = 1;
406 break;
407 }
414 rcu_read_unlock(); 408 rcu_read_unlock();
415 return found; 409 return found;
416} 410}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 622dc7939a1b..7d929a22cbc2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 38 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
39 struct inet_sock *inet = inet_sk(sk); 39 struct inet_sock *inet = inet_sk(sk);
40 struct ipv6_pinfo *np = inet6_sk(sk); 40 struct ipv6_pinfo *np = inet6_sk(sk);
41 struct in6_addr *daddr, *final_p = NULL, final; 41 struct in6_addr *daddr, *final_p, final;
42 struct dst_entry *dst; 42 struct dst_entry *dst;
43 struct flowi fl; 43 struct flowi fl;
44 struct ip6_flowlabel *flowlabel = NULL; 44 struct ip6_flowlabel *flowlabel = NULL;
45 struct ipv6_txoptions *opt;
45 int addr_type; 46 int addr_type;
46 int err; 47 int err;
47 48
@@ -155,19 +156,8 @@ ipv4_connected:
155 156
156 security_sk_classify_flow(sk, &fl); 157 security_sk_classify_flow(sk, &fl);
157 158
158 if (flowlabel) { 159 opt = flowlabel ? flowlabel->opt : np->opt;
159 if (flowlabel->opt && flowlabel->opt->srcrt) { 160 final_p = fl6_update_dst(&fl, opt, &final);
160 struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
161 ipv6_addr_copy(&final, &fl.fl6_dst);
162 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
163 final_p = &final;
164 }
165 } else if (np->opt && np->opt->srcrt) {
166 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
167 ipv6_addr_copy(&final, &fl.fl6_dst);
168 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
169 final_p = &final;
170 }
171 161
172 err = ip6_dst_lookup(sk, &dst, &fl); 162 err = ip6_dst_lookup(sk, &dst, &fl);
173 if (err) 163 if (err)
@@ -222,6 +212,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
222 if (!skb) 212 if (!skb)
223 return; 213 return;
224 214
215 skb->protocol = htons(ETH_P_IPV6);
216
225 serr = SKB_EXT_ERR(skb); 217 serr = SKB_EXT_ERR(skb);
226 serr->ee.ee_errno = err; 218 serr->ee.ee_errno = err;
227 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6; 219 serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
@@ -255,6 +247,8 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
255 if (!skb) 247 if (!skb)
256 return; 248 return;
257 249
250 skb->protocol = htons(ETH_P_IPV6);
251
258 skb_put(skb, sizeof(struct ipv6hdr)); 252 skb_put(skb, sizeof(struct ipv6hdr));
259 skb_reset_network_header(skb); 253 skb_reset_network_header(skb);
260 iph = ipv6_hdr(skb); 254 iph = ipv6_hdr(skb);
@@ -278,6 +272,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
278 kfree_skb(skb); 272 kfree_skb(skb);
279} 273}
280 274
275void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
276{
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *iph;
279 struct sk_buff *skb;
280 struct ip6_mtuinfo *mtu_info;
281
282 if (!np->rxopt.bits.rxpmtu)
283 return;
284
285 skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
286 if (!skb)
287 return;
288
289 skb_put(skb, sizeof(struct ipv6hdr));
290 skb_reset_network_header(skb);
291 iph = ipv6_hdr(skb);
292 ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
293
294 mtu_info = IP6CBMTU(skb);
295 if (!mtu_info) {
296 kfree_skb(skb);
297 return;
298 }
299
300 mtu_info->ip6m_mtu = mtu;
301 mtu_info->ip6m_addr.sin6_family = AF_INET6;
302 mtu_info->ip6m_addr.sin6_port = 0;
303 mtu_info->ip6m_addr.sin6_flowinfo = 0;
304 mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
305 ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
306
307 __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
308 skb_reset_transport_header(skb);
309
310 skb = xchg(&np->rxpmtu, skb);
311 kfree_skb(skb);
312}
313
281/* 314/*
282 * Handle MSG_ERRQUEUE 315 * Handle MSG_ERRQUEUE
283 */ 316 */
@@ -319,7 +352,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
319 sin->sin6_flowinfo = 0; 352 sin->sin6_flowinfo = 0;
320 sin->sin6_port = serr->port; 353 sin->sin6_port = serr->port;
321 sin->sin6_scope_id = 0; 354 sin->sin6_scope_id = 0;
322 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { 355 if (skb->protocol == htons(ETH_P_IPV6)) {
323 ipv6_addr_copy(&sin->sin6_addr, 356 ipv6_addr_copy(&sin->sin6_addr,
324 (struct in6_addr *)(nh + serr->addr_offset)); 357 (struct in6_addr *)(nh + serr->addr_offset));
325 if (np->sndflow) 358 if (np->sndflow)
@@ -341,7 +374,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
341 sin->sin6_family = AF_INET6; 374 sin->sin6_family = AF_INET6;
342 sin->sin6_flowinfo = 0; 375 sin->sin6_flowinfo = 0;
343 sin->sin6_scope_id = 0; 376 sin->sin6_scope_id = 0;
344 if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) { 377 if (skb->protocol == htons(ETH_P_IPV6)) {
345 ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr); 378 ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
346 if (np->rxopt.all) 379 if (np->rxopt.all)
347 datagram_recv_ctl(sk, msg, skb); 380 datagram_recv_ctl(sk, msg, skb);
@@ -381,6 +414,54 @@ out:
381 return err; 414 return err;
382} 415}
383 416
417/*
418 * Handle IPV6_RECVPATHMTU
419 */
420int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
421{
422 struct ipv6_pinfo *np = inet6_sk(sk);
423 struct sk_buff *skb;
424 struct sockaddr_in6 *sin;
425 struct ip6_mtuinfo mtu_info;
426 int err;
427 int copied;
428
429 err = -EAGAIN;
430 skb = xchg(&np->rxpmtu, NULL);
431 if (skb == NULL)
432 goto out;
433
434 copied = skb->len;
435 if (copied > len) {
436 msg->msg_flags |= MSG_TRUNC;
437 copied = len;
438 }
439 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
440 if (err)
441 goto out_free_skb;
442
443 sock_recv_timestamp(msg, sk, skb);
444
445 memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
446
447 sin = (struct sockaddr_in6 *)msg->msg_name;
448 if (sin) {
449 sin->sin6_family = AF_INET6;
450 sin->sin6_flowinfo = 0;
451 sin->sin6_port = 0;
452 sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
453 ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
454 }
455
456 put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
457
458 err = copied;
459
460out_free_skb:
461 kfree_skb(skb);
462out:
463 return err;
464}
384 465
385 466
386int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) 467int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
@@ -497,7 +578,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
497int datagram_send_ctl(struct net *net, 578int datagram_send_ctl(struct net *net,
498 struct msghdr *msg, struct flowi *fl, 579 struct msghdr *msg, struct flowi *fl,
499 struct ipv6_txoptions *opt, 580 struct ipv6_txoptions *opt,
500 int *hlimit, int *tclass) 581 int *hlimit, int *tclass, int *dontfrag)
501{ 582{
502 struct in6_pktinfo *src_info; 583 struct in6_pktinfo *src_info;
503 struct cmsghdr *cmsg; 584 struct cmsghdr *cmsg;
@@ -737,6 +818,25 @@ int datagram_send_ctl(struct net *net,
737 818
738 break; 819 break;
739 } 820 }
821
822 case IPV6_DONTFRAG:
823 {
824 int df;
825
826 err = -EINVAL;
827 if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
828 goto exit_f;
829 }
830
831 df = *(int *)CMSG_DATA(cmsg);
832 if (df < 0 || df > 1)
833 goto exit_f;
834
835 err = 0;
836 *dontfrag = df;
837
838 break;
839 }
740 default: 840 default:
741 LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", 841 LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n",
742 cmsg->cmsg_type); 842 cmsg->cmsg_type);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17a..262f105d23b9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
312 Routing header. 312 Routing header.
313 ********************************/ 313 ********************************/
314 314
315/* called with rcu_read_lock() */
315static int ipv6_rthdr_rcv(struct sk_buff *skb) 316static int ipv6_rthdr_rcv(struct sk_buff *skb)
316{ 317{
317 struct inet6_skb_parm *opt = IP6CB(skb); 318 struct inet6_skb_parm *opt = IP6CB(skb);
@@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
324 struct net *net = dev_net(skb->dev); 325 struct net *net = dev_net(skb->dev);
325 int accept_source_route = net->ipv6.devconf_all->accept_source_route; 326 int accept_source_route = net->ipv6.devconf_all->accept_source_route;
326 327
327 idev = in6_dev_get(skb->dev); 328 idev = __in6_dev_get(skb->dev);
328 if (idev) { 329 if (idev && accept_source_route > idev->cnf.accept_source_route)
329 if (accept_source_route > idev->cnf.accept_source_route) 330 accept_source_route = idev->cnf.accept_source_route;
330 accept_source_route = idev->cnf.accept_source_route;
331 in6_dev_put(idev);
332 }
333 331
334 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || 332 if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
335 !pskb_may_pull(skb, (skb_transport_offset(skb) + 333 !pskb_may_pull(skb, (skb_transport_offset(skb) +
@@ -874,3 +872,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
874 return opt; 872 return opt;
875} 873}
876 874
875/**
876 * fl6_update_dst - update flowi destination address with info given
877 * by srcrt option, if any.
878 *
879 * @fl: flowi for which fl6_dst is to be updated
880 * @opt: struct ipv6_txoptions in which to look for srcrt opt
881 * @orig: copy of original fl6_dst address if modified
882 *
883 * Returns NULL if no txoptions or no srcrt, otherwise returns orig
884 * and initial value of fl->fl6_dst set in orig
885 */
886struct in6_addr *fl6_update_dst(struct flowi *fl,
887 const struct ipv6_txoptions *opt,
888 struct in6_addr *orig)
889{
890 if (!opt || !opt->srcrt)
891 return NULL;
892
893 ipv6_addr_copy(orig, &fl->fl6_dst);
894 ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
895 return orig;
896}
897
898EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5e463c43fcc2..b1108ede18e1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
43 if (arg.result) 43 if (arg.result)
44 return arg.result; 44 return arg.result;
45 45
46 dst_hold(&net->ipv6.ip6_null_entry->u.dst); 46 dst_hold(&net->ipv6.ip6_null_entry->dst);
47 return &net->ipv6.ip6_null_entry->u.dst; 47 return &net->ipv6.ip6_null_entry->dst;
48} 48}
49 49
50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, 50static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
86 struct in6_addr saddr; 86 struct in6_addr saddr;
87 87
88 if (ipv6_dev_get_saddr(net, 88 if (ipv6_dev_get_saddr(net,
89 ip6_dst_idev(&rt->u.dst)->dev, 89 ip6_dst_idev(&rt->dst)->dev,
90 &flp->fl6_dst, 90 &flp->fl6_dst,
91 rt6_flags2srcprefs(flags), 91 rt6_flags2srcprefs(flags),
92 &saddr)) 92 &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
99 goto out; 99 goto out;
100 } 100 }
101again: 101again:
102 dst_release(&rt->u.dst); 102 dst_release(&rt->dst);
103 rt = NULL; 103 rt = NULL;
104 goto out; 104 goto out;
105 105
106discard_pkt: 106discard_pkt:
107 dst_hold(&rt->u.dst); 107 dst_hold(&rt->dst);
108out: 108out:
109 arg->result = rt; 109 arg->result = rt;
110 return rt == NULL ? -EAGAIN : 0; 110 return rt == NULL ? -EAGAIN : 0;
@@ -208,7 +208,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208{ 208{
209 struct fib6_rule *rule6 = (struct fib6_rule *) rule; 209 struct fib6_rule *rule6 = (struct fib6_rule *) rule;
210 210
211 frh->family = AF_INET6;
212 frh->dst_len = rule6->dst.plen; 211 frh->dst_len = rule6->dst.plen;
213 frh->src_len = rule6->src.plen; 212 frh->src_len = rule6->src.plen;
214 frh->tos = rule6->tclass; 213 frh->tos = rule6->tclass;
@@ -238,7 +237,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
238 + nla_total_size(16); /* src */ 237 + nla_total_size(16); /* src */
239} 238}
240 239
241static struct fib_rules_ops fib6_rules_ops_template = { 240static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
242 .family = AF_INET6, 241 .family = AF_INET6,
243 .rule_size = sizeof(struct fib6_rule), 242 .rule_size = sizeof(struct fib6_rule),
244 .addr_size = sizeof(struct in6_addr), 243 .addr_size = sizeof(struct in6_addr),
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 3330a4bd6157..03e62f94ff8e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -481,8 +481,9 @@ route_done:
481 len + sizeof(struct icmp6hdr), 481 len + sizeof(struct icmp6hdr),
482 sizeof(struct icmp6hdr), hlimit, 482 sizeof(struct icmp6hdr), hlimit,
483 np->tclass, NULL, &fl, (struct rt6_info*)dst, 483 np->tclass, NULL, &fl, (struct rt6_info*)dst,
484 MSG_DONTWAIT); 484 MSG_DONTWAIT, np->dontfrag);
485 if (err) { 485 if (err) {
486 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
486 ip6_flush_pending_frames(sk); 487 ip6_flush_pending_frames(sk);
487 goto out_put; 488 goto out_put;
488 } 489 }
@@ -560,9 +561,11 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
560 561
561 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), 562 err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
562 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl, 563 sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
563 (struct rt6_info*)dst, MSG_DONTWAIT); 564 (struct rt6_info*)dst, MSG_DONTWAIT,
565 np->dontfrag);
564 566
565 if (err) { 567 if (err) {
568 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
566 ip6_flush_pending_frames(sk); 569 ip6_flush_pending_frames(sk);
567 goto out_put; 570 goto out_put;
568 } 571 }
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 628db24bcf22..8a1628023bd1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -178,14 +178,14 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
178 return dst; 178 return dst;
179} 179}
180 180
181int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) 181int inet6_csk_xmit(struct sk_buff *skb)
182{ 182{
183 struct sock *sk = skb->sk; 183 struct sock *sk = skb->sk;
184 struct inet_sock *inet = inet_sk(sk); 184 struct inet_sock *inet = inet_sk(sk);
185 struct ipv6_pinfo *np = inet6_sk(sk); 185 struct ipv6_pinfo *np = inet6_sk(sk);
186 struct flowi fl; 186 struct flowi fl;
187 struct dst_entry *dst; 187 struct dst_entry *dst;
188 struct in6_addr *final_p = NULL, final; 188 struct in6_addr *final_p, final;
189 189
190 memset(&fl, 0, sizeof(fl)); 190 memset(&fl, 0, sizeof(fl));
191 fl.proto = sk->sk_protocol; 191 fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
199 fl.fl_ip_dport = inet->inet_dport; 199 fl.fl_ip_dport = inet->inet_dport;
200 security_sk_classify_flow(sk, &fl); 200 security_sk_classify_flow(sk, &fl);
201 201
202 if (np->opt && np->opt->srcrt) { 202 final_p = fl6_update_dst(&fl, np->opt, &final);
203 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
204 ipv6_addr_copy(&final, &fl.fl6_dst);
205 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
206 final_p = &final;
207 }
208 203
209 dst = __inet6_csk_dst_check(sk, np->dst_cookie); 204 dst = __inet6_csk_dst_check(sk, np->dst_cookie);
210 205
@@ -234,7 +229,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
234 /* Restore final destination back after routing done */ 229 /* Restore final destination back after routing done */
235 ipv6_addr_copy(&fl.fl6_dst, &np->daddr); 230 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
236 231
237 return ip6_xmit(sk, skb, &fl, np->opt, 0); 232 return ip6_xmit(sk, skb, &fl, np->opt);
238} 233}
239 234
240EXPORT_SYMBOL_GPL(inet6_csk_xmit); 235EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6b82e02158c6..b6a585909d35 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -128,12 +128,24 @@ static __inline__ u32 fib6_new_sernum(void)
128/* 128/*
129 * test bit 129 * test bit
130 */ 130 */
131#if defined(__LITTLE_ENDIAN)
132# define BITOP_BE32_SWIZZLE (0x1F & ~7)
133#else
134# define BITOP_BE32_SWIZZLE 0
135#endif
131 136
132static __inline__ __be32 addr_bit_set(void *token, int fn_bit) 137static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
133{ 138{
134 __be32 *addr = token; 139 __be32 *addr = token;
135 140 /*
136 return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5]; 141 * Here,
142 * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
143 * is optimized version of
144 * htonl(1 << ((~fn_bit)&0x1F))
145 * See include/asm-generic/bitops/le.h.
146 */
147 return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
148 addr[fn_bit >> 5];
137} 149}
138 150
139static __inline__ struct fib6_node * node_alloc(void) 151static __inline__ struct fib6_node * node_alloc(void)
@@ -153,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
153static __inline__ void rt6_release(struct rt6_info *rt) 165static __inline__ void rt6_release(struct rt6_info *rt)
154{ 166{
155 if (atomic_dec_and_test(&rt->rt6i_ref)) 167 if (atomic_dec_and_test(&rt->rt6i_ref))
156 dst_free(&rt->u.dst); 168 dst_free(&rt->dst);
157} 169}
158 170
159static void fib6_link_table(struct net *net, struct fib6_table *tb) 171static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -266,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
266 int res; 278 int res;
267 struct rt6_info *rt; 279 struct rt6_info *rt;
268 280
269 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { 281 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
270 res = rt6_dump_route(rt, w->args); 282 res = rt6_dump_route(rt, w->args);
271 if (res < 0) { 283 if (res < 0) {
272 /* Frame is full, suspend walking */ 284 /* Frame is full, suspend walking */
@@ -607,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
607 619
608 ins = &fn->leaf; 620 ins = &fn->leaf;
609 621
610 for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) { 622 for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
611 /* 623 /*
612 * Search for duplicates 624 * Search for duplicates
613 */ 625 */
@@ -635,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
635 if (iter->rt6i_metric > rt->rt6i_metric) 647 if (iter->rt6i_metric > rt->rt6i_metric)
636 break; 648 break;
637 649
638 ins = &iter->u.dst.rt6_next; 650 ins = &iter->dst.rt6_next;
639 } 651 }
640 652
641 /* Reset round-robin state, if necessary */ 653 /* Reset round-robin state, if necessary */
@@ -646,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
646 * insert node 658 * insert node
647 */ 659 */
648 660
649 rt->u.dst.rt6_next = iter; 661 rt->dst.rt6_next = iter;
650 *ins = rt; 662 *ins = rt;
651 rt->rt6i_node = fn; 663 rt->rt6i_node = fn;
652 atomic_inc(&rt->rt6i_ref); 664 atomic_inc(&rt->rt6i_ref);
@@ -787,7 +799,7 @@ out:
787 atomic_inc(&pn->leaf->rt6i_ref); 799 atomic_inc(&pn->leaf->rt6i_ref);
788 } 800 }
789#endif 801#endif
790 dst_free(&rt->u.dst); 802 dst_free(&rt->dst);
791 } 803 }
792 return err; 804 return err;
793 805
@@ -798,7 +810,7 @@ out:
798st_failure: 810st_failure:
799 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) 811 if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
800 fib6_repair_tree(info->nl_net, fn); 812 fib6_repair_tree(info->nl_net, fn);
801 dst_free(&rt->u.dst); 813 dst_free(&rt->dst);
802 return err; 814 return err;
803#endif 815#endif
804} 816}
@@ -1096,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1096 RT6_TRACE("fib6_del_route\n"); 1108 RT6_TRACE("fib6_del_route\n");
1097 1109
1098 /* Unlink it */ 1110 /* Unlink it */
1099 *rtp = rt->u.dst.rt6_next; 1111 *rtp = rt->dst.rt6_next;
1100 rt->rt6i_node = NULL; 1112 rt->rt6i_node = NULL;
1101 net->ipv6.rt6_stats->fib_rt_entries--; 1113 net->ipv6.rt6_stats->fib_rt_entries--;
1102 net->ipv6.rt6_stats->fib_discarded_routes++; 1114 net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1110,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
1110 FOR_WALKERS(w) { 1122 FOR_WALKERS(w) {
1111 if (w->state == FWS_C && w->leaf == rt) { 1123 if (w->state == FWS_C && w->leaf == rt) {
1112 RT6_TRACE("walker %p adjusted by delroute\n", w); 1124 RT6_TRACE("walker %p adjusted by delroute\n", w);
1113 w->leaf = rt->u.dst.rt6_next; 1125 w->leaf = rt->dst.rt6_next;
1114 if (w->leaf == NULL) 1126 if (w->leaf == NULL)
1115 w->state = FWS_U; 1127 w->state = FWS_U;
1116 } 1128 }
1117 } 1129 }
1118 read_unlock(&fib6_walker_lock); 1130 read_unlock(&fib6_walker_lock);
1119 1131
1120 rt->u.dst.rt6_next = NULL; 1132 rt->dst.rt6_next = NULL;
1121 1133
1122 /* If it was last route, expunge its radix tree node */ 1134 /* If it was last route, expunge its radix tree node */
1123 if (fn->leaf == NULL) { 1135 if (fn->leaf == NULL) {
@@ -1156,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1156 struct rt6_info **rtp; 1168 struct rt6_info **rtp;
1157 1169
1158#if RT6_DEBUG >= 2 1170#if RT6_DEBUG >= 2
1159 if (rt->u.dst.obsolete>0) { 1171 if (rt->dst.obsolete>0) {
1160 WARN_ON(fn != NULL); 1172 WARN_ON(fn != NULL);
1161 return -ENOENT; 1173 return -ENOENT;
1162 } 1174 }
@@ -1183,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
1183 * Walk the leaf entries looking for ourself 1195 * Walk the leaf entries looking for ourself
1184 */ 1196 */
1185 1197
1186 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) { 1198 for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
1187 if (*rtp == rt) { 1199 if (*rtp == rt) {
1188 fib6_del_route(fn, rtp, info); 1200 fib6_del_route(fn, rtp, info);
1189 return 0; 1201 return 0;
@@ -1322,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
1322 .nl_net = c->net, 1334 .nl_net = c->net,
1323 }; 1335 };
1324 1336
1325 for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) { 1337 for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
1326 res = c->func(rt, c->arg); 1338 res = c->func(rt, c->arg);
1327 if (res < 0) { 1339 if (res < 0) {
1328 w->leaf = rt; 1340 w->leaf = rt;
@@ -1436,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
1436 } 1448 }
1437 gc_args.more++; 1449 gc_args.more++;
1438 } else if (rt->rt6i_flags & RTF_CACHE) { 1450 } else if (rt->rt6i_flags & RTF_CACHE) {
1439 if (atomic_read(&rt->u.dst.__refcnt) == 0 && 1451 if (atomic_read(&rt->dst.__refcnt) == 0 &&
1440 time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) { 1452 time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
1441 RT6_TRACE("aging clone %p\n", rt); 1453 RT6_TRACE("aging clone %p\n", rt);
1442 return -1; 1454 return -1;
1443 } else if ((rt->rt6i_flags & RTF_GATEWAY) && 1455 } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 14e23216eb28..13654686aeab 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
360 msg.msg_control = (void*)(fl->opt+1); 360 msg.msg_control = (void*)(fl->opt+1);
361 flowi.oif = 0; 361 flowi.oif = 0;
362 362
363 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); 363 err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk,
364 &junk, &junk);
364 if (err) 365 if (err)
365 goto done; 366 goto done;
366 err = -EINVAL; 367 err = -EINVAL;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 6aa7ee1295c2..a83e9209cecc 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -143,7 +143,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
143 /* Must drop socket now because of tproxy. */ 143 /* Must drop socket now because of tproxy. */
144 skb_orphan(skb); 144 skb_orphan(skb);
145 145
146 return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, 146 return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
147 ip6_rcv_finish); 147 ip6_rcv_finish);
148err: 148err:
149 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); 149 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -236,7 +236,7 @@ discard:
236 236
237int ip6_input(struct sk_buff *skb) 237int ip6_input(struct sk_buff *skb)
238{ 238{
239 return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, 239 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
240 ip6_input_finish); 240 ip6_input_finish);
241} 241}
242 242
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 75d5ef830097..d40b330c0ee6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -67,8 +67,8 @@ int __ip6_local_out(struct sk_buff *skb)
67 len = 0; 67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len); 68 ipv6_hdr(skb)->payload_len = htons(len);
69 69
70 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, 70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 dst_output); 71 skb_dst(skb)->dev, dst_output);
72} 72}
73 73
74int ip6_local_out(struct sk_buff *skb) 74int ip6_local_out(struct sk_buff *skb)
@@ -83,22 +83,6 @@ int ip6_local_out(struct sk_buff *skb)
83} 83}
84EXPORT_SYMBOL_GPL(ip6_local_out); 84EXPORT_SYMBOL_GPL(ip6_local_out);
85 85
86static int ip6_output_finish(struct sk_buff *skb)
87{
88 struct dst_entry *dst = skb_dst(skb);
89
90 if (dst->hh)
91 return neigh_hh_output(dst->hh, skb);
92 else if (dst->neighbour)
93 return dst->neighbour->output(skb);
94
95 IP6_INC_STATS_BH(dev_net(dst->dev),
96 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
97 kfree_skb(skb);
98 return -EINVAL;
99
100}
101
102/* dev_loopback_xmit for use with netfilter. */ 86/* dev_loopback_xmit for use with netfilter. */
103static int ip6_dev_loopback_xmit(struct sk_buff *newskb) 87static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104{ 88{
@@ -112,8 +96,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
112 return 0; 96 return 0;
113} 97}
114 98
115 99static int ip6_finish_output2(struct sk_buff *skb)
116static int ip6_output2(struct sk_buff *skb)
117{ 100{
118 struct dst_entry *dst = skb_dst(skb); 101 struct dst_entry *dst = skb_dst(skb);
119 struct net_device *dev = dst->dev; 102 struct net_device *dev = dst->dev;
@@ -125,7 +108,7 @@ static int ip6_output2(struct sk_buff *skb)
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126 109
127 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && 110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
128 ((mroute6_socket(dev_net(dev)) && 111 ((mroute6_socket(dev_net(dev), skb) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || 112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, 113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) { 114 &ipv6_hdr(skb)->saddr))) {
@@ -135,8 +118,8 @@ static int ip6_output2(struct sk_buff *skb)
135 is not supported in any case. 118 is not supported in any case.
136 */ 119 */
137 if (newskb) 120 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, 121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
139 NULL, newskb->dev, 122 newskb, NULL, newskb->dev,
140 ip6_dev_loopback_xmit); 123 ip6_dev_loopback_xmit);
141 124
142 if (ipv6_hdr(skb)->hop_limit == 0) { 125 if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -151,8 +134,15 @@ static int ip6_output2(struct sk_buff *skb)
151 skb->len); 134 skb->len);
152 } 135 }
153 136
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, 137 if (dst->hh)
155 ip6_output_finish); 138 return neigh_hh_output(dst->hh, skb);
139 else if (dst->neighbour)
140 return dst->neighbour->output(skb);
141
142 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb);
145 return -EINVAL;
156} 146}
157 147
158static inline int ip6_skb_dst_mtu(struct sk_buff *skb) 148static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -163,29 +153,37 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); 153 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164} 154}
165 155
156static int ip6_finish_output(struct sk_buff *skb)
157{
158 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
159 dst_allfrag(skb_dst(skb)))
160 return ip6_fragment(skb, ip6_finish_output2);
161 else
162 return ip6_finish_output2(skb);
163}
164
166int ip6_output(struct sk_buff *skb) 165int ip6_output(struct sk_buff *skb)
167{ 166{
167 struct net_device *dev = skb_dst(skb)->dev;
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) { 169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, 170 IP6_INC_STATS(dev_net(dev), idev,
171 IPSTATS_MIB_OUTDISCARDS); 171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb); 172 kfree_skb(skb);
173 return 0; 173 return 0;
174 } 174 }
175 175
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || 176 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
177 dst_allfrag(skb_dst(skb))) 177 ip6_finish_output,
178 return ip6_fragment(skb, ip6_output2); 178 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 else
180 return ip6_output2(skb);
181} 179}
182 180
183/* 181/*
184 * xmit an sk_buff (used by TCP) 182 * xmit an sk_buff (used by TCP, SCTP and DCCP)
185 */ 183 */
186 184
187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, 185int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok) 186 struct ipv6_txoptions *opt)
189{ 187{
190 struct net *net = sock_net(sk); 188 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk); 189 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -218,8 +216,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
218 } 216 }
219 kfree_skb(skb); 217 kfree_skb(skb);
220 skb = skb2; 218 skb = skb2;
221 if (sk) 219 skb_set_owner_w(skb, sk);
222 skb_set_owner_w(skb, sk);
223 } 220 }
224 if (opt->opt_flen) 221 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto); 222 ipv6_push_frag_opts(skb, opt, &proto);
@@ -231,10 +228,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
231 skb_reset_network_header(skb); 228 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb); 229 hdr = ipv6_hdr(skb);
233 230
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
237
238 /* 231 /*
239 * Fill in the IPv6 header 232 * Fill in the IPv6 header
240 */ 233 */
@@ -261,8 +254,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { 254 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), 255 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len); 256 IPSTATS_MIB_OUT, skb->len);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 257 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
265 dst_output); 258 dst->dev, dst_output);
266 } 259 }
267 260
268 if (net_ratelimit()) 261 if (net_ratelimit())
@@ -514,7 +507,7 @@ int ip6_forward(struct sk_buff *skb)
514 if (mtu < IPV6_MIN_MTU) 507 if (mtu < IPV6_MIN_MTU)
515 mtu = IPV6_MIN_MTU; 508 mtu = IPV6_MIN_MTU;
516 509
517 if (skb->len > mtu) { 510 if (skb->len > mtu && !skb_is_gso(skb)) {
518 /* Again, force OUTPUT device used as source address */ 511 /* Again, force OUTPUT device used as source address */
519 skb->dev = dst->dev; 512 skb->dev = dst->dev;
520 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 513 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -538,7 +531,7 @@ int ip6_forward(struct sk_buff *skb)
538 hdr->hop_limit--; 531 hdr->hop_limit--;
539 532
540 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); 533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
541 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, 534 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
542 ip6_forward_finish); 535 ip6_forward_finish);
543 536
544error: 537error:
@@ -705,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
705 ipv6_hdr(skb)->payload_len = htons(first_len - 698 ipv6_hdr(skb)->payload_len = htons(first_len -
706 sizeof(struct ipv6hdr)); 699 sizeof(struct ipv6hdr));
707 700
708 dst_hold(&rt->u.dst); 701 dst_hold(&rt->dst);
709 702
710 for (;;) { 703 for (;;) {
711 /* Prepare header of the next frame, 704 /* Prepare header of the next frame,
@@ -733,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
733 726
734 err = output(skb); 727 err = output(skb);
735 if(!err) 728 if(!err)
736 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 729 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
737 IPSTATS_MIB_FRAGCREATES); 730 IPSTATS_MIB_FRAGCREATES);
738 731
739 if (err || !frag) 732 if (err || !frag)
@@ -747,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
747 kfree(tmp_hdr); 740 kfree(tmp_hdr);
748 741
749 if (err == 0) { 742 if (err == 0) {
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 743 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751 IPSTATS_MIB_FRAGOKS); 744 IPSTATS_MIB_FRAGOKS);
752 dst_release(&rt->u.dst); 745 dst_release(&rt->dst);
753 return 0; 746 return 0;
754 } 747 }
755 748
@@ -759,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
759 frag = skb; 752 frag = skb;
760 } 753 }
761 754
762 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), 755 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
763 IPSTATS_MIB_FRAGFAILS); 756 IPSTATS_MIB_FRAGFAILS);
764 dst_release(&rt->u.dst); 757 dst_release(&rt->dst);
765 return err; 758 return err;
766 } 759 }
767 760
@@ -792,7 +785,7 @@ slow_path:
792 * Allocate buffer. 785 * Allocate buffer.
793 */ 786 */
794 787
795 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { 788 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
796 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); 789 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
797 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 790 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
798 IPSTATS_MIB_FRAGFAILS); 791 IPSTATS_MIB_FRAGFAILS);
@@ -805,7 +798,7 @@ slow_path:
805 */ 798 */
806 799
807 ip6_copy_metadata(frag, skb); 800 ip6_copy_metadata(frag, skb);
808 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); 801 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
809 skb_put(frag, len + hlen + sizeof(struct frag_hdr)); 802 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
810 skb_reset_network_header(frag); 803 skb_reset_network_header(frag);
811 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); 804 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1109,7 +1102,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1109 int offset, int len, int odd, struct sk_buff *skb), 1102 int offset, int len, int odd, struct sk_buff *skb),
1110 void *from, int length, int transhdrlen, 1103 void *from, int length, int transhdrlen,
1111 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, 1104 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1112 struct rt6_info *rt, unsigned int flags) 1105 struct rt6_info *rt, unsigned int flags, int dontfrag)
1113{ 1106{
1114 struct inet_sock *inet = inet_sk(sk); 1107 struct inet_sock *inet = inet_sk(sk);
1115 struct ipv6_pinfo *np = inet6_sk(sk); 1108 struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1163,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1163 1156
1164 /* need source address above miyazawa*/ 1157 /* need source address above miyazawa*/
1165 } 1158 }
1166 dst_hold(&rt->u.dst); 1159 dst_hold(&rt->dst);
1167 inet->cork.dst = &rt->u.dst; 1160 inet->cork.dst = &rt->dst;
1168 inet->cork.fl = *fl; 1161 inet->cork.fl = *fl;
1169 np->cork.hop_limit = hlimit; 1162 np->cork.hop_limit = hlimit;
1170 np->cork.tclass = tclass; 1163 np->cork.tclass = tclass;
1171 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? 1164 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1172 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); 1165 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1173 if (np->frag_size < mtu) { 1166 if (np->frag_size < mtu) {
1174 if (np->frag_size) 1167 if (np->frag_size)
1175 mtu = np->frag_size; 1168 mtu = np->frag_size;
1176 } 1169 }
1177 inet->cork.fragsize = mtu; 1170 inet->cork.fragsize = mtu;
1178 if (dst_allfrag(rt->u.dst.path)) 1171 if (dst_allfrag(rt->dst.path))
1179 inet->cork.flags |= IPCORK_ALLFRAG; 1172 inet->cork.flags |= IPCORK_ALLFRAG;
1180 inet->cork.length = 0; 1173 inet->cork.length = 0;
1181 sk->sk_sndmsg_page = NULL; 1174 sk->sk_sndmsg_page = NULL;
1182 sk->sk_sndmsg_off = 0; 1175 sk->sk_sndmsg_off = 0;
1183 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - 1176 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1184 rt->rt6i_nfheader_len; 1177 rt->rt6i_nfheader_len;
1185 length += exthdrlen; 1178 length += exthdrlen;
1186 transhdrlen += exthdrlen; 1179 transhdrlen += exthdrlen;
@@ -1193,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1193 mtu = inet->cork.fragsize; 1186 mtu = inet->cork.fragsize;
1194 } 1187 }
1195 1188
1196 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); 1189 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1197 1190
1198 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + 1191 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1199 (opt ? opt->opt_nflen : 0); 1192 (opt ? opt->opt_nflen : 0);
@@ -1223,15 +1216,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1223 */ 1216 */
1224 1217
1225 inet->cork.length += length; 1218 inet->cork.length += length;
1226 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && 1219 if (length > mtu) {
1227 (rt->u.dst.dev->features & NETIF_F_UFO)) { 1220 int proto = sk->sk_protocol;
1221 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1222 ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
1223 return -EMSGSIZE;
1224 }
1228 1225
1229 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, 1226 if (proto == IPPROTO_UDP &&
1230 fragheaderlen, transhdrlen, mtu, 1227 (rt->dst.dev->features & NETIF_F_UFO)) {
1231 flags); 1228
1232 if (err) 1229 err = ip6_ufo_append_data(sk, getfrag, from, length,
1233 goto error; 1230 hh_len, fragheaderlen,
1234 return 0; 1231 transhdrlen, mtu, flags);
1232 if (err)
1233 goto error;
1234 return 0;
1235 }
1235 } 1236 }
1236 1237
1237 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) 1238 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
@@ -1269,7 +1270,7 @@ alloc_new_skb:
1269 1270
1270 fraglen = datalen + fragheaderlen; 1271 fraglen = datalen + fragheaderlen;
1271 if ((flags & MSG_MORE) && 1272 if ((flags & MSG_MORE) &&
1272 !(rt->u.dst.dev->features&NETIF_F_SG)) 1273 !(rt->dst.dev->features&NETIF_F_SG))
1273 alloclen = mtu; 1274 alloclen = mtu;
1274 else 1275 else
1275 alloclen = datalen + fragheaderlen; 1276 alloclen = datalen + fragheaderlen;
@@ -1280,7 +1281,7 @@ alloc_new_skb:
1280 * because we have no idea if we're the last one. 1281 * because we have no idea if we're the last one.
1281 */ 1282 */
1282 if (datalen == length + fraggap) 1283 if (datalen == length + fraggap)
1283 alloclen += rt->u.dst.trailer_len; 1284 alloclen += rt->dst.trailer_len;
1284 1285
1285 /* 1286 /*
1286 * We just reserve space for fragment header. 1287 * We just reserve space for fragment header.
@@ -1357,7 +1358,7 @@ alloc_new_skb:
1357 if (copy > length) 1358 if (copy > length)
1358 copy = length; 1359 copy = length;
1359 1360
1360 if (!(rt->u.dst.dev->features&NETIF_F_SG)) { 1361 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1361 unsigned int off; 1362 unsigned int off;
1362 1363
1363 off = skb->len; 1364 off = skb->len;
@@ -1502,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
1502 skb->priority = sk->sk_priority; 1503 skb->priority = sk->sk_priority;
1503 skb->mark = sk->sk_mark; 1504 skb->mark = sk->sk_mark;
1504 1505
1505 skb_dst_set(skb, dst_clone(&rt->u.dst)); 1506 skb_dst_set(skb, dst_clone(&rt->dst));
1506 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 1507 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1507 if (proto == IPPROTO_ICMPV6) { 1508 if (proto == IPPROTO_ICMPV6) {
1508 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); 1509 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2599870747ec..0fd027f3f47e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) 552 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
553 goto out; 553 goto out;
554 554
555 skb2->dev = rt->u.dst.dev; 555 skb2->dev = rt->dst.dev;
556 556
557 /* route "incoming" packet */ 557 /* route "incoming" packet */
558 if (rt->rt_flags & RTCF_LOCAL) { 558 if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
562 fl.fl4_src = eiph->saddr; 562 fl.fl4_src = eiph->saddr;
563 fl.fl4_tos = eiph->tos; 563 fl.fl4_tos = eiph->tos;
564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || 564 if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
565 rt->u.dst.dev->type != ARPHRD_TUNNEL) { 565 rt->dst.dev->type != ARPHRD_TUNNEL) {
566 ip_rt_put(rt); 566 ip_rt_put(rt);
567 goto out; 567 goto out;
568 } 568 }
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
626 icmpv6_send(skb2, rel_type, rel_code, rel_info); 626 icmpv6_send(skb2, rel_type, rel_code, rel_info);
627 627
628 if (rt) 628 if (rt)
629 dst_release(&rt->u.dst); 629 dst_release(&rt->dst);
630 630
631 kfree_skb(skb2); 631 kfree_skb(skb2);
632 } 632 }
@@ -723,14 +723,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
723 skb->protocol = htons(protocol); 723 skb->protocol = htons(protocol);
724 skb->pkt_type = PACKET_HOST; 724 skb->pkt_type = PACKET_HOST;
725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
726 skb->dev = t->dev;
727 skb_dst_drop(skb);
728 nf_reset(skb);
729 726
730 dscp_ecn_decapsulate(t, ipv6h, skb); 727 skb_tunnel_rx(skb, t->dev);
731 728
732 t->dev->stats.rx_packets++; 729 dscp_ecn_decapsulate(t, ipv6h, skb);
733 t->dev->stats.rx_bytes += skb->len;
734 netif_rx(skb); 730 netif_rx(skb);
735 rcu_read_unlock(); 731 rcu_read_unlock();
736 return 0; 732 return 0;
@@ -1139,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
1139 if (dev->mtu < IPV6_MIN_MTU) 1135 if (dev->mtu < IPV6_MIN_MTU)
1140 dev->mtu = IPV6_MIN_MTU; 1136 dev->mtu = IPV6_MIN_MTU;
1141 } 1137 }
1142 dst_release(&rt->u.dst); 1138 dst_release(&rt->dst);
1143 } 1139 }
1144} 1140}
1145 1141
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3e333268db89..66078dad7fe8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -42,6 +42,7 @@
42#include <linux/if_arp.h> 42#include <linux/if_arp.h>
43#include <net/checksum.h> 43#include <net/checksum.h>
44#include <net/netlink.h> 44#include <net/netlink.h>
45#include <net/fib_rules.h>
45 46
46#include <net/ipv6.h> 47#include <net/ipv6.h>
47#include <net/ip6_route.h> 48#include <net/ip6_route.h>
@@ -51,6 +52,34 @@
51#include <linux/netfilter_ipv6.h> 52#include <linux/netfilter_ipv6.h>
52#include <net/ip6_checksum.h> 53#include <net/ip6_checksum.h>
53 54
55struct mr6_table {
56 struct list_head list;
57#ifdef CONFIG_NET_NS
58 struct net *net;
59#endif
60 u32 id;
61 struct sock *mroute6_sk;
62 struct timer_list ipmr_expire_timer;
63 struct list_head mfc6_unres_queue;
64 struct list_head mfc6_cache_array[MFC6_LINES];
65 struct mif_device vif6_table[MAXMIFS];
66 int maxvif;
67 atomic_t cache_resolve_queue_len;
68 int mroute_do_assert;
69 int mroute_do_pim;
70#ifdef CONFIG_IPV6_PIMSM_V2
71 int mroute_reg_vif_num;
72#endif
73};
74
75struct ip6mr_rule {
76 struct fib_rule common;
77};
78
79struct ip6mr_result {
80 struct mr6_table *mrt;
81};
82
54/* Big lock, protecting vif table, mrt cache and mroute socket state. 83/* Big lock, protecting vif table, mrt cache and mroute socket state.
55 Note that the changes are semaphored via rtnl_lock. 84 Note that the changes are semaphored via rtnl_lock.
56 */ 85 */
@@ -61,9 +90,7 @@ static DEFINE_RWLOCK(mrt_lock);
61 * Multicast router control variables 90 * Multicast router control variables
62 */ 91 */
63 92
64#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) 93#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
65
66static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
67 94
68/* Special spinlock for queue of unresolved entries */ 95/* Special spinlock for queue of unresolved entries */
69static DEFINE_SPINLOCK(mfc_unres_lock); 96static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -78,20 +105,235 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
78 105
79static struct kmem_cache *mrt_cachep __read_mostly; 106static struct kmem_cache *mrt_cachep __read_mostly;
80 107
81static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); 108static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
82static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, 109static void ip6mr_free_table(struct mr6_table *mrt);
110
111static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112 struct sk_buff *skb, struct mfc6_cache *cache);
113static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
83 mifi_t mifi, int assert); 114 mifi_t mifi, int assert);
84static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); 115static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
85static void mroute_clean_tables(struct net *net); 116 struct mfc6_cache *c, struct rtmsg *rtm);
117static int ip6mr_rtm_dumproute(struct sk_buff *skb,
118 struct netlink_callback *cb);
119static void mroute_clean_tables(struct mr6_table *mrt);
120static void ipmr_expire_process(unsigned long arg);
121
122#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
123#define ip6mr_for_each_table(mrt, net) \
124 list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
125
126static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
127{
128 struct mr6_table *mrt;
129
130 ip6mr_for_each_table(mrt, net) {
131 if (mrt->id == id)
132 return mrt;
133 }
134 return NULL;
135}
136
137static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
138 struct mr6_table **mrt)
139{
140 struct ip6mr_result res;
141 struct fib_lookup_arg arg = { .result = &res, };
142 int err;
143
144 err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
145 if (err < 0)
146 return err;
147 *mrt = res.mrt;
148 return 0;
149}
150
151static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
152 int flags, struct fib_lookup_arg *arg)
153{
154 struct ip6mr_result *res = arg->result;
155 struct mr6_table *mrt;
156
157 switch (rule->action) {
158 case FR_ACT_TO_TBL:
159 break;
160 case FR_ACT_UNREACHABLE:
161 return -ENETUNREACH;
162 case FR_ACT_PROHIBIT:
163 return -EACCES;
164 case FR_ACT_BLACKHOLE:
165 default:
166 return -EINVAL;
167 }
168
169 mrt = ip6mr_get_table(rule->fr_net, rule->table);
170 if (mrt == NULL)
171 return -EAGAIN;
172 res->mrt = mrt;
173 return 0;
174}
175
176static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
177{
178 return 1;
179}
180
181static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
182 FRA_GENERIC_POLICY,
183};
184
185static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
186 struct fib_rule_hdr *frh, struct nlattr **tb)
187{
188 return 0;
189}
190
191static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
192 struct nlattr **tb)
193{
194 return 1;
195}
196
197static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
198 struct fib_rule_hdr *frh)
199{
200 frh->dst_len = 0;
201 frh->src_len = 0;
202 frh->tos = 0;
203 return 0;
204}
205
206static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
207 .family = RTNL_FAMILY_IP6MR,
208 .rule_size = sizeof(struct ip6mr_rule),
209 .addr_size = sizeof(struct in6_addr),
210 .action = ip6mr_rule_action,
211 .match = ip6mr_rule_match,
212 .configure = ip6mr_rule_configure,
213 .compare = ip6mr_rule_compare,
214 .default_pref = fib_default_rule_pref,
215 .fill = ip6mr_rule_fill,
216 .nlgroup = RTNLGRP_IPV6_RULE,
217 .policy = ip6mr_rule_policy,
218 .owner = THIS_MODULE,
219};
220
221static int __net_init ip6mr_rules_init(struct net *net)
222{
223 struct fib_rules_ops *ops;
224 struct mr6_table *mrt;
225 int err;
226
227 ops = fib_rules_register(&ip6mr_rules_ops_template, net);
228 if (IS_ERR(ops))
229 return PTR_ERR(ops);
230
231 INIT_LIST_HEAD(&net->ipv6.mr6_tables);
232
233 mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
234 if (mrt == NULL) {
235 err = -ENOMEM;
236 goto err1;
237 }
238
239 err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
240 if (err < 0)
241 goto err2;
242
243 net->ipv6.mr6_rules_ops = ops;
244 return 0;
245
246err2:
247 kfree(mrt);
248err1:
249 fib_rules_unregister(ops);
250 return err;
251}
252
253static void __net_exit ip6mr_rules_exit(struct net *net)
254{
255 struct mr6_table *mrt, *next;
256
257 list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
258 list_del(&mrt->list);
259 ip6mr_free_table(mrt);
260 }
261 fib_rules_unregister(net->ipv6.mr6_rules_ops);
262}
263#else
264#define ip6mr_for_each_table(mrt, net) \
265 for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
266
267static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
268{
269 return net->ipv6.mrt6;
270}
271
272static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
273 struct mr6_table **mrt)
274{
275 *mrt = net->ipv6.mrt6;
276 return 0;
277}
278
279static int __net_init ip6mr_rules_init(struct net *net)
280{
281 net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
282 return net->ipv6.mrt6 ? 0 : -ENOMEM;
283}
284
285static void __net_exit ip6mr_rules_exit(struct net *net)
286{
287 ip6mr_free_table(net->ipv6.mrt6);
288}
289#endif
290
291static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
292{
293 struct mr6_table *mrt;
294 unsigned int i;
86 295
87static struct timer_list ipmr_expire_timer; 296 mrt = ip6mr_get_table(net, id);
297 if (mrt != NULL)
298 return mrt;
88 299
300 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
301 if (mrt == NULL)
302 return NULL;
303 mrt->id = id;
304 write_pnet(&mrt->net, net);
305
306 /* Forwarding cache */
307 for (i = 0; i < MFC6_LINES; i++)
308 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
309
310 INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
311
312 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
313 (unsigned long)mrt);
314
315#ifdef CONFIG_IPV6_PIMSM_V2
316 mrt->mroute_reg_vif_num = -1;
317#endif
318#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
319 list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
320#endif
321 return mrt;
322}
323
324static void ip6mr_free_table(struct mr6_table *mrt)
325{
326 del_timer(&mrt->ipmr_expire_timer);
327 mroute_clean_tables(mrt);
328 kfree(mrt);
329}
89 330
90#ifdef CONFIG_PROC_FS 331#ifdef CONFIG_PROC_FS
91 332
92struct ipmr_mfc_iter { 333struct ipmr_mfc_iter {
93 struct seq_net_private p; 334 struct seq_net_private p;
94 struct mfc6_cache **cache; 335 struct mr6_table *mrt;
336 struct list_head *cache;
95 int ct; 337 int ct;
96}; 338};
97 339
@@ -99,22 +341,22 @@ struct ipmr_mfc_iter {
99static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net, 341static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
100 struct ipmr_mfc_iter *it, loff_t pos) 342 struct ipmr_mfc_iter *it, loff_t pos)
101{ 343{
344 struct mr6_table *mrt = it->mrt;
102 struct mfc6_cache *mfc; 345 struct mfc6_cache *mfc;
103 346
104 it->cache = net->ipv6.mfc6_cache_array;
105 read_lock(&mrt_lock); 347 read_lock(&mrt_lock);
106 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) 348 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
107 for (mfc = net->ipv6.mfc6_cache_array[it->ct]; 349 it->cache = &mrt->mfc6_cache_array[it->ct];
108 mfc; mfc = mfc->next) 350 list_for_each_entry(mfc, it->cache, list)
109 if (pos-- == 0) 351 if (pos-- == 0)
110 return mfc; 352 return mfc;
353 }
111 read_unlock(&mrt_lock); 354 read_unlock(&mrt_lock);
112 355
113 it->cache = &mfc_unres_queue;
114 spin_lock_bh(&mfc_unres_lock); 356 spin_lock_bh(&mfc_unres_lock);
115 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 357 it->cache = &mrt->mfc6_unres_queue;
116 if (net_eq(mfc6_net(mfc), net) && 358 list_for_each_entry(mfc, it->cache, list)
117 pos-- == 0) 359 if (pos-- == 0)
118 return mfc; 360 return mfc;
119 spin_unlock_bh(&mfc_unres_lock); 361 spin_unlock_bh(&mfc_unres_lock);
120 362
@@ -122,15 +364,13 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
122 return NULL; 364 return NULL;
123} 365}
124 366
125
126
127
128/* 367/*
129 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif 368 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
130 */ 369 */
131 370
132struct ipmr_vif_iter { 371struct ipmr_vif_iter {
133 struct seq_net_private p; 372 struct seq_net_private p;
373 struct mr6_table *mrt;
134 int ct; 374 int ct;
135}; 375};
136 376
@@ -138,11 +378,13 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
138 struct ipmr_vif_iter *iter, 378 struct ipmr_vif_iter *iter,
139 loff_t pos) 379 loff_t pos)
140{ 380{
141 for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) { 381 struct mr6_table *mrt = iter->mrt;
142 if (!MIF_EXISTS(net, iter->ct)) 382
383 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
384 if (!MIF_EXISTS(mrt, iter->ct))
143 continue; 385 continue;
144 if (pos-- == 0) 386 if (pos-- == 0)
145 return &net->ipv6.vif6_table[iter->ct]; 387 return &mrt->vif6_table[iter->ct];
146 } 388 }
147 return NULL; 389 return NULL;
148} 390}
@@ -150,7 +392,15 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
150static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) 392static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
151 __acquires(mrt_lock) 393 __acquires(mrt_lock)
152{ 394{
395 struct ipmr_vif_iter *iter = seq->private;
153 struct net *net = seq_file_net(seq); 396 struct net *net = seq_file_net(seq);
397 struct mr6_table *mrt;
398
399 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
400 if (mrt == NULL)
401 return ERR_PTR(-ENOENT);
402
403 iter->mrt = mrt;
154 404
155 read_lock(&mrt_lock); 405 read_lock(&mrt_lock);
156 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1) 406 return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -161,15 +411,16 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
161{ 411{
162 struct ipmr_vif_iter *iter = seq->private; 412 struct ipmr_vif_iter *iter = seq->private;
163 struct net *net = seq_file_net(seq); 413 struct net *net = seq_file_net(seq);
414 struct mr6_table *mrt = iter->mrt;
164 415
165 ++*pos; 416 ++*pos;
166 if (v == SEQ_START_TOKEN) 417 if (v == SEQ_START_TOKEN)
167 return ip6mr_vif_seq_idx(net, iter, 0); 418 return ip6mr_vif_seq_idx(net, iter, 0);
168 419
169 while (++iter->ct < net->ipv6.maxvif) { 420 while (++iter->ct < mrt->maxvif) {
170 if (!MIF_EXISTS(net, iter->ct)) 421 if (!MIF_EXISTS(mrt, iter->ct))
171 continue; 422 continue;
172 return &net->ipv6.vif6_table[iter->ct]; 423 return &mrt->vif6_table[iter->ct];
173 } 424 }
174 return NULL; 425 return NULL;
175} 426}
@@ -182,7 +433,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
182 433
183static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) 434static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
184{ 435{
185 struct net *net = seq_file_net(seq); 436 struct ipmr_vif_iter *iter = seq->private;
437 struct mr6_table *mrt = iter->mrt;
186 438
187 if (v == SEQ_START_TOKEN) { 439 if (v == SEQ_START_TOKEN) {
188 seq_puts(seq, 440 seq_puts(seq,
@@ -193,7 +445,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
193 445
194 seq_printf(seq, 446 seq_printf(seq,
195 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", 447 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
196 vif - net->ipv6.vif6_table, 448 vif - mrt->vif6_table,
197 name, vif->bytes_in, vif->pkt_in, 449 name, vif->bytes_in, vif->pkt_in,
198 vif->bytes_out, vif->pkt_out, 450 vif->bytes_out, vif->pkt_out,
199 vif->flags); 451 vif->flags);
@@ -224,8 +476,15 @@ static const struct file_operations ip6mr_vif_fops = {
224 476
225static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 477static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
226{ 478{
479 struct ipmr_mfc_iter *it = seq->private;
227 struct net *net = seq_file_net(seq); 480 struct net *net = seq_file_net(seq);
481 struct mr6_table *mrt;
482
483 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
484 if (mrt == NULL)
485 return ERR_PTR(-ENOENT);
228 486
487 it->mrt = mrt;
229 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) 488 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
230 : SEQ_START_TOKEN; 489 : SEQ_START_TOKEN;
231} 490}
@@ -235,35 +494,36 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
235 struct mfc6_cache *mfc = v; 494 struct mfc6_cache *mfc = v;
236 struct ipmr_mfc_iter *it = seq->private; 495 struct ipmr_mfc_iter *it = seq->private;
237 struct net *net = seq_file_net(seq); 496 struct net *net = seq_file_net(seq);
497 struct mr6_table *mrt = it->mrt;
238 498
239 ++*pos; 499 ++*pos;
240 500
241 if (v == SEQ_START_TOKEN) 501 if (v == SEQ_START_TOKEN)
242 return ipmr_mfc_seq_idx(net, seq->private, 0); 502 return ipmr_mfc_seq_idx(net, seq->private, 0);
243 503
244 if (mfc->next) 504 if (mfc->list.next != it->cache)
245 return mfc->next; 505 return list_entry(mfc->list.next, struct mfc6_cache, list);
246 506
247 if (it->cache == &mfc_unres_queue) 507 if (it->cache == &mrt->mfc6_unres_queue)
248 goto end_of_list; 508 goto end_of_list;
249 509
250 BUG_ON(it->cache != net->ipv6.mfc6_cache_array); 510 BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
251 511
252 while (++it->ct < MFC6_LINES) { 512 while (++it->ct < MFC6_LINES) {
253 mfc = net->ipv6.mfc6_cache_array[it->ct]; 513 it->cache = &mrt->mfc6_cache_array[it->ct];
254 if (mfc) 514 if (list_empty(it->cache))
255 return mfc; 515 continue;
516 return list_first_entry(it->cache, struct mfc6_cache, list);
256 } 517 }
257 518
258 /* exhausted cache_array, show unresolved */ 519 /* exhausted cache_array, show unresolved */
259 read_unlock(&mrt_lock); 520 read_unlock(&mrt_lock);
260 it->cache = &mfc_unres_queue; 521 it->cache = &mrt->mfc6_unres_queue;
261 it->ct = 0; 522 it->ct = 0;
262 523
263 spin_lock_bh(&mfc_unres_lock); 524 spin_lock_bh(&mfc_unres_lock);
264 mfc = mfc_unres_queue; 525 if (!list_empty(it->cache))
265 if (mfc) 526 return list_first_entry(it->cache, struct mfc6_cache, list);
266 return mfc;
267 527
268 end_of_list: 528 end_of_list:
269 spin_unlock_bh(&mfc_unres_lock); 529 spin_unlock_bh(&mfc_unres_lock);
@@ -275,18 +535,17 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
275static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 535static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
276{ 536{
277 struct ipmr_mfc_iter *it = seq->private; 537 struct ipmr_mfc_iter *it = seq->private;
278 struct net *net = seq_file_net(seq); 538 struct mr6_table *mrt = it->mrt;
279 539
280 if (it->cache == &mfc_unres_queue) 540 if (it->cache == &mrt->mfc6_unres_queue)
281 spin_unlock_bh(&mfc_unres_lock); 541 spin_unlock_bh(&mfc_unres_lock);
282 else if (it->cache == net->ipv6.mfc6_cache_array) 542 else if (it->cache == mrt->mfc6_cache_array)
283 read_unlock(&mrt_lock); 543 read_unlock(&mrt_lock);
284} 544}
285 545
286static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 546static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
287{ 547{
288 int n; 548 int n;
289 struct net *net = seq_file_net(seq);
290 549
291 if (v == SEQ_START_TOKEN) { 550 if (v == SEQ_START_TOKEN) {
292 seq_puts(seq, 551 seq_puts(seq,
@@ -296,19 +555,20 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
296 } else { 555 } else {
297 const struct mfc6_cache *mfc = v; 556 const struct mfc6_cache *mfc = v;
298 const struct ipmr_mfc_iter *it = seq->private; 557 const struct ipmr_mfc_iter *it = seq->private;
558 struct mr6_table *mrt = it->mrt;
299 559
300 seq_printf(seq, "%pI6 %pI6 %-3hd", 560 seq_printf(seq, "%pI6 %pI6 %-3hd",
301 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, 561 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
302 mfc->mf6c_parent); 562 mfc->mf6c_parent);
303 563
304 if (it->cache != &mfc_unres_queue) { 564 if (it->cache != &mrt->mfc6_unres_queue) {
305 seq_printf(seq, " %8lu %8lu %8lu", 565 seq_printf(seq, " %8lu %8lu %8lu",
306 mfc->mfc_un.res.pkt, 566 mfc->mfc_un.res.pkt,
307 mfc->mfc_un.res.bytes, 567 mfc->mfc_un.res.bytes,
308 mfc->mfc_un.res.wrong_if); 568 mfc->mfc_un.res.wrong_if);
309 for (n = mfc->mfc_un.res.minvif; 569 for (n = mfc->mfc_un.res.minvif;
310 n < mfc->mfc_un.res.maxvif; n++) { 570 n < mfc->mfc_un.res.maxvif; n++) {
311 if (MIF_EXISTS(net, n) && 571 if (MIF_EXISTS(mrt, n) &&
312 mfc->mfc_un.res.ttls[n] < 255) 572 mfc->mfc_un.res.ttls[n] < 255)
313 seq_printf(seq, 573 seq_printf(seq,
314 " %2d:%-3d", 574 " %2d:%-3d",
@@ -355,7 +615,12 @@ static int pim6_rcv(struct sk_buff *skb)
355 struct ipv6hdr *encap; 615 struct ipv6hdr *encap;
356 struct net_device *reg_dev = NULL; 616 struct net_device *reg_dev = NULL;
357 struct net *net = dev_net(skb->dev); 617 struct net *net = dev_net(skb->dev);
358 int reg_vif_num = net->ipv6.mroute_reg_vif_num; 618 struct mr6_table *mrt;
619 struct flowi fl = {
620 .iif = skb->dev->ifindex,
621 .mark = skb->mark,
622 };
623 int reg_vif_num;
359 624
360 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) 625 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
361 goto drop; 626 goto drop;
@@ -378,9 +643,13 @@ static int pim6_rcv(struct sk_buff *skb)
378 ntohs(encap->payload_len) + sizeof(*pim) > skb->len) 643 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
379 goto drop; 644 goto drop;
380 645
646 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
647 goto drop;
648 reg_vif_num = mrt->mroute_reg_vif_num;
649
381 read_lock(&mrt_lock); 650 read_lock(&mrt_lock);
382 if (reg_vif_num >= 0) 651 if (reg_vif_num >= 0)
383 reg_dev = net->ipv6.vif6_table[reg_vif_num].dev; 652 reg_dev = mrt->vif6_table[reg_vif_num].dev;
384 if (reg_dev) 653 if (reg_dev)
385 dev_hold(reg_dev); 654 dev_hold(reg_dev);
386 read_unlock(&mrt_lock); 655 read_unlock(&mrt_lock);
@@ -391,14 +660,12 @@ static int pim6_rcv(struct sk_buff *skb)
391 skb->mac_header = skb->network_header; 660 skb->mac_header = skb->network_header;
392 skb_pull(skb, (u8 *)encap - skb->data); 661 skb_pull(skb, (u8 *)encap - skb->data);
393 skb_reset_network_header(skb); 662 skb_reset_network_header(skb);
394 skb->dev = reg_dev;
395 skb->protocol = htons(ETH_P_IPV6); 663 skb->protocol = htons(ETH_P_IPV6);
396 skb->ip_summed = 0; 664 skb->ip_summed = 0;
397 skb->pkt_type = PACKET_HOST; 665 skb->pkt_type = PACKET_HOST;
398 skb_dst_drop(skb); 666
399 reg_dev->stats.rx_bytes += skb->len; 667 skb_tunnel_rx(skb, reg_dev);
400 reg_dev->stats.rx_packets++; 668
401 nf_reset(skb);
402 netif_rx(skb); 669 netif_rx(skb);
403 dev_put(reg_dev); 670 dev_put(reg_dev);
404 return 0; 671 return 0;
@@ -417,12 +684,22 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
417 struct net_device *dev) 684 struct net_device *dev)
418{ 685{
419 struct net *net = dev_net(dev); 686 struct net *net = dev_net(dev);
687 struct mr6_table *mrt;
688 struct flowi fl = {
689 .oif = dev->ifindex,
690 .iif = skb->skb_iif,
691 .mark = skb->mark,
692 };
693 int err;
694
695 err = ip6mr_fib_lookup(net, &fl, &mrt);
696 if (err < 0)
697 return err;
420 698
421 read_lock(&mrt_lock); 699 read_lock(&mrt_lock);
422 dev->stats.tx_bytes += skb->len; 700 dev->stats.tx_bytes += skb->len;
423 dev->stats.tx_packets++; 701 dev->stats.tx_packets++;
424 ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num, 702 ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
425 MRT6MSG_WHOLEPKT);
426 read_unlock(&mrt_lock); 703 read_unlock(&mrt_lock);
427 kfree_skb(skb); 704 kfree_skb(skb);
428 return NETDEV_TX_OK; 705 return NETDEV_TX_OK;
@@ -442,11 +719,17 @@ static void reg_vif_setup(struct net_device *dev)
442 dev->features |= NETIF_F_NETNS_LOCAL; 719 dev->features |= NETIF_F_NETNS_LOCAL;
443} 720}
444 721
445static struct net_device *ip6mr_reg_vif(struct net *net) 722static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
446{ 723{
447 struct net_device *dev; 724 struct net_device *dev;
725 char name[IFNAMSIZ];
448 726
449 dev = alloc_netdev(0, "pim6reg", reg_vif_setup); 727 if (mrt->id == RT6_TABLE_DFLT)
728 sprintf(name, "pim6reg");
729 else
730 sprintf(name, "pim6reg%u", mrt->id);
731
732 dev = alloc_netdev(0, name, reg_vif_setup);
450 if (dev == NULL) 733 if (dev == NULL)
451 return NULL; 734 return NULL;
452 735
@@ -478,15 +761,16 @@ failure:
478 * Delete a VIF entry 761 * Delete a VIF entry
479 */ 762 */
480 763
481static int mif6_delete(struct net *net, int vifi, struct list_head *head) 764static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
482{ 765{
483 struct mif_device *v; 766 struct mif_device *v;
484 struct net_device *dev; 767 struct net_device *dev;
485 struct inet6_dev *in6_dev; 768 struct inet6_dev *in6_dev;
486 if (vifi < 0 || vifi >= net->ipv6.maxvif) 769
770 if (vifi < 0 || vifi >= mrt->maxvif)
487 return -EADDRNOTAVAIL; 771 return -EADDRNOTAVAIL;
488 772
489 v = &net->ipv6.vif6_table[vifi]; 773 v = &mrt->vif6_table[vifi];
490 774
491 write_lock_bh(&mrt_lock); 775 write_lock_bh(&mrt_lock);
492 dev = v->dev; 776 dev = v->dev;
@@ -498,17 +782,17 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
498 } 782 }
499 783
500#ifdef CONFIG_IPV6_PIMSM_V2 784#ifdef CONFIG_IPV6_PIMSM_V2
501 if (vifi == net->ipv6.mroute_reg_vif_num) 785 if (vifi == mrt->mroute_reg_vif_num)
502 net->ipv6.mroute_reg_vif_num = -1; 786 mrt->mroute_reg_vif_num = -1;
503#endif 787#endif
504 788
505 if (vifi + 1 == net->ipv6.maxvif) { 789 if (vifi + 1 == mrt->maxvif) {
506 int tmp; 790 int tmp;
507 for (tmp = vifi - 1; tmp >= 0; tmp--) { 791 for (tmp = vifi - 1; tmp >= 0; tmp--) {
508 if (MIF_EXISTS(net, tmp)) 792 if (MIF_EXISTS(mrt, tmp))
509 break; 793 break;
510 } 794 }
511 net->ipv6.maxvif = tmp + 1; 795 mrt->maxvif = tmp + 1;
512 } 796 }
513 797
514 write_unlock_bh(&mrt_lock); 798 write_unlock_bh(&mrt_lock);
@@ -528,7 +812,6 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
528 812
529static inline void ip6mr_cache_free(struct mfc6_cache *c) 813static inline void ip6mr_cache_free(struct mfc6_cache *c)
530{ 814{
531 release_net(mfc6_net(c));
532 kmem_cache_free(mrt_cachep, c); 815 kmem_cache_free(mrt_cachep, c);
533} 816}
534 817
@@ -536,12 +819,12 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
536 and reporting error to netlink readers. 819 and reporting error to netlink readers.
537 */ 820 */
538 821
539static void ip6mr_destroy_unres(struct mfc6_cache *c) 822static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
540{ 823{
824 struct net *net = read_pnet(&mrt->net);
541 struct sk_buff *skb; 825 struct sk_buff *skb;
542 struct net *net = mfc6_net(c);
543 826
544 atomic_dec(&net->ipv6.cache_resolve_queue_len); 827 atomic_dec(&mrt->cache_resolve_queue_len);
545 828
546 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { 829 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
547 if (ipv6_hdr(skb)->version == 0) { 830 if (ipv6_hdr(skb)->version == 0) {
@@ -559,60 +842,59 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
559} 842}
560 843
561 844
562/* Single timer process for all the unresolved queue. */ 845/* Timer process for all the unresolved queue. */
563 846
564static void ipmr_do_expire_process(unsigned long dummy) 847static void ipmr_do_expire_process(struct mr6_table *mrt)
565{ 848{
566 unsigned long now = jiffies; 849 unsigned long now = jiffies;
567 unsigned long expires = 10 * HZ; 850 unsigned long expires = 10 * HZ;
568 struct mfc6_cache *c, **cp; 851 struct mfc6_cache *c, *next;
569
570 cp = &mfc_unres_queue;
571 852
572 while ((c = *cp) != NULL) { 853 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
573 if (time_after(c->mfc_un.unres.expires, now)) { 854 if (time_after(c->mfc_un.unres.expires, now)) {
574 /* not yet... */ 855 /* not yet... */
575 unsigned long interval = c->mfc_un.unres.expires - now; 856 unsigned long interval = c->mfc_un.unres.expires - now;
576 if (interval < expires) 857 if (interval < expires)
577 expires = interval; 858 expires = interval;
578 cp = &c->next;
579 continue; 859 continue;
580 } 860 }
581 861
582 *cp = c->next; 862 list_del(&c->list);
583 ip6mr_destroy_unres(c); 863 ip6mr_destroy_unres(mrt, c);
584 } 864 }
585 865
586 if (mfc_unres_queue != NULL) 866 if (!list_empty(&mrt->mfc6_unres_queue))
587 mod_timer(&ipmr_expire_timer, jiffies + expires); 867 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
588} 868}
589 869
590static void ipmr_expire_process(unsigned long dummy) 870static void ipmr_expire_process(unsigned long arg)
591{ 871{
872 struct mr6_table *mrt = (struct mr6_table *)arg;
873
592 if (!spin_trylock(&mfc_unres_lock)) { 874 if (!spin_trylock(&mfc_unres_lock)) {
593 mod_timer(&ipmr_expire_timer, jiffies + 1); 875 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
594 return; 876 return;
595 } 877 }
596 878
597 if (mfc_unres_queue != NULL) 879 if (!list_empty(&mrt->mfc6_unres_queue))
598 ipmr_do_expire_process(dummy); 880 ipmr_do_expire_process(mrt);
599 881
600 spin_unlock(&mfc_unres_lock); 882 spin_unlock(&mfc_unres_lock);
601} 883}
602 884
603/* Fill oifs list. It is called under write locked mrt_lock. */ 885/* Fill oifs list. It is called under write locked mrt_lock. */
604 886
605static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) 887static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
888 unsigned char *ttls)
606{ 889{
607 int vifi; 890 int vifi;
608 struct net *net = mfc6_net(cache);
609 891
610 cache->mfc_un.res.minvif = MAXMIFS; 892 cache->mfc_un.res.minvif = MAXMIFS;
611 cache->mfc_un.res.maxvif = 0; 893 cache->mfc_un.res.maxvif = 0;
612 memset(cache->mfc_un.res.ttls, 255, MAXMIFS); 894 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
613 895
614 for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) { 896 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
615 if (MIF_EXISTS(net, vifi) && 897 if (MIF_EXISTS(mrt, vifi) &&
616 ttls[vifi] && ttls[vifi] < 255) { 898 ttls[vifi] && ttls[vifi] < 255) {
617 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 899 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
618 if (cache->mfc_un.res.minvif > vifi) 900 if (cache->mfc_un.res.minvif > vifi)
@@ -623,16 +905,17 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl
623 } 905 }
624} 906}
625 907
626static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) 908static int mif6_add(struct net *net, struct mr6_table *mrt,
909 struct mif6ctl *vifc, int mrtsock)
627{ 910{
628 int vifi = vifc->mif6c_mifi; 911 int vifi = vifc->mif6c_mifi;
629 struct mif_device *v = &net->ipv6.vif6_table[vifi]; 912 struct mif_device *v = &mrt->vif6_table[vifi];
630 struct net_device *dev; 913 struct net_device *dev;
631 struct inet6_dev *in6_dev; 914 struct inet6_dev *in6_dev;
632 int err; 915 int err;
633 916
634 /* Is vif busy ? */ 917 /* Is vif busy ? */
635 if (MIF_EXISTS(net, vifi)) 918 if (MIF_EXISTS(mrt, vifi))
636 return -EADDRINUSE; 919 return -EADDRINUSE;
637 920
638 switch (vifc->mif6c_flags) { 921 switch (vifc->mif6c_flags) {
@@ -642,9 +925,9 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
642 * Special Purpose VIF in PIM 925 * Special Purpose VIF in PIM
643 * All the packets will be sent to the daemon 926 * All the packets will be sent to the daemon
644 */ 927 */
645 if (net->ipv6.mroute_reg_vif_num >= 0) 928 if (mrt->mroute_reg_vif_num >= 0)
646 return -EADDRINUSE; 929 return -EADDRINUSE;
647 dev = ip6mr_reg_vif(net); 930 dev = ip6mr_reg_vif(net, mrt);
648 if (!dev) 931 if (!dev)
649 return -ENOBUFS; 932 return -ENOBUFS;
650 err = dev_set_allmulti(dev, 1); 933 err = dev_set_allmulti(dev, 1);
@@ -694,50 +977,48 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
694 v->dev = dev; 977 v->dev = dev;
695#ifdef CONFIG_IPV6_PIMSM_V2 978#ifdef CONFIG_IPV6_PIMSM_V2
696 if (v->flags & MIFF_REGISTER) 979 if (v->flags & MIFF_REGISTER)
697 net->ipv6.mroute_reg_vif_num = vifi; 980 mrt->mroute_reg_vif_num = vifi;
698#endif 981#endif
699 if (vifi + 1 > net->ipv6.maxvif) 982 if (vifi + 1 > mrt->maxvif)
700 net->ipv6.maxvif = vifi + 1; 983 mrt->maxvif = vifi + 1;
701 write_unlock_bh(&mrt_lock); 984 write_unlock_bh(&mrt_lock);
702 return 0; 985 return 0;
703} 986}
704 987
705static struct mfc6_cache *ip6mr_cache_find(struct net *net, 988static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
706 struct in6_addr *origin, 989 struct in6_addr *origin,
707 struct in6_addr *mcastgrp) 990 struct in6_addr *mcastgrp)
708{ 991{
709 int line = MFC6_HASH(mcastgrp, origin); 992 int line = MFC6_HASH(mcastgrp, origin);
710 struct mfc6_cache *c; 993 struct mfc6_cache *c;
711 994
712 for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) { 995 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
713 if (ipv6_addr_equal(&c->mf6c_origin, origin) && 996 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
714 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) 997 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
715 break; 998 return c;
716 } 999 }
717 return c; 1000 return NULL;
718} 1001}
719 1002
720/* 1003/*
721 * Allocate a multicast cache entry 1004 * Allocate a multicast cache entry
722 */ 1005 */
723static struct mfc6_cache *ip6mr_cache_alloc(struct net *net) 1006static struct mfc6_cache *ip6mr_cache_alloc(void)
724{ 1007{
725 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 1008 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
726 if (c == NULL) 1009 if (c == NULL)
727 return NULL; 1010 return NULL;
728 c->mfc_un.res.minvif = MAXMIFS; 1011 c->mfc_un.res.minvif = MAXMIFS;
729 mfc6_net_set(c, net);
730 return c; 1012 return c;
731} 1013}
732 1014
733static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net) 1015static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
734{ 1016{
735 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 1017 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
736 if (c == NULL) 1018 if (c == NULL)
737 return NULL; 1019 return NULL;
738 skb_queue_head_init(&c->mfc_un.unres.unresolved); 1020 skb_queue_head_init(&c->mfc_un.unres.unresolved);
739 c->mfc_un.unres.expires = jiffies + 10 * HZ; 1021 c->mfc_un.unres.expires = jiffies + 10 * HZ;
740 mfc6_net_set(c, net);
741 return c; 1022 return c;
742} 1023}
743 1024
@@ -745,7 +1026,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
745 * A cache entry has gone into a resolved state from queued 1026 * A cache entry has gone into a resolved state from queued
746 */ 1027 */
747 1028
748static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) 1029static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1030 struct mfc6_cache *uc, struct mfc6_cache *c)
749{ 1031{
750 struct sk_buff *skb; 1032 struct sk_buff *skb;
751 1033
@@ -758,7 +1040,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
758 int err; 1040 int err;
759 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); 1041 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
760 1042
761 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { 1043 if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
762 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; 1044 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
763 } else { 1045 } else {
764 nlh->nlmsg_type = NLMSG_ERROR; 1046 nlh->nlmsg_type = NLMSG_ERROR;
@@ -766,9 +1048,9 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
766 skb_trim(skb, nlh->nlmsg_len); 1048 skb_trim(skb, nlh->nlmsg_len);
767 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; 1049 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
768 } 1050 }
769 err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid); 1051 err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
770 } else 1052 } else
771 ip6_mr_forward(skb, c); 1053 ip6_mr_forward(net, mrt, skb, c);
772 } 1054 }
773} 1055}
774 1056
@@ -779,8 +1061,8 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
779 * Called under mrt_lock. 1061 * Called under mrt_lock.
780 */ 1062 */
781 1063
782static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, 1064static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
783 int assert) 1065 mifi_t mifi, int assert)
784{ 1066{
785 struct sk_buff *skb; 1067 struct sk_buff *skb;
786 struct mrt6msg *msg; 1068 struct mrt6msg *msg;
@@ -816,7 +1098,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
816 msg = (struct mrt6msg *)skb_transport_header(skb); 1098 msg = (struct mrt6msg *)skb_transport_header(skb);
817 msg->im6_mbz = 0; 1099 msg->im6_mbz = 0;
818 msg->im6_msgtype = MRT6MSG_WHOLEPKT; 1100 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
819 msg->im6_mif = net->ipv6.mroute_reg_vif_num; 1101 msg->im6_mif = mrt->mroute_reg_vif_num;
820 msg->im6_pad = 0; 1102 msg->im6_pad = 0;
821 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); 1103 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
822 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); 1104 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -851,7 +1133,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
851 skb->ip_summed = CHECKSUM_UNNECESSARY; 1133 skb->ip_summed = CHECKSUM_UNNECESSARY;
852 } 1134 }
853 1135
854 if (net->ipv6.mroute6_sk == NULL) { 1136 if (mrt->mroute6_sk == NULL) {
855 kfree_skb(skb); 1137 kfree_skb(skb);
856 return -EINVAL; 1138 return -EINVAL;
857 } 1139 }
@@ -859,7 +1141,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
859 /* 1141 /*
860 * Deliver to user space multicast routing algorithms 1142 * Deliver to user space multicast routing algorithms
861 */ 1143 */
862 ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb); 1144 ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
863 if (ret < 0) { 1145 if (ret < 0) {
864 if (net_ratelimit()) 1146 if (net_ratelimit())
865 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); 1147 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
@@ -874,26 +1156,28 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
874 */ 1156 */
875 1157
876static int 1158static int
877ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb) 1159ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
878{ 1160{
1161 bool found = false;
879 int err; 1162 int err;
880 struct mfc6_cache *c; 1163 struct mfc6_cache *c;
881 1164
882 spin_lock_bh(&mfc_unres_lock); 1165 spin_lock_bh(&mfc_unres_lock);
883 for (c = mfc_unres_queue; c; c = c->next) { 1166 list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
884 if (net_eq(mfc6_net(c), net) && 1167 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
885 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && 1168 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
886 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) 1169 found = true;
887 break; 1170 break;
1171 }
888 } 1172 }
889 1173
890 if (c == NULL) { 1174 if (!found) {
891 /* 1175 /*
892 * Create a new entry if allowable 1176 * Create a new entry if allowable
893 */ 1177 */
894 1178
895 if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 || 1179 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
896 (c = ip6mr_cache_alloc_unres(net)) == NULL) { 1180 (c = ip6mr_cache_alloc_unres()) == NULL) {
897 spin_unlock_bh(&mfc_unres_lock); 1181 spin_unlock_bh(&mfc_unres_lock);
898 1182
899 kfree_skb(skb); 1183 kfree_skb(skb);
@@ -910,7 +1194,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
910 /* 1194 /*
911 * Reflect first query at pim6sd 1195 * Reflect first query at pim6sd
912 */ 1196 */
913 err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE); 1197 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
914 if (err < 0) { 1198 if (err < 0) {
915 /* If the report failed throw the cache entry 1199 /* If the report failed throw the cache entry
916 out - Brad Parker 1200 out - Brad Parker
@@ -922,11 +1206,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
922 return err; 1206 return err;
923 } 1207 }
924 1208
925 atomic_inc(&net->ipv6.cache_resolve_queue_len); 1209 atomic_inc(&mrt->cache_resolve_queue_len);
926 c->next = mfc_unres_queue; 1210 list_add(&c->list, &mrt->mfc6_unres_queue);
927 mfc_unres_queue = c;
928 1211
929 ipmr_do_expire_process(1); 1212 ipmr_do_expire_process(mrt);
930 } 1213 }
931 1214
932 /* 1215 /*
@@ -948,19 +1231,18 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
948 * MFC6 cache manipulation by user space 1231 * MFC6 cache manipulation by user space
949 */ 1232 */
950 1233
951static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc) 1234static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
952{ 1235{
953 int line; 1236 int line;
954 struct mfc6_cache *c, **cp; 1237 struct mfc6_cache *c, *next;
955 1238
956 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1239 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
957 1240
958 for (cp = &net->ipv6.mfc6_cache_array[line]; 1241 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
959 (c = *cp) != NULL; cp = &c->next) {
960 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1242 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
961 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { 1243 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
962 write_lock_bh(&mrt_lock); 1244 write_lock_bh(&mrt_lock);
963 *cp = c->next; 1245 list_del(&c->list);
964 write_unlock_bh(&mrt_lock); 1246 write_unlock_bh(&mrt_lock);
965 1247
966 ip6mr_cache_free(c); 1248 ip6mr_cache_free(c);
@@ -975,6 +1257,7 @@ static int ip6mr_device_event(struct notifier_block *this,
975{ 1257{
976 struct net_device *dev = ptr; 1258 struct net_device *dev = ptr;
977 struct net *net = dev_net(dev); 1259 struct net *net = dev_net(dev);
1260 struct mr6_table *mrt;
978 struct mif_device *v; 1261 struct mif_device *v;
979 int ct; 1262 int ct;
980 LIST_HEAD(list); 1263 LIST_HEAD(list);
@@ -982,10 +1265,12 @@ static int ip6mr_device_event(struct notifier_block *this,
982 if (event != NETDEV_UNREGISTER) 1265 if (event != NETDEV_UNREGISTER)
983 return NOTIFY_DONE; 1266 return NOTIFY_DONE;
984 1267
985 v = &net->ipv6.vif6_table[0]; 1268 ip6mr_for_each_table(mrt, net) {
986 for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { 1269 v = &mrt->vif6_table[0];
987 if (v->dev == dev) 1270 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
988 mif6_delete(net, ct, &list); 1271 if (v->dev == dev)
1272 mif6_delete(mrt, ct, &list);
1273 }
989 } 1274 }
990 unregister_netdevice_many(&list); 1275 unregister_netdevice_many(&list);
991 1276
@@ -1002,26 +1287,11 @@ static struct notifier_block ip6_mr_notifier = {
1002 1287
1003static int __net_init ip6mr_net_init(struct net *net) 1288static int __net_init ip6mr_net_init(struct net *net)
1004{ 1289{
1005 int err = 0; 1290 int err;
1006 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
1007 GFP_KERNEL);
1008 if (!net->ipv6.vif6_table) {
1009 err = -ENOMEM;
1010 goto fail;
1011 }
1012
1013 /* Forwarding cache */
1014 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
1015 sizeof(struct mfc6_cache *),
1016 GFP_KERNEL);
1017 if (!net->ipv6.mfc6_cache_array) {
1018 err = -ENOMEM;
1019 goto fail_mfc6_cache;
1020 }
1021 1291
1022#ifdef CONFIG_IPV6_PIMSM_V2 1292 err = ip6mr_rules_init(net);
1023 net->ipv6.mroute_reg_vif_num = -1; 1293 if (err < 0)
1024#endif 1294 goto fail;
1025 1295
1026#ifdef CONFIG_PROC_FS 1296#ifdef CONFIG_PROC_FS
1027 err = -ENOMEM; 1297 err = -ENOMEM;
@@ -1030,16 +1300,15 @@ static int __net_init ip6mr_net_init(struct net *net)
1030 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops)) 1300 if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
1031 goto proc_cache_fail; 1301 goto proc_cache_fail;
1032#endif 1302#endif
1303
1033 return 0; 1304 return 0;
1034 1305
1035#ifdef CONFIG_PROC_FS 1306#ifdef CONFIG_PROC_FS
1036proc_cache_fail: 1307proc_cache_fail:
1037 proc_net_remove(net, "ip6_mr_vif"); 1308 proc_net_remove(net, "ip6_mr_vif");
1038proc_vif_fail: 1309proc_vif_fail:
1039 kfree(net->ipv6.mfc6_cache_array); 1310 ip6mr_rules_exit(net);
1040#endif 1311#endif
1041fail_mfc6_cache:
1042 kfree(net->ipv6.vif6_table);
1043fail: 1312fail:
1044 return err; 1313 return err;
1045} 1314}
@@ -1050,9 +1319,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
1050 proc_net_remove(net, "ip6_mr_cache"); 1319 proc_net_remove(net, "ip6_mr_cache");
1051 proc_net_remove(net, "ip6_mr_vif"); 1320 proc_net_remove(net, "ip6_mr_vif");
1052#endif 1321#endif
1053 mroute_clean_tables(net); 1322 ip6mr_rules_exit(net);
1054 kfree(net->ipv6.mfc6_cache_array);
1055 kfree(net->ipv6.vif6_table);
1056} 1323}
1057 1324
1058static struct pernet_operations ip6mr_net_ops = { 1325static struct pernet_operations ip6mr_net_ops = {
@@ -1075,7 +1342,6 @@ int __init ip6_mr_init(void)
1075 if (err) 1342 if (err)
1076 goto reg_pernet_fail; 1343 goto reg_pernet_fail;
1077 1344
1078 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1079 err = register_netdevice_notifier(&ip6_mr_notifier); 1345 err = register_netdevice_notifier(&ip6_mr_notifier);
1080 if (err) 1346 if (err)
1081 goto reg_notif_fail; 1347 goto reg_notif_fail;
@@ -1086,13 +1352,13 @@ int __init ip6_mr_init(void)
1086 goto add_proto_fail; 1352 goto add_proto_fail;
1087 } 1353 }
1088#endif 1354#endif
1355 rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
1089 return 0; 1356 return 0;
1090#ifdef CONFIG_IPV6_PIMSM_V2 1357#ifdef CONFIG_IPV6_PIMSM_V2
1091add_proto_fail: 1358add_proto_fail:
1092 unregister_netdevice_notifier(&ip6_mr_notifier); 1359 unregister_netdevice_notifier(&ip6_mr_notifier);
1093#endif 1360#endif
1094reg_notif_fail: 1361reg_notif_fail:
1095 del_timer(&ipmr_expire_timer);
1096 unregister_pernet_subsys(&ip6mr_net_ops); 1362 unregister_pernet_subsys(&ip6mr_net_ops);
1097reg_pernet_fail: 1363reg_pernet_fail:
1098 kmem_cache_destroy(mrt_cachep); 1364 kmem_cache_destroy(mrt_cachep);
@@ -1102,15 +1368,16 @@ reg_pernet_fail:
1102void ip6_mr_cleanup(void) 1368void ip6_mr_cleanup(void)
1103{ 1369{
1104 unregister_netdevice_notifier(&ip6_mr_notifier); 1370 unregister_netdevice_notifier(&ip6_mr_notifier);
1105 del_timer(&ipmr_expire_timer);
1106 unregister_pernet_subsys(&ip6mr_net_ops); 1371 unregister_pernet_subsys(&ip6mr_net_ops);
1107 kmem_cache_destroy(mrt_cachep); 1372 kmem_cache_destroy(mrt_cachep);
1108} 1373}
1109 1374
1110static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) 1375static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1376 struct mf6cctl *mfc, int mrtsock)
1111{ 1377{
1378 bool found = false;
1112 int line; 1379 int line;
1113 struct mfc6_cache *uc, *c, **cp; 1380 struct mfc6_cache *uc, *c;
1114 unsigned char ttls[MAXMIFS]; 1381 unsigned char ttls[MAXMIFS];
1115 int i; 1382 int i;
1116 1383
@@ -1126,17 +1393,18 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1126 1393
1127 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); 1394 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1128 1395
1129 for (cp = &net->ipv6.mfc6_cache_array[line]; 1396 list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1130 (c = *cp) != NULL; cp = &c->next) {
1131 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && 1397 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1132 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) 1398 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
1399 found = true;
1133 break; 1400 break;
1401 }
1134 } 1402 }
1135 1403
1136 if (c != NULL) { 1404 if (found) {
1137 write_lock_bh(&mrt_lock); 1405 write_lock_bh(&mrt_lock);
1138 c->mf6c_parent = mfc->mf6cc_parent; 1406 c->mf6c_parent = mfc->mf6cc_parent;
1139 ip6mr_update_thresholds(c, ttls); 1407 ip6mr_update_thresholds(mrt, c, ttls);
1140 if (!mrtsock) 1408 if (!mrtsock)
1141 c->mfc_flags |= MFC_STATIC; 1409 c->mfc_flags |= MFC_STATIC;
1142 write_unlock_bh(&mrt_lock); 1410 write_unlock_bh(&mrt_lock);
@@ -1146,43 +1414,42 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1146 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) 1414 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1147 return -EINVAL; 1415 return -EINVAL;
1148 1416
1149 c = ip6mr_cache_alloc(net); 1417 c = ip6mr_cache_alloc();
1150 if (c == NULL) 1418 if (c == NULL)
1151 return -ENOMEM; 1419 return -ENOMEM;
1152 1420
1153 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; 1421 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1154 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr; 1422 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1155 c->mf6c_parent = mfc->mf6cc_parent; 1423 c->mf6c_parent = mfc->mf6cc_parent;
1156 ip6mr_update_thresholds(c, ttls); 1424 ip6mr_update_thresholds(mrt, c, ttls);
1157 if (!mrtsock) 1425 if (!mrtsock)
1158 c->mfc_flags |= MFC_STATIC; 1426 c->mfc_flags |= MFC_STATIC;
1159 1427
1160 write_lock_bh(&mrt_lock); 1428 write_lock_bh(&mrt_lock);
1161 c->next = net->ipv6.mfc6_cache_array[line]; 1429 list_add(&c->list, &mrt->mfc6_cache_array[line]);
1162 net->ipv6.mfc6_cache_array[line] = c;
1163 write_unlock_bh(&mrt_lock); 1430 write_unlock_bh(&mrt_lock);
1164 1431
1165 /* 1432 /*
1166 * Check to see if we resolved a queued list. If so we 1433 * Check to see if we resolved a queued list. If so we
1167 * need to send on the frames and tidy up. 1434 * need to send on the frames and tidy up.
1168 */ 1435 */
1436 found = false;
1169 spin_lock_bh(&mfc_unres_lock); 1437 spin_lock_bh(&mfc_unres_lock);
1170 for (cp = &mfc_unres_queue; (uc = *cp) != NULL; 1438 list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1171 cp = &uc->next) { 1439 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1172 if (net_eq(mfc6_net(uc), net) &&
1173 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1174 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { 1440 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1175 *cp = uc->next; 1441 list_del(&uc->list);
1176 atomic_dec(&net->ipv6.cache_resolve_queue_len); 1442 atomic_dec(&mrt->cache_resolve_queue_len);
1443 found = true;
1177 break; 1444 break;
1178 } 1445 }
1179 } 1446 }
1180 if (mfc_unres_queue == NULL) 1447 if (list_empty(&mrt->mfc6_unres_queue))
1181 del_timer(&ipmr_expire_timer); 1448 del_timer(&mrt->ipmr_expire_timer);
1182 spin_unlock_bh(&mfc_unres_lock); 1449 spin_unlock_bh(&mfc_unres_lock);
1183 1450
1184 if (uc) { 1451 if (found) {
1185 ip6mr_cache_resolve(uc, c); 1452 ip6mr_cache_resolve(net, mrt, uc, c);
1186 ip6mr_cache_free(uc); 1453 ip6mr_cache_free(uc);
1187 } 1454 }
1188 return 0; 1455 return 0;
@@ -1192,17 +1459,18 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
1192 * Close the multicast socket, and clear the vif tables etc 1459 * Close the multicast socket, and clear the vif tables etc
1193 */ 1460 */
1194 1461
1195static void mroute_clean_tables(struct net *net) 1462static void mroute_clean_tables(struct mr6_table *mrt)
1196{ 1463{
1197 int i; 1464 int i;
1198 LIST_HEAD(list); 1465 LIST_HEAD(list);
1466 struct mfc6_cache *c, *next;
1199 1467
1200 /* 1468 /*
1201 * Shut down all active vif entries 1469 * Shut down all active vif entries
1202 */ 1470 */
1203 for (i = 0; i < net->ipv6.maxvif; i++) { 1471 for (i = 0; i < mrt->maxvif; i++) {
1204 if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) 1472 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1205 mif6_delete(net, i, &list); 1473 mif6_delete(mrt, i, &list);
1206 } 1474 }
1207 unregister_netdevice_many(&list); 1475 unregister_netdevice_many(&list);
1208 1476
@@ -1210,48 +1478,36 @@ static void mroute_clean_tables(struct net *net)
1210 * Wipe the cache 1478 * Wipe the cache
1211 */ 1479 */
1212 for (i = 0; i < MFC6_LINES; i++) { 1480 for (i = 0; i < MFC6_LINES; i++) {
1213 struct mfc6_cache *c, **cp; 1481 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1214 1482 if (c->mfc_flags & MFC_STATIC)
1215 cp = &net->ipv6.mfc6_cache_array[i];
1216 while ((c = *cp) != NULL) {
1217 if (c->mfc_flags & MFC_STATIC) {
1218 cp = &c->next;
1219 continue; 1483 continue;
1220 }
1221 write_lock_bh(&mrt_lock); 1484 write_lock_bh(&mrt_lock);
1222 *cp = c->next; 1485 list_del(&c->list);
1223 write_unlock_bh(&mrt_lock); 1486 write_unlock_bh(&mrt_lock);
1224 1487
1225 ip6mr_cache_free(c); 1488 ip6mr_cache_free(c);
1226 } 1489 }
1227 } 1490 }
1228 1491
1229 if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) { 1492 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1230 struct mfc6_cache *c, **cp;
1231
1232 spin_lock_bh(&mfc_unres_lock); 1493 spin_lock_bh(&mfc_unres_lock);
1233 cp = &mfc_unres_queue; 1494 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1234 while ((c = *cp) != NULL) { 1495 list_del(&c->list);
1235 if (!net_eq(mfc6_net(c), net)) { 1496 ip6mr_destroy_unres(mrt, c);
1236 cp = &c->next;
1237 continue;
1238 }
1239 *cp = c->next;
1240 ip6mr_destroy_unres(c);
1241 } 1497 }
1242 spin_unlock_bh(&mfc_unres_lock); 1498 spin_unlock_bh(&mfc_unres_lock);
1243 } 1499 }
1244} 1500}
1245 1501
1246static int ip6mr_sk_init(struct sock *sk) 1502static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1247{ 1503{
1248 int err = 0; 1504 int err = 0;
1249 struct net *net = sock_net(sk); 1505 struct net *net = sock_net(sk);
1250 1506
1251 rtnl_lock(); 1507 rtnl_lock();
1252 write_lock_bh(&mrt_lock); 1508 write_lock_bh(&mrt_lock);
1253 if (likely(net->ipv6.mroute6_sk == NULL)) { 1509 if (likely(mrt->mroute6_sk == NULL)) {
1254 net->ipv6.mroute6_sk = sk; 1510 mrt->mroute6_sk = sk;
1255 net->ipv6.devconf_all->mc_forwarding++; 1511 net->ipv6.devconf_all->mc_forwarding++;
1256 } 1512 }
1257 else 1513 else
@@ -1265,24 +1521,43 @@ static int ip6mr_sk_init(struct sock *sk)
1265 1521
1266int ip6mr_sk_done(struct sock *sk) 1522int ip6mr_sk_done(struct sock *sk)
1267{ 1523{
1268 int err = 0; 1524 int err = -EACCES;
1269 struct net *net = sock_net(sk); 1525 struct net *net = sock_net(sk);
1526 struct mr6_table *mrt;
1270 1527
1271 rtnl_lock(); 1528 rtnl_lock();
1272 if (sk == net->ipv6.mroute6_sk) { 1529 ip6mr_for_each_table(mrt, net) {
1273 write_lock_bh(&mrt_lock); 1530 if (sk == mrt->mroute6_sk) {
1274 net->ipv6.mroute6_sk = NULL; 1531 write_lock_bh(&mrt_lock);
1275 net->ipv6.devconf_all->mc_forwarding--; 1532 mrt->mroute6_sk = NULL;
1276 write_unlock_bh(&mrt_lock); 1533 net->ipv6.devconf_all->mc_forwarding--;
1534 write_unlock_bh(&mrt_lock);
1277 1535
1278 mroute_clean_tables(net); 1536 mroute_clean_tables(mrt);
1279 } else 1537 err = 0;
1280 err = -EACCES; 1538 break;
1539 }
1540 }
1281 rtnl_unlock(); 1541 rtnl_unlock();
1282 1542
1283 return err; 1543 return err;
1284} 1544}
1285 1545
1546struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1547{
1548 struct mr6_table *mrt;
1549 struct flowi fl = {
1550 .iif = skb->skb_iif,
1551 .oif = skb->dev->ifindex,
1552 .mark = skb->mark,
1553 };
1554
1555 if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
1556 return NULL;
1557
1558 return mrt->mroute6_sk;
1559}
1560
1286/* 1561/*
1287 * Socket options and virtual interface manipulation. The whole 1562 * Socket options and virtual interface manipulation. The whole
1288 * virtual interface system is a complete heap, but unfortunately 1563 * virtual interface system is a complete heap, but unfortunately
@@ -1297,9 +1572,14 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1297 struct mf6cctl mfc; 1572 struct mf6cctl mfc;
1298 mifi_t mifi; 1573 mifi_t mifi;
1299 struct net *net = sock_net(sk); 1574 struct net *net = sock_net(sk);
1575 struct mr6_table *mrt;
1576
1577 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1578 if (mrt == NULL)
1579 return -ENOENT;
1300 1580
1301 if (optname != MRT6_INIT) { 1581 if (optname != MRT6_INIT) {
1302 if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) 1582 if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
1303 return -EACCES; 1583 return -EACCES;
1304 } 1584 }
1305 1585
@@ -1311,7 +1591,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1311 if (optlen < sizeof(int)) 1591 if (optlen < sizeof(int))
1312 return -EINVAL; 1592 return -EINVAL;
1313 1593
1314 return ip6mr_sk_init(sk); 1594 return ip6mr_sk_init(mrt, sk);
1315 1595
1316 case MRT6_DONE: 1596 case MRT6_DONE:
1317 return ip6mr_sk_done(sk); 1597 return ip6mr_sk_done(sk);
@@ -1324,7 +1604,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1324 if (vif.mif6c_mifi >= MAXMIFS) 1604 if (vif.mif6c_mifi >= MAXMIFS)
1325 return -ENFILE; 1605 return -ENFILE;
1326 rtnl_lock(); 1606 rtnl_lock();
1327 ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk); 1607 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1328 rtnl_unlock(); 1608 rtnl_unlock();
1329 return ret; 1609 return ret;
1330 1610
@@ -1334,7 +1614,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1334 if (copy_from_user(&mifi, optval, sizeof(mifi_t))) 1614 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1335 return -EFAULT; 1615 return -EFAULT;
1336 rtnl_lock(); 1616 rtnl_lock();
1337 ret = mif6_delete(net, mifi, NULL); 1617 ret = mif6_delete(mrt, mifi, NULL);
1338 rtnl_unlock(); 1618 rtnl_unlock();
1339 return ret; 1619 return ret;
1340 1620
@@ -1350,10 +1630,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1350 return -EFAULT; 1630 return -EFAULT;
1351 rtnl_lock(); 1631 rtnl_lock();
1352 if (optname == MRT6_DEL_MFC) 1632 if (optname == MRT6_DEL_MFC)
1353 ret = ip6mr_mfc_delete(net, &mfc); 1633 ret = ip6mr_mfc_delete(mrt, &mfc);
1354 else 1634 else
1355 ret = ip6mr_mfc_add(net, &mfc, 1635 ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
1356 sk == net->ipv6.mroute6_sk);
1357 rtnl_unlock(); 1636 rtnl_unlock();
1358 return ret; 1637 return ret;
1359 1638
@@ -1365,7 +1644,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1365 int v; 1644 int v;
1366 if (get_user(v, (int __user *)optval)) 1645 if (get_user(v, (int __user *)optval))
1367 return -EFAULT; 1646 return -EFAULT;
1368 net->ipv6.mroute_do_assert = !!v; 1647 mrt->mroute_do_assert = !!v;
1369 return 0; 1648 return 0;
1370 } 1649 }
1371 1650
@@ -1378,15 +1657,36 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
1378 v = !!v; 1657 v = !!v;
1379 rtnl_lock(); 1658 rtnl_lock();
1380 ret = 0; 1659 ret = 0;
1381 if (v != net->ipv6.mroute_do_pim) { 1660 if (v != mrt->mroute_do_pim) {
1382 net->ipv6.mroute_do_pim = v; 1661 mrt->mroute_do_pim = v;
1383 net->ipv6.mroute_do_assert = v; 1662 mrt->mroute_do_assert = v;
1384 } 1663 }
1385 rtnl_unlock(); 1664 rtnl_unlock();
1386 return ret; 1665 return ret;
1387 } 1666 }
1388 1667
1389#endif 1668#endif
1669#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1670 case MRT6_TABLE:
1671 {
1672 u32 v;
1673
1674 if (optlen != sizeof(u32))
1675 return -EINVAL;
1676 if (get_user(v, (u32 __user *)optval))
1677 return -EFAULT;
1678 if (sk == mrt->mroute6_sk)
1679 return -EBUSY;
1680
1681 rtnl_lock();
1682 ret = 0;
1683 if (!ip6mr_new_table(net, v))
1684 ret = -ENOMEM;
1685 raw6_sk(sk)->ip6mr_table = v;
1686 rtnl_unlock();
1687 return ret;
1688 }
1689#endif
1390 /* 1690 /*
1391 * Spurious command, or MRT6_VERSION which you cannot 1691 * Spurious command, or MRT6_VERSION which you cannot
1392 * set. 1692 * set.
@@ -1406,6 +1706,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1406 int olr; 1706 int olr;
1407 int val; 1707 int val;
1408 struct net *net = sock_net(sk); 1708 struct net *net = sock_net(sk);
1709 struct mr6_table *mrt;
1710
1711 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1712 if (mrt == NULL)
1713 return -ENOENT;
1409 1714
1410 switch (optname) { 1715 switch (optname) {
1411 case MRT6_VERSION: 1716 case MRT6_VERSION:
@@ -1413,11 +1718,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1413 break; 1718 break;
1414#ifdef CONFIG_IPV6_PIMSM_V2 1719#ifdef CONFIG_IPV6_PIMSM_V2
1415 case MRT6_PIM: 1720 case MRT6_PIM:
1416 val = net->ipv6.mroute_do_pim; 1721 val = mrt->mroute_do_pim;
1417 break; 1722 break;
1418#endif 1723#endif
1419 case MRT6_ASSERT: 1724 case MRT6_ASSERT:
1420 val = net->ipv6.mroute_do_assert; 1725 val = mrt->mroute_do_assert;
1421 break; 1726 break;
1422 default: 1727 default:
1423 return -ENOPROTOOPT; 1728 return -ENOPROTOOPT;
@@ -1448,16 +1753,21 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1448 struct mif_device *vif; 1753 struct mif_device *vif;
1449 struct mfc6_cache *c; 1754 struct mfc6_cache *c;
1450 struct net *net = sock_net(sk); 1755 struct net *net = sock_net(sk);
1756 struct mr6_table *mrt;
1757
1758 mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1759 if (mrt == NULL)
1760 return -ENOENT;
1451 1761
1452 switch (cmd) { 1762 switch (cmd) {
1453 case SIOCGETMIFCNT_IN6: 1763 case SIOCGETMIFCNT_IN6:
1454 if (copy_from_user(&vr, arg, sizeof(vr))) 1764 if (copy_from_user(&vr, arg, sizeof(vr)))
1455 return -EFAULT; 1765 return -EFAULT;
1456 if (vr.mifi >= net->ipv6.maxvif) 1766 if (vr.mifi >= mrt->maxvif)
1457 return -EINVAL; 1767 return -EINVAL;
1458 read_lock(&mrt_lock); 1768 read_lock(&mrt_lock);
1459 vif = &net->ipv6.vif6_table[vr.mifi]; 1769 vif = &mrt->vif6_table[vr.mifi];
1460 if (MIF_EXISTS(net, vr.mifi)) { 1770 if (MIF_EXISTS(mrt, vr.mifi)) {
1461 vr.icount = vif->pkt_in; 1771 vr.icount = vif->pkt_in;
1462 vr.ocount = vif->pkt_out; 1772 vr.ocount = vif->pkt_out;
1463 vr.ibytes = vif->bytes_in; 1773 vr.ibytes = vif->bytes_in;
@@ -1475,7 +1785,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1475 return -EFAULT; 1785 return -EFAULT;
1476 1786
1477 read_lock(&mrt_lock); 1787 read_lock(&mrt_lock);
1478 c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr); 1788 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1479 if (c) { 1789 if (c) {
1480 sr.pktcnt = c->mfc_un.res.pkt; 1790 sr.pktcnt = c->mfc_un.res.pkt;
1481 sr.bytecnt = c->mfc_un.res.bytes; 1791 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1505,11 +1815,11 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1505 * Processing handlers for ip6mr_forward 1815 * Processing handlers for ip6mr_forward
1506 */ 1816 */
1507 1817
1508static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) 1818static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1819 struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1509{ 1820{
1510 struct ipv6hdr *ipv6h; 1821 struct ipv6hdr *ipv6h;
1511 struct net *net = mfc6_net(c); 1822 struct mif_device *vif = &mrt->vif6_table[vifi];
1512 struct mif_device *vif = &net->ipv6.vif6_table[vifi];
1513 struct net_device *dev; 1823 struct net_device *dev;
1514 struct dst_entry *dst; 1824 struct dst_entry *dst;
1515 struct flowi fl; 1825 struct flowi fl;
@@ -1523,7 +1833,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1523 vif->bytes_out += skb->len; 1833 vif->bytes_out += skb->len;
1524 vif->dev->stats.tx_bytes += skb->len; 1834 vif->dev->stats.tx_bytes += skb->len;
1525 vif->dev->stats.tx_packets++; 1835 vif->dev->stats.tx_packets++;
1526 ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT); 1836 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
1527 goto out_free; 1837 goto out_free;
1528 } 1838 }
1529#endif 1839#endif
@@ -1570,7 +1880,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1570 1880
1571 IP6CB(skb)->flags |= IP6SKB_FORWARDED; 1881 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1572 1882
1573 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev, 1883 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
1574 ip6mr_forward2_finish); 1884 ip6mr_forward2_finish);
1575 1885
1576out_free: 1886out_free:
@@ -1578,22 +1888,22 @@ out_free:
1578 return 0; 1888 return 0;
1579} 1889}
1580 1890
1581static int ip6mr_find_vif(struct net_device *dev) 1891static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
1582{ 1892{
1583 struct net *net = dev_net(dev);
1584 int ct; 1893 int ct;
1585 for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) { 1894
1586 if (net->ipv6.vif6_table[ct].dev == dev) 1895 for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
1896 if (mrt->vif6_table[ct].dev == dev)
1587 break; 1897 break;
1588 } 1898 }
1589 return ct; 1899 return ct;
1590} 1900}
1591 1901
1592static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) 1902static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
1903 struct sk_buff *skb, struct mfc6_cache *cache)
1593{ 1904{
1594 int psend = -1; 1905 int psend = -1;
1595 int vif, ct; 1906 int vif, ct;
1596 struct net *net = mfc6_net(cache);
1597 1907
1598 vif = cache->mf6c_parent; 1908 vif = cache->mf6c_parent;
1599 cache->mfc_un.res.pkt++; 1909 cache->mfc_un.res.pkt++;
@@ -1602,30 +1912,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1602 /* 1912 /*
1603 * Wrong interface: drop packet and (maybe) send PIM assert. 1913 * Wrong interface: drop packet and (maybe) send PIM assert.
1604 */ 1914 */
1605 if (net->ipv6.vif6_table[vif].dev != skb->dev) { 1915 if (mrt->vif6_table[vif].dev != skb->dev) {
1606 int true_vifi; 1916 int true_vifi;
1607 1917
1608 cache->mfc_un.res.wrong_if++; 1918 cache->mfc_un.res.wrong_if++;
1609 true_vifi = ip6mr_find_vif(skb->dev); 1919 true_vifi = ip6mr_find_vif(mrt, skb->dev);
1610 1920
1611 if (true_vifi >= 0 && net->ipv6.mroute_do_assert && 1921 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1612 /* pimsm uses asserts, when switching from RPT to SPT, 1922 /* pimsm uses asserts, when switching from RPT to SPT,
1613 so that we cannot check that packet arrived on an oif. 1923 so that we cannot check that packet arrived on an oif.
1614 It is bad, but otherwise we would need to move pretty 1924 It is bad, but otherwise we would need to move pretty
1615 large chunk of pimd to kernel. Ough... --ANK 1925 large chunk of pimd to kernel. Ough... --ANK
1616 */ 1926 */
1617 (net->ipv6.mroute_do_pim || 1927 (mrt->mroute_do_pim ||
1618 cache->mfc_un.res.ttls[true_vifi] < 255) && 1928 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1619 time_after(jiffies, 1929 time_after(jiffies,
1620 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1930 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1621 cache->mfc_un.res.last_assert = jiffies; 1931 cache->mfc_un.res.last_assert = jiffies;
1622 ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF); 1932 ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
1623 } 1933 }
1624 goto dont_forward; 1934 goto dont_forward;
1625 } 1935 }
1626 1936
1627 net->ipv6.vif6_table[vif].pkt_in++; 1937 mrt->vif6_table[vif].pkt_in++;
1628 net->ipv6.vif6_table[vif].bytes_in += skb->len; 1938 mrt->vif6_table[vif].bytes_in += skb->len;
1629 1939
1630 /* 1940 /*
1631 * Forward the frame 1941 * Forward the frame
@@ -1635,13 +1945,13 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1635 if (psend != -1) { 1945 if (psend != -1) {
1636 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1946 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1637 if (skb2) 1947 if (skb2)
1638 ip6mr_forward2(skb2, cache, psend); 1948 ip6mr_forward2(net, mrt, skb2, cache, psend);
1639 } 1949 }
1640 psend = ct; 1950 psend = ct;
1641 } 1951 }
1642 } 1952 }
1643 if (psend != -1) { 1953 if (psend != -1) {
1644 ip6mr_forward2(skb, cache, psend); 1954 ip6mr_forward2(net, mrt, skb, cache, psend);
1645 return 0; 1955 return 0;
1646 } 1956 }
1647 1957
@@ -1659,9 +1969,19 @@ int ip6_mr_input(struct sk_buff *skb)
1659{ 1969{
1660 struct mfc6_cache *cache; 1970 struct mfc6_cache *cache;
1661 struct net *net = dev_net(skb->dev); 1971 struct net *net = dev_net(skb->dev);
1972 struct mr6_table *mrt;
1973 struct flowi fl = {
1974 .iif = skb->dev->ifindex,
1975 .mark = skb->mark,
1976 };
1977 int err;
1978
1979 err = ip6mr_fib_lookup(net, &fl, &mrt);
1980 if (err < 0)
1981 return err;
1662 1982
1663 read_lock(&mrt_lock); 1983 read_lock(&mrt_lock);
1664 cache = ip6mr_cache_find(net, 1984 cache = ip6mr_cache_find(mrt,
1665 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 1985 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1666 1986
1667 /* 1987 /*
@@ -1670,9 +1990,9 @@ int ip6_mr_input(struct sk_buff *skb)
1670 if (cache == NULL) { 1990 if (cache == NULL) {
1671 int vif; 1991 int vif;
1672 1992
1673 vif = ip6mr_find_vif(skb->dev); 1993 vif = ip6mr_find_vif(mrt, skb->dev);
1674 if (vif >= 0) { 1994 if (vif >= 0) {
1675 int err = ip6mr_cache_unresolved(net, vif, skb); 1995 int err = ip6mr_cache_unresolved(mrt, vif, skb);
1676 read_unlock(&mrt_lock); 1996 read_unlock(&mrt_lock);
1677 1997
1678 return err; 1998 return err;
@@ -1682,7 +2002,7 @@ int ip6_mr_input(struct sk_buff *skb)
1682 return -ENODEV; 2002 return -ENODEV;
1683 } 2003 }
1684 2004
1685 ip6_mr_forward(skb, cache); 2005 ip6_mr_forward(net, mrt, skb, cache);
1686 2006
1687 read_unlock(&mrt_lock); 2007 read_unlock(&mrt_lock);
1688 2008
@@ -1690,32 +2010,31 @@ int ip6_mr_input(struct sk_buff *skb)
1690} 2010}
1691 2011
1692 2012
1693static int 2013static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
1694ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) 2014 struct mfc6_cache *c, struct rtmsg *rtm)
1695{ 2015{
1696 int ct; 2016 int ct;
1697 struct rtnexthop *nhp; 2017 struct rtnexthop *nhp;
1698 struct net *net = mfc6_net(c);
1699 u8 *b = skb_tail_pointer(skb); 2018 u8 *b = skb_tail_pointer(skb);
1700 struct rtattr *mp_head; 2019 struct rtattr *mp_head;
1701 2020
1702 /* If cache is unresolved, don't try to parse IIF and OIF */ 2021 /* If cache is unresolved, don't try to parse IIF and OIF */
1703 if (c->mf6c_parent > MAXMIFS) 2022 if (c->mf6c_parent >= MAXMIFS)
1704 return -ENOENT; 2023 return -ENOENT;
1705 2024
1706 if (MIF_EXISTS(net, c->mf6c_parent)) 2025 if (MIF_EXISTS(mrt, c->mf6c_parent))
1707 RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex); 2026 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
1708 2027
1709 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0)); 2028 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1710 2029
1711 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { 2030 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1712 if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) { 2031 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1713 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) 2032 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1714 goto rtattr_failure; 2033 goto rtattr_failure;
1715 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 2034 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1716 nhp->rtnh_flags = 0; 2035 nhp->rtnh_flags = 0;
1717 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 2036 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1718 nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex; 2037 nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
1719 nhp->rtnh_len = sizeof(*nhp); 2038 nhp->rtnh_len = sizeof(*nhp);
1720 } 2039 }
1721 } 2040 }
@@ -1733,11 +2052,16 @@ int ip6mr_get_route(struct net *net,
1733 struct sk_buff *skb, struct rtmsg *rtm, int nowait) 2052 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1734{ 2053{
1735 int err; 2054 int err;
2055 struct mr6_table *mrt;
1736 struct mfc6_cache *cache; 2056 struct mfc6_cache *cache;
1737 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); 2057 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1738 2058
2059 mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2060 if (mrt == NULL)
2061 return -ENOENT;
2062
1739 read_lock(&mrt_lock); 2063 read_lock(&mrt_lock);
1740 cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); 2064 cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1741 2065
1742 if (!cache) { 2066 if (!cache) {
1743 struct sk_buff *skb2; 2067 struct sk_buff *skb2;
@@ -1751,7 +2075,7 @@ int ip6mr_get_route(struct net *net,
1751 } 2075 }
1752 2076
1753 dev = skb->dev; 2077 dev = skb->dev;
1754 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) { 2078 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
1755 read_unlock(&mrt_lock); 2079 read_unlock(&mrt_lock);
1756 return -ENODEV; 2080 return -ENODEV;
1757 } 2081 }
@@ -1780,7 +2104,7 @@ int ip6mr_get_route(struct net *net,
1780 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); 2104 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1781 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); 2105 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1782 2106
1783 err = ip6mr_cache_unresolved(net, vif, skb2); 2107 err = ip6mr_cache_unresolved(mrt, vif, skb2);
1784 read_unlock(&mrt_lock); 2108 read_unlock(&mrt_lock);
1785 2109
1786 return err; 2110 return err;
@@ -1789,8 +2113,88 @@ int ip6mr_get_route(struct net *net,
1789 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2113 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1790 cache->mfc_flags |= MFC_NOTIFY; 2114 cache->mfc_flags |= MFC_NOTIFY;
1791 2115
1792 err = ip6mr_fill_mroute(skb, cache, rtm); 2116 err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
1793 read_unlock(&mrt_lock); 2117 read_unlock(&mrt_lock);
1794 return err; 2118 return err;
1795} 2119}
1796 2120
2121static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2122 u32 pid, u32 seq, struct mfc6_cache *c)
2123{
2124 struct nlmsghdr *nlh;
2125 struct rtmsg *rtm;
2126
2127 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2128 if (nlh == NULL)
2129 return -EMSGSIZE;
2130
2131 rtm = nlmsg_data(nlh);
2132 rtm->rtm_family = RTNL_FAMILY_IPMR;
2133 rtm->rtm_dst_len = 128;
2134 rtm->rtm_src_len = 128;
2135 rtm->rtm_tos = 0;
2136 rtm->rtm_table = mrt->id;
2137 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2138 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2139 rtm->rtm_protocol = RTPROT_UNSPEC;
2140 rtm->rtm_flags = 0;
2141
2142 NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
2143 NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
2144
2145 if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
2146 goto nla_put_failure;
2147
2148 return nlmsg_end(skb, nlh);
2149
2150nla_put_failure:
2151 nlmsg_cancel(skb, nlh);
2152 return -EMSGSIZE;
2153}
2154
2155static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2156{
2157 struct net *net = sock_net(skb->sk);
2158 struct mr6_table *mrt;
2159 struct mfc6_cache *mfc;
2160 unsigned int t = 0, s_t;
2161 unsigned int h = 0, s_h;
2162 unsigned int e = 0, s_e;
2163
2164 s_t = cb->args[0];
2165 s_h = cb->args[1];
2166 s_e = cb->args[2];
2167
2168 read_lock(&mrt_lock);
2169 ip6mr_for_each_table(mrt, net) {
2170 if (t < s_t)
2171 goto next_table;
2172 if (t > s_t)
2173 s_h = 0;
2174 for (h = s_h; h < MFC6_LINES; h++) {
2175 list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2176 if (e < s_e)
2177 goto next_entry;
2178 if (ip6mr_fill_mroute(mrt, skb,
2179 NETLINK_CB(cb->skb).pid,
2180 cb->nlh->nlmsg_seq,
2181 mfc) < 0)
2182 goto done;
2183next_entry:
2184 e++;
2185 }
2186 e = s_e = 0;
2187 }
2188 s_h = 0;
2189next_table:
2190 t++;
2191 }
2192done:
2193 read_unlock(&mrt_lock);
2194
2195 cb->args[2] = e;
2196 cb->args[1] = h;
2197 cb->args[0] = t;
2198
2199 return skb->len;
2200}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 33f60fca7aa7..a7f66bc8f0b0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,8 +55,6 @@
55 55
56#include <asm/uaccess.h> 56#include <asm/uaccess.h>
57 57
58DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
59
60struct ip6_ra_chain *ip6_ra_chain; 58struct ip6_ra_chain *ip6_ra_chain;
61DEFINE_RWLOCK(ip6_ra_lock); 59DEFINE_RWLOCK(ip6_ra_lock);
62 60
@@ -114,9 +112,9 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
114 } 112 }
115 opt = xchg(&inet6_sk(sk)->opt, opt); 113 opt = xchg(&inet6_sk(sk)->opt, opt);
116 } else { 114 } else {
117 write_lock(&sk->sk_dst_lock); 115 spin_lock(&sk->sk_dst_lock);
118 opt = xchg(&inet6_sk(sk)->opt, opt); 116 opt = xchg(&inet6_sk(sk)->opt, opt);
119 write_unlock(&sk->sk_dst_lock); 117 spin_unlock(&sk->sk_dst_lock);
120 } 118 }
121 sk_dst_reset(sk); 119 sk_dst_reset(sk);
122 120
@@ -337,6 +335,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
337 retv = 0; 335 retv = 0;
338 break; 336 break;
339 337
338 case IPV6_RECVPATHMTU:
339 if (optlen < sizeof(int))
340 goto e_inval;
341 np->rxopt.bits.rxpmtu = valbool;
342 retv = 0;
343 break;
344
340 case IPV6_HOPOPTS: 345 case IPV6_HOPOPTS:
341 case IPV6_RTHDRDSTOPTS: 346 case IPV6_RTHDRDSTOPTS:
342 case IPV6_RTHDR: 347 case IPV6_RTHDR:
@@ -451,7 +456,8 @@ sticky_done:
451 msg.msg_controllen = optlen; 456 msg.msg_controllen = optlen;
452 msg.msg_control = (void*)(opt+1); 457 msg.msg_control = (void*)(opt+1);
453 458
454 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); 459 retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk,
460 &junk);
455 if (retv) 461 if (retv)
456 goto done; 462 goto done;
457update: 463update:
@@ -767,6 +773,17 @@ pref_skip_coa:
767 773
768 break; 774 break;
769 } 775 }
776 case IPV6_MINHOPCOUNT:
777 if (optlen < sizeof(int))
778 goto e_inval;
779 if (val < 0 || val > 255)
780 goto e_inval;
781 np->min_hopcount = val;
782 break;
783 case IPV6_DONTFRAG:
784 np->dontfrag = valbool;
785 retv = 0;
786 break;
770 } 787 }
771 788
772 release_sock(sk); 789 release_sock(sk);
@@ -971,14 +988,13 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
971 case IPV6_MTU: 988 case IPV6_MTU:
972 { 989 {
973 struct dst_entry *dst; 990 struct dst_entry *dst;
991
974 val = 0; 992 val = 0;
975 lock_sock(sk); 993 rcu_read_lock();
976 dst = sk_dst_get(sk); 994 dst = __sk_dst_get(sk);
977 if (dst) { 995 if (dst)
978 val = dst_mtu(dst); 996 val = dst_mtu(dst);
979 dst_release(dst); 997 rcu_read_unlock();
980 }
981 release_sock(sk);
982 if (!val) 998 if (!val)
983 return -ENOTCONN; 999 return -ENOTCONN;
984 break; 1000 break;
@@ -1056,6 +1072,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1056 val = np->rxopt.bits.rxflow; 1072 val = np->rxopt.bits.rxflow;
1057 break; 1073 break;
1058 1074
1075 case IPV6_RECVPATHMTU:
1076 val = np->rxopt.bits.rxpmtu;
1077 break;
1078
1079 case IPV6_PATHMTU:
1080 {
1081 struct dst_entry *dst;
1082 struct ip6_mtuinfo mtuinfo;
1083
1084 if (len < sizeof(mtuinfo))
1085 return -EINVAL;
1086
1087 len = sizeof(mtuinfo);
1088 memset(&mtuinfo, 0, sizeof(mtuinfo));
1089
1090 rcu_read_lock();
1091 dst = __sk_dst_get(sk);
1092 if (dst)
1093 mtuinfo.ip6m_mtu = dst_mtu(dst);
1094 rcu_read_unlock();
1095 if (!mtuinfo.ip6m_mtu)
1096 return -ENOTCONN;
1097
1098 if (put_user(len, optlen))
1099 return -EFAULT;
1100 if (copy_to_user(optval, &mtuinfo, len))
1101 return -EFAULT;
1102
1103 return 0;
1104 break;
1105 }
1106
1059 case IPV6_UNICAST_HOPS: 1107 case IPV6_UNICAST_HOPS:
1060 case IPV6_MULTICAST_HOPS: 1108 case IPV6_MULTICAST_HOPS:
1061 { 1109 {
@@ -1066,12 +1114,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1066 else 1114 else
1067 val = np->mcast_hops; 1115 val = np->mcast_hops;
1068 1116
1069 dst = sk_dst_get(sk); 1117 if (val < 0) {
1070 if (dst) { 1118 rcu_read_lock();
1071 if (val < 0) 1119 dst = __sk_dst_get(sk);
1120 if (dst)
1072 val = ip6_dst_hoplimit(dst); 1121 val = ip6_dst_hoplimit(dst);
1073 dst_release(dst); 1122 rcu_read_unlock();
1074 } 1123 }
1124
1075 if (val < 0) 1125 if (val < 0)
1076 val = sock_net(sk)->ipv6.devconf_all->hop_limit; 1126 val = sock_net(sk)->ipv6.devconf_all->hop_limit;
1077 break; 1127 break;
@@ -1115,6 +1165,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1115 val |= IPV6_PREFER_SRC_HOME; 1165 val |= IPV6_PREFER_SRC_HOME;
1116 break; 1166 break;
1117 1167
1168 case IPV6_MINHOPCOUNT:
1169 val = np->min_hopcount;
1170 break;
1171
1172 case IPV6_DONTFRAG:
1173 val = np->dontfrag;
1174 break;
1175
1118 default: 1176 default:
1119 return -ENOPROTOOPT; 1177 return -ENOPROTOOPT;
1120 } 1178 }
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c483ab9fd67b..d1444b95ad7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
44#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
45#include <linux/seq_file.h> 45#include <linux/seq_file.h>
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <net/mld.h>
47 48
48#include <linux/netfilter.h> 49#include <linux/netfilter.h>
49#include <linux/netfilter_ipv6.h> 50#include <linux/netfilter_ipv6.h>
@@ -71,54 +72,11 @@
71#define MDBG(x) 72#define MDBG(x)
72#endif 73#endif
73 74
74/* 75/* Ensure that we have struct in6_addr aligned on 32bit word. */
75 * These header formats should be in a separate include file, but icmpv6.h 76static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
76 * doesn't have in6_addr defined in all cases, there is no __u128, and no 77 BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
77 * other files reference these. 78 BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4),
78 * 79 BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4)
79 * +-DLS 4/14/03
80 */
81
82/* Multicast Listener Discovery version 2 headers */
83
84struct mld2_grec {
85 __u8 grec_type;
86 __u8 grec_auxwords;
87 __be16 grec_nsrcs;
88 struct in6_addr grec_mca;
89 struct in6_addr grec_src[0];
90};
91
92struct mld2_report {
93 __u8 type;
94 __u8 resv1;
95 __sum16 csum;
96 __be16 resv2;
97 __be16 ngrec;
98 struct mld2_grec grec[0];
99};
100
101struct mld2_query {
102 __u8 type;
103 __u8 code;
104 __sum16 csum;
105 __be16 mrc;
106 __be16 resv1;
107 struct in6_addr mca;
108#if defined(__LITTLE_ENDIAN_BITFIELD)
109 __u8 qrv:3,
110 suppress:1,
111 resv2:4;
112#elif defined(__BIG_ENDIAN_BITFIELD)
113 __u8 resv2:4,
114 suppress:1,
115 qrv:3;
116#else
117#error "Please fix <asm/byteorder.h>"
118#endif
119 __u8 qqic;
120 __be16 nsrcs;
121 struct in6_addr srcs[0];
122}; 80};
123 81
124static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; 82static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
@@ -157,14 +115,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
157 ((idev)->mc_v1_seen && \ 115 ((idev)->mc_v1_seen && \
158 time_before(jiffies, (idev)->mc_v1_seen))) 116 time_before(jiffies, (idev)->mc_v1_seen)))
159 117
160#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
161#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
162 ((value) < (thresh) ? (value) : \
163 ((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
164 (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
165
166#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
167
168#define IPV6_MLD_MAX_MSF 64 118#define IPV6_MLD_MAX_MSF 64
169 119
170int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; 120int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
@@ -202,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
202 mc_lst->next = NULL; 152 mc_lst->next = NULL;
203 ipv6_addr_copy(&mc_lst->addr, addr); 153 ipv6_addr_copy(&mc_lst->addr, addr);
204 154
155 rcu_read_lock();
205 if (ifindex == 0) { 156 if (ifindex == 0) {
206 struct rt6_info *rt; 157 struct rt6_info *rt;
207 rt = rt6_lookup(net, addr, NULL, 0, 0); 158 rt = rt6_lookup(net, addr, NULL, 0, 0);
208 if (rt) { 159 if (rt) {
209 dev = rt->rt6i_dev; 160 dev = rt->rt6i_dev;
210 dev_hold(dev); 161 dst_release(&rt->dst);
211 dst_release(&rt->u.dst);
212 } 162 }
213 } else 163 } else
214 dev = dev_get_by_index(net, ifindex); 164 dev = dev_get_by_index_rcu(net, ifindex);
215 165
216 if (dev == NULL) { 166 if (dev == NULL) {
167 rcu_read_unlock();
217 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 168 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
218 return -ENODEV; 169 return -ENODEV;
219 } 170 }
@@ -230,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
230 err = ipv6_dev_mc_inc(dev, addr); 181 err = ipv6_dev_mc_inc(dev, addr);
231 182
232 if (err) { 183 if (err) {
184 rcu_read_unlock();
233 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 185 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
234 dev_put(dev);
235 return err; 186 return err;
236 } 187 }
237 188
@@ -240,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
240 np->ipv6_mc_list = mc_lst; 191 np->ipv6_mc_list = mc_lst;
241 write_unlock_bh(&ipv6_sk_mc_lock); 192 write_unlock_bh(&ipv6_sk_mc_lock);
242 193
243 dev_put(dev); 194 rcu_read_unlock();
244 195
245 return 0; 196 return 0;
246} 197}
@@ -263,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
263 *lnk = mc_lst->next; 214 *lnk = mc_lst->next;
264 write_unlock_bh(&ipv6_sk_mc_lock); 215 write_unlock_bh(&ipv6_sk_mc_lock);
265 216
266 dev = dev_get_by_index(net, mc_lst->ifindex); 217 rcu_read_lock();
218 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
267 if (dev != NULL) { 219 if (dev != NULL) {
268 struct inet6_dev *idev = in6_dev_get(dev); 220 struct inet6_dev *idev = __in6_dev_get(dev);
269 221
270 (void) ip6_mc_leave_src(sk, mc_lst, idev); 222 (void) ip6_mc_leave_src(sk, mc_lst, idev);
271 if (idev) { 223 if (idev)
272 __ipv6_dev_mc_dec(idev, &mc_lst->addr); 224 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
273 in6_dev_put(idev);
274 }
275 dev_put(dev);
276 } else 225 } else
277 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 226 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
227 rcu_read_unlock();
278 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 228 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
279 return 0; 229 return 0;
280 } 230 }
@@ -284,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
284 return -EADDRNOTAVAIL; 234 return -EADDRNOTAVAIL;
285} 235}
286 236
287static struct inet6_dev *ip6_mc_find_dev(struct net *net, 237/* called with rcu_read_lock() */
288 struct in6_addr *group, 238static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
289 int ifindex) 239 struct in6_addr *group,
240 int ifindex)
290{ 241{
291 struct net_device *dev = NULL; 242 struct net_device *dev = NULL;
292 struct inet6_dev *idev = NULL; 243 struct inet6_dev *idev = NULL;
293 244
294 if (ifindex == 0) { 245 if (ifindex == 0) {
295 struct rt6_info *rt; 246 struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
296 247
297 rt = rt6_lookup(net, group, NULL, 0, 0);
298 if (rt) { 248 if (rt) {
299 dev = rt->rt6i_dev; 249 dev = rt->rt6i_dev;
300 dev_hold(dev); 250 dev_hold(dev);
301 dst_release(&rt->u.dst); 251 dst_release(&rt->dst);
302 } 252 }
303 } else 253 } else
304 dev = dev_get_by_index(net, ifindex); 254 dev = dev_get_by_index_rcu(net, ifindex);
305 255
306 if (!dev) 256 if (!dev)
307 goto nodev; 257 return NULL;
308 idev = in6_dev_get(dev); 258 idev = __in6_dev_get(dev);
309 if (!idev) 259 if (!idev)
310 goto release; 260 return NULL;;
311 read_lock_bh(&idev->lock); 261 read_lock_bh(&idev->lock);
312 if (idev->dead) 262 if (idev->dead) {
313 goto unlock_release; 263 read_unlock_bh(&idev->lock);
314 264 return NULL;
265 }
315 return idev; 266 return idev;
316
317unlock_release:
318 read_unlock_bh(&idev->lock);
319 in6_dev_put(idev);
320release:
321 dev_put(dev);
322nodev:
323 return NULL;
324} 267}
325 268
326void ipv6_sock_mc_close(struct sock *sk) 269void ipv6_sock_mc_close(struct sock *sk)
@@ -336,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk)
336 np->ipv6_mc_list = mc_lst->next; 279 np->ipv6_mc_list = mc_lst->next;
337 write_unlock_bh(&ipv6_sk_mc_lock); 280 write_unlock_bh(&ipv6_sk_mc_lock);
338 281
339 dev = dev_get_by_index(net, mc_lst->ifindex); 282 rcu_read_lock();
283 dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
340 if (dev) { 284 if (dev) {
341 struct inet6_dev *idev = in6_dev_get(dev); 285 struct inet6_dev *idev = __in6_dev_get(dev);
342 286
343 (void) ip6_mc_leave_src(sk, mc_lst, idev); 287 (void) ip6_mc_leave_src(sk, mc_lst, idev);
344 if (idev) { 288 if (idev)
345 __ipv6_dev_mc_dec(idev, &mc_lst->addr); 289 __ipv6_dev_mc_dec(idev, &mc_lst->addr);
346 in6_dev_put(idev);
347 }
348 dev_put(dev);
349 } else 290 } else
350 (void) ip6_mc_leave_src(sk, mc_lst, NULL); 291 (void) ip6_mc_leave_src(sk, mc_lst, NULL);
351 292 rcu_read_unlock();
352 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); 293 sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
353 294
354 write_lock_bh(&ipv6_sk_mc_lock); 295 write_lock_bh(&ipv6_sk_mc_lock);
@@ -377,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
377 if (!ipv6_addr_is_multicast(group)) 318 if (!ipv6_addr_is_multicast(group))
378 return -EINVAL; 319 return -EINVAL;
379 320
380 idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); 321 rcu_read_lock();
381 if (!idev) 322 idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
323 if (!idev) {
324 rcu_read_unlock();
382 return -ENODEV; 325 return -ENODEV;
326 }
383 dev = idev->dev; 327 dev = idev->dev;
384 328
385 err = -EADDRNOTAVAIL; 329 err = -EADDRNOTAVAIL;
386 330
387 read_lock_bh(&ipv6_sk_mc_lock); 331 read_lock(&ipv6_sk_mc_lock);
388 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 332 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
389 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) 333 if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
390 continue; 334 continue;
@@ -408,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
408 pmc->sfmode = omode; 352 pmc->sfmode = omode;
409 } 353 }
410 354
411 write_lock_bh(&pmc->sflock); 355 write_lock(&pmc->sflock);
412 pmclocked = 1; 356 pmclocked = 1;
413 357
414 psl = pmc->sflist; 358 psl = pmc->sflist;
@@ -483,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
483 ip6_mc_add_src(idev, group, omode, 1, source, 1); 427 ip6_mc_add_src(idev, group, omode, 1, source, 1);
484done: 428done:
485 if (pmclocked) 429 if (pmclocked)
486 write_unlock_bh(&pmc->sflock); 430 write_unlock(&pmc->sflock);
487 read_unlock_bh(&ipv6_sk_mc_lock); 431 read_unlock(&ipv6_sk_mc_lock);
488 read_unlock_bh(&idev->lock); 432 read_unlock_bh(&idev->lock);
489 in6_dev_put(idev); 433 rcu_read_unlock();
490 dev_put(dev);
491 if (leavegroup) 434 if (leavegroup)
492 return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); 435 return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
493 return err; 436 return err;
@@ -513,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
513 gsf->gf_fmode != MCAST_EXCLUDE) 456 gsf->gf_fmode != MCAST_EXCLUDE)
514 return -EINVAL; 457 return -EINVAL;
515 458
516 idev = ip6_mc_find_dev(net, group, gsf->gf_interface); 459 rcu_read_lock();
460 idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
517 461
518 if (!idev) 462 if (!idev) {
463 rcu_read_unlock();
519 return -ENODEV; 464 return -ENODEV;
465 }
520 dev = idev->dev; 466 dev = idev->dev;
521 467
522 err = 0; 468 err = 0;
523 read_lock_bh(&ipv6_sk_mc_lock); 469 read_lock(&ipv6_sk_mc_lock);
524 470
525 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { 471 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
526 leavegroup = 1; 472 leavegroup = 1;
@@ -562,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
562 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0); 508 (void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
563 } 509 }
564 510
565 write_lock_bh(&pmc->sflock); 511 write_lock(&pmc->sflock);
566 psl = pmc->sflist; 512 psl = pmc->sflist;
567 if (psl) { 513 if (psl) {
568 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 514 (void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -572,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
572 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); 518 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
573 pmc->sflist = newpsl; 519 pmc->sflist = newpsl;
574 pmc->sfmode = gsf->gf_fmode; 520 pmc->sfmode = gsf->gf_fmode;
575 write_unlock_bh(&pmc->sflock); 521 write_unlock(&pmc->sflock);
576 err = 0; 522 err = 0;
577done: 523done:
578 read_unlock_bh(&ipv6_sk_mc_lock); 524 read_unlock(&ipv6_sk_mc_lock);
579 read_unlock_bh(&idev->lock); 525 read_unlock_bh(&idev->lock);
580 in6_dev_put(idev); 526 rcu_read_unlock();
581 dev_put(dev);
582 if (leavegroup) 527 if (leavegroup)
583 err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group); 528 err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
584 return err; 529 return err;
@@ -601,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
601 if (!ipv6_addr_is_multicast(group)) 546 if (!ipv6_addr_is_multicast(group))
602 return -EINVAL; 547 return -EINVAL;
603 548
604 idev = ip6_mc_find_dev(net, group, gsf->gf_interface); 549 rcu_read_lock();
550 idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
605 551
606 if (!idev) 552 if (!idev) {
553 rcu_read_unlock();
607 return -ENODEV; 554 return -ENODEV;
608 555 }
609 dev = idev->dev; 556 dev = idev->dev;
610 557
611 err = -EADDRNOTAVAIL; 558 err = -EADDRNOTAVAIL;
@@ -627,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
627 psl = pmc->sflist; 574 psl = pmc->sflist;
628 count = psl ? psl->sl_count : 0; 575 count = psl ? psl->sl_count : 0;
629 read_unlock_bh(&idev->lock); 576 read_unlock_bh(&idev->lock);
630 in6_dev_put(idev); 577 rcu_read_unlock();
631 dev_put(dev);
632 578
633 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; 579 copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
634 gsf->gf_numsrc = count; 580 gsf->gf_numsrc = count;
@@ -654,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
654 return 0; 600 return 0;
655done: 601done:
656 read_unlock_bh(&idev->lock); 602 read_unlock_bh(&idev->lock);
657 in6_dev_put(idev); 603 rcu_read_unlock();
658 dev_put(dev);
659 return err; 604 return err;
660} 605}
661 606
@@ -715,7 +660,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
715 if (!(mc->mca_flags&MAF_LOADED)) { 660 if (!(mc->mca_flags&MAF_LOADED)) {
716 mc->mca_flags |= MAF_LOADED; 661 mc->mca_flags |= MAF_LOADED;
717 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) 662 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
718 dev_mc_add(dev, buf, dev->addr_len, 0); 663 dev_mc_add(dev, buf);
719 } 664 }
720 spin_unlock_bh(&mc->mca_lock); 665 spin_unlock_bh(&mc->mca_lock);
721 666
@@ -741,7 +686,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
741 if (mc->mca_flags&MAF_LOADED) { 686 if (mc->mca_flags&MAF_LOADED) {
742 mc->mca_flags &= ~MAF_LOADED; 687 mc->mca_flags &= ~MAF_LOADED;
743 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0) 688 if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
744 dev_mc_delete(dev, buf, dev->addr_len, 0); 689 dev_mc_del(dev, buf);
745 } 690 }
746 691
747 if (mc->mca_flags & MAF_NOREPORT) 692 if (mc->mca_flags & MAF_NOREPORT)
@@ -872,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
872 struct ifmcaddr6 *mc; 817 struct ifmcaddr6 *mc;
873 struct inet6_dev *idev; 818 struct inet6_dev *idev;
874 819
820 /* we need to take a reference on idev */
875 idev = in6_dev_get(dev); 821 idev = in6_dev_get(dev);
876 822
877 if (idev == NULL) 823 if (idev == NULL)
@@ -910,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
910 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); 856 setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
911 857
912 ipv6_addr_copy(&mc->mca_addr, addr); 858 ipv6_addr_copy(&mc->mca_addr, addr);
913 mc->idev = idev; 859 mc->idev = idev; /* (reference taken) */
914 mc->mca_users = 1; 860 mc->mca_users = 1;
915 /* mca_stamp should be updated upon changes */ 861 /* mca_stamp should be updated upon changes */
916 mc->mca_cstamp = mc->mca_tstamp = jiffies; 862 mc->mca_cstamp = mc->mca_tstamp = jiffies;
@@ -965,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
965 911
966int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr) 912int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
967{ 913{
968 struct inet6_dev *idev = in6_dev_get(dev); 914 struct inet6_dev *idev;
969 int err; 915 int err;
970 916
971 if (!idev) 917 rcu_read_lock();
972 return -ENODEV;
973
974 err = __ipv6_dev_mc_dec(idev, addr);
975 918
976 in6_dev_put(idev); 919 idev = __in6_dev_get(dev);
920 if (!idev)
921 err = -ENODEV;
922 else
923 err = __ipv6_dev_mc_dec(idev, addr);
977 924
925 rcu_read_unlock();
978 return err; 926 return err;
979} 927}
980 928
@@ -1015,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
1015 struct ifmcaddr6 *mc; 963 struct ifmcaddr6 *mc;
1016 int rv = 0; 964 int rv = 0;
1017 965
1018 idev = in6_dev_get(dev); 966 rcu_read_lock();
967 idev = __in6_dev_get(dev);
1019 if (idev) { 968 if (idev) {
1020 read_lock_bh(&idev->lock); 969 read_lock_bh(&idev->lock);
1021 for (mc = idev->mc_list; mc; mc=mc->next) { 970 for (mc = idev->mc_list; mc; mc=mc->next) {
@@ -1042,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
1042 rv = 1; /* don't filter unspecified source */ 991 rv = 1; /* don't filter unspecified source */
1043 } 992 }
1044 read_unlock_bh(&idev->lock); 993 read_unlock_bh(&idev->lock);
1045 in6_dev_put(idev);
1046 } 994 }
995 rcu_read_unlock();
1047 return rv; 996 return rv;
1048} 997}
1049 998
@@ -1154,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
1154 return 1; 1103 return 1;
1155} 1104}
1156 1105
1106/* called with rcu_read_lock() */
1157int igmp6_event_query(struct sk_buff *skb) 1107int igmp6_event_query(struct sk_buff *skb)
1158{ 1108{
1159 struct mld2_query *mlh2 = NULL; 1109 struct mld2_query *mlh2 = NULL;
@@ -1161,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb)
1161 struct in6_addr *group; 1111 struct in6_addr *group;
1162 unsigned long max_delay; 1112 unsigned long max_delay;
1163 struct inet6_dev *idev; 1113 struct inet6_dev *idev;
1164 struct icmp6hdr *hdr; 1114 struct mld_msg *mld;
1165 int group_type; 1115 int group_type;
1166 int mark = 0; 1116 int mark = 0;
1167 int len; 1117 int len;
@@ -1177,27 +1127,25 @@ int igmp6_event_query(struct sk_buff *skb)
1177 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) 1127 if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
1178 return -EINVAL; 1128 return -EINVAL;
1179 1129
1180 idev = in6_dev_get(skb->dev); 1130 idev = __in6_dev_get(skb->dev);
1181 1131
1182 if (idev == NULL) 1132 if (idev == NULL)
1183 return 0; 1133 return 0;
1184 1134
1185 hdr = icmp6_hdr(skb); 1135 mld = (struct mld_msg *)icmp6_hdr(skb);
1186 group = (struct in6_addr *) (hdr + 1); 1136 group = &mld->mld_mca;
1187 group_type = ipv6_addr_type(group); 1137 group_type = ipv6_addr_type(group);
1188 1138
1189 if (group_type != IPV6_ADDR_ANY && 1139 if (group_type != IPV6_ADDR_ANY &&
1190 !(group_type&IPV6_ADDR_MULTICAST)) { 1140 !(group_type&IPV6_ADDR_MULTICAST))
1191 in6_dev_put(idev);
1192 return -EINVAL; 1141 return -EINVAL;
1193 }
1194 1142
1195 if (len == 24) { 1143 if (len == 24) {
1196 int switchback; 1144 int switchback;
1197 /* MLDv1 router present */ 1145 /* MLDv1 router present */
1198 1146
1199 /* Translate milliseconds to jiffies */ 1147 /* Translate milliseconds to jiffies */
1200 max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000; 1148 max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
1201 1149
1202 switchback = (idev->mc_qrv + 1) * max_delay; 1150 switchback = (idev->mc_qrv + 1) * max_delay;
1203 idev->mc_v1_seen = jiffies + switchback; 1151 idev->mc_v1_seen = jiffies + switchback;
@@ -1211,40 +1159,34 @@ int igmp6_event_query(struct sk_buff *skb)
1211 } else if (len >= 28) { 1159 } else if (len >= 28) {
1212 int srcs_offset = sizeof(struct mld2_query) - 1160 int srcs_offset = sizeof(struct mld2_query) -
1213 sizeof(struct icmp6hdr); 1161 sizeof(struct icmp6hdr);
1214 if (!pskb_may_pull(skb, srcs_offset)) { 1162 if (!pskb_may_pull(skb, srcs_offset))
1215 in6_dev_put(idev);
1216 return -EINVAL; 1163 return -EINVAL;
1217 } 1164
1218 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1165 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1219 max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000; 1166 max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
1220 if (!max_delay) 1167 if (!max_delay)
1221 max_delay = 1; 1168 max_delay = 1;
1222 idev->mc_maxdelay = max_delay; 1169 idev->mc_maxdelay = max_delay;
1223 if (mlh2->qrv) 1170 if (mlh2->mld2q_qrv)
1224 idev->mc_qrv = mlh2->qrv; 1171 idev->mc_qrv = mlh2->mld2q_qrv;
1225 if (group_type == IPV6_ADDR_ANY) { /* general query */ 1172 if (group_type == IPV6_ADDR_ANY) { /* general query */
1226 if (mlh2->nsrcs) { 1173 if (mlh2->mld2q_nsrcs)
1227 in6_dev_put(idev);
1228 return -EINVAL; /* no sources allowed */ 1174 return -EINVAL; /* no sources allowed */
1229 } 1175
1230 mld_gq_start_timer(idev); 1176 mld_gq_start_timer(idev);
1231 in6_dev_put(idev);
1232 return 0; 1177 return 0;
1233 } 1178 }
1234 /* mark sources to include, if group & source-specific */ 1179 /* mark sources to include, if group & source-specific */
1235 if (mlh2->nsrcs != 0) { 1180 if (mlh2->mld2q_nsrcs != 0) {
1236 if (!pskb_may_pull(skb, srcs_offset + 1181 if (!pskb_may_pull(skb, srcs_offset +
1237 ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) { 1182 ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
1238 in6_dev_put(idev);
1239 return -EINVAL; 1183 return -EINVAL;
1240 } 1184
1241 mlh2 = (struct mld2_query *)skb_transport_header(skb); 1185 mlh2 = (struct mld2_query *)skb_transport_header(skb);
1242 mark = 1; 1186 mark = 1;
1243 } 1187 }
1244 } else { 1188 } else
1245 in6_dev_put(idev);
1246 return -EINVAL; 1189 return -EINVAL;
1247 }
1248 1190
1249 read_lock_bh(&idev->lock); 1191 read_lock_bh(&idev->lock);
1250 if (group_type == IPV6_ADDR_ANY) { 1192 if (group_type == IPV6_ADDR_ANY) {
@@ -1270,25 +1212,23 @@ int igmp6_event_query(struct sk_buff *skb)
1270 ma->mca_flags &= ~MAF_GSQUERY; 1212 ma->mca_flags &= ~MAF_GSQUERY;
1271 } 1213 }
1272 if (!(ma->mca_flags & MAF_GSQUERY) || 1214 if (!(ma->mca_flags & MAF_GSQUERY) ||
1273 mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs)) 1215 mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
1274 igmp6_group_queried(ma, max_delay); 1216 igmp6_group_queried(ma, max_delay);
1275 spin_unlock_bh(&ma->mca_lock); 1217 spin_unlock_bh(&ma->mca_lock);
1276 break; 1218 break;
1277 } 1219 }
1278 } 1220 }
1279 read_unlock_bh(&idev->lock); 1221 read_unlock_bh(&idev->lock);
1280 in6_dev_put(idev);
1281 1222
1282 return 0; 1223 return 0;
1283} 1224}
1284 1225
1285 1226/* called with rcu_read_lock() */
1286int igmp6_event_report(struct sk_buff *skb) 1227int igmp6_event_report(struct sk_buff *skb)
1287{ 1228{
1288 struct ifmcaddr6 *ma; 1229 struct ifmcaddr6 *ma;
1289 struct in6_addr *addrp;
1290 struct inet6_dev *idev; 1230 struct inet6_dev *idev;
1291 struct icmp6hdr *hdr; 1231 struct mld_msg *mld;
1292 int addr_type; 1232 int addr_type;
1293 1233
1294 /* Our own report looped back. Ignore it. */ 1234 /* Our own report looped back. Ignore it. */
@@ -1300,10 +1240,10 @@ int igmp6_event_report(struct sk_buff *skb)
1300 skb->pkt_type != PACKET_BROADCAST) 1240 skb->pkt_type != PACKET_BROADCAST)
1301 return 0; 1241 return 0;
1302 1242
1303 if (!pskb_may_pull(skb, sizeof(struct in6_addr))) 1243 if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
1304 return -EINVAL; 1244 return -EINVAL;
1305 1245
1306 hdr = icmp6_hdr(skb); 1246 mld = (struct mld_msg *)icmp6_hdr(skb);
1307 1247
1308 /* Drop reports with not link local source */ 1248 /* Drop reports with not link local source */
1309 addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr); 1249 addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
@@ -1311,9 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb)
1311 !(addr_type&IPV6_ADDR_LINKLOCAL)) 1251 !(addr_type&IPV6_ADDR_LINKLOCAL))
1312 return -EINVAL; 1252 return -EINVAL;
1313 1253
1314 addrp = (struct in6_addr *) (hdr + 1); 1254 idev = __in6_dev_get(skb->dev);
1315
1316 idev = in6_dev_get(skb->dev);
1317 if (idev == NULL) 1255 if (idev == NULL)
1318 return -ENODEV; 1256 return -ENODEV;
1319 1257
@@ -1323,7 +1261,7 @@ int igmp6_event_report(struct sk_buff *skb)
1323 1261
1324 read_lock_bh(&idev->lock); 1262 read_lock_bh(&idev->lock);
1325 for (ma = idev->mc_list; ma; ma=ma->next) { 1263 for (ma = idev->mc_list; ma; ma=ma->next) {
1326 if (ipv6_addr_equal(&ma->mca_addr, addrp)) { 1264 if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
1327 spin_lock(&ma->mca_lock); 1265 spin_lock(&ma->mca_lock);
1328 if (del_timer(&ma->mca_timer)) 1266 if (del_timer(&ma->mca_timer))
1329 atomic_dec(&ma->mca_refcnt); 1267 atomic_dec(&ma->mca_refcnt);
@@ -1333,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb)
1333 } 1271 }
1334 } 1272 }
1335 read_unlock_bh(&idev->lock); 1273 read_unlock_bh(&idev->lock);
1336 in6_dev_put(idev);
1337 return 0; 1274 return 0;
1338} 1275}
1339 1276
@@ -1409,7 +1346,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1409 IPV6_TLV_PADN, 0 }; 1346 IPV6_TLV_PADN, 0 };
1410 1347
1411 /* we assume size > sizeof(ra) here */ 1348 /* we assume size > sizeof(ra) here */
1412 skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err); 1349 size += LL_ALLOCATED_SPACE(dev);
1350 /* limit our allocations to order-0 page */
1351 size = min_t(int, size, SKB_MAX_ORDER(0, 0));
1352 skb = sock_alloc_send_skb(sk, size, 1, &err);
1413 1353
1414 if (!skb) 1354 if (!skb)
1415 return NULL; 1355 return NULL;
@@ -1432,11 +1372,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
1432 skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); 1372 skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
1433 skb_put(skb, sizeof(*pmr)); 1373 skb_put(skb, sizeof(*pmr));
1434 pmr = (struct mld2_report *)skb_transport_header(skb); 1374 pmr = (struct mld2_report *)skb_transport_header(skb);
1435 pmr->type = ICMPV6_MLD2_REPORT; 1375 pmr->mld2r_type = ICMPV6_MLD2_REPORT;
1436 pmr->resv1 = 0; 1376 pmr->mld2r_resv1 = 0;
1437 pmr->csum = 0; 1377 pmr->mld2r_cksum = 0;
1438 pmr->resv2 = 0; 1378 pmr->mld2r_resv2 = 0;
1439 pmr->ngrec = 0; 1379 pmr->mld2r_ngrec = 0;
1440 return skb; 1380 return skb;
1441} 1381}
1442 1382
@@ -1446,21 +1386,24 @@ static void mld_sendpack(struct sk_buff *skb)
1446 struct mld2_report *pmr = 1386 struct mld2_report *pmr =
1447 (struct mld2_report *)skb_transport_header(skb); 1387 (struct mld2_report *)skb_transport_header(skb);
1448 int payload_len, mldlen; 1388 int payload_len, mldlen;
1449 struct inet6_dev *idev = in6_dev_get(skb->dev); 1389 struct inet6_dev *idev;
1450 struct net *net = dev_net(skb->dev); 1390 struct net *net = dev_net(skb->dev);
1451 int err; 1391 int err;
1452 struct flowi fl; 1392 struct flowi fl;
1453 struct dst_entry *dst; 1393 struct dst_entry *dst;
1454 1394
1395 rcu_read_lock();
1396 idev = __in6_dev_get(skb->dev);
1455 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1397 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1456 1398
1457 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); 1399 payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
1458 mldlen = skb->tail - skb->transport_header; 1400 mldlen = skb->tail - skb->transport_header;
1459 pip6->payload_len = htons(payload_len); 1401 pip6->payload_len = htons(payload_len);
1460 1402
1461 pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, 1403 pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
1462 IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb), 1404 IPPROTO_ICMPV6,
1463 mldlen, 0)); 1405 csum_partial(skb_transport_header(skb),
1406 mldlen, 0));
1464 1407
1465 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); 1408 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
1466 1409
@@ -1480,7 +1423,7 @@ static void mld_sendpack(struct sk_buff *skb)
1480 1423
1481 payload_len = skb->len; 1424 payload_len = skb->len;
1482 1425
1483 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, 1426 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1484 dst_output); 1427 dst_output);
1485out: 1428out:
1486 if (!err) { 1429 if (!err) {
@@ -1490,8 +1433,7 @@ out:
1490 } else 1433 } else
1491 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); 1434 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
1492 1435
1493 if (likely(idev != NULL)) 1436 rcu_read_unlock();
1494 in6_dev_put(idev);
1495 return; 1437 return;
1496 1438
1497err_out: 1439err_out:
@@ -1521,7 +1463,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1521 pgr->grec_nsrcs = 0; 1463 pgr->grec_nsrcs = 0;
1522 pgr->grec_mca = pmc->mca_addr; /* structure copy */ 1464 pgr->grec_mca = pmc->mca_addr; /* structure copy */
1523 pmr = (struct mld2_report *)skb_transport_header(skb); 1465 pmr = (struct mld2_report *)skb_transport_header(skb);
1524 pmr->ngrec = htons(ntohs(pmr->ngrec)+1); 1466 pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
1525 *ppgr = pgr; 1467 *ppgr = pgr;
1526 return skb; 1468 return skb;
1527} 1469}
@@ -1557,7 +1499,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
1557 1499
1558 /* EX and TO_EX get a fresh packet, if needed */ 1500 /* EX and TO_EX get a fresh packet, if needed */
1559 if (truncate) { 1501 if (truncate) {
1560 if (pmr && pmr->ngrec && 1502 if (pmr && pmr->mld2r_ngrec &&
1561 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { 1503 AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
1562 if (skb) 1504 if (skb)
1563 mld_sendpack(skb); 1505 mld_sendpack(skb);
@@ -1770,9 +1712,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1770 struct sock *sk = net->ipv6.igmp_sk; 1712 struct sock *sk = net->ipv6.igmp_sk;
1771 struct inet6_dev *idev; 1713 struct inet6_dev *idev;
1772 struct sk_buff *skb; 1714 struct sk_buff *skb;
1773 struct icmp6hdr *hdr; 1715 struct mld_msg *hdr;
1774 const struct in6_addr *snd_addr, *saddr; 1716 const struct in6_addr *snd_addr, *saddr;
1775 struct in6_addr *addrp;
1776 struct in6_addr addr_buf; 1717 struct in6_addr addr_buf;
1777 int err, len, payload_len, full_len; 1718 int err, len, payload_len, full_len;
1778 u8 ra[8] = { IPPROTO_ICMPV6, 0, 1719 u8 ra[8] = { IPPROTO_ICMPV6, 0,
@@ -1820,18 +1761,17 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1820 1761
1821 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra)); 1762 memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
1822 1763
1823 hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr)); 1764 hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
1824 memset(hdr, 0, sizeof(struct icmp6hdr)); 1765 memset(hdr, 0, sizeof(struct mld_msg));
1825 hdr->icmp6_type = type; 1766 hdr->mld_type = type;
1826 1767 ipv6_addr_copy(&hdr->mld_mca, addr);
1827 addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
1828 ipv6_addr_copy(addrp, addr);
1829 1768
1830 hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, 1769 hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
1831 IPPROTO_ICMPV6, 1770 IPPROTO_ICMPV6,
1832 csum_partial(hdr, len, 0)); 1771 csum_partial(hdr, len, 0));
1833 1772
1834 idev = in6_dev_get(skb->dev); 1773 rcu_read_lock();
1774 idev = __in6_dev_get(skb->dev);
1835 1775
1836 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr); 1776 dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
1837 if (!dst) { 1777 if (!dst) {
@@ -1848,7 +1788,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
1848 goto err_out; 1788 goto err_out;
1849 1789
1850 skb_dst_set(skb, dst); 1790 skb_dst_set(skb, dst);
1851 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, 1791 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
1852 dst_output); 1792 dst_output);
1853out: 1793out:
1854 if (!err) { 1794 if (!err) {
@@ -1858,8 +1798,7 @@ out:
1858 } else 1798 } else
1859 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); 1799 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
1860 1800
1861 if (likely(idev != NULL)) 1801 rcu_read_unlock();
1862 in6_dev_put(idev);
1863 return; 1802 return;
1864 1803
1865err_out: 1804err_out:
@@ -2050,8 +1989,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
2050 &psf->sf_addr)) 1989 &psf->sf_addr))
2051 break; 1990 break;
2052 if (!dpsf) { 1991 if (!dpsf) {
2053 dpsf = (struct ip6_sf_list *) 1992 dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
2054 kmalloc(sizeof(*dpsf), GFP_ATOMIC);
2055 if (!dpsf) 1993 if (!dpsf)
2056 continue; 1994 continue;
2057 *dpsf = *psf; 1995 *dpsf = *psf;
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 2794b6002836..d6e9599d0705 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
347 347
348static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) 348static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
349{ 349{
350 struct ipv6hdr *iph = ipv6_hdr(skb);
350 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; 351 struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
351 int err = rt2->rt_hdr.nexthdr; 352 int err = rt2->rt_hdr.nexthdr;
352 353
353 spin_lock(&x->lock); 354 spin_lock(&x->lock);
354 if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && 355 if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
355 !ipv6_addr_any((struct in6_addr *)x->coaddr)) 356 !ipv6_addr_any((struct in6_addr *)x->coaddr))
356 err = -ENOENT; 357 err = -ENOENT;
357 spin_unlock(&x->lock); 358 spin_unlock(&x->lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index da0a4d2adc69..58841c4ae947 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -536,7 +536,7 @@ void ndisc_send_skb(struct sk_buff *skb,
536 idev = in6_dev_get(dst->dev); 536 idev = in6_dev_get(dst->dev);
537 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 537 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
538 538
539 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, 539 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
540 dst_output); 540 dst_output);
541 if (!err) { 541 if (!err) {
542 ICMP6MSGOUT_INC_STATS(net, idev, type); 542 ICMP6MSGOUT_INC_STATS(net, idev, type);
@@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
586 src_addr = solicited_addr; 586 src_addr = solicited_addr;
587 if (ifp->flags & IFA_F_OPTIMISTIC) 587 if (ifp->flags & IFA_F_OPTIMISTIC)
588 override = 0; 588 override = 0;
589 inc_opt |= ifp->idev->cnf.force_tllao;
589 in6_ifa_put(ifp); 590 in6_ifa_put(ifp);
590 } else { 591 } else {
591 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, 592 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
599 icmp6h.icmp6_solicited = solicited; 600 icmp6h.icmp6_solicited = solicited;
600 icmp6h.icmp6_override = override; 601 icmp6h.icmp6_override = override;
601 602
602 inc_opt |= ifp->idev->cnf.force_tllao;
603 __ndisc_send(dev, neigh, daddr, src_addr, 603 __ndisc_send(dev, neigh, daddr, src_addr,
604 &icmp6h, solicited_addr, 604 &icmp6h, solicited_addr,
605 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0); 605 inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
@@ -890,8 +890,6 @@ out:
890 in6_ifa_put(ifp); 890 in6_ifa_put(ifp);
891 else 891 else
892 in6_dev_put(idev); 892 in6_dev_put(idev);
893
894 return;
895} 893}
896 894
897static void ndisc_recv_na(struct sk_buff *skb) 895static void ndisc_recv_na(struct sk_buff *skb)
@@ -1231,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1231 ND_PRINTK0(KERN_ERR 1229 ND_PRINTK0(KERN_ERR
1232 "ICMPv6 RA: %s() got default router without neighbour.\n", 1230 "ICMPv6 RA: %s() got default router without neighbour.\n",
1233 __func__); 1231 __func__);
1234 dst_release(&rt->u.dst); 1232 dst_release(&rt->dst);
1235 in6_dev_put(in6_dev); 1233 in6_dev_put(in6_dev);
1236 return; 1234 return;
1237 } 1235 }
@@ -1246,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1246 if (ra_msg->icmph.icmp6_hop_limit) { 1244 if (ra_msg->icmph.icmp6_hop_limit) {
1247 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; 1245 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1248 if (rt) 1246 if (rt)
1249 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; 1247 rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1250 } 1248 }
1251 1249
1252skip_defrtr: 1250skip_defrtr:
@@ -1365,7 +1363,7 @@ skip_linkparms:
1365 in6_dev->cnf.mtu6 = mtu; 1363 in6_dev->cnf.mtu6 = mtu;
1366 1364
1367 if (rt) 1365 if (rt)
1368 rt->u.dst.metrics[RTAX_MTU-1] = mtu; 1366 rt->dst.metrics[RTAX_MTU-1] = mtu;
1369 1367
1370 rt6_mtu_change(skb->dev, mtu); 1368 rt6_mtu_change(skb->dev, mtu);
1371 } 1369 }
@@ -1386,7 +1384,7 @@ skip_linkparms:
1386 } 1384 }
1387out: 1385out:
1388 if (rt) 1386 if (rt)
1389 dst_release(&rt->u.dst); 1387 dst_release(&rt->dst);
1390 else if (neigh) 1388 else if (neigh)
1391 neigh_release(neigh); 1389 neigh_release(neigh);
1392 in6_dev_put(in6_dev); 1390 in6_dev_put(in6_dev);
@@ -1618,7 +1616,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1618 skb_dst_set(buff, dst); 1616 skb_dst_set(buff, dst);
1619 idev = in6_dev_get(dst->dev); 1617 idev = in6_dev_get(dst->dev);
1620 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); 1618 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
1621 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, 1619 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1622 dst_output); 1620 dst_output);
1623 if (!err) { 1621 if (!err) {
1624 ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); 1622 ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d5ed92b14346..7155b2451d7c 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -25,20 +25,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
25 }; 25 };
26 26
27 dst = ip6_route_output(net, skb->sk, &fl); 27 dst = ip6_route_output(net, skb->sk, &fl);
28
29#ifdef CONFIG_XFRM
30 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
31 xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
32 struct dst_entry *dst2 = skb_dst(skb);
33
34 if (xfrm_lookup(net, &dst2, &fl, skb->sk, 0)) {
35 skb_dst_set(skb, NULL);
36 return -1;
37 }
38 skb_dst_set(skb, dst2);
39 }
40#endif
41
42 if (dst->error) { 28 if (dst->error) {
43 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); 29 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
44 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); 30 LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -50,6 +36,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
50 skb_dst_drop(skb); 36 skb_dst_drop(skb);
51 37
52 skb_dst_set(skb, dst); 38 skb_dst_set(skb, dst);
39
40#ifdef CONFIG_XFRM
41 if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
42 xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
43 skb_dst_set(skb, NULL);
44 if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
45 return -1;
46 skb_dst_set(skb, dst);
47 }
48#endif
49
53 return 0; 50 return 0;
54} 51}
55EXPORT_SYMBOL(ip6_route_me_harder); 52EXPORT_SYMBOL(ip6_route_me_harder);
@@ -154,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
154 protocol, 151 protocol,
155 csum_sub(0, hsum))); 152 csum_sub(0, hsum)));
156 skb->ip_summed = CHECKSUM_NONE; 153 skb->ip_summed = CHECKSUM_NONE;
157 csum = __skb_checksum_complete_head(skb, dataoff + len); 154 return __skb_checksum_complete_head(skb, dataoff + len);
158 if (!csum)
159 skb->ip_summed = CHECKSUM_UNNECESSARY;
160 } 155 }
161 return csum; 156 return csum;
162}; 157};
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 6a68a74d14a3..413ab0754e1f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
43 43
44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
46static DEFINE_RWLOCK(queue_lock); 46static DEFINE_SPINLOCK(queue_lock);
47static int peer_pid __read_mostly; 47static int peer_pid __read_mostly;
48static unsigned int copy_range __read_mostly; 48static unsigned int copy_range __read_mostly;
49static unsigned int queue_total; 49static unsigned int queue_total;
@@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
73 break; 73 break;
74 74
75 case IPQ_COPY_PACKET: 75 case IPQ_COPY_PACKET:
76 copy_mode = mode; 76 if (range > 0xFFFF)
77 range = 0xFFFF;
77 copy_range = range; 78 copy_range = range;
78 if (copy_range > 0xFFFF) 79 copy_mode = mode;
79 copy_range = 0xFFFF;
80 break; 80 break;
81 81
82 default: 82 default:
@@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id)
102{ 102{
103 struct nf_queue_entry *entry = NULL, *i; 103 struct nf_queue_entry *entry = NULL, *i;
104 104
105 write_lock_bh(&queue_lock); 105 spin_lock_bh(&queue_lock);
106 106
107 list_for_each_entry(i, &queue_list, list) { 107 list_for_each_entry(i, &queue_list, list) {
108 if ((unsigned long)i == id) { 108 if ((unsigned long)i == id) {
@@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id)
116 queue_total--; 116 queue_total--;
117 } 117 }
118 118
119 write_unlock_bh(&queue_lock); 119 spin_unlock_bh(&queue_lock);
120 return entry; 120 return entry;
121} 121}
122 122
@@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
137static void 137static void
138ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 138ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
139{ 139{
140 write_lock_bh(&queue_lock); 140 spin_lock_bh(&queue_lock);
141 __ipq_flush(cmpfn, data); 141 __ipq_flush(cmpfn, data);
142 write_unlock_bh(&queue_lock); 142 spin_unlock_bh(&queue_lock);
143} 143}
144 144
145static struct sk_buff * 145static struct sk_buff *
@@ -153,37 +153,29 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
153 struct nlmsghdr *nlh; 153 struct nlmsghdr *nlh;
154 struct timeval tv; 154 struct timeval tv;
155 155
156 read_lock_bh(&queue_lock); 156 switch (ACCESS_ONCE(copy_mode)) {
157
158 switch (copy_mode) {
159 case IPQ_COPY_META: 157 case IPQ_COPY_META:
160 case IPQ_COPY_NONE: 158 case IPQ_COPY_NONE:
161 size = NLMSG_SPACE(sizeof(*pmsg)); 159 size = NLMSG_SPACE(sizeof(*pmsg));
162 break; 160 break;
163 161
164 case IPQ_COPY_PACKET: 162 case IPQ_COPY_PACKET:
165 if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || 163 if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
166 entry->skb->ip_summed == CHECKSUM_COMPLETE) && 164 (*errp = skb_checksum_help(entry->skb)))
167 (*errp = skb_checksum_help(entry->skb))) {
168 read_unlock_bh(&queue_lock);
169 return NULL; 165 return NULL;
170 } 166
171 if (copy_range == 0 || copy_range > entry->skb->len) 167 data_len = ACCESS_ONCE(copy_range);
168 if (data_len == 0 || data_len > entry->skb->len)
172 data_len = entry->skb->len; 169 data_len = entry->skb->len;
173 else
174 data_len = copy_range;
175 170
176 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 171 size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
177 break; 172 break;
178 173
179 default: 174 default:
180 *errp = -EINVAL; 175 *errp = -EINVAL;
181 read_unlock_bh(&queue_lock);
182 return NULL; 176 return NULL;
183 } 177 }
184 178
185 read_unlock_bh(&queue_lock);
186
187 skb = alloc_skb(size, GFP_ATOMIC); 179 skb = alloc_skb(size, GFP_ATOMIC);
188 if (!skb) 180 if (!skb)
189 goto nlmsg_failure; 181 goto nlmsg_failure;
@@ -243,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
243 if (nskb == NULL) 235 if (nskb == NULL)
244 return status; 236 return status;
245 237
246 write_lock_bh(&queue_lock); 238 spin_lock_bh(&queue_lock);
247 239
248 if (!peer_pid) 240 if (!peer_pid)
249 goto err_out_free_nskb; 241 goto err_out_free_nskb;
@@ -267,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
267 259
268 __ipq_enqueue_entry(entry); 260 __ipq_enqueue_entry(entry);
269 261
270 write_unlock_bh(&queue_lock); 262 spin_unlock_bh(&queue_lock);
271 return status; 263 return status;
272 264
273err_out_free_nskb: 265err_out_free_nskb:
274 kfree_skb(nskb); 266 kfree_skb(nskb);
275 267
276err_out_unlock: 268err_out_unlock:
277 write_unlock_bh(&queue_lock); 269 spin_unlock_bh(&queue_lock);
278 return status; 270 return status;
279} 271}
280 272
@@ -343,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
343{ 335{
344 int status; 336 int status;
345 337
346 write_lock_bh(&queue_lock); 338 spin_lock_bh(&queue_lock);
347 status = __ipq_set_mode(mode, range); 339 status = __ipq_set_mode(mode, range);
348 write_unlock_bh(&queue_lock); 340 spin_unlock_bh(&queue_lock);
349 return status; 341 return status;
350} 342}
351 343
@@ -442,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
442 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 434 if (security_netlink_recv(skb, CAP_NET_ADMIN))
443 RCV_SKB_FAIL(-EPERM); 435 RCV_SKB_FAIL(-EPERM);
444 436
445 write_lock_bh(&queue_lock); 437 spin_lock_bh(&queue_lock);
446 438
447 if (peer_pid) { 439 if (peer_pid) {
448 if (peer_pid != pid) { 440 if (peer_pid != pid) {
449 write_unlock_bh(&queue_lock); 441 spin_unlock_bh(&queue_lock);
450 RCV_SKB_FAIL(-EBUSY); 442 RCV_SKB_FAIL(-EBUSY);
451 } 443 }
452 } else { 444 } else {
@@ -454,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
454 peer_pid = pid; 446 peer_pid = pid;
455 } 447 }
456 448
457 write_unlock_bh(&queue_lock); 449 spin_unlock_bh(&queue_lock);
458 450
459 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 451 status = ipq_receive_peer(NLMSG_DATA(nlh), type,
460 nlmsglen - NLMSG_LENGTH(0)); 452 nlmsglen - NLMSG_LENGTH(0));
@@ -463,7 +455,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
463 455
464 if (flags & NLM_F_ACK) 456 if (flags & NLM_F_ACK)
465 netlink_ack(skb, nlh, 0); 457 netlink_ack(skb, nlh, 0);
466 return;
467} 458}
468 459
469static void 460static void
@@ -500,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
500 struct netlink_notify *n = ptr; 491 struct netlink_notify *n = ptr;
501 492
502 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { 493 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
503 write_lock_bh(&queue_lock); 494 spin_lock_bh(&queue_lock);
504 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 495 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
505 __ipq_reset(); 496 __ipq_reset();
506 write_unlock_bh(&queue_lock); 497 spin_unlock_bh(&queue_lock);
507 } 498 }
508 return NOTIFY_DONE; 499 return NOTIFY_DONE;
509} 500}
@@ -530,7 +521,7 @@ static ctl_table ipq_table[] = {
530#ifdef CONFIG_PROC_FS 521#ifdef CONFIG_PROC_FS
531static int ip6_queue_show(struct seq_file *m, void *v) 522static int ip6_queue_show(struct seq_file *m, void *v)
532{ 523{
533 read_lock_bh(&queue_lock); 524 spin_lock_bh(&queue_lock);
534 525
535 seq_printf(m, 526 seq_printf(m,
536 "Peer PID : %d\n" 527 "Peer PID : %d\n"
@@ -548,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v)
548 queue_dropped, 539 queue_dropped,
549 queue_user_dropped); 540 queue_user_dropped);
550 541
551 read_unlock_bh(&queue_lock); 542 spin_unlock_bh(&queue_lock);
552 return 0; 543 return 0;
553} 544}
554 545
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9210e312edf1..5359ef4daac5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -40,24 +40,19 @@ MODULE_DESCRIPTION("IPv6 packet filter");
40/*#define DEBUG_IP_FIREWALL_USER*/ 40/*#define DEBUG_IP_FIREWALL_USER*/
41 41
42#ifdef DEBUG_IP_FIREWALL 42#ifdef DEBUG_IP_FIREWALL
43#define dprintf(format, args...) printk(format , ## args) 43#define dprintf(format, args...) pr_info(format , ## args)
44#else 44#else
45#define dprintf(format, args...) 45#define dprintf(format, args...)
46#endif 46#endif
47 47
48#ifdef DEBUG_IP_FIREWALL_USER 48#ifdef DEBUG_IP_FIREWALL_USER
49#define duprintf(format, args...) printk(format , ## args) 49#define duprintf(format, args...) pr_info(format , ## args)
50#else 50#else
51#define duprintf(format, args...) 51#define duprintf(format, args...)
52#endif 52#endif
53 53
54#ifdef CONFIG_NETFILTER_DEBUG 54#ifdef CONFIG_NETFILTER_DEBUG
55#define IP_NF_ASSERT(x) \ 55#define IP_NF_ASSERT(x) WARN_ON(!(x))
56do { \
57 if (!(x)) \
58 printk("IP_NF_ASSERT: %s:%s:%u\n", \
59 __func__, __FILE__, __LINE__); \
60} while(0)
61#else 56#else
62#define IP_NF_ASSERT(x) 57#define IP_NF_ASSERT(x)
63#endif 58#endif
@@ -197,30 +192,14 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
197} 192}
198 193
199static unsigned int 194static unsigned int
200ip6t_error(struct sk_buff *skb, const struct xt_target_param *par) 195ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
201{ 196{
202 if (net_ratelimit()) 197 if (net_ratelimit())
203 printk("ip6_tables: error: `%s'\n", 198 pr_info("error: `%s'\n", (const char *)par->targinfo);
204 (const char *)par->targinfo);
205 199
206 return NF_DROP; 200 return NF_DROP;
207} 201}
208 202
209/* Performance critical - called for every packet */
210static inline bool
211do_match(const struct ip6t_entry_match *m, const struct sk_buff *skb,
212 struct xt_match_param *par)
213{
214 par->match = m->u.kernel.match;
215 par->matchinfo = m->data;
216
217 /* Stop iteration if it doesn't match */
218 if (!m->u.kernel.match->match(skb, par))
219 return true;
220 else
221 return false;
222}
223
224static inline struct ip6t_entry * 203static inline struct ip6t_entry *
225get_entry(const void *base, unsigned int offset) 204get_entry(const void *base, unsigned int offset)
226{ 205{
@@ -352,18 +331,15 @@ ip6t_do_table(struct sk_buff *skb,
352 const struct net_device *out, 331 const struct net_device *out,
353 struct xt_table *table) 332 struct xt_table *table)
354{ 333{
355#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
356
357 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 334 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
358 bool hotdrop = false;
359 /* Initializing verdict to NF_DROP keeps gcc happy. */ 335 /* Initializing verdict to NF_DROP keeps gcc happy. */
360 unsigned int verdict = NF_DROP; 336 unsigned int verdict = NF_DROP;
361 const char *indev, *outdev; 337 const char *indev, *outdev;
362 const void *table_base; 338 const void *table_base;
363 struct ip6t_entry *e, *back; 339 struct ip6t_entry *e, **jumpstack;
340 unsigned int *stackptr, origptr, cpu;
364 const struct xt_table_info *private; 341 const struct xt_table_info *private;
365 struct xt_match_param mtpar; 342 struct xt_action_param acpar;
366 struct xt_target_param tgpar;
367 343
368 /* Initialization */ 344 /* Initialization */
369 indev = in ? in->name : nulldevname; 345 indev = in ? in->name : nulldevname;
@@ -374,43 +350,44 @@ ip6t_do_table(struct sk_buff *skb,
374 * things we don't know, ie. tcp syn flag or ports). If the 350 * things we don't know, ie. tcp syn flag or ports). If the
375 * rule is also a fragment-specific rule, non-fragments won't 351 * rule is also a fragment-specific rule, non-fragments won't
376 * match it. */ 352 * match it. */
377 mtpar.hotdrop = &hotdrop; 353 acpar.hotdrop = false;
378 mtpar.in = tgpar.in = in; 354 acpar.in = in;
379 mtpar.out = tgpar.out = out; 355 acpar.out = out;
380 mtpar.family = tgpar.family = NFPROTO_IPV6; 356 acpar.family = NFPROTO_IPV6;
381 mtpar.hooknum = tgpar.hooknum = hook; 357 acpar.hooknum = hook;
382 358
383 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 359 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
384 360
385 xt_info_rdlock_bh(); 361 xt_info_rdlock_bh();
386 private = table->private; 362 private = table->private;
387 table_base = private->entries[smp_processor_id()]; 363 cpu = smp_processor_id();
364 table_base = private->entries[cpu];
365 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
366 stackptr = per_cpu_ptr(private->stackptr, cpu);
367 origptr = *stackptr;
388 368
389 e = get_entry(table_base, private->hook_entry[hook]); 369 e = get_entry(table_base, private->hook_entry[hook]);
390 370
391 /* For return from builtin chain */
392 back = get_entry(table_base, private->underflow[hook]);
393
394 do { 371 do {
395 const struct ip6t_entry_target *t; 372 const struct ip6t_entry_target *t;
396 const struct xt_entry_match *ematch; 373 const struct xt_entry_match *ematch;
397 374
398 IP_NF_ASSERT(e); 375 IP_NF_ASSERT(e);
399 IP_NF_ASSERT(back);
400 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, 376 if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
401 &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { 377 &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
402 no_match: 378 no_match:
403 e = ip6t_next_entry(e); 379 e = ip6t_next_entry(e);
404 continue; 380 continue;
405 } 381 }
406 382
407 xt_ematch_foreach(ematch, e) 383 xt_ematch_foreach(ematch, e) {
408 if (do_match(ematch, skb, &mtpar) != 0) 384 acpar.match = ematch->u.kernel.match;
385 acpar.matchinfo = ematch->data;
386 if (!acpar.match->match(skb, &acpar))
409 goto no_match; 387 goto no_match;
388 }
410 389
411 ADD_COUNTER(e->counters, 390 ADD_COUNTER(e->counters, skb->len, 1);
412 ntohs(ipv6_hdr(skb)->payload_len) +
413 sizeof(struct ipv6hdr), 1);
414 391
415 t = ip6t_get_target_c(e); 392 t = ip6t_get_target_c(e);
416 IP_NF_ASSERT(t->u.kernel.target); 393 IP_NF_ASSERT(t->u.kernel.target);
@@ -433,62 +410,47 @@ ip6t_do_table(struct sk_buff *skb,
433 verdict = (unsigned)(-v) - 1; 410 verdict = (unsigned)(-v) - 1;
434 break; 411 break;
435 } 412 }
436 e = back; 413 if (*stackptr == 0)
437 back = get_entry(table_base, back->comefrom); 414 e = get_entry(table_base,
415 private->underflow[hook]);
416 else
417 e = ip6t_next_entry(jumpstack[--*stackptr]);
438 continue; 418 continue;
439 } 419 }
440 if (table_base + v != ip6t_next_entry(e) && 420 if (table_base + v != ip6t_next_entry(e) &&
441 !(e->ipv6.flags & IP6T_F_GOTO)) { 421 !(e->ipv6.flags & IP6T_F_GOTO)) {
442 /* Save old back ptr in next entry */ 422 if (*stackptr >= private->stacksize) {
443 struct ip6t_entry *next = ip6t_next_entry(e); 423 verdict = NF_DROP;
444 next->comefrom = (void *)back - table_base; 424 break;
445 /* set back pointer to next entry */ 425 }
446 back = next; 426 jumpstack[(*stackptr)++] = e;
447 } 427 }
448 428
449 e = get_entry(table_base, v); 429 e = get_entry(table_base, v);
450 continue; 430 continue;
451 } 431 }
452 432
453 /* Targets which reenter must return 433 acpar.target = t->u.kernel.target;
454 abs. verdicts */ 434 acpar.targinfo = t->data;
455 tgpar.target = t->u.kernel.target;
456 tgpar.targinfo = t->data;
457 435
458#ifdef CONFIG_NETFILTER_DEBUG 436 verdict = t->u.kernel.target->target(skb, &acpar);
459 tb_comefrom = 0xeeeeeeec;
460#endif
461 verdict = t->u.kernel.target->target(skb, &tgpar);
462
463#ifdef CONFIG_NETFILTER_DEBUG
464 if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
465 printk("Target %s reentered!\n",
466 t->u.kernel.target->name);
467 verdict = NF_DROP;
468 }
469 tb_comefrom = 0x57acc001;
470#endif
471 if (verdict == IP6T_CONTINUE) 437 if (verdict == IP6T_CONTINUE)
472 e = ip6t_next_entry(e); 438 e = ip6t_next_entry(e);
473 else 439 else
474 /* Verdict */ 440 /* Verdict */
475 break; 441 break;
476 } while (!hotdrop); 442 } while (!acpar.hotdrop);
477 443
478#ifdef CONFIG_NETFILTER_DEBUG
479 tb_comefrom = NETFILTER_LINK_POISON;
480#endif
481 xt_info_rdunlock_bh(); 444 xt_info_rdunlock_bh();
445 *stackptr = origptr;
482 446
483#ifdef DEBUG_ALLOW_ALL 447#ifdef DEBUG_ALLOW_ALL
484 return NF_ACCEPT; 448 return NF_ACCEPT;
485#else 449#else
486 if (hotdrop) 450 if (acpar.hotdrop)
487 return NF_DROP; 451 return NF_DROP;
488 else return verdict; 452 else return verdict;
489#endif 453#endif
490
491#undef tb_comefrom
492} 454}
493 455
494/* Figures out from what hook each rule can be called: returns 0 if 456/* Figures out from what hook each rule can be called: returns 0 if
@@ -517,7 +479,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
517 int visited = e->comefrom & (1 << hook); 479 int visited = e->comefrom & (1 << hook);
518 480
519 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { 481 if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
520 printk("iptables: loop hook %u pos %u %08X.\n", 482 pr_err("iptables: loop hook %u pos %u %08X.\n",
521 hook, pos, e->comefrom); 483 hook, pos, e->comefrom);
522 return 0; 484 return 0;
523 } 485 }
@@ -661,12 +623,11 @@ find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
661 struct xt_match *match; 623 struct xt_match *match;
662 int ret; 624 int ret;
663 625
664 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, 626 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
665 m->u.user.revision), 627 m->u.user.revision);
666 "ip6t_%s", m->u.user.name); 628 if (IS_ERR(match)) {
667 if (IS_ERR(match) || !match) {
668 duprintf("find_check_match: `%s' not found\n", m->u.user.name); 629 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
669 return match ? PTR_ERR(match) : -ENOENT; 630 return PTR_ERR(match);
670 } 631 }
671 m->u.kernel.match = match; 632 m->u.kernel.match = match;
672 633
@@ -734,13 +695,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
734 } 695 }
735 696
736 t = ip6t_get_target(e); 697 t = ip6t_get_target(e);
737 target = try_then_request_module(xt_find_target(AF_INET6, 698 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
738 t->u.user.name, 699 t->u.user.revision);
739 t->u.user.revision), 700 if (IS_ERR(target)) {
740 "ip6t_%s", t->u.user.name);
741 if (IS_ERR(target) || !target) {
742 duprintf("find_check_entry: `%s' not found\n", t->u.user.name); 701 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
743 ret = target ? PTR_ERR(target) : -ENOENT; 702 ret = PTR_ERR(target);
744 goto cleanup_matches; 703 goto cleanup_matches;
745 } 704 }
746 t->u.kernel.target = target; 705 t->u.kernel.target = target;
@@ -873,6 +832,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
873 if (ret != 0) 832 if (ret != 0)
874 return ret; 833 return ret;
875 ++i; 834 ++i;
835 if (strcmp(ip6t_get_target(iter)->u.user.name,
836 XT_ERROR_TARGET) == 0)
837 ++newinfo->stacksize;
876 } 838 }
877 839
878 if (i != repl->num_entries) { 840 if (i != repl->num_entries) {
@@ -935,7 +897,7 @@ get_counters(const struct xt_table_info *t,
935 struct ip6t_entry *iter; 897 struct ip6t_entry *iter;
936 unsigned int cpu; 898 unsigned int cpu;
937 unsigned int i; 899 unsigned int i;
938 unsigned int curcpu; 900 unsigned int curcpu = get_cpu();
939 901
940 /* Instead of clearing (by a previous call to memset()) 902 /* Instead of clearing (by a previous call to memset())
941 * the counters and using adds, we set the counters 903 * the counters and using adds, we set the counters
@@ -945,14 +907,16 @@ get_counters(const struct xt_table_info *t,
945 * if new softirq were to run and call ipt_do_table 907 * if new softirq were to run and call ipt_do_table
946 */ 908 */
947 local_bh_disable(); 909 local_bh_disable();
948 curcpu = smp_processor_id();
949
950 i = 0; 910 i = 0;
951 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 911 xt_entry_foreach(iter, t->entries[curcpu], t->size) {
952 SET_COUNTER(counters[i], iter->counters.bcnt, 912 SET_COUNTER(counters[i], iter->counters.bcnt,
953 iter->counters.pcnt); 913 iter->counters.pcnt);
954 ++i; 914 ++i;
955 } 915 }
916 local_bh_enable();
917 /* Processing counters from other cpus, we can let bottom half enabled,
918 * (preemption is disabled)
919 */
956 920
957 for_each_possible_cpu(cpu) { 921 for_each_possible_cpu(cpu) {
958 if (cpu == curcpu) 922 if (cpu == curcpu)
@@ -966,7 +930,7 @@ get_counters(const struct xt_table_info *t,
966 } 930 }
967 xt_info_wrunlock(cpu); 931 xt_info_wrunlock(cpu);
968 } 932 }
969 local_bh_enable(); 933 put_cpu();
970} 934}
971 935
972static struct xt_counters *alloc_counters(const struct xt_table *table) 936static struct xt_counters *alloc_counters(const struct xt_table *table)
@@ -979,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
979 (other than comefrom, which userspace doesn't care 943 (other than comefrom, which userspace doesn't care
980 about). */ 944 about). */
981 countersize = sizeof(struct xt_counters) * private->number; 945 countersize = sizeof(struct xt_counters) * private->number;
982 counters = vmalloc_node(countersize, numa_node_id()); 946 counters = vmalloc(countersize);
983 947
984 if (counters == NULL) 948 if (counters == NULL)
985 return ERR_PTR(-ENOMEM); 949 return ERR_PTR(-ENOMEM);
@@ -1249,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1249 struct ip6t_entry *iter; 1213 struct ip6t_entry *iter;
1250 1214
1251 ret = 0; 1215 ret = 0;
1252 counters = vmalloc_node(num_counters * sizeof(struct xt_counters), 1216 counters = vmalloc(num_counters * sizeof(struct xt_counters));
1253 numa_node_id());
1254 if (!counters) { 1217 if (!counters) {
1255 ret = -ENOMEM; 1218 ret = -ENOMEM;
1256 goto out; 1219 goto out;
@@ -1404,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1404 if (len != size + num_counters * sizeof(struct xt_counters)) 1367 if (len != size + num_counters * sizeof(struct xt_counters))
1405 return -EINVAL; 1368 return -EINVAL;
1406 1369
1407 paddc = vmalloc_node(len - size, numa_node_id()); 1370 paddc = vmalloc(len - size);
1408 if (!paddc) 1371 if (!paddc)
1409 return -ENOMEM; 1372 return -ENOMEM;
1410 1373
@@ -1509,13 +1472,12 @@ compat_find_calc_match(struct ip6t_entry_match *m,
1509{ 1472{
1510 struct xt_match *match; 1473 struct xt_match *match;
1511 1474
1512 match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name, 1475 match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
1513 m->u.user.revision), 1476 m->u.user.revision);
1514 "ip6t_%s", m->u.user.name); 1477 if (IS_ERR(match)) {
1515 if (IS_ERR(match) || !match) {
1516 duprintf("compat_check_calc_match: `%s' not found\n", 1478 duprintf("compat_check_calc_match: `%s' not found\n",
1517 m->u.user.name); 1479 m->u.user.name);
1518 return match ? PTR_ERR(match) : -ENOENT; 1480 return PTR_ERR(match);
1519 } 1481 }
1520 m->u.kernel.match = match; 1482 m->u.kernel.match = match;
1521 *size += xt_compat_match_offset(match); 1483 *size += xt_compat_match_offset(match);
@@ -1582,14 +1544,12 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1582 } 1544 }
1583 1545
1584 t = compat_ip6t_get_target(e); 1546 t = compat_ip6t_get_target(e);
1585 target = try_then_request_module(xt_find_target(AF_INET6, 1547 target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
1586 t->u.user.name, 1548 t->u.user.revision);
1587 t->u.user.revision), 1549 if (IS_ERR(target)) {
1588 "ip6t_%s", t->u.user.name);
1589 if (IS_ERR(target) || !target) {
1590 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", 1550 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1591 t->u.user.name); 1551 t->u.user.name);
1592 ret = target ? PTR_ERR(target) : -ENOENT; 1552 ret = PTR_ERR(target);
1593 goto release_matches; 1553 goto release_matches;
1594 } 1554 }
1595 t->u.kernel.target = target; 1555 t->u.kernel.target = target;
@@ -2127,8 +2087,7 @@ struct xt_table *ip6t_register_table(struct net *net,
2127{ 2087{
2128 int ret; 2088 int ret;
2129 struct xt_table_info *newinfo; 2089 struct xt_table_info *newinfo;
2130 struct xt_table_info bootstrap 2090 struct xt_table_info bootstrap = {0};
2131 = { 0, 0, 0, { 0 }, { 0 }, { } };
2132 void *loc_cpu_entry; 2091 void *loc_cpu_entry;
2133 struct xt_table *new_table; 2092 struct xt_table *new_table;
2134 2093
@@ -2188,7 +2147,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2188} 2147}
2189 2148
2190static bool 2149static bool
2191icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par) 2150icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
2192{ 2151{
2193 const struct icmp6hdr *ic; 2152 const struct icmp6hdr *ic;
2194 struct icmp6hdr _icmph; 2153 struct icmp6hdr _icmph;
@@ -2204,7 +2163,7 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
2204 * can't. Hence, no choice but to drop. 2163 * can't. Hence, no choice but to drop.
2205 */ 2164 */
2206 duprintf("Dropping evil ICMP tinygram.\n"); 2165 duprintf("Dropping evil ICMP tinygram.\n");
2207 *par->hotdrop = true; 2166 par->hotdrop = true;
2208 return false; 2167 return false;
2209 } 2168 }
2210 2169
@@ -2216,31 +2175,32 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
2216} 2175}
2217 2176
2218/* Called when user tries to insert an entry of this type. */ 2177/* Called when user tries to insert an entry of this type. */
2219static bool icmp6_checkentry(const struct xt_mtchk_param *par) 2178static int icmp6_checkentry(const struct xt_mtchk_param *par)
2220{ 2179{
2221 const struct ip6t_icmp *icmpinfo = par->matchinfo; 2180 const struct ip6t_icmp *icmpinfo = par->matchinfo;
2222 2181
2223 /* Must specify no unknown invflags */ 2182 /* Must specify no unknown invflags */
2224 return !(icmpinfo->invflags & ~IP6T_ICMP_INV); 2183 return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0;
2225} 2184}
2226 2185
2227/* The built-in targets: standard (NULL) and error. */ 2186/* The built-in targets: standard (NULL) and error. */
2228static struct xt_target ip6t_standard_target __read_mostly = { 2187static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2229 .name = IP6T_STANDARD_TARGET, 2188 {
2230 .targetsize = sizeof(int), 2189 .name = IP6T_STANDARD_TARGET,
2231 .family = NFPROTO_IPV6, 2190 .targetsize = sizeof(int),
2191 .family = NFPROTO_IPV6,
2232#ifdef CONFIG_COMPAT 2192#ifdef CONFIG_COMPAT
2233 .compatsize = sizeof(compat_int_t), 2193 .compatsize = sizeof(compat_int_t),
2234 .compat_from_user = compat_standard_from_user, 2194 .compat_from_user = compat_standard_from_user,
2235 .compat_to_user = compat_standard_to_user, 2195 .compat_to_user = compat_standard_to_user,
2236#endif 2196#endif
2237}; 2197 },
2238 2198 {
2239static struct xt_target ip6t_error_target __read_mostly = { 2199 .name = IP6T_ERROR_TARGET,
2240 .name = IP6T_ERROR_TARGET, 2200 .target = ip6t_error,
2241 .target = ip6t_error, 2201 .targetsize = IP6T_FUNCTION_MAXNAMELEN,
2242 .targetsize = IP6T_FUNCTION_MAXNAMELEN, 2202 .family = NFPROTO_IPV6,
2243 .family = NFPROTO_IPV6, 2203 },
2244}; 2204};
2245 2205
2246static struct nf_sockopt_ops ip6t_sockopts = { 2206static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2260,13 +2220,15 @@ static struct nf_sockopt_ops ip6t_sockopts = {
2260 .owner = THIS_MODULE, 2220 .owner = THIS_MODULE,
2261}; 2221};
2262 2222
2263static struct xt_match icmp6_matchstruct __read_mostly = { 2223static struct xt_match ip6t_builtin_mt[] __read_mostly = {
2264 .name = "icmp6", 2224 {
2265 .match = icmp6_match, 2225 .name = "icmp6",
2266 .matchsize = sizeof(struct ip6t_icmp), 2226 .match = icmp6_match,
2267 .checkentry = icmp6_checkentry, 2227 .matchsize = sizeof(struct ip6t_icmp),
2268 .proto = IPPROTO_ICMPV6, 2228 .checkentry = icmp6_checkentry,
2269 .family = NFPROTO_IPV6, 2229 .proto = IPPROTO_ICMPV6,
2230 .family = NFPROTO_IPV6,
2231 },
2270}; 2232};
2271 2233
2272static int __net_init ip6_tables_net_init(struct net *net) 2234static int __net_init ip6_tables_net_init(struct net *net)
@@ -2293,13 +2255,10 @@ static int __init ip6_tables_init(void)
2293 goto err1; 2255 goto err1;
2294 2256
2295 /* Noone else will be downing sem now, so we won't sleep */ 2257 /* Noone else will be downing sem now, so we won't sleep */
2296 ret = xt_register_target(&ip6t_standard_target); 2258 ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
2297 if (ret < 0) 2259 if (ret < 0)
2298 goto err2; 2260 goto err2;
2299 ret = xt_register_target(&ip6t_error_target); 2261 ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
2300 if (ret < 0)
2301 goto err3;
2302 ret = xt_register_match(&icmp6_matchstruct);
2303 if (ret < 0) 2262 if (ret < 0)
2304 goto err4; 2263 goto err4;
2305 2264
@@ -2308,15 +2267,13 @@ static int __init ip6_tables_init(void)
2308 if (ret < 0) 2267 if (ret < 0)
2309 goto err5; 2268 goto err5;
2310 2269
2311 printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n"); 2270 pr_info("(C) 2000-2006 Netfilter Core Team\n");
2312 return 0; 2271 return 0;
2313 2272
2314err5: 2273err5:
2315 xt_unregister_match(&icmp6_matchstruct); 2274 xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
2316err4: 2275err4:
2317 xt_unregister_target(&ip6t_error_target); 2276 xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
2318err3:
2319 xt_unregister_target(&ip6t_standard_target);
2320err2: 2277err2:
2321 unregister_pernet_subsys(&ip6_tables_net_ops); 2278 unregister_pernet_subsys(&ip6_tables_net_ops);
2322err1: 2279err1:
@@ -2327,10 +2284,8 @@ static void __exit ip6_tables_fini(void)
2327{ 2284{
2328 nf_unregister_sockopt(&ip6t_sockopts); 2285 nf_unregister_sockopt(&ip6t_sockopts);
2329 2286
2330 xt_unregister_match(&icmp6_matchstruct); 2287 xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
2331 xt_unregister_target(&ip6t_error_target); 2288 xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
2332 xt_unregister_target(&ip6t_standard_target);
2333
2334 unregister_pernet_subsys(&ip6_tables_net_ops); 2289 unregister_pernet_subsys(&ip6_tables_net_ops);
2335} 2290}
2336 2291
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index b285fdf19050..0a07ae7b933f 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -9,9 +9,8 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/moduleparam.h>
15#include <linux/skbuff.h> 14#include <linux/skbuff.h>
16#include <linux/if_arp.h> 15#include <linux/if_arp.h>
17#include <linux/ip.h> 16#include <linux/ip.h>
@@ -374,11 +373,61 @@ static void dump_packet(const struct nf_loginfo *info,
374 printk("MARK=0x%x ", skb->mark); 373 printk("MARK=0x%x ", skb->mark);
375} 374}
376 375
376static void dump_mac_header(const struct nf_loginfo *info,
377 const struct sk_buff *skb)
378{
379 struct net_device *dev = skb->dev;
380 unsigned int logflags = 0;
381
382 if (info->type == NF_LOG_TYPE_LOG)
383 logflags = info->u.log.logflags;
384
385 if (!(logflags & IP6T_LOG_MACDECODE))
386 goto fallback;
387
388 switch (dev->type) {
389 case ARPHRD_ETHER:
390 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto));
393 return;
394 default:
395 break;
396 }
397
398fallback:
399 printk("MAC=");
400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb);
403 unsigned int len = dev->hard_header_len;
404 unsigned int i;
405
406 if (dev->type == ARPHRD_SIT &&
407 (p -= ETH_HLEN) < skb->head)
408 p = NULL;
409
410 if (p != NULL) {
411 printk("%02x", *p++);
412 for (i = 1; i < len; i++)
413 printk(":%02x", p[i]);
414 }
415 printk(" ");
416
417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb);
420 printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 }
422 } else
423 printk(" ");
424}
425
377static struct nf_loginfo default_loginfo = { 426static struct nf_loginfo default_loginfo = {
378 .type = NF_LOG_TYPE_LOG, 427 .type = NF_LOG_TYPE_LOG,
379 .u = { 428 .u = {
380 .log = { 429 .log = {
381 .level = 0, 430 .level = 5,
382 .logflags = NF_LOG_MASK, 431 .logflags = NF_LOG_MASK,
383 }, 432 },
384 }, 433 },
@@ -401,35 +450,10 @@ ip6t_log_packet(u_int8_t pf,
401 prefix, 450 prefix,
402 in ? in->name : "", 451 in ? in->name : "",
403 out ? out->name : ""); 452 out ? out->name : "");
404 if (in && !out) {
405 unsigned int len;
406 /* MAC logging for input chain only. */
407 printk("MAC=");
408 if (skb->dev && (len = skb->dev->hard_header_len) &&
409 skb->mac_header != skb->network_header) {
410 const unsigned char *p = skb_mac_header(skb);
411 int i;
412
413 if (skb->dev->type == ARPHRD_SIT &&
414 (p -= ETH_HLEN) < skb->head)
415 p = NULL;
416
417 if (p != NULL) {
418 for (i = 0; i < len; i++)
419 printk("%02x%s", p[i],
420 i == len - 1 ? "" : ":");
421 }
422 printk(" ");
423 453
424 if (skb->dev->type == ARPHRD_SIT) { 454 /* MAC logging for input path only. */
425 const struct iphdr *iph = 455 if (in && !out)
426 (struct iphdr *)skb_mac_header(skb); 456 dump_mac_header(loginfo, skb);
427 printk("TUNNEL=%pI4->%pI4 ",
428 &iph->saddr, &iph->daddr);
429 }
430 } else
431 printk(" ");
432 }
433 457
434 dump_packet(loginfo, skb, skb_network_offset(skb), 1); 458 dump_packet(loginfo, skb, skb_network_offset(skb), 1);
435 printk("\n"); 459 printk("\n");
@@ -437,7 +461,7 @@ ip6t_log_packet(u_int8_t pf,
437} 461}
438 462
439static unsigned int 463static unsigned int
440log_tg6(struct sk_buff *skb, const struct xt_target_param *par) 464log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
441{ 465{
442 const struct ip6t_log_info *loginfo = par->targinfo; 466 const struct ip6t_log_info *loginfo = par->targinfo;
443 struct nf_loginfo li; 467 struct nf_loginfo li;
@@ -452,20 +476,19 @@ log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
452} 476}
453 477
454 478
455static bool log_tg6_check(const struct xt_tgchk_param *par) 479static int log_tg6_check(const struct xt_tgchk_param *par)
456{ 480{
457 const struct ip6t_log_info *loginfo = par->targinfo; 481 const struct ip6t_log_info *loginfo = par->targinfo;
458 482
459 if (loginfo->level >= 8) { 483 if (loginfo->level >= 8) {
460 pr_debug("LOG: level %u >= 8\n", loginfo->level); 484 pr_debug("level %u >= 8\n", loginfo->level);
461 return false; 485 return -EINVAL;
462 } 486 }
463 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { 487 if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
464 pr_debug("LOG: prefix term %i\n", 488 pr_debug("prefix not null-terminated\n");
465 loginfo->prefix[sizeof(loginfo->prefix)-1]); 489 return -EINVAL;
466 return false;
467 } 490 }
468 return true; 491 return 0;
469} 492}
470 493
471static struct xt_target log_tg6_reg __read_mostly = { 494static struct xt_target log_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 39b50c3768e8..2933396e0281 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -14,6 +14,7 @@
14 * as published by the Free Software Foundation; either version 14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version. 15 * 2 of the License, or (at your option) any later version.
16 */ 16 */
17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17 18
18#include <linux/gfp.h> 19#include <linux/gfp.h>
19#include <linux/module.h> 20#include <linux/module.h>
@@ -50,7 +51,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
50 51
51 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || 52 if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
52 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { 53 (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
53 pr_debug("ip6t_REJECT: addr is not unicast.\n"); 54 pr_debug("addr is not unicast.\n");
54 return; 55 return;
55 } 56 }
56 57
@@ -58,7 +59,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
58 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); 59 tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
59 60
60 if ((tcphoff < 0) || (tcphoff > oldskb->len)) { 61 if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
61 pr_debug("ip6t_REJECT: Can't get TCP header.\n"); 62 pr_debug("Cannot get TCP header.\n");
62 return; 63 return;
63 } 64 }
64 65
@@ -66,7 +67,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
66 67
67 /* IP header checks: fragment, too short. */ 68 /* IP header checks: fragment, too short. */
68 if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { 69 if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
69 pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, " 70 pr_debug("proto(%d) != IPPROTO_TCP, "
70 "or too short. otcplen = %d\n", 71 "or too short. otcplen = %d\n",
71 proto, otcplen); 72 proto, otcplen);
72 return; 73 return;
@@ -77,14 +78,14 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
77 78
78 /* No RST for RST. */ 79 /* No RST for RST. */
79 if (otcph.rst) { 80 if (otcph.rst) {
80 pr_debug("ip6t_REJECT: RST is set\n"); 81 pr_debug("RST is set\n");
81 return; 82 return;
82 } 83 }
83 84
84 /* Check checksum. */ 85 /* Check checksum. */
85 if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, 86 if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
86 skb_checksum(oldskb, tcphoff, otcplen, 0))) { 87 skb_checksum(oldskb, tcphoff, otcplen, 0))) {
87 pr_debug("ip6t_REJECT: TCP checksum is invalid\n"); 88 pr_debug("TCP checksum is invalid\n");
88 return; 89 return;
89 } 90 }
90 91
@@ -96,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
96 fl.fl_ip_dport = otcph.source; 97 fl.fl_ip_dport = otcph.source;
97 security_skb_classify_flow(oldskb, &fl); 98 security_skb_classify_flow(oldskb, &fl);
98 dst = ip6_route_output(net, NULL, &fl); 99 dst = ip6_route_output(net, NULL, &fl);
99 if (dst == NULL) 100 if (dst == NULL || dst->error) {
101 dst_release(dst);
100 return; 102 return;
101 if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) 103 }
104 if (xfrm_lookup(net, &dst, &fl, NULL, 0))
102 return; 105 return;
103 106
104 hh_len = (dst->dev->hard_header_len + 15)&~15; 107 hh_len = (dst->dev->hard_header_len + 15)&~15;
@@ -108,7 +111,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
108 111
109 if (!nskb) { 112 if (!nskb) {
110 if (net_ratelimit()) 113 if (net_ratelimit())
111 printk("ip6t_REJECT: Can't alloc skb\n"); 114 pr_debug("cannot alloc skb\n");
112 dst_release(dst); 115 dst_release(dst);
113 return; 116 return;
114 } 117 }
@@ -174,15 +177,12 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
174} 177}
175 178
176static unsigned int 179static unsigned int
177reject_tg6(struct sk_buff *skb, const struct xt_target_param *par) 180reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
178{ 181{
179 const struct ip6t_reject_info *reject = par->targinfo; 182 const struct ip6t_reject_info *reject = par->targinfo;
180 struct net *net = dev_net((par->in != NULL) ? par->in : par->out); 183 struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
181 184
182 pr_debug("%s: medium point\n", __func__); 185 pr_debug("%s: medium point\n", __func__);
183 /* WARNING: This code causes reentry within ip6tables.
184 This means that the ip6tables jump stack is now crap. We
185 must return an absolute verdict. --RR */
186 switch (reject->with) { 186 switch (reject->with) {
187 case IP6T_ICMP6_NO_ROUTE: 187 case IP6T_ICMP6_NO_ROUTE:
188 send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); 188 send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -207,30 +207,30 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
207 break; 207 break;
208 default: 208 default:
209 if (net_ratelimit()) 209 if (net_ratelimit())
210 printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); 210 pr_info("case %u not handled yet\n", reject->with);
211 break; 211 break;
212 } 212 }
213 213
214 return NF_DROP; 214 return NF_DROP;
215} 215}
216 216
217static bool reject_tg6_check(const struct xt_tgchk_param *par) 217static int reject_tg6_check(const struct xt_tgchk_param *par)
218{ 218{
219 const struct ip6t_reject_info *rejinfo = par->targinfo; 219 const struct ip6t_reject_info *rejinfo = par->targinfo;
220 const struct ip6t_entry *e = par->entryinfo; 220 const struct ip6t_entry *e = par->entryinfo;
221 221
222 if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { 222 if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
223 printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); 223 pr_info("ECHOREPLY is not supported.\n");
224 return false; 224 return -EINVAL;
225 } else if (rejinfo->with == IP6T_TCP_RESET) { 225 } else if (rejinfo->with == IP6T_TCP_RESET) {
226 /* Must specify that it's a TCP packet */ 226 /* Must specify that it's a TCP packet */
227 if (e->ipv6.proto != IPPROTO_TCP || 227 if (e->ipv6.proto != IPPROTO_TCP ||
228 (e->ipv6.invflags & XT_INV_PROTO)) { 228 (e->ipv6.invflags & XT_INV_PROTO)) {
229 printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); 229 pr_info("TCP_RESET illegal for non-tcp\n");
230 return false; 230 return -EINVAL;
231 } 231 }
232 } 232 }
233 return true; 233 return 0;
234} 234}
235 235
236static struct xt_target reject_tg6_reg __read_mostly = { 236static struct xt_target reject_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index ac0b7c629d78..89cccc5a9c92 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h> 12#include <linux/ip.h>
@@ -29,14 +29,14 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
29{ 29{
30 bool r; 30 bool r;
31 31
32 pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x", 32 pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
33 invert ? '!' : ' ', min, spi, max); 33 invert ? '!' : ' ', min, spi, max);
34 r = (spi >= min && spi <= max) ^ invert; 34 r = (spi >= min && spi <= max) ^ invert;
35 pr_debug(" result %s\n", r ? "PASS" : "FAILED"); 35 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
36 return r; 36 return r;
37} 37}
38 38
39static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 39static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
40{ 40{
41 struct ip_auth_hdr _ah; 41 struct ip_auth_hdr _ah;
42 const struct ip_auth_hdr *ah; 42 const struct ip_auth_hdr *ah;
@@ -48,13 +48,13 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
48 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); 48 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
49 if (err < 0) { 49 if (err < 0) {
50 if (err != -ENOENT) 50 if (err != -ENOENT)
51 *par->hotdrop = true; 51 par->hotdrop = true;
52 return false; 52 return false;
53 } 53 }
54 54
55 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); 55 ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
56 if (ah == NULL) { 56 if (ah == NULL) {
57 *par->hotdrop = true; 57 par->hotdrop = true;
58 return false; 58 return false;
59 } 59 }
60 60
@@ -87,15 +87,15 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
87 !(ahinfo->hdrres && ah->reserved); 87 !(ahinfo->hdrres && ah->reserved);
88} 88}
89 89
90static bool ah_mt6_check(const struct xt_mtchk_param *par) 90static int ah_mt6_check(const struct xt_mtchk_param *par)
91{ 91{
92 const struct ip6t_ah *ahinfo = par->matchinfo; 92 const struct ip6t_ah *ahinfo = par->matchinfo;
93 93
94 if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { 94 if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
95 pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags); 95 pr_debug("unknown flags %X\n", ahinfo->invflags);
96 return false; 96 return -EINVAL;
97 } 97 }
98 return true; 98 return 0;
99} 99}
100 100
101static struct xt_match ah_mt6_reg __read_mostly = { 101static struct xt_match ah_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ca287f6d2bce..aab0706908c5 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,14 +20,14 @@ MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 20MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
21 21
22static bool 22static bool
23eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 23eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
24{ 24{
25 unsigned char eui64[8]; 25 unsigned char eui64[8];
26 26
27 if (!(skb_mac_header(skb) >= skb->head && 27 if (!(skb_mac_header(skb) >= skb->head &&
28 skb_mac_header(skb) + ETH_HLEN <= skb->data) && 28 skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
29 par->fragoff != 0) { 29 par->fragoff != 0) {
30 *par->hotdrop = true; 30 par->hotdrop = true;
31 return false; 31 return false;
32 } 32 }
33 33
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 7b91c2598ed5..eda898fda6ca 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ipv6.h> 12#include <linux/ipv6.h>
@@ -27,7 +27,7 @@ static inline bool
27id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) 27id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
28{ 28{
29 bool r; 29 bool r;
30 pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', 30 pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ',
31 min, id, max); 31 min, id, max);
32 r = (id >= min && id <= max) ^ invert; 32 r = (id >= min && id <= max) ^ invert;
33 pr_debug(" result %s\n", r ? "PASS" : "FAILED"); 33 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
35} 35}
36 36
37static bool 37static bool
38frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 38frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
39{ 39{
40 struct frag_hdr _frag; 40 struct frag_hdr _frag;
41 const struct frag_hdr *fh; 41 const struct frag_hdr *fh;
@@ -46,13 +46,13 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
46 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); 46 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
47 if (err < 0) { 47 if (err < 0) {
48 if (err != -ENOENT) 48 if (err != -ENOENT)
49 *par->hotdrop = true; 49 par->hotdrop = true;
50 return false; 50 return false;
51 } 51 }
52 52
53 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); 53 fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
54 if (fh == NULL) { 54 if (fh == NULL) {
55 *par->hotdrop = true; 55 par->hotdrop = true;
56 return false; 56 return false;
57 } 57 }
58 58
@@ -102,15 +102,15 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
102 (ntohs(fh->frag_off) & IP6_MF)); 102 (ntohs(fh->frag_off) & IP6_MF));
103} 103}
104 104
105static bool frag_mt6_check(const struct xt_mtchk_param *par) 105static int frag_mt6_check(const struct xt_mtchk_param *par)
106{ 106{
107 const struct ip6t_frag *fraginfo = par->matchinfo; 107 const struct ip6t_frag *fraginfo = par->matchinfo;
108 108
109 if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { 109 if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
110 pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags); 110 pr_debug("unknown flags %X\n", fraginfo->invflags);
111 return false; 111 return -EINVAL;
112 } 112 }
113 return true; 113 return 0;
114} 114}
115 115
116static struct xt_match frag_mt6_reg __read_mostly = { 116static struct xt_match frag_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..59df051eaef6 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ipv6.h> 12#include <linux/ipv6.h>
@@ -41,8 +41,10 @@ MODULE_ALIAS("ip6t_dst");
41 * 5 -> RTALERT 2 x x 41 * 5 -> RTALERT 2 x x
42 */ 42 */
43 43
44static struct xt_match hbh_mt6_reg[] __read_mostly;
45
44static bool 46static bool
45hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 47hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
46{ 48{
47 struct ipv6_opt_hdr _optsh; 49 struct ipv6_opt_hdr _optsh;
48 const struct ipv6_opt_hdr *oh; 50 const struct ipv6_opt_hdr *oh;
@@ -58,16 +60,18 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
58 unsigned int optlen; 60 unsigned int optlen;
59 int err; 61 int err;
60 62
61 err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL); 63 err = ipv6_find_hdr(skb, &ptr,
64 (par->match == &hbh_mt6_reg[0]) ?
65 NEXTHDR_HOP : NEXTHDR_DEST, NULL);
62 if (err < 0) { 66 if (err < 0) {
63 if (err != -ENOENT) 67 if (err != -ENOENT)
64 *par->hotdrop = true; 68 par->hotdrop = true;
65 return false; 69 return false;
66 } 70 }
67 71
68 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); 72 oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
69 if (oh == NULL) { 73 if (oh == NULL) {
70 *par->hotdrop = true; 74 par->hotdrop = true;
71 return false; 75 return false;
72 } 76 }
73 77
@@ -141,11 +145,11 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
141 } 145 }
142 146
143 /* Step to the next */ 147 /* Step to the next */
144 pr_debug("len%04X \n", optlen); 148 pr_debug("len%04X\n", optlen);
145 149
146 if ((ptr > skb->len - optlen || hdrlen < optlen) && 150 if ((ptr > skb->len - optlen || hdrlen < optlen) &&
147 temp < optinfo->optsnr - 1) { 151 temp < optinfo->optsnr - 1) {
148 pr_debug("new pointer is too large! \n"); 152 pr_debug("new pointer is too large!\n");
149 break; 153 break;
150 } 154 }
151 ptr += optlen; 155 ptr += optlen;
@@ -160,32 +164,32 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
160 return false; 164 return false;
161} 165}
162 166
163static bool hbh_mt6_check(const struct xt_mtchk_param *par) 167static int hbh_mt6_check(const struct xt_mtchk_param *par)
164{ 168{
165 const struct ip6t_opts *optsinfo = par->matchinfo; 169 const struct ip6t_opts *optsinfo = par->matchinfo;
166 170
167 if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { 171 if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
168 pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); 172 pr_debug("unknown flags %X\n", optsinfo->invflags);
169 return false; 173 return -EINVAL;
170 } 174 }
171 175
172 if (optsinfo->flags & IP6T_OPTS_NSTRICT) { 176 if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
173 pr_debug("ip6t_opts: Not strict - not implemented"); 177 pr_debug("Not strict - not implemented");
174 return false; 178 return -EINVAL;
175 } 179 }
176 180
177 return true; 181 return 0;
178} 182}
179 183
180static struct xt_match hbh_mt6_reg[] __read_mostly = { 184static struct xt_match hbh_mt6_reg[] __read_mostly = {
181 { 185 {
186 /* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
182 .name = "hbh", 187 .name = "hbh",
183 .family = NFPROTO_IPV6, 188 .family = NFPROTO_IPV6,
184 .match = hbh_mt6, 189 .match = hbh_mt6,
185 .matchsize = sizeof(struct ip6t_opts), 190 .matchsize = sizeof(struct ip6t_opts),
186 .checkentry = hbh_mt6_check, 191 .checkentry = hbh_mt6_check,
187 .me = THIS_MODULE, 192 .me = THIS_MODULE,
188 .data = NEXTHDR_HOP,
189 }, 193 },
190 { 194 {
191 .name = "dst", 195 .name = "dst",
@@ -194,7 +198,6 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = {
194 .matchsize = sizeof(struct ip6t_opts), 198 .matchsize = sizeof(struct ip6t_opts),
195 .checkentry = hbh_mt6_check, 199 .checkentry = hbh_mt6_check,
196 .me = THIS_MODULE, 200 .me = THIS_MODULE,
197 .data = NEXTHDR_DEST,
198 }, 201 },
199}; 202};
200 203
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 91490ad9302c..54bd9790603f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); 27MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
28 28
29static bool 29static bool
30ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 30ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
31{ 31{
32 const struct ip6t_ipv6header_info *info = par->matchinfo; 32 const struct ip6t_ipv6header_info *info = par->matchinfo;
33 unsigned int temp; 33 unsigned int temp;
@@ -118,16 +118,16 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
118 } 118 }
119} 119}
120 120
121static bool ipv6header_mt6_check(const struct xt_mtchk_param *par) 121static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
122{ 122{
123 const struct ip6t_ipv6header_info *info = par->matchinfo; 123 const struct ip6t_ipv6header_info *info = par->matchinfo;
124 124
125 /* invflags is 0 or 0xff in hard mode */ 125 /* invflags is 0 or 0xff in hard mode */
126 if ((!info->modeflag) && info->invflags != 0x00 && 126 if ((!info->modeflag) && info->invflags != 0x00 &&
127 info->invflags != 0xFF) 127 info->invflags != 0xFF)
128 return false; 128 return -EINVAL;
129 129
130 return true; 130 return 0;
131} 131}
132 132
133static struct xt_match ipv6header_mt6_reg __read_mostly = { 133static struct xt_match ipv6header_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index aafe4e66577b..0c90c66b1992 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -11,6 +11,7 @@
11 * Based on net/netfilter/xt_tcpudp.c 11 * Based on net/netfilter/xt_tcpudp.c
12 * 12 *
13 */ 13 */
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14#include <linux/types.h> 15#include <linux/types.h>
15#include <linux/module.h> 16#include <linux/module.h>
16#include <net/ip.h> 17#include <net/ip.h>
@@ -24,12 +25,6 @@
24MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match"); 25MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
25MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
26 27
27#ifdef DEBUG_IP_FIREWALL_USER
28#define duprintf(format, args...) printk(format , ## args)
29#else
30#define duprintf(format, args...)
31#endif
32
33/* Returns 1 if the type is matched by the range, 0 otherwise */ 28/* Returns 1 if the type is matched by the range, 0 otherwise */
34static inline bool 29static inline bool
35type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) 30type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
@@ -37,7 +32,7 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
37 return (type >= min && type <= max) ^ invert; 32 return (type >= min && type <= max) ^ invert;
38} 33}
39 34
40static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 35static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
41{ 36{
42 struct ip6_mh _mh; 37 struct ip6_mh _mh;
43 const struct ip6_mh *mh; 38 const struct ip6_mh *mh;
@@ -51,15 +46,15 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
51 if (mh == NULL) { 46 if (mh == NULL) {
52 /* We've been asked to examine this packet, and we 47 /* We've been asked to examine this packet, and we
53 can't. Hence, no choice but to drop. */ 48 can't. Hence, no choice but to drop. */
54 duprintf("Dropping evil MH tinygram.\n"); 49 pr_debug("Dropping evil MH tinygram.\n");
55 *par->hotdrop = true; 50 par->hotdrop = true;
56 return false; 51 return false;
57 } 52 }
58 53
59 if (mh->ip6mh_proto != IPPROTO_NONE) { 54 if (mh->ip6mh_proto != IPPROTO_NONE) {
60 duprintf("Dropping invalid MH Payload Proto: %u\n", 55 pr_debug("Dropping invalid MH Payload Proto: %u\n",
61 mh->ip6mh_proto); 56 mh->ip6mh_proto);
62 *par->hotdrop = true; 57 par->hotdrop = true;
63 return false; 58 return false;
64 } 59 }
65 60
@@ -67,12 +62,12 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
67 !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); 62 !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
68} 63}
69 64
70static bool mh_mt6_check(const struct xt_mtchk_param *par) 65static int mh_mt6_check(const struct xt_mtchk_param *par)
71{ 66{
72 const struct ip6t_mh *mhinfo = par->matchinfo; 67 const struct ip6t_mh *mhinfo = par->matchinfo;
73 68
74 /* Must specify no unknown invflags */ 69 /* Must specify no unknown invflags */
75 return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); 70 return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0;
76} 71}
77 72
78static struct xt_match mh_mt6_reg __read_mostly = { 73static struct xt_match mh_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index b77307fc8743..d8488c50a8e0 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ipv6.h> 12#include <linux/ipv6.h>
@@ -29,14 +29,14 @@ static inline bool
29segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) 29segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
30{ 30{
31 bool r; 31 bool r;
32 pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x", 32 pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
33 invert ? '!' : ' ', min, id, max); 33 invert ? '!' : ' ', min, id, max);
34 r = (id >= min && id <= max) ^ invert; 34 r = (id >= min && id <= max) ^ invert;
35 pr_debug(" result %s\n", r ? "PASS" : "FAILED"); 35 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
36 return r; 36 return r;
37} 37}
38 38
39static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 39static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
40{ 40{
41 struct ipv6_rt_hdr _route; 41 struct ipv6_rt_hdr _route;
42 const struct ipv6_rt_hdr *rh; 42 const struct ipv6_rt_hdr *rh;
@@ -52,13 +52,13 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
52 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); 52 err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
53 if (err < 0) { 53 if (err < 0) {
54 if (err != -ENOENT) 54 if (err != -ENOENT)
55 *par->hotdrop = true; 55 par->hotdrop = true;
56 return false; 56 return false;
57 } 57 }
58 58
59 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); 59 rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
60 if (rh == NULL) { 60 if (rh == NULL) {
61 *par->hotdrop = true; 61 par->hotdrop = true;
62 return false; 62 return false;
63 } 63 }
64 64
@@ -183,23 +183,23 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
183 return false; 183 return false;
184} 184}
185 185
186static bool rt_mt6_check(const struct xt_mtchk_param *par) 186static int rt_mt6_check(const struct xt_mtchk_param *par)
187{ 187{
188 const struct ip6t_rt *rtinfo = par->matchinfo; 188 const struct ip6t_rt *rtinfo = par->matchinfo;
189 189
190 if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { 190 if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
191 pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags); 191 pr_debug("unknown flags %X\n", rtinfo->invflags);
192 return false; 192 return -EINVAL;
193 } 193 }
194 if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) && 194 if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
195 (!(rtinfo->flags & IP6T_RT_TYP) || 195 (!(rtinfo->flags & IP6T_RT_TYP) ||
196 (rtinfo->rt_type != 0) || 196 (rtinfo->rt_type != 0) ||
197 (rtinfo->invflags & IP6T_RT_INV_TYP))) { 197 (rtinfo->invflags & IP6T_RT_INV_TYP))) {
198 pr_debug("`--rt-type 0' required before `--rt-0-*'"); 198 pr_debug("`--rt-type 0' required before `--rt-0-*'");
199 return false; 199 return -EINVAL;
200 } 200 }
201 201
202 return true; 202 return 0;
203} 203}
204 204
205static struct xt_match rt_mt6_reg __read_mostly = { 205static struct xt_match rt_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index d6fc9aff3163..c9e37c8fd62c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -81,7 +81,7 @@ static int __init ip6table_filter_init(void)
81 int ret; 81 int ret;
82 82
83 if (forward < 0 || forward > NF_MAX_VERDICT) { 83 if (forward < 0 || forward > NF_MAX_VERDICT) {
84 printk("iptables forward must be 0 or 1\n"); 84 pr_err("iptables forward must be 0 or 1\n");
85 return -EINVAL; 85 return -EINVAL;
86 } 86 }
87 87
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6a102b57f356..679a0a3b7b3c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -43,7 +43,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
43 if (skb->len < sizeof(struct iphdr) || 43 if (skb->len < sizeof(struct iphdr) ||
44 ip_hdrlen(skb) < sizeof(struct iphdr)) { 44 ip_hdrlen(skb) < sizeof(struct iphdr)) {
45 if (net_ratelimit()) 45 if (net_ratelimit())
46 printk("ip6t_hook: happy cracking.\n"); 46 pr_warning("ip6t_hook: happy cracking.\n");
47 return NF_ACCEPT; 47 return NF_ACCEPT;
48 } 48 }
49#endif 49#endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 996c3f41fecd..ff43461704be 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -280,7 +280,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
280 /* root is playing with raw sockets. */ 280 /* root is playing with raw sockets. */
281 if (skb->len < sizeof(struct ipv6hdr)) { 281 if (skb->len < sizeof(struct ipv6hdr)) {
282 if (net_ratelimit()) 282 if (net_ratelimit())
283 printk("ipv6_conntrack_local: packet too short\n"); 283 pr_notice("ipv6_conntrack_local: packet too short\n");
284 return NF_ACCEPT; 284 return NF_ACCEPT;
285 } 285 }
286 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); 286 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
@@ -406,37 +406,37 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
406 406
407 ret = nf_ct_frag6_init(); 407 ret = nf_ct_frag6_init();
408 if (ret < 0) { 408 if (ret < 0) {
409 printk("nf_conntrack_ipv6: can't initialize frag6.\n"); 409 pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
410 return ret; 410 return ret;
411 } 411 }
412 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); 412 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
413 if (ret < 0) { 413 if (ret < 0) {
414 printk("nf_conntrack_ipv6: can't register tcp.\n"); 414 pr_err("nf_conntrack_ipv6: can't register tcp.\n");
415 goto cleanup_frag6; 415 goto cleanup_frag6;
416 } 416 }
417 417
418 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); 418 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
419 if (ret < 0) { 419 if (ret < 0) {
420 printk("nf_conntrack_ipv6: can't register udp.\n"); 420 pr_err("nf_conntrack_ipv6: can't register udp.\n");
421 goto cleanup_tcp; 421 goto cleanup_tcp;
422 } 422 }
423 423
424 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6); 424 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
425 if (ret < 0) { 425 if (ret < 0) {
426 printk("nf_conntrack_ipv6: can't register icmpv6.\n"); 426 pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
427 goto cleanup_udp; 427 goto cleanup_udp;
428 } 428 }
429 429
430 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); 430 ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
431 if (ret < 0) { 431 if (ret < 0) {
432 printk("nf_conntrack_ipv6: can't register ipv6\n"); 432 pr_err("nf_conntrack_ipv6: can't register ipv6\n");
433 goto cleanup_icmpv6; 433 goto cleanup_icmpv6;
434 } 434 }
435 435
436 ret = nf_register_hooks(ipv6_conntrack_ops, 436 ret = nf_register_hooks(ipv6_conntrack_ops,
437 ARRAY_SIZE(ipv6_conntrack_ops)); 437 ARRAY_SIZE(ipv6_conntrack_ops));
438 if (ret < 0) { 438 if (ret < 0) {
439 printk("nf_conntrack_ipv6: can't register pre-routing defrag " 439 pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
440 "hook.\n"); 440 "hook.\n");
441 goto cleanup_ipv6; 441 goto cleanup_ipv6;
442 } 442 }
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415e..1df3c8b6bf47 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
208 type = icmp6h->icmp6_type - 130; 208 type = icmp6h->icmp6_type - 130;
209 if (type >= 0 && type < sizeof(noct_valid_new) && 209 if (type >= 0 && type < sizeof(noct_valid_new) &&
210 noct_valid_new[type]) { 210 noct_valid_new[type]) {
211 skb->nfct = &nf_conntrack_untracked.ct_general; 211 skb->nfct = &nf_ct_untracked_get()->ct_general;
212 skb->nfctinfo = IP_CT_NEW; 212 skb->nfctinfo = IP_CT_NEW;
213 nf_conntrack_get(skb->nfct); 213 nf_conntrack_get(skb->nfct);
214 return NF_ACCEPT; 214 return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index dd5b9bd61c62..13ef5bc05cf5 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb)
114} 114}
115 115
116/* Memory Tracking Functions. */ 116/* Memory Tracking Functions. */
117static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) 117static void frag_kfree_skb(struct sk_buff *skb)
118{ 118{
119 if (work)
120 *work -= skb->truesize;
121 atomic_sub(skb->truesize, &nf_init_frags.mem); 119 atomic_sub(skb->truesize, &nf_init_frags.mem);
122 nf_skb_free(skb); 120 nf_skb_free(skb);
123 kfree_skb(skb); 121 kfree_skb(skb);
@@ -201,7 +199,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
201 int offset, end; 199 int offset, end;
202 200
203 if (fq->q.last_in & INET_FRAG_COMPLETE) { 201 if (fq->q.last_in & INET_FRAG_COMPLETE) {
204 pr_debug("Allready completed\n"); 202 pr_debug("Already completed\n");
205 goto err; 203 goto err;
206 } 204 }
207 205
@@ -271,6 +269,11 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
271 * in the chain of fragments so far. We must know where to put 269 * in the chain of fragments so far. We must know where to put
272 * this fragment, right? 270 * this fragment, right?
273 */ 271 */
272 prev = fq->q.fragments_tail;
273 if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
274 next = NULL;
275 goto found;
276 }
274 prev = NULL; 277 prev = NULL;
275 for (next = fq->q.fragments; next != NULL; next = next->next) { 278 for (next = fq->q.fragments; next != NULL; next = next->next) {
276 if (NFCT_FRAG6_CB(next)->offset >= offset) 279 if (NFCT_FRAG6_CB(next)->offset >= offset)
@@ -278,6 +281,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
278 prev = next; 281 prev = next;
279 } 282 }
280 283
284found:
281 /* We found where to put this one. Check for overlap with 285 /* We found where to put this one. Check for overlap with
282 * preceding fragment, and, if needed, align things so that 286 * preceding fragment, and, if needed, align things so that
283 * any overlaps are eliminated. 287 * any overlaps are eliminated.
@@ -335,7 +339,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
335 fq->q.fragments = next; 339 fq->q.fragments = next;
336 340
337 fq->q.meat -= free_it->len; 341 fq->q.meat -= free_it->len;
338 frag_kfree_skb(free_it, NULL); 342 frag_kfree_skb(free_it);
339 } 343 }
340 } 344 }
341 345
@@ -343,6 +347,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
343 347
344 /* Insert this fragment in the chain of fragments. */ 348 /* Insert this fragment in the chain of fragments. */
345 skb->next = next; 349 skb->next = next;
350 if (!next)
351 fq->q.fragments_tail = skb;
346 if (prev) 352 if (prev)
347 prev->next = skb; 353 prev->next = skb;
348 else 354 else
@@ -442,7 +448,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
442 skb_shinfo(head)->frag_list = head->next; 448 skb_shinfo(head)->frag_list = head->next;
443 skb_reset_transport_header(head); 449 skb_reset_transport_header(head);
444 skb_push(head, head->data - skb_network_header(head)); 450 skb_push(head, head->data - skb_network_header(head));
445 atomic_sub(head->truesize, &nf_init_frags.mem);
446 451
447 for (fp=head->next; fp; fp = fp->next) { 452 for (fp=head->next; fp; fp = fp->next) {
448 head->data_len += fp->len; 453 head->data_len += fp->len;
@@ -452,8 +457,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
452 else if (head->ip_summed == CHECKSUM_COMPLETE) 457 else if (head->ip_summed == CHECKSUM_COMPLETE)
453 head->csum = csum_add(head->csum, fp->csum); 458 head->csum = csum_add(head->csum, fp->csum);
454 head->truesize += fp->truesize; 459 head->truesize += fp->truesize;
455 atomic_sub(fp->truesize, &nf_init_frags.mem);
456 } 460 }
461 atomic_sub(head->truesize, &nf_init_frags.mem);
457 462
458 head->next = NULL; 463 head->next = NULL;
459 head->dev = dev; 464 head->dev = dev;
@@ -467,6 +472,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
467 head->csum); 472 head->csum);
468 473
469 fq->q.fragments = NULL; 474 fq->q.fragments = NULL;
475 fq->q.fragments_tail = NULL;
470 476
471 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ 477 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
472 fp = skb_shinfo(head)->frag_list; 478 fp = skb_shinfo(head)->frag_list;
@@ -644,7 +650,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
644 s2 = s->next; 650 s2 = s->next;
645 s->next = NULL; 651 s->next = NULL;
646 652
647 NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, 653 NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
648 NF_IP6_PRI_CONNTRACK_DEFRAG + 1); 654 NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
649 s = s2; 655 s = s2;
650 } 656 }
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 58344c0fbd13..d082eaeefa25 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
97 SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS), 97 SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
98 SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS), 98 SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
99 SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS), 99 SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
100 SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
100 SNMP_MIB_SENTINEL 101 SNMP_MIB_SENTINEL
101}; 102};
102 103
@@ -167,24 +168,34 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
167 i & 0x100 ? "Out" : "In", i & 0xff); 168 i & 0x100 ? "Out" : "In", i & 0xff);
168 seq_printf(seq, "%-32s\t%lu\n", name, val); 169 seq_printf(seq, "%-32s\t%lu\n", name, val);
169 } 170 }
170 return;
171} 171}
172 172
173static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib, 173static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
174 const struct snmp_mib *itemlist) 174 const struct snmp_mib *itemlist)
175{ 175{
176 int i; 176 int i;
177 for (i=0; itemlist[i].name; i++) 177
178 for (i = 0; itemlist[i].name; i++)
178 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, 179 seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
179 snmp_fold_field(mib, itemlist[i].entry)); 180 snmp_fold_field(mib, itemlist[i].entry));
180} 181}
181 182
183static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
184 const struct snmp_mib *itemlist, size_t syncpoff)
185{
186 int i;
187
188 for (i = 0; itemlist[i].name; i++)
189 seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
190 snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
191}
192
182static int snmp6_seq_show(struct seq_file *seq, void *v) 193static int snmp6_seq_show(struct seq_file *seq, void *v)
183{ 194{
184 struct net *net = (struct net *)seq->private; 195 struct net *net = (struct net *)seq->private;
185 196
186 snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics, 197 snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
187 snmp6_ipstats_list); 198 snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
188 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, 199 snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
189 snmp6_icmp6_list); 200 snmp6_icmp6_list);
190 snmp6_seq_show_icmpv6msg(seq, 201 snmp6_seq_show_icmpv6msg(seq,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8763b1a0814a..e677937a07fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
381 } 381 }
382 382
383 /* Charge it to the socket. */ 383 /* Charge it to the socket. */
384 if (sock_queue_rcv_skb(sk, skb) < 0) { 384 if (ip_queue_rcv_skb(sk, skb) < 0) {
385 kfree_skb(skb); 385 kfree_skb(skb);
386 return NET_RX_DROP; 386 return NET_RX_DROP;
387 } 387 }
@@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
461 if (flags & MSG_ERRQUEUE) 461 if (flags & MSG_ERRQUEUE)
462 return ipv6_recv_error(sk, msg, len); 462 return ipv6_recv_error(sk, msg, len);
463 463
464 if (np->rxpmtu && np->rxopt.bits.rxpmtu)
465 return ipv6_recv_rxpmtu(sk, msg, len);
466
464 skb = skb_recv_datagram(sk, flags, noblock, &err); 467 skb = skb_recv_datagram(sk, flags, noblock, &err);
465 if (!skb) 468 if (!skb)
466 goto out; 469 goto out;
@@ -599,31 +602,33 @@ out:
599} 602}
600 603
601static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, 604static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
602 struct flowi *fl, struct rt6_info *rt, 605 struct flowi *fl, struct dst_entry **dstp,
603 unsigned int flags) 606 unsigned int flags)
604{ 607{
605 struct ipv6_pinfo *np = inet6_sk(sk); 608 struct ipv6_pinfo *np = inet6_sk(sk);
606 struct ipv6hdr *iph; 609 struct ipv6hdr *iph;
607 struct sk_buff *skb; 610 struct sk_buff *skb;
608 int err; 611 int err;
612 struct rt6_info *rt = (struct rt6_info *)*dstp;
609 613
610 if (length > rt->u.dst.dev->mtu) { 614 if (length > rt->dst.dev->mtu) {
611 ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu); 615 ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
612 return -EMSGSIZE; 616 return -EMSGSIZE;
613 } 617 }
614 if (flags&MSG_PROBE) 618 if (flags&MSG_PROBE)
615 goto out; 619 goto out;
616 620
617 skb = sock_alloc_send_skb(sk, 621 skb = sock_alloc_send_skb(sk,
618 length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15, 622 length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
619 flags & MSG_DONTWAIT, &err); 623 flags & MSG_DONTWAIT, &err);
620 if (skb == NULL) 624 if (skb == NULL)
621 goto error; 625 goto error;
622 skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev)); 626 skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
623 627
624 skb->priority = sk->sk_priority; 628 skb->priority = sk->sk_priority;
625 skb->mark = sk->sk_mark; 629 skb->mark = sk->sk_mark;
626 skb_dst_set(skb, dst_clone(&rt->u.dst)); 630 skb_dst_set(skb, &rt->dst);
631 *dstp = NULL;
627 632
628 skb_put(skb, length); 633 skb_put(skb, length);
629 skb_reset_network_header(skb); 634 skb_reset_network_header(skb);
@@ -637,8 +642,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
637 goto error_fault; 642 goto error_fault;
638 643
639 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 644 IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
640 err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, 645 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
641 dst_output); 646 rt->dst.dev, dst_output);
642 if (err > 0) 647 if (err > 0)
643 err = net_xmit_errno(err); 648 err = net_xmit_errno(err);
644 if (err) 649 if (err)
@@ -722,7 +727,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
722{ 727{
723 struct ipv6_txoptions opt_space; 728 struct ipv6_txoptions opt_space;
724 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; 729 struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
725 struct in6_addr *daddr, *final_p = NULL, final; 730 struct in6_addr *daddr, *final_p, final;
726 struct inet_sock *inet = inet_sk(sk); 731 struct inet_sock *inet = inet_sk(sk);
727 struct ipv6_pinfo *np = inet6_sk(sk); 732 struct ipv6_pinfo *np = inet6_sk(sk);
728 struct raw6_sock *rp = raw6_sk(sk); 733 struct raw6_sock *rp = raw6_sk(sk);
@@ -733,6 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
733 int addr_len = msg->msg_namelen; 738 int addr_len = msg->msg_namelen;
734 int hlimit = -1; 739 int hlimit = -1;
735 int tclass = -1; 740 int tclass = -1;
741 int dontfrag = -1;
736 u16 proto; 742 u16 proto;
737 int err; 743 int err;
738 744
@@ -811,7 +817,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
811 memset(opt, 0, sizeof(struct ipv6_txoptions)); 817 memset(opt, 0, sizeof(struct ipv6_txoptions));
812 opt->tot_len = sizeof(struct ipv6_txoptions); 818 opt->tot_len = sizeof(struct ipv6_txoptions);
813 819
814 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); 820 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
821 &tclass, &dontfrag);
815 if (err < 0) { 822 if (err < 0) {
816 fl6_sock_release(flowlabel); 823 fl6_sock_release(flowlabel);
817 return err; 824 return err;
@@ -842,13 +849,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
842 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr)) 849 if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
843 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 850 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
844 851
845 /* merge ip6_build_xmit from ip6_output */ 852 final_p = fl6_update_dst(&fl, opt, &final);
846 if (opt && opt->srcrt) {
847 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
848 ipv6_addr_copy(&final, &fl.fl6_dst);
849 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
850 final_p = &final;
851 }
852 853
853 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) 854 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
854 fl.oif = np->mcast_oif; 855 fl.oif = np->mcast_oif;
@@ -880,17 +881,20 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
880 if (tclass < 0) 881 if (tclass < 0)
881 tclass = np->tclass; 882 tclass = np->tclass;
882 883
884 if (dontfrag < 0)
885 dontfrag = np->dontfrag;
886
883 if (msg->msg_flags&MSG_CONFIRM) 887 if (msg->msg_flags&MSG_CONFIRM)
884 goto do_confirm; 888 goto do_confirm;
885 889
886back_from_confirm: 890back_from_confirm:
887 if (inet->hdrincl) { 891 if (inet->hdrincl)
888 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); 892 err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
889 } else { 893 else {
890 lock_sock(sk); 894 lock_sock(sk);
891 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, 895 err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
892 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, 896 len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
893 msg->msg_flags); 897 msg->msg_flags, dontfrag);
894 898
895 if (err) 899 if (err)
896 ip6_flush_pending_frames(sk); 900 ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292ff5854..545c4141b755 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
150EXPORT_SYMBOL(ip6_frag_match); 150EXPORT_SYMBOL(ip6_frag_match);
151 151
152/* Memory Tracking Functions. */ 152/* Memory Tracking Functions. */
153static inline void frag_kfree_skb(struct netns_frags *nf, 153static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
154 struct sk_buff *skb, int *work)
155{ 154{
156 if (work)
157 *work -= skb->truesize;
158 atomic_sub(skb->truesize, &nf->mem); 155 atomic_sub(skb->truesize, &nf->mem);
159 kfree_skb(skb); 156 kfree_skb(skb);
160} 157}
@@ -336,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
336 * in the chain of fragments so far. We must know where to put 333 * in the chain of fragments so far. We must know where to put
337 * this fragment, right? 334 * this fragment, right?
338 */ 335 */
336 prev = fq->q.fragments_tail;
337 if (!prev || FRAG6_CB(prev)->offset < offset) {
338 next = NULL;
339 goto found;
340 }
339 prev = NULL; 341 prev = NULL;
340 for(next = fq->q.fragments; next != NULL; next = next->next) { 342 for(next = fq->q.fragments; next != NULL; next = next->next) {
341 if (FRAG6_CB(next)->offset >= offset) 343 if (FRAG6_CB(next)->offset >= offset)
@@ -343,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
343 prev = next; 345 prev = next;
344 } 346 }
345 347
348found:
346 /* We found where to put this one. Check for overlap with 349 /* We found where to put this one. Check for overlap with
347 * preceding fragment, and, if needed, align things so that 350 * preceding fragment, and, if needed, align things so that
348 * any overlaps are eliminated. 351 * any overlaps are eliminated.
@@ -392,7 +395,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
392 fq->q.fragments = next; 395 fq->q.fragments = next;
393 396
394 fq->q.meat -= free_it->len; 397 fq->q.meat -= free_it->len;
395 frag_kfree_skb(fq->q.net, free_it, NULL); 398 frag_kfree_skb(fq->q.net, free_it);
396 } 399 }
397 } 400 }
398 401
@@ -400,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
400 403
401 /* Insert this fragment in the chain of fragments. */ 404 /* Insert this fragment in the chain of fragments. */
402 skb->next = next; 405 skb->next = next;
406 if (!next)
407 fq->q.fragments_tail = skb;
403 if (prev) 408 if (prev)
404 prev->next = skb; 409 prev->next = skb;
405 else 410 else
@@ -466,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
466 goto out_oom; 471 goto out_oom;
467 472
468 fp->next = head->next; 473 fp->next = head->next;
474 if (!fp->next)
475 fq->q.fragments_tail = fp;
469 prev->next = fp; 476 prev->next = fp;
470 477
471 skb_morph(head, fq->q.fragments); 478 skb_morph(head, fq->q.fragments);
@@ -524,7 +531,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
524 skb_shinfo(head)->frag_list = head->next; 531 skb_shinfo(head)->frag_list = head->next;
525 skb_reset_transport_header(head); 532 skb_reset_transport_header(head);
526 skb_push(head, head->data - skb_network_header(head)); 533 skb_push(head, head->data - skb_network_header(head));
527 atomic_sub(head->truesize, &fq->q.net->mem);
528 534
529 for (fp=head->next; fp; fp = fp->next) { 535 for (fp=head->next; fp; fp = fp->next) {
530 head->data_len += fp->len; 536 head->data_len += fp->len;
@@ -534,8 +540,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
534 else if (head->ip_summed == CHECKSUM_COMPLETE) 540 else if (head->ip_summed == CHECKSUM_COMPLETE)
535 head->csum = csum_add(head->csum, fp->csum); 541 head->csum = csum_add(head->csum, fp->csum);
536 head->truesize += fp->truesize; 542 head->truesize += fp->truesize;
537 atomic_sub(fp->truesize, &fq->q.net->mem);
538 } 543 }
544 atomic_sub(head->truesize, &fq->q.net->mem);
539 545
540 head->next = NULL; 546 head->next = NULL;
541 head->dev = dev; 547 head->dev = dev;
@@ -553,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
553 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); 559 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
554 rcu_read_unlock(); 560 rcu_read_unlock();
555 fq->q.fragments = NULL; 561 fq->q.fragments = NULL;
562 fq->q.fragments_tail = NULL;
556 return 1; 563 return 1;
557 564
558out_oversize: 565out_oversize:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05ebd7833043..8f2d0400cf8a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
126}; 126};
127 127
128static struct rt6_info ip6_null_entry_template = { 128static struct rt6_info ip6_null_entry_template = {
129 .u = { 129 .dst = {
130 .dst = { 130 .__refcnt = ATOMIC_INIT(1),
131 .__refcnt = ATOMIC_INIT(1), 131 .__use = 1,
132 .__use = 1, 132 .obsolete = -1,
133 .obsolete = -1, 133 .error = -ENETUNREACH,
134 .error = -ENETUNREACH, 134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 135 .input = ip6_pkt_discard,
136 .input = ip6_pkt_discard, 136 .output = ip6_pkt_discard_out,
137 .output = ip6_pkt_discard_out,
138 }
139 }, 137 },
140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
141 .rt6i_protocol = RTPROT_KERNEL, 139 .rt6i_protocol = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb); 147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
150 148
151static struct rt6_info ip6_prohibit_entry_template = { 149static struct rt6_info ip6_prohibit_entry_template = {
152 .u = { 150 .dst = {
153 .dst = { 151 .__refcnt = ATOMIC_INIT(1),
154 .__refcnt = ATOMIC_INIT(1), 152 .__use = 1,
155 .__use = 1, 153 .obsolete = -1,
156 .obsolete = -1, 154 .error = -EACCES,
157 .error = -EACCES, 155 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 156 .input = ip6_pkt_prohibit,
159 .input = ip6_pkt_prohibit, 157 .output = ip6_pkt_prohibit_out,
160 .output = ip6_pkt_prohibit_out,
161 }
162 }, 158 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_protocol = RTPROT_KERNEL, 160 .rt6i_protocol = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
167}; 163};
168 164
169static struct rt6_info ip6_blk_hole_entry_template = { 165static struct rt6_info ip6_blk_hole_entry_template = {
170 .u = { 166 .dst = {
171 .dst = { 167 .__refcnt = ATOMIC_INIT(1),
172 .__refcnt = ATOMIC_INIT(1), 168 .__use = 1,
173 .__use = 1, 169 .obsolete = -1,
174 .obsolete = -1, 170 .error = -EINVAL,
175 .error = -EINVAL, 171 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 172 .input = dst_discard,
177 .input = dst_discard, 173 .output = dst_discard,
178 .output = dst_discard,
179 }
180 }, 174 },
181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 175 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
182 .rt6i_protocol = RTPROT_KERNEL, 176 .rt6i_protocol = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
249 if (!oif && ipv6_addr_any(saddr)) 243 if (!oif && ipv6_addr_any(saddr))
250 goto out; 244 goto out;
251 245
252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 246 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
253 struct net_device *dev = sprt->rt6i_dev; 247 struct net_device *dev = sprt->rt6i_dev;
254 248
255 if (oif) { 249 if (oif) {
@@ -316,7 +310,6 @@ static void rt6_probe(struct rt6_info *rt)
316#else 310#else
317static inline void rt6_probe(struct rt6_info *rt) 311static inline void rt6_probe(struct rt6_info *rt)
318{ 312{
319 return;
320} 313}
321#endif 314#endif
322 315
@@ -408,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
408 401
409 match = NULL; 402 match = NULL;
410 for (rt = rr_head; rt && rt->rt6i_metric == metric; 403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
411 rt = rt->u.dst.rt6_next) 404 rt = rt->dst.rt6_next)
412 match = find_match(rt, oif, strict, &mpri, match); 405 match = find_match(rt, oif, strict, &mpri, match);
413 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
414 rt = rt->u.dst.rt6_next) 407 rt = rt->dst.rt6_next)
415 match = find_match(rt, oif, strict, &mpri, match); 408 match = find_match(rt, oif, strict, &mpri, match);
416 409
417 return match; 410 return match;
@@ -433,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
433 426
434 if (!match && 427 if (!match &&
435 (strict & RT6_LOOKUP_F_REACHABLE)) { 428 (strict & RT6_LOOKUP_F_REACHABLE)) {
436 struct rt6_info *next = rt0->u.dst.rt6_next; 429 struct rt6_info *next = rt0->dst.rt6_next;
437 430
438 /* no entries matched; do round-robin */ 431 /* no entries matched; do round-robin */
439 if (!next || next->rt6i_metric != rt0->rt6i_metric) 432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -518,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
518 rt->rt6i_expires = jiffies + HZ * lifetime; 511 rt->rt6i_expires = jiffies + HZ * lifetime;
519 rt->rt6i_flags |= RTF_EXPIRES; 512 rt->rt6i_flags |= RTF_EXPIRES;
520 } 513 }
521 dst_release(&rt->u.dst); 514 dst_release(&rt->dst);
522 } 515 }
523 return 0; 516 return 0;
524} 517}
@@ -556,7 +549,7 @@ restart:
556 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 549 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
557 BACKTRACK(net, &fl->fl6_src); 550 BACKTRACK(net, &fl->fl6_src);
558out: 551out:
559 dst_use(&rt->u.dst, jiffies); 552 dst_use(&rt->dst, jiffies);
560 read_unlock_bh(&table->tb6_lock); 553 read_unlock_bh(&table->tb6_lock);
561 return rt; 554 return rt;
562 555
@@ -644,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
644 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
645 rt->rt6i_dst.plen = 128; 638 rt->rt6i_dst.plen = 128;
646 rt->rt6i_flags |= RTF_CACHE; 639 rt->rt6i_flags |= RTF_CACHE;
647 rt->u.dst.flags |= DST_HOST; 640 rt->dst.flags |= DST_HOST;
648 641
649#ifdef CONFIG_IPV6_SUBTREES 642#ifdef CONFIG_IPV6_SUBTREES
650 if (rt->rt6i_src.plen && saddr) { 643 if (rt->rt6i_src.plen && saddr) {
@@ -678,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
678 if (net_ratelimit()) 671 if (net_ratelimit())
679 printk(KERN_WARNING 672 printk(KERN_WARNING
680 "Neighbour table overflow.\n"); 673 "Neighbour table overflow.\n");
681 dst_free(&rt->u.dst); 674 dst_free(&rt->dst);
682 return NULL; 675 return NULL;
683 } 676 }
684 rt->rt6i_nexthop = neigh; 677 rt->rt6i_nexthop = neigh;
@@ -695,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 688 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128; 689 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE; 690 rt->rt6i_flags |= RTF_CACHE;
698 rt->u.dst.flags |= DST_HOST; 691 rt->dst.flags |= DST_HOST;
699 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 692 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700 } 693 }
701 return rt; 694 return rt;
@@ -727,7 +720,7 @@ restart:
727 rt->rt6i_flags & RTF_CACHE) 720 rt->rt6i_flags & RTF_CACHE)
728 goto out; 721 goto out;
729 722
730 dst_hold(&rt->u.dst); 723 dst_hold(&rt->dst);
731 read_unlock_bh(&table->tb6_lock); 724 read_unlock_bh(&table->tb6_lock);
732 725
733 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 726 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -740,10 +733,10 @@ restart:
740#endif 733#endif
741 } 734 }
742 735
743 dst_release(&rt->u.dst); 736 dst_release(&rt->dst);
744 rt = nrt ? : net->ipv6.ip6_null_entry; 737 rt = nrt ? : net->ipv6.ip6_null_entry;
745 738
746 dst_hold(&rt->u.dst); 739 dst_hold(&rt->dst);
747 if (nrt) { 740 if (nrt) {
748 err = ip6_ins_rt(nrt); 741 err = ip6_ins_rt(nrt);
749 if (!err) 742 if (!err)
@@ -757,7 +750,7 @@ restart:
757 * Race condition! In the gap, when table->tb6_lock was 750 * Race condition! In the gap, when table->tb6_lock was
758 * released someone could insert this route. Relookup. 751 * released someone could insert this route. Relookup.
759 */ 752 */
760 dst_release(&rt->u.dst); 753 dst_release(&rt->dst);
761 goto relookup; 754 goto relookup;
762 755
763out: 756out:
@@ -765,11 +758,11 @@ out:
765 reachable = 0; 758 reachable = 0;
766 goto restart_2; 759 goto restart_2;
767 } 760 }
768 dst_hold(&rt->u.dst); 761 dst_hold(&rt->dst);
769 read_unlock_bh(&table->tb6_lock); 762 read_unlock_bh(&table->tb6_lock);
770out2: 763out2:
771 rt->u.dst.lastuse = jiffies; 764 rt->dst.lastuse = jiffies;
772 rt->u.dst.__use++; 765 rt->dst.__use++;
773 766
774 return rt; 767 return rt;
775} 768}
@@ -815,7 +808,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
815{ 808{
816 int flags = 0; 809 int flags = 0;
817 810
818 if (fl->oif || rt6_need_strict(&fl->fl6_dst)) 811 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
819 flags |= RT6_LOOKUP_F_IFACE; 812 flags |= RT6_LOOKUP_F_IFACE;
820 813
821 if (!ipv6_addr_any(&fl->fl6_src)) 814 if (!ipv6_addr_any(&fl->fl6_src))
@@ -836,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
836 struct dst_entry *new = NULL; 829 struct dst_entry *new = NULL;
837 830
838 if (rt) { 831 if (rt) {
839 new = &rt->u.dst; 832 new = &rt->dst;
840 833
841 atomic_set(&new->__refcnt, 1); 834 atomic_set(&new->__refcnt, 1);
842 new->__use = 1; 835 new->__use = 1;
843 new->input = dst_discard; 836 new->input = dst_discard;
844 new->output = dst_discard; 837 new->output = dst_discard;
845 838
846 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 839 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
847 new->dev = ort->u.dst.dev; 840 new->dev = ort->dst.dev;
848 if (new->dev) 841 if (new->dev)
849 dev_hold(new->dev); 842 dev_hold(new->dev);
850 rt->rt6i_idev = ort->rt6i_idev; 843 rt->rt6i_idev = ort->rt6i_idev;
@@ -913,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
913 rt = (struct rt6_info *) skb_dst(skb); 906 rt = (struct rt6_info *) skb_dst(skb);
914 if (rt) { 907 if (rt) {
915 if (rt->rt6i_flags&RTF_CACHE) { 908 if (rt->rt6i_flags&RTF_CACHE) {
916 dst_set_expires(&rt->u.dst, 0); 909 dst_set_expires(&rt->dst, 0);
917 rt->rt6i_flags |= RTF_EXPIRES; 910 rt->rt6i_flags |= RTF_EXPIRES;
918 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 911 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
919 rt->rt6i_node->fn_sernum = -1; 912 rt->rt6i_node->fn_sernum = -1;
@@ -987,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
987 rt->rt6i_dev = dev; 980 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev; 981 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh; 982 rt->rt6i_nexthop = neigh;
990 atomic_set(&rt->u.dst.__refcnt, 1); 983 atomic_set(&rt->dst.__refcnt, 1);
991 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 984 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
992 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 985 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
993 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 986 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
994 rt->u.dst.output = ip6_output; 987 rt->dst.output = ip6_output;
995 988
996#if 0 /* there's no chance to use these for ndisc */ 989#if 0 /* there's no chance to use these for ndisc */
997 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 990 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998 ? DST_HOST 991 ? DST_HOST
999 : 0; 992 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 993 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1002,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1002#endif 995#endif
1003 996
1004 spin_lock_bh(&icmp6_dst_lock); 997 spin_lock_bh(&icmp6_dst_lock);
1005 rt->u.dst.next = icmp6_dst_gc_list; 998 rt->dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->u.dst; 999 icmp6_dst_gc_list = &rt->dst;
1007 spin_unlock_bh(&icmp6_dst_lock); 1000 spin_unlock_bh(&icmp6_dst_lock);
1008 1001
1009 fib6_force_start_gc(net); 1002 fib6_force_start_gc(net);
1010 1003
1011out: 1004out:
1012 return &rt->u.dst; 1005 return &rt->dst;
1013} 1006}
1014 1007
1015int icmp6_dst_gc(void) 1008int icmp6_dst_gc(void)
@@ -1091,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev)
1091 int mtu = IPV6_MIN_MTU; 1084 int mtu = IPV6_MIN_MTU;
1092 struct inet6_dev *idev; 1085 struct inet6_dev *idev;
1093 1086
1094 idev = in6_dev_get(dev); 1087 rcu_read_lock();
1095 if (idev) { 1088 idev = __in6_dev_get(dev);
1089 if (idev)
1096 mtu = idev->cnf.mtu6; 1090 mtu = idev->cnf.mtu6;
1097 in6_dev_put(idev); 1091 rcu_read_unlock();
1098 }
1099 return mtu; 1092 return mtu;
1100} 1093}
1101 1094
@@ -1104,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
1104 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1097 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1105 if (hoplimit < 0) { 1098 if (hoplimit < 0) {
1106 struct net_device *dev = dst->dev; 1099 struct net_device *dev = dst->dev;
1107 struct inet6_dev *idev = in6_dev_get(dev); 1100 struct inet6_dev *idev;
1108 if (idev) { 1101
1102 rcu_read_lock();
1103 idev = __in6_dev_get(dev);
1104 if (idev)
1109 hoplimit = idev->cnf.hop_limit; 1105 hoplimit = idev->cnf.hop_limit;
1110 in6_dev_put(idev); 1106 else
1111 } else
1112 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1107 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1108 rcu_read_unlock();
1113 } 1109 }
1114 return hoplimit; 1110 return hoplimit;
1115} 1111}
@@ -1160,7 +1156,7 @@ int ip6_route_add(struct fib6_config *cfg)
1160 goto out; 1156 goto out;
1161 } 1157 }
1162 1158
1163 rt->u.dst.obsolete = -1; 1159 rt->dst.obsolete = -1;
1164 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1160 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1165 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1161 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1166 0; 1162 0;
@@ -1172,16 +1168,16 @@ int ip6_route_add(struct fib6_config *cfg)
1172 addr_type = ipv6_addr_type(&cfg->fc_dst); 1168 addr_type = ipv6_addr_type(&cfg->fc_dst);
1173 1169
1174 if (addr_type & IPV6_ADDR_MULTICAST) 1170 if (addr_type & IPV6_ADDR_MULTICAST)
1175 rt->u.dst.input = ip6_mc_input; 1171 rt->dst.input = ip6_mc_input;
1176 else 1172 else
1177 rt->u.dst.input = ip6_forward; 1173 rt->dst.input = ip6_forward;
1178 1174
1179 rt->u.dst.output = ip6_output; 1175 rt->dst.output = ip6_output;
1180 1176
1181 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1177 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1182 rt->rt6i_dst.plen = cfg->fc_dst_len; 1178 rt->rt6i_dst.plen = cfg->fc_dst_len;
1183 if (rt->rt6i_dst.plen == 128) 1179 if (rt->rt6i_dst.plen == 128)
1184 rt->u.dst.flags = DST_HOST; 1180 rt->dst.flags = DST_HOST;
1185 1181
1186#ifdef CONFIG_IPV6_SUBTREES 1182#ifdef CONFIG_IPV6_SUBTREES
1187 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1183 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1209,9 +1205,9 @@ int ip6_route_add(struct fib6_config *cfg)
1209 goto out; 1205 goto out;
1210 } 1206 }
1211 } 1207 }
1212 rt->u.dst.output = ip6_pkt_discard_out; 1208 rt->dst.output = ip6_pkt_discard_out;
1213 rt->u.dst.input = ip6_pkt_discard; 1209 rt->dst.input = ip6_pkt_discard;
1214 rt->u.dst.error = -ENETUNREACH; 1210 rt->dst.error = -ENETUNREACH;
1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1211 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216 goto install_route; 1212 goto install_route;
1217 } 1213 }
@@ -1245,7 +1241,7 @@ int ip6_route_add(struct fib6_config *cfg)
1245 goto out; 1241 goto out;
1246 if (dev) { 1242 if (dev) {
1247 if (dev != grt->rt6i_dev) { 1243 if (dev != grt->rt6i_dev) {
1248 dst_release(&grt->u.dst); 1244 dst_release(&grt->dst);
1249 goto out; 1245 goto out;
1250 } 1246 }
1251 } else { 1247 } else {
@@ -1256,7 +1252,7 @@ int ip6_route_add(struct fib6_config *cfg)
1256 } 1252 }
1257 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1253 if (!(grt->rt6i_flags&RTF_GATEWAY))
1258 err = 0; 1254 err = 0;
1259 dst_release(&grt->u.dst); 1255 dst_release(&grt->dst);
1260 1256
1261 if (err) 1257 if (err)
1262 goto out; 1258 goto out;
@@ -1295,18 +1291,18 @@ install_route:
1295 goto out; 1291 goto out;
1296 } 1292 }
1297 1293
1298 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1294 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1299 } 1295 }
1300 } 1296 }
1301 } 1297 }
1302 1298
1303 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1299 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1304 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1300 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1305 if (!dst_mtu(&rt->u.dst)) 1301 if (!dst_mtu(&rt->dst))
1306 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1302 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1307 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1303 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1308 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1304 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1309 rt->u.dst.dev = dev; 1305 rt->dst.dev = dev;
1310 rt->rt6i_idev = idev; 1306 rt->rt6i_idev = idev;
1311 rt->rt6i_table = table; 1307 rt->rt6i_table = table;
1312 1308
@@ -1320,7 +1316,7 @@ out:
1320 if (idev) 1316 if (idev)
1321 in6_dev_put(idev); 1317 in6_dev_put(idev);
1322 if (rt) 1318 if (rt)
1323 dst_free(&rt->u.dst); 1319 dst_free(&rt->dst);
1324 return err; 1320 return err;
1325} 1321}
1326 1322
@@ -1337,7 +1333,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1337 write_lock_bh(&table->tb6_lock); 1333 write_lock_bh(&table->tb6_lock);
1338 1334
1339 err = fib6_del(rt, info); 1335 err = fib6_del(rt, info);
1340 dst_release(&rt->u.dst); 1336 dst_release(&rt->dst);
1341 1337
1342 write_unlock_bh(&table->tb6_lock); 1338 write_unlock_bh(&table->tb6_lock);
1343 1339
@@ -1370,7 +1366,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1370 &cfg->fc_src, cfg->fc_src_len); 1366 &cfg->fc_src, cfg->fc_src_len);
1371 1367
1372 if (fn) { 1368 if (fn) {
1373 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1369 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1374 if (cfg->fc_ifindex && 1370 if (cfg->fc_ifindex &&
1375 (rt->rt6i_dev == NULL || 1371 (rt->rt6i_dev == NULL ||
1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1372 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1380,7 +1376,7 @@ static int ip6_route_del(struct fib6_config *cfg)
1380 continue; 1376 continue;
1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1377 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1382 continue; 1378 continue;
1383 dst_hold(&rt->u.dst); 1379 dst_hold(&rt->dst);
1384 read_unlock_bh(&table->tb6_lock); 1380 read_unlock_bh(&table->tb6_lock);
1385 1381
1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1382 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1422,7 +1418,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
1422 read_lock_bh(&table->tb6_lock); 1418 read_lock_bh(&table->tb6_lock);
1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1419 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1424restart: 1420restart:
1425 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1421 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1426 /* 1422 /*
1427 * Current route is on-link; redirect is always invalid. 1423 * Current route is on-link; redirect is always invalid.
1428 * 1424 *
@@ -1446,7 +1442,7 @@ restart:
1446 rt = net->ipv6.ip6_null_entry; 1442 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src); 1443 BACKTRACK(net, &fl->fl6_src);
1448out: 1444out:
1449 dst_hold(&rt->u.dst); 1445 dst_hold(&rt->dst);
1450 1446
1451 read_unlock_bh(&table->tb6_lock); 1447 read_unlock_bh(&table->tb6_lock);
1452 1448
@@ -1514,10 +1510,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1514 * Look, redirects are sent only in response to data packets, 1510 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK 1511 * so that this nexthop apparently is reachable. --ANK
1516 */ 1512 */
1517 dst_confirm(&rt->u.dst); 1513 dst_confirm(&rt->dst);
1518 1514
1519 /* Duplicate redirect: silently ignore. */ 1515 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->u.dst.neighbour) 1516 if (neigh == rt->dst.neighbour)
1521 goto out; 1517 goto out;
1522 1518
1523 nrt = ip6_rt_copy(rt); 1519 nrt = ip6_rt_copy(rt);
@@ -1530,20 +1526,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1530 1526
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1527 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128; 1528 nrt->rt6i_dst.plen = 128;
1533 nrt->u.dst.flags |= DST_HOST; 1529 nrt->dst.flags |= DST_HOST;
1534 1530
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1531 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh); 1532 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */ 1533 /* Reset pmtu, it may be better */
1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1534 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1535 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1540 dst_mtu(&nrt->u.dst)); 1536 dst_mtu(&nrt->dst));
1541 1537
1542 if (ip6_ins_rt(nrt)) 1538 if (ip6_ins_rt(nrt))
1543 goto out; 1539 goto out;
1544 1540
1545 netevent.old = &rt->u.dst; 1541 netevent.old = &rt->dst;
1546 netevent.new = &nrt->u.dst; 1542 netevent.new = &nrt->dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1543 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548 1544
1549 if (rt->rt6i_flags&RTF_CACHE) { 1545 if (rt->rt6i_flags&RTF_CACHE) {
@@ -1552,8 +1548,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1552 } 1548 }
1553 1549
1554out: 1550out:
1555 dst_release(&rt->u.dst); 1551 dst_release(&rt->dst);
1556 return;
1557} 1552}
1558 1553
1559/* 1554/*
@@ -1572,7 +1567,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1572 if (rt == NULL) 1567 if (rt == NULL)
1573 return; 1568 return;
1574 1569
1575 if (pmtu >= dst_mtu(&rt->u.dst)) 1570 if (pmtu >= dst_mtu(&rt->dst))
1576 goto out; 1571 goto out;
1577 1572
1578 if (pmtu < IPV6_MIN_MTU) { 1573 if (pmtu < IPV6_MIN_MTU) {
@@ -1590,7 +1585,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1590 They are sent only in response to data packets, 1585 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK 1586 so that this nexthop apparently is reachable. --ANK
1592 */ 1587 */
1593 dst_confirm(&rt->u.dst); 1588 dst_confirm(&rt->dst);
1594 1589
1595 /* Host route. If it is static, it would be better 1590 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that 1591 not to override it, but add new one, so that
@@ -1598,10 +1593,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1598 would return automatically. 1593 would return automatically.
1599 */ 1594 */
1600 if (rt->rt6i_flags & RTF_CACHE) { 1595 if (rt->rt6i_flags & RTF_CACHE) {
1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1596 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1602 if (allfrag) 1597 if (allfrag)
1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1598 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1599 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1600 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out; 1601 goto out;
1607 } 1602 }
@@ -1617,9 +1612,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1617 nrt = rt6_alloc_clone(rt, daddr); 1612 nrt = rt6_alloc_clone(rt, daddr);
1618 1613
1619 if (nrt) { 1614 if (nrt) {
1620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1615 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
1621 if (allfrag) 1616 if (allfrag)
1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1617 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623 1618
1624 /* According to RFC 1981, detecting PMTU increase shouldn't be 1619 /* According to RFC 1981, detecting PMTU increase shouldn't be
1625 * happened within 5 mins, the recommended timer is 10 mins. 1620 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1627,13 +1622,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1627 * which is 10 mins. After 10 mins the decreased pmtu is expired 1622 * which is 10 mins. After 10 mins the decreased pmtu is expired
1628 * and detecting PMTU increase will be automatically happened. 1623 * and detecting PMTU increase will be automatically happened.
1629 */ 1624 */
1630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1625 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1626 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632 1627
1633 ip6_ins_rt(nrt); 1628 ip6_ins_rt(nrt);
1634 } 1629 }
1635out: 1630out:
1636 dst_release(&rt->u.dst); 1631 dst_release(&rt->dst);
1637} 1632}
1638 1633
1639/* 1634/*
@@ -1646,18 +1641,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1646 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1641 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1647 1642
1648 if (rt) { 1643 if (rt) {
1649 rt->u.dst.input = ort->u.dst.input; 1644 rt->dst.input = ort->dst.input;
1650 rt->u.dst.output = ort->u.dst.output; 1645 rt->dst.output = ort->dst.output;
1651 1646
1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1647 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1653 rt->u.dst.error = ort->u.dst.error; 1648 rt->dst.error = ort->dst.error;
1654 rt->u.dst.dev = ort->u.dst.dev; 1649 rt->dst.dev = ort->dst.dev;
1655 if (rt->u.dst.dev) 1650 if (rt->dst.dev)
1656 dev_hold(rt->u.dst.dev); 1651 dev_hold(rt->dst.dev);
1657 rt->rt6i_idev = ort->rt6i_idev; 1652 rt->rt6i_idev = ort->rt6i_idev;
1658 if (rt->rt6i_idev) 1653 if (rt->rt6i_idev)
1659 in6_dev_hold(rt->rt6i_idev); 1654 in6_dev_hold(rt->rt6i_idev);
1660 rt->u.dst.lastuse = jiffies; 1655 rt->dst.lastuse = jiffies;
1661 rt->rt6i_expires = 0; 1656 rt->rt6i_expires = 0;
1662 1657
1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1658 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1691,14 +1686,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
1691 if (!fn) 1686 if (!fn)
1692 goto out; 1687 goto out;
1693 1688
1694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1689 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1695 if (rt->rt6i_dev->ifindex != ifindex) 1690 if (rt->rt6i_dev->ifindex != ifindex)
1696 continue; 1691 continue;
1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1692 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1698 continue; 1693 continue;
1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1694 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1700 continue; 1695 continue;
1701 dst_hold(&rt->u.dst); 1696 dst_hold(&rt->dst);
1702 break; 1697 break;
1703 } 1698 }
1704out: 1699out:
@@ -1746,14 +1741,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
1746 return NULL; 1741 return NULL;
1747 1742
1748 write_lock_bh(&table->tb6_lock); 1743 write_lock_bh(&table->tb6_lock);
1749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1744 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1750 if (dev == rt->rt6i_dev && 1745 if (dev == rt->rt6i_dev &&
1751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1746 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1752 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1747 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1753 break; 1748 break;
1754 } 1749 }
1755 if (rt) 1750 if (rt)
1756 dst_hold(&rt->u.dst); 1751 dst_hold(&rt->dst);
1757 write_unlock_bh(&table->tb6_lock); 1752 write_unlock_bh(&table->tb6_lock);
1758 return rt; 1753 return rt;
1759} 1754}
@@ -1792,9 +1787,9 @@ void rt6_purge_dflt_routers(struct net *net)
1792 1787
1793restart: 1788restart:
1794 read_lock_bh(&table->tb6_lock); 1789 read_lock_bh(&table->tb6_lock);
1795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1790 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1791 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1797 dst_hold(&rt->u.dst); 1792 dst_hold(&rt->dst);
1798 read_unlock_bh(&table->tb6_lock); 1793 read_unlock_bh(&table->tb6_lock);
1799 ip6_del_rt(rt); 1794 ip6_del_rt(rt);
1800 goto restart; 1795 goto restart;
@@ -1932,15 +1927,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1932 dev_hold(net->loopback_dev); 1927 dev_hold(net->loopback_dev);
1933 in6_dev_hold(idev); 1928 in6_dev_hold(idev);
1934 1929
1935 rt->u.dst.flags = DST_HOST; 1930 rt->dst.flags = DST_HOST;
1936 rt->u.dst.input = ip6_input; 1931 rt->dst.input = ip6_input;
1937 rt->u.dst.output = ip6_output; 1932 rt->dst.output = ip6_output;
1938 rt->rt6i_dev = net->loopback_dev; 1933 rt->rt6i_dev = net->loopback_dev;
1939 rt->rt6i_idev = idev; 1934 rt->rt6i_idev = idev;
1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1935 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1936 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1937 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1943 rt->u.dst.obsolete = -1; 1938 rt->dst.obsolete = -1;
1944 1939
1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1940 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1946 if (anycast) 1941 if (anycast)
@@ -1949,7 +1944,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1949 rt->rt6i_flags |= RTF_LOCAL; 1944 rt->rt6i_flags |= RTF_LOCAL;
1950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1945 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1951 if (IS_ERR(neigh)) { 1946 if (IS_ERR(neigh)) {
1952 dst_free(&rt->u.dst); 1947 dst_free(&rt->dst);
1953 1948
1954 /* We are casting this because that is the return 1949 /* We are casting this because that is the return
1955 * value type. But an errno encoded pointer is the 1950 * value type. But an errno encoded pointer is the
@@ -1964,7 +1959,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1964 rt->rt6i_dst.plen = 128; 1959 rt->rt6i_dst.plen = 128;
1965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1960 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1966 1961
1967 atomic_set(&rt->u.dst.__refcnt, 1); 1962 atomic_set(&rt->dst.__refcnt, 1);
1968 1963
1969 return rt; 1964 return rt;
1970} 1965}
@@ -2035,12 +2030,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2035 PMTU discouvery. 2030 PMTU discouvery.
2036 */ 2031 */
2037 if (rt->rt6i_dev == arg->dev && 2032 if (rt->rt6i_dev == arg->dev &&
2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2033 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2039 (dst_mtu(&rt->u.dst) >= arg->mtu || 2034 (dst_mtu(&rt->dst) >= arg->mtu ||
2040 (dst_mtu(&rt->u.dst) < arg->mtu && 2035 (dst_mtu(&rt->dst) < arg->mtu &&
2041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2036 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2037 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2038 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2044 } 2039 }
2045 return 0; 2040 return 0;
2046} 2041}
@@ -2254,20 +2249,20 @@ static int rt6_fill_node(struct net *net,
2254#endif 2249#endif
2255 NLA_PUT_U32(skb, RTA_IIF, iif); 2250 NLA_PUT_U32(skb, RTA_IIF, iif);
2256 } else if (dst) { 2251 } else if (dst) {
2257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2252 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2258 struct in6_addr saddr_buf; 2253 struct in6_addr saddr_buf;
2259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2254 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2260 dst, 0, &saddr_buf) == 0) 2255 dst, 0, &saddr_buf) == 0)
2261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2256 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2262 } 2257 }
2263 2258
2264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2259 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2265 goto nla_put_failure; 2260 goto nla_put_failure;
2266 2261
2267 if (rt->u.dst.neighbour) 2262 if (rt->dst.neighbour)
2268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2263 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2269 2264
2270 if (rt->u.dst.dev) 2265 if (rt->dst.dev)
2271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2266 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2272 2267
2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2268 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2279,8 +2274,8 @@ static int rt6_fill_node(struct net *net,
2279 else 2274 else
2280 expires = INT_MAX; 2275 expires = INT_MAX;
2281 2276
2282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2277 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2283 expires, rt->u.dst.error) < 0) 2278 expires, rt->dst.error) < 0)
2284 goto nla_put_failure; 2279 goto nla_put_failure;
2285 2280
2286 return nlmsg_end(skb, nlh); 2281 return nlmsg_end(skb, nlh);
@@ -2366,7 +2361,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2361 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2367 2362
2368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2363 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2369 skb_dst_set(skb, &rt->u.dst); 2364 skb_dst_set(skb, &rt->dst);
2370 2365
2371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2366 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2367 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2418,12 +2413,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
2418 struct net *net = dev_net(dev); 2413 struct net *net = dev_net(dev);
2419 2414
2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2415 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2421 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2416 net->ipv6.ip6_null_entry->dst.dev = dev;
2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2417 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2423#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2418#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2419 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2420 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2421 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2422 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2428#endif 2423#endif
2429 } 2424 }
@@ -2466,8 +2461,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2466 seq_puts(m, "00000000000000000000000000000000"); 2461 seq_puts(m, "00000000000000000000000000000000");
2467 } 2462 }
2468 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2463 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2464 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2470 rt->u.dst.__use, rt->rt6i_flags, 2465 rt->dst.__use, rt->rt6i_flags,
2471 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2466 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2472 return 0; 2467 return 0;
2473} 2468}
@@ -2648,9 +2643,9 @@ static int __net_init ip6_route_net_init(struct net *net)
2648 GFP_KERNEL); 2643 GFP_KERNEL);
2649 if (!net->ipv6.ip6_null_entry) 2644 if (!net->ipv6.ip6_null_entry)
2650 goto out_ip6_dst_ops; 2645 goto out_ip6_dst_ops;
2651 net->ipv6.ip6_null_entry->u.dst.path = 2646 net->ipv6.ip6_null_entry->dst.path =
2652 (struct dst_entry *)net->ipv6.ip6_null_entry; 2647 (struct dst_entry *)net->ipv6.ip6_null_entry;
2653 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2648 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2654 2649
2655#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2650#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2656 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2651 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2658,18 +2653,18 @@ static int __net_init ip6_route_net_init(struct net *net)
2658 GFP_KERNEL); 2653 GFP_KERNEL);
2659 if (!net->ipv6.ip6_prohibit_entry) 2654 if (!net->ipv6.ip6_prohibit_entry)
2660 goto out_ip6_null_entry; 2655 goto out_ip6_null_entry;
2661 net->ipv6.ip6_prohibit_entry->u.dst.path = 2656 net->ipv6.ip6_prohibit_entry->dst.path =
2662 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2657 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2663 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2658 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2664 2659
2665 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2660 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2666 sizeof(*net->ipv6.ip6_blk_hole_entry), 2661 sizeof(*net->ipv6.ip6_blk_hole_entry),
2667 GFP_KERNEL); 2662 GFP_KERNEL);
2668 if (!net->ipv6.ip6_blk_hole_entry) 2663 if (!net->ipv6.ip6_blk_hole_entry)
2669 goto out_ip6_prohibit_entry; 2664 goto out_ip6_prohibit_entry;
2670 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2665 net->ipv6.ip6_blk_hole_entry->dst.path =
2671 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2666 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2672 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2667 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2673#endif 2668#endif
2674 2669
2675 net->ipv6.sysctl.flush_delay = 0; 2670 net->ipv6.sysctl.flush_delay = 0;
@@ -2744,12 +2739,12 @@ int __init ip6_route_init(void)
2744 /* Registering of the loopback is done before this portion of code, 2739 /* Registering of the loopback is done before this portion of code,
2745 * the loopback reference in rt6_info will not be taken, do it 2740 * the loopback reference in rt6_info will not be taken, do it
2746 * manually for init_net */ 2741 * manually for init_net */
2747 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2742 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2748 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2743 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2749 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2744 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2750 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2745 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2751 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2746 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2752 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2747 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2753 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2748 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2754 #endif 2749 #endif
2755 ret = fib6_init(); 2750 ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5abae10cd884..4699cd3c3118 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -249,8 +249,6 @@ failed:
249 return NULL; 249 return NULL;
250} 250}
251 251
252static DEFINE_SPINLOCK(ipip6_prl_lock);
253
254#define for_each_prl_rcu(start) \ 252#define for_each_prl_rcu(start) \
255 for (prl = rcu_dereference(start); \ 253 for (prl = rcu_dereference(start); \
256 prl; \ 254 prl; \
@@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 if (a->addr == htonl(INADDR_ANY)) 338 if (a->addr == htonl(INADDR_ANY))
341 return -EINVAL; 339 return -EINVAL;
342 340
343 spin_lock(&ipip6_prl_lock); 341 ASSERT_RTNL();
344 342
345 for (p = t->prl; p; p = p->next) { 343 for (p = t->prl; p; p = p->next) {
346 if (p->addr == a->addr) { 344 if (p->addr == a->addr) {
@@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
370 t->prl_count++; 368 t->prl_count++;
371 rcu_assign_pointer(t->prl, p); 369 rcu_assign_pointer(t->prl, p);
372out: 370out:
373 spin_unlock(&ipip6_prl_lock);
374 return err; 371 return err;
375} 372}
376 373
@@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
397 struct ip_tunnel_prl_entry *x, **p; 394 struct ip_tunnel_prl_entry *x, **p;
398 int err = 0; 395 int err = 0;
399 396
400 spin_lock(&ipip6_prl_lock); 397 ASSERT_RTNL();
401 398
402 if (a && a->addr != htonl(INADDR_ANY)) { 399 if (a && a->addr != htonl(INADDR_ANY)) {
403 for (p = &t->prl; *p; p = &(*p)->next) { 400 for (p = &t->prl; *p; p = &(*p)->next) {
@@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
419 } 416 }
420 } 417 }
421out: 418out:
422 spin_unlock(&ipip6_prl_lock);
423 return err; 419 return err;
424} 420}
425 421
@@ -566,11 +562,9 @@ static int ipip6_rcv(struct sk_buff *skb)
566 kfree_skb(skb); 562 kfree_skb(skb);
567 return 0; 563 return 0;
568 } 564 }
569 tunnel->dev->stats.rx_packets++; 565
570 tunnel->dev->stats.rx_bytes += skb->len; 566 skb_tunnel_rx(skb, tunnel->dev);
571 skb->dev = tunnel->dev; 567
572 skb_dst_drop(skb);
573 nf_reset(skb);
574 ipip6_ecn_decapsulate(iph, skb); 568 ipip6_ecn_decapsulate(iph, skb);
575 netif_rx(skb); 569 netif_rx(skb);
576 rcu_read_unlock(); 570 rcu_read_unlock();
@@ -718,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
718 stats->tx_carrier_errors++; 712 stats->tx_carrier_errors++;
719 goto tx_error_icmp; 713 goto tx_error_icmp;
720 } 714 }
721 tdev = rt->u.dst.dev; 715 tdev = rt->dst.dev;
722 716
723 if (tdev == dev) { 717 if (tdev == dev) {
724 ip_rt_put(rt); 718 ip_rt_put(rt);
@@ -727,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
727 } 721 }
728 722
729 if (df) { 723 if (df) {
730 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 724 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
731 725
732 if (mtu < 68) { 726 if (mtu < 68) {
733 stats->collisions++; 727 stats->collisions++;
@@ -786,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
786 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); 780 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
787 IPCB(skb)->flags = 0; 781 IPCB(skb)->flags = 0;
788 skb_dst_drop(skb); 782 skb_dst_drop(skb);
789 skb_dst_set(skb, &rt->u.dst); 783 skb_dst_set(skb, &rt->dst);
790 784
791 /* 785 /*
792 * Push down and install the IPIP header. 786 * Push down and install the IPIP header.
@@ -835,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
835 .proto = IPPROTO_IPV6 }; 829 .proto = IPPROTO_IPV6 };
836 struct rtable *rt; 830 struct rtable *rt;
837 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 831 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
838 tdev = rt->u.dst.dev; 832 tdev = rt->dst.dev;
839 ip_rt_put(rt); 833 ip_rt_put(rt);
840 } 834 }
841 dev->flags |= IFF_POINTOPOINT; 835 dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7e..09fd34f0dbf2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
27#define COOKIEBITS 24 /* Upper bits store count */ 27#define COOKIEBITS 24 /* Upper bits store count */
28#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) 28#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
29 29
30/* 30/* Table must be sorted. */
31 * This table has to be sorted and terminated with (__u16)-1.
32 * XXX generate a better table.
33 * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
34 *
35 * Taken directly from ipv4 implementation.
36 * Should this list be modified for ipv6 use or is it close enough?
37 * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
38 */
39static __u16 const msstab[] = { 31static __u16 const msstab[] = {
40 64 - 1, 32 64,
41 256 - 1, 33 512,
42 512 - 1, 34 536,
43 536 - 1, 35 1280 - 60,
44 1024 - 1, 36 1480 - 60,
45 1440 - 1, 37 1500 - 60,
46 1460 - 1, 38 4460 - 60,
47 4312 - 1, 39 9000 - 60,
48 (__u16)-1
49}; 40};
50/* The number doesn't include the -1 terminator */
51#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
52 41
53/* 42/*
54 * This (misnamed) value is the age of syncookie which is permitted. 43 * This (misnamed) value is the age of syncookie which is permitted.
@@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
134 123
135 tcp_synq_overflow(sk); 124 tcp_synq_overflow(sk);
136 125
137 for (mssind = 0; mss > msstab[mssind + 1]; mssind++) 126 for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
138 ; 127 if (mss >= msstab[mssind])
139 *mssp = msstab[mssind] + 1; 128 break;
129
130 *mssp = msstab[mssind];
140 131
141 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT); 132 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
142 133
@@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
154 th->source, th->dest, seq, 145 th->source, th->dest, seq,
155 jiffies / (HZ * 60), COUNTER_TRIES); 146 jiffies / (HZ * 60), COUNTER_TRIES);
156 147
157 return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; 148 return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
158} 149}
159 150
160struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) 151struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
@@ -173,8 +164,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
173 int mss; 164 int mss;
174 struct dst_entry *dst; 165 struct dst_entry *dst;
175 __u8 rcv_wscale; 166 __u8 rcv_wscale;
167 bool ecn_ok;
176 168
177 if (!sysctl_tcp_syncookies || !th->ack) 169 if (!sysctl_tcp_syncookies || !th->ack || th->rst)
178 goto out; 170 goto out;
179 171
180 if (tcp_synq_no_recent_overflow(sk) || 172 if (tcp_synq_no_recent_overflow(sk) ||
@@ -189,8 +181,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
189 memset(&tcp_opt, 0, sizeof(tcp_opt)); 181 memset(&tcp_opt, 0, sizeof(tcp_opt));
190 tcp_parse_options(skb, &tcp_opt, &hash_location, 0); 182 tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
191 183
192 if (tcp_opt.saw_tstamp) 184 if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
193 cookie_check_timestamp(&tcp_opt); 185 goto out;
194 186
195 ret = NULL; 187 ret = NULL;
196 req = inet6_reqsk_alloc(&tcp6_request_sock_ops); 188 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
@@ -224,9 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
224 216
225 req->expires = 0UL; 217 req->expires = 0UL;
226 req->retrans = 0; 218 req->retrans = 0;
227 ireq->ecn_ok = 0; 219 ireq->ecn_ok = ecn_ok;
228 ireq->snd_wscale = tcp_opt.snd_wscale; 220 ireq->snd_wscale = tcp_opt.snd_wscale;
229 ireq->rcv_wscale = tcp_opt.rcv_wscale;
230 ireq->sack_ok = tcp_opt.sack_ok; 221 ireq->sack_ok = tcp_opt.sack_ok;
231 ireq->wscale_ok = tcp_opt.wscale_ok; 222 ireq->wscale_ok = tcp_opt.wscale_ok;
232 ireq->tstamp_ok = tcp_opt.saw_tstamp; 223 ireq->tstamp_ok = tcp_opt.saw_tstamp;
@@ -240,17 +231,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
240 * me if there is a preferred way. 231 * me if there is a preferred way.
241 */ 232 */
242 { 233 {
243 struct in6_addr *final_p = NULL, final; 234 struct in6_addr *final_p, final;
244 struct flowi fl; 235 struct flowi fl;
245 memset(&fl, 0, sizeof(fl)); 236 memset(&fl, 0, sizeof(fl));
246 fl.proto = IPPROTO_TCP; 237 fl.proto = IPPROTO_TCP;
247 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); 238 ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
248 if (np->opt && np->opt->srcrt) { 239 final_p = fl6_update_dst(&fl, np->opt, &final);
249 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
250 ipv6_addr_copy(&final, &fl.fl6_dst);
251 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
252 final_p = &final;
253 }
254 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); 240 ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
255 fl.oif = sk->sk_bound_dev_if; 241 fl.oif = sk->sk_bound_dev_if;
256 fl.mark = sk->sk_mark; 242 fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 075f540ec197..fe6d40418c0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,6 +75,9 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
75 struct request_sock *req); 75 struct request_sock *req);
76 76
77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 77static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
78static void __tcp_v6_send_check(struct sk_buff *skb,
79 struct in6_addr *saddr,
80 struct in6_addr *daddr);
78 81
79static const struct inet_connection_sock_af_ops ipv6_mapped; 82static const struct inet_connection_sock_af_ops ipv6_mapped;
80static const struct inet_connection_sock_af_ops ipv6_specific; 83static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -126,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
126 struct inet_connection_sock *icsk = inet_csk(sk); 129 struct inet_connection_sock *icsk = inet_csk(sk);
127 struct ipv6_pinfo *np = inet6_sk(sk); 130 struct ipv6_pinfo *np = inet6_sk(sk);
128 struct tcp_sock *tp = tcp_sk(sk); 131 struct tcp_sock *tp = tcp_sk(sk);
129 struct in6_addr *saddr = NULL, *final_p = NULL, final; 132 struct in6_addr *saddr = NULL, *final_p, final;
130 struct flowi fl; 133 struct flowi fl;
131 struct dst_entry *dst; 134 struct dst_entry *dst;
132 int addr_type; 135 int addr_type;
@@ -247,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
247 fl.fl_ip_dport = usin->sin6_port; 250 fl.fl_ip_dport = usin->sin6_port;
248 fl.fl_ip_sport = inet->inet_sport; 251 fl.fl_ip_sport = inet->inet_sport;
249 252
250 if (np->opt && np->opt->srcrt) { 253 final_p = fl6_update_dst(&fl, np->opt, &final);
251 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
252 ipv6_addr_copy(&final, &fl.fl6_dst);
253 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
254 final_p = &final;
255 }
256 254
257 security_sk_classify_flow(sk, &fl); 255 security_sk_classify_flow(sk, &fl);
258 256
@@ -350,6 +348,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
350 if (sk->sk_state == TCP_CLOSE) 348 if (sk->sk_state == TCP_CLOSE)
351 goto out; 349 goto out;
352 350
351 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
352 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
353 goto out;
354 }
355
353 tp = tcp_sk(sk); 356 tp = tcp_sk(sk);
354 seq = ntohl(th->seq); 357 seq = ntohl(th->seq);
355 if (sk->sk_state != TCP_LISTEN && 358 if (sk->sk_state != TCP_LISTEN &&
@@ -469,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
469 struct ipv6_pinfo *np = inet6_sk(sk); 472 struct ipv6_pinfo *np = inet6_sk(sk);
470 struct sk_buff * skb; 473 struct sk_buff * skb;
471 struct ipv6_txoptions *opt = NULL; 474 struct ipv6_txoptions *opt = NULL;
472 struct in6_addr * final_p = NULL, final; 475 struct in6_addr * final_p, final;
473 struct flowi fl; 476 struct flowi fl;
474 struct dst_entry *dst; 477 struct dst_entry *dst;
475 int err = -1; 478 int err = -1;
@@ -486,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
486 security_req_classify_flow(req, &fl); 489 security_req_classify_flow(req, &fl);
487 490
488 opt = np->opt; 491 opt = np->opt;
489 if (opt && opt->srcrt) { 492 final_p = fl6_update_dst(&fl, opt, &final);
490 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
491 ipv6_addr_copy(&final, &fl.fl6_dst);
492 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
493 final_p = &final;
494 }
495 493
496 err = ip6_dst_lookup(sk, &dst, &fl); 494 err = ip6_dst_lookup(sk, &dst, &fl);
497 if (err) 495 if (err)
@@ -503,14 +501,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
503 501
504 skb = tcp_make_synack(sk, dst, req, rvp); 502 skb = tcp_make_synack(sk, dst, req, rvp);
505 if (skb) { 503 if (skb) {
506 struct tcphdr *th = tcp_hdr(skb); 504 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
507
508 th->check = tcp_v6_check(skb->len,
509 &treq->loc_addr, &treq->rmt_addr,
510 csum_partial(th, skb->len, skb->csum));
511 505
512 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 506 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
513 err = ip6_xmit(sk, skb, &fl, opt, 0); 507 err = ip6_xmit(sk, skb, &fl, opt);
514 err = net_xmit_eval(err); 508 err = net_xmit_eval(err);
515 } 509 }
516 510
@@ -600,7 +594,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
600 kfree(newkey); 594 kfree(newkey);
601 return -ENOMEM; 595 return -ENOMEM;
602 } 596 }
603 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 597 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
604 } 598 }
605 if (tcp_alloc_md5sig_pool(sk) == NULL) { 599 if (tcp_alloc_md5sig_pool(sk) == NULL) {
606 kfree(newkey); 600 kfree(newkey);
@@ -737,7 +731,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
737 return -ENOMEM; 731 return -ENOMEM;
738 732
739 tp->md5sig_info = p; 733 tp->md5sig_info = p;
740 sk->sk_route_caps &= ~NETIF_F_GSO_MASK; 734 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
741 } 735 }
742 736
743 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 737 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
@@ -918,22 +912,29 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
918 .twsk_destructor= tcp_twsk_destructor, 912 .twsk_destructor= tcp_twsk_destructor,
919}; 913};
920 914
921static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) 915static void __tcp_v6_send_check(struct sk_buff *skb,
916 struct in6_addr *saddr, struct in6_addr *daddr)
922{ 917{
923 struct ipv6_pinfo *np = inet6_sk(sk);
924 struct tcphdr *th = tcp_hdr(skb); 918 struct tcphdr *th = tcp_hdr(skb);
925 919
926 if (skb->ip_summed == CHECKSUM_PARTIAL) { 920 if (skb->ip_summed == CHECKSUM_PARTIAL) {
927 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); 921 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
928 skb->csum_start = skb_transport_header(skb) - skb->head; 922 skb->csum_start = skb_transport_header(skb) - skb->head;
929 skb->csum_offset = offsetof(struct tcphdr, check); 923 skb->csum_offset = offsetof(struct tcphdr, check);
930 } else { 924 } else {
931 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 925 th->check = tcp_v6_check(skb->len, saddr, daddr,
932 csum_partial(th, th->doff<<2, 926 csum_partial(th, th->doff << 2,
933 skb->csum)); 927 skb->csum));
934 } 928 }
935} 929}
936 930
931static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
932{
933 struct ipv6_pinfo *np = inet6_sk(sk);
934
935 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
936}
937
937static int tcp_v6_gso_send_check(struct sk_buff *skb) 938static int tcp_v6_gso_send_check(struct sk_buff *skb)
938{ 939{
939 struct ipv6hdr *ipv6h; 940 struct ipv6hdr *ipv6h;
@@ -946,11 +947,8 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
946 th = tcp_hdr(skb); 947 th = tcp_hdr(skb);
947 948
948 th->check = 0; 949 th->check = 0;
949 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
950 IPPROTO_TCP, 0);
951 skb->csum_start = skb_transport_header(skb) - skb->head;
952 skb->csum_offset = offsetof(struct tcphdr, check);
953 skb->ip_summed = CHECKSUM_PARTIAL; 950 skb->ip_summed = CHECKSUM_PARTIAL;
951 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
954 return 0; 952 return 0;
955} 953}
956 954
@@ -1047,15 +1045,14 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1047 } 1045 }
1048#endif 1046#endif
1049 1047
1050 buff->csum = csum_partial(t1, tot_len, 0);
1051
1052 memset(&fl, 0, sizeof(fl)); 1048 memset(&fl, 0, sizeof(fl));
1053 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); 1049 ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
1054 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); 1050 ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
1055 1051
1056 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, 1052 buff->ip_summed = CHECKSUM_PARTIAL;
1057 tot_len, IPPROTO_TCP, 1053 buff->csum = 0;
1058 buff->csum); 1054
1055 __tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
1059 1056
1060 fl.proto = IPPROTO_TCP; 1057 fl.proto = IPPROTO_TCP;
1061 fl.oif = inet6_iif(skb); 1058 fl.oif = inet6_iif(skb);
@@ -1070,7 +1067,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
1070 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) { 1067 if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
1071 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) { 1068 if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
1072 skb_dst_set(buff, dst); 1069 skb_dst_set(buff, dst);
1073 ip6_xmit(ctl_sk, buff, &fl, NULL, 0); 1070 ip6_xmit(ctl_sk, buff, &fl, NULL);
1074 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); 1071 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1075 if (rst) 1072 if (rst)
1076 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); 1073 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
@@ -1160,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1160 } 1157 }
1161 1158
1162#ifdef CONFIG_SYN_COOKIES 1159#ifdef CONFIG_SYN_COOKIES
1163 if (!th->rst && !th->syn && th->ack) 1160 if (!th->syn)
1164 sk = cookie_v6_check(sk, skb); 1161 sk = cookie_v6_check(sk, skb);
1165#endif 1162#endif
1166 return sk; 1163 return sk;
@@ -1233,12 +1230,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1233 goto drop_and_free; 1230 goto drop_and_free;
1234 1231
1235 /* Secret recipe starts with IP addresses */ 1232 /* Secret recipe starts with IP addresses */
1236 d = &ipv6_hdr(skb)->daddr.s6_addr32[0]; 1233 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1237 *mess++ ^= *d++; 1234 *mess++ ^= *d++;
1238 *mess++ ^= *d++; 1235 *mess++ ^= *d++;
1239 *mess++ ^= *d++; 1236 *mess++ ^= *d++;
1240 *mess++ ^= *d++; 1237 *mess++ ^= *d++;
1241 d = &ipv6_hdr(skb)->saddr.s6_addr32[0]; 1238 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1242 *mess++ ^= *d++; 1239 *mess++ ^= *d++;
1243 *mess++ ^= *d++; 1240 *mess++ ^= *d++;
1244 *mess++ ^= *d++; 1241 *mess++ ^= *d++;
@@ -1272,13 +1269,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1272 treq = inet6_rsk(req); 1269 treq = inet6_rsk(req);
1273 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr); 1270 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1274 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr); 1271 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1275 if (!want_cookie) 1272 if (!want_cookie || tmp_opt.tstamp_ok)
1276 TCP_ECN_create_request(req, tcp_hdr(skb)); 1273 TCP_ECN_create_request(req, tcp_hdr(skb));
1277 1274
1278 if (want_cookie) { 1275 if (!isn) {
1279 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1280 req->cookie_ts = tmp_opt.tstamp_ok;
1281 } else if (!isn) {
1282 if (ipv6_opt_accepted(sk, skb) || 1276 if (ipv6_opt_accepted(sk, skb) ||
1283 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || 1277 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1284 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { 1278 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1291,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1291 if (!sk->sk_bound_dev_if && 1285 if (!sk->sk_bound_dev_if &&
1292 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) 1286 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1293 treq->iif = inet6_iif(skb); 1287 treq->iif = inet6_iif(skb);
1294 1288 if (!want_cookie) {
1295 isn = tcp_v6_init_sequence(skb); 1289 isn = tcp_v6_init_sequence(skb);
1290 } else {
1291 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1292 req->cookie_ts = tmp_opt.tstamp_ok;
1293 }
1296 } 1294 }
1297 tcp_rsk(req)->snt_isn = isn; 1295 tcp_rsk(req)->snt_isn = isn;
1298 1296
@@ -1385,18 +1383,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1385 goto out_overflow; 1383 goto out_overflow;
1386 1384
1387 if (dst == NULL) { 1385 if (dst == NULL) {
1388 struct in6_addr *final_p = NULL, final; 1386 struct in6_addr *final_p, final;
1389 struct flowi fl; 1387 struct flowi fl;
1390 1388
1391 memset(&fl, 0, sizeof(fl)); 1389 memset(&fl, 0, sizeof(fl));
1392 fl.proto = IPPROTO_TCP; 1390 fl.proto = IPPROTO_TCP;
1393 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); 1391 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1394 if (opt && opt->srcrt) { 1392 final_p = fl6_update_dst(&fl, opt, &final);
1395 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1396 ipv6_addr_copy(&final, &fl.fl6_dst);
1397 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1398 final_p = &final;
1399 }
1400 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); 1393 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1401 fl.oif = sk->sk_bound_dev_if; 1394 fl.oif = sk->sk_bound_dev_if;
1402 fl.mark = sk->sk_mark; 1395 fl.mark = sk->sk_mark;
@@ -1676,6 +1669,7 @@ ipv6_pktoptions:
1676static int tcp_v6_rcv(struct sk_buff *skb) 1669static int tcp_v6_rcv(struct sk_buff *skb)
1677{ 1670{
1678 struct tcphdr *th; 1671 struct tcphdr *th;
1672 struct ipv6hdr *hdr;
1679 struct sock *sk; 1673 struct sock *sk;
1680 int ret; 1674 int ret;
1681 struct net *net = dev_net(skb->dev); 1675 struct net *net = dev_net(skb->dev);
@@ -1702,12 +1696,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1702 goto bad_packet; 1696 goto bad_packet;
1703 1697
1704 th = tcp_hdr(skb); 1698 th = tcp_hdr(skb);
1699 hdr = ipv6_hdr(skb);
1705 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1700 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1706 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1701 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1707 skb->len - th->doff*4); 1702 skb->len - th->doff*4);
1708 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1703 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1709 TCP_SKB_CB(skb)->when = 0; 1704 TCP_SKB_CB(skb)->when = 0;
1710 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); 1705 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
1711 TCP_SKB_CB(skb)->sacked = 0; 1706 TCP_SKB_CB(skb)->sacked = 0;
1712 1707
1713 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); 1708 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1718,6 +1713,11 @@ process:
1718 if (sk->sk_state == TCP_TIME_WAIT) 1713 if (sk->sk_state == TCP_TIME_WAIT)
1719 goto do_time_wait; 1714 goto do_time_wait;
1720 1715
1716 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1717 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1718 goto discard_and_relse;
1719 }
1720
1721 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) 1721 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1722 goto discard_and_relse; 1722 goto discard_and_relse;
1723 1723
@@ -2142,6 +2142,8 @@ struct proto tcpv6_prot = {
2142 .setsockopt = tcp_setsockopt, 2142 .setsockopt = tcp_setsockopt,
2143 .getsockopt = tcp_getsockopt, 2143 .getsockopt = tcp_getsockopt,
2144 .recvmsg = tcp_recvmsg, 2144 .recvmsg = tcp_recvmsg,
2145 .sendmsg = tcp_sendmsg,
2146 .sendpage = tcp_sendpage,
2145 .backlog_rcv = tcp_v6_do_rcv, 2147 .backlog_rcv = tcp_v6_do_rcv,
2146 .hash = tcp_v6_hash, 2148 .hash = tcp_v6_hash,
2147 .unhash = inet_unhash, 2149 .unhash = inet_unhash,
@@ -2160,6 +2162,7 @@ struct proto tcpv6_prot = {
2160 .twsk_prot = &tcp6_timewait_sock_ops, 2162 .twsk_prot = &tcp6_timewait_sock_ops,
2161 .rsk_prot = &tcp6_request_sock_ops, 2163 .rsk_prot = &tcp6_request_sock_ops,
2162 .h.hashinfo = &tcp_hashinfo, 2164 .h.hashinfo = &tcp_hashinfo,
2165 .no_autobind = true,
2163#ifdef CONFIG_COMPAT 2166#ifdef CONFIG_COMPAT
2164 .compat_setsockopt = compat_tcp_setsockopt, 2167 .compat_setsockopt = compat_tcp_setsockopt,
2165 .compat_getsockopt = compat_tcp_getsockopt, 2168 .compat_getsockopt = compat_tcp_getsockopt,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 90824852f598..1dd1affdead2 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net,
91 if (ipv6_addr_any(addr6)) 91 if (ipv6_addr_any(addr6))
92 hash = jhash_1word(0, mix); 92 hash = jhash_1word(0, mix);
93 else if (ipv6_addr_v4mapped(addr6)) 93 else if (ipv6_addr_v4mapped(addr6))
94 hash = jhash_1word(addr6->s6_addr32[3], mix); 94 hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
95 else 95 else
96 hash = jhash2(addr6->s6_addr32, 4, mix); 96 hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
97 97
98 return hash ^ port; 98 return hash ^ port;
99} 99}
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
328 int err; 328 int err;
329 int is_udplite = IS_UDPLITE(sk); 329 int is_udplite = IS_UDPLITE(sk);
330 int is_udp4; 330 int is_udp4;
331 bool slow;
331 332
332 if (addr_len) 333 if (addr_len)
333 *addr_len=sizeof(struct sockaddr_in6); 334 *addr_len=sizeof(struct sockaddr_in6);
@@ -335,6 +336,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
335 if (flags & MSG_ERRQUEUE) 336 if (flags & MSG_ERRQUEUE)
336 return ipv6_recv_error(sk, msg, len); 337 return ipv6_recv_error(sk, msg, len);
337 338
339 if (np->rxpmtu && np->rxopt.bits.rxpmtu)
340 return ipv6_recv_rxpmtu(sk, msg, len);
341
338try_again: 342try_again:
339 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), 343 skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
340 &peeked, &err); 344 &peeked, &err);
@@ -421,7 +425,7 @@ out:
421 return err; 425 return err;
422 426
423csum_copy_err: 427csum_copy_err:
424 lock_sock(sk); 428 slow = lock_sock_fast(sk);
425 if (!skb_kill_datagram(sk, skb, flags)) { 429 if (!skb_kill_datagram(sk, skb, flags)) {
426 if (is_udp4) 430 if (is_udp4)
427 UDP_INC_STATS_USER(sock_net(sk), 431 UDP_INC_STATS_USER(sock_net(sk),
@@ -430,7 +434,7 @@ csum_copy_err:
430 UDP6_INC_STATS_USER(sock_net(sk), 434 UDP6_INC_STATS_USER(sock_net(sk),
431 UDP_MIB_INERRORS, is_udplite); 435 UDP_MIB_INERRORS, is_udplite);
432 } 436 }
433 release_sock(sk); 437 unlock_sock_fast(sk, slow);
434 438
435 if (flags & MSG_DONTWAIT) 439 if (flags & MSG_DONTWAIT)
436 return -EAGAIN; 440 return -EAGAIN;
@@ -511,7 +515,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
511 goto drop; 515 goto drop;
512 } 516 }
513 517
514 if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { 518 if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
515 /* Note that an ENOMEM error is charged twice */ 519 /* Note that an ENOMEM error is charged twice */
516 if (rc == -ENOMEM) 520 if (rc == -ENOMEM)
517 UDP6_INC_STATS_BH(sock_net(sk), 521 UDP6_INC_STATS_BH(sock_net(sk),
@@ -581,6 +585,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
581 585
582 sk = stack[i]; 586 sk = stack[i];
583 if (skb1) { 587 if (skb1) {
588 if (sk_rcvqueues_full(sk, skb)) {
589 kfree_skb(skb1);
590 goto drop;
591 }
584 bh_lock_sock(sk); 592 bh_lock_sock(sk);
585 if (!sock_owned_by_user(sk)) 593 if (!sock_owned_by_user(sk))
586 udpv6_queue_rcv_skb(sk, skb1); 594 udpv6_queue_rcv_skb(sk, skb1);
@@ -692,7 +700,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
692 u32 ulen = 0; 700 u32 ulen = 0;
693 701
694 if (!pskb_may_pull(skb, sizeof(struct udphdr))) 702 if (!pskb_may_pull(skb, sizeof(struct udphdr)))
695 goto short_packet; 703 goto discard;
696 704
697 saddr = &ipv6_hdr(skb)->saddr; 705 saddr = &ipv6_hdr(skb)->saddr;
698 daddr = &ipv6_hdr(skb)->daddr; 706 daddr = &ipv6_hdr(skb)->daddr;
@@ -756,6 +764,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
756 764
757 /* deliver */ 765 /* deliver */
758 766
767 if (sk_rcvqueues_full(sk, skb)) {
768 sock_put(sk);
769 goto discard;
770 }
759 bh_lock_sock(sk); 771 bh_lock_sock(sk);
760 if (!sock_owned_by_user(sk)) 772 if (!sock_owned_by_user(sk))
761 udpv6_queue_rcv_skb(sk, skb); 773 udpv6_queue_rcv_skb(sk, skb);
@@ -770,9 +782,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
770 return 0; 782 return 0;
771 783
772short_packet: 784short_packet:
773 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", 785 LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
774 proto == IPPROTO_UDPLITE ? "-Lite" : "", 786 proto == IPPROTO_UDPLITE ? "-Lite" : "",
775 ulen, skb->len); 787 saddr,
788 ntohs(uh->source),
789 ulen,
790 skb->len,
791 daddr,
792 ntohs(uh->dest));
776 793
777discard: 794discard:
778 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); 795 UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
@@ -910,7 +927,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
910 struct inet_sock *inet = inet_sk(sk); 927 struct inet_sock *inet = inet_sk(sk);
911 struct ipv6_pinfo *np = inet6_sk(sk); 928 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; 929 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
913 struct in6_addr *daddr, *final_p = NULL, final; 930 struct in6_addr *daddr, *final_p, final;
914 struct ipv6_txoptions *opt = NULL; 931 struct ipv6_txoptions *opt = NULL;
915 struct ip6_flowlabel *flowlabel = NULL; 932 struct ip6_flowlabel *flowlabel = NULL;
916 struct flowi fl; 933 struct flowi fl;
@@ -919,6 +936,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
919 int ulen = len; 936 int ulen = len;
920 int hlimit = -1; 937 int hlimit = -1;
921 int tclass = -1; 938 int tclass = -1;
939 int dontfrag = -1;
922 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; 940 int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
923 int err; 941 int err;
924 int connected = 0; 942 int connected = 0;
@@ -1049,7 +1067,8 @@ do_udp_sendmsg:
1049 memset(opt, 0, sizeof(struct ipv6_txoptions)); 1067 memset(opt, 0, sizeof(struct ipv6_txoptions));
1050 opt->tot_len = sizeof(*opt); 1068 opt->tot_len = sizeof(*opt);
1051 1069
1052 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); 1070 err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
1071 &tclass, &dontfrag);
1053 if (err < 0) { 1072 if (err < 0) {
1054 fl6_sock_release(flowlabel); 1073 fl6_sock_release(flowlabel);
1055 return err; 1074 return err;
@@ -1078,14 +1097,9 @@ do_udp_sendmsg:
1078 ipv6_addr_copy(&fl.fl6_src, &np->saddr); 1097 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1079 fl.fl_ip_sport = inet->inet_sport; 1098 fl.fl_ip_sport = inet->inet_sport;
1080 1099
1081 /* merge ip6_build_xmit from ip6_output */ 1100 final_p = fl6_update_dst(&fl, opt, &final);
1082 if (opt && opt->srcrt) { 1101 if (final_p)
1083 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1084 ipv6_addr_copy(&final, &fl.fl6_dst);
1085 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1086 final_p = &final;
1087 connected = 0; 1102 connected = 0;
1088 }
1089 1103
1090 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) { 1104 if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
1091 fl.oif = np->mcast_oif; 1105 fl.oif = np->mcast_oif;
@@ -1120,6 +1134,9 @@ do_udp_sendmsg:
1120 if (tclass < 0) 1134 if (tclass < 0)
1121 tclass = np->tclass; 1135 tclass = np->tclass;
1122 1136
1137 if (dontfrag < 0)
1138 dontfrag = np->dontfrag;
1139
1123 if (msg->msg_flags&MSG_CONFIRM) 1140 if (msg->msg_flags&MSG_CONFIRM)
1124 goto do_confirm; 1141 goto do_confirm;
1125back_from_confirm: 1142back_from_confirm:
@@ -1143,7 +1160,7 @@ do_append_data:
1143 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, 1160 err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
1144 sizeof(struct udphdr), hlimit, tclass, opt, &fl, 1161 sizeof(struct udphdr), hlimit, tclass, opt, &fl,
1145 (struct rt6_info*)dst, 1162 (struct rt6_info*)dst,
1146 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); 1163 corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
1147 if (err) 1164 if (err)
1148 udp_v6_flush_pending_frames(sk); 1165 udp_v6_flush_pending_frames(sk);
1149 else if (!corkreq) 1166 else if (!corkreq)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 2bc98ede1235..f8c3cf842f53 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,7 +42,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
42 ipv6_hdr(skb)->payload_len = htons(skb->len); 42 ipv6_hdr(skb)->payload_len = htons(skb->len);
43 __skb_push(skb, skb->data - skb_network_header(skb)); 43 __skb_push(skb, skb->data - skb_network_header(skb));
44 44
45 NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 45 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
46 ip6_rcv_finish); 46 ip6_rcv_finish);
47 return -1; 47 return -1;
48} 48}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0c92112dcba3..6434bd5ce088 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -90,6 +90,6 @@ static int xfrm6_output_finish(struct sk_buff *skb)
90 90
91int xfrm6_output(struct sk_buff *skb) 91int xfrm6_output(struct sk_buff *skb)
92{ 92{
93 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev, 93 return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
94 xfrm6_output_finish); 94 skb_dst(skb)->dev, xfrm6_output_finish);
95} 95}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 00bf7c962b7e..6baeabbbca82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
67 return 0; 67 return 0;
68} 68}
69 69
70static struct dst_entry *
71__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
72{
73 struct dst_entry *dst;
74
75 /* Still not clear if we should set fl->fl6_{src,dst}... */
76 read_lock_bh(&policy->lock);
77 for (dst = policy->bundles; dst; dst = dst->next) {
78 struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
79 struct in6_addr fl_dst_prefix, fl_src_prefix;
80
81 ipv6_addr_prefix(&fl_dst_prefix,
82 &fl->fl6_dst,
83 xdst->u.rt6.rt6i_dst.plen);
84 ipv6_addr_prefix(&fl_src_prefix,
85 &fl->fl6_src,
86 xdst->u.rt6.rt6i_src.plen);
87 if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
88 ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
89 xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
90 (xdst->u.rt6.rt6i_dst.plen != 128 ||
91 xdst->u.rt6.rt6i_src.plen != 128))) {
92 dst_clone(dst);
93 break;
94 }
95 }
96 read_unlock_bh(&policy->lock);
97 return dst;
98}
99
100static int xfrm6_get_tos(struct flowi *fl) 70static int xfrm6_get_tos(struct flowi *fl)
101{ 71{
102 return 0; 72 return 0;
@@ -154,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
154 u8 nexthdr = nh[IP6CB(skb)->nhoff]; 124 u8 nexthdr = nh[IP6CB(skb)->nhoff];
155 125
156 memset(fl, 0, sizeof(struct flowi)); 126 memset(fl, 0, sizeof(struct flowi));
127 fl->mark = skb->mark;
128
157 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr); 129 ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
158 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr); 130 ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
159 131
@@ -291,7 +263,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
291 .dst_ops = &xfrm6_dst_ops, 263 .dst_ops = &xfrm6_dst_ops,
292 .dst_lookup = xfrm6_dst_lookup, 264 .dst_lookup = xfrm6_dst_lookup,
293 .get_saddr = xfrm6_get_saddr, 265 .get_saddr = xfrm6_get_saddr,
294 .find_bundle = __xfrm6_find_bundle,
295 .decode_session = _decode_session6, 266 .decode_session = _decode_session6,
296 .get_tos = xfrm6_get_tos, 267 .get_tos = xfrm6_get_tos,
297 .init_path = xfrm6_init_path, 268 .init_path = xfrm6_init_path,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 2a4efcea3423..79986a674f6e 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
347 self->tx_flow = flow; 347 self->tx_flow = flow;
348 IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n", 348 IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n",
349 __func__); 349 __func__);
350 wake_up_interruptible(sk->sk_sleep); 350 wake_up_interruptible(sk_sleep(sk));
351 break; 351 break;
352 default: 352 default:
353 IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__); 353 IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__);
@@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
900 if (flags & O_NONBLOCK) 900 if (flags & O_NONBLOCK)
901 goto out; 901 goto out;
902 902
903 err = wait_event_interruptible(*(sk->sk_sleep), 903 err = wait_event_interruptible(*(sk_sleep(sk)),
904 skb_peek(&sk->sk_receive_queue)); 904 skb_peek(&sk->sk_receive_queue));
905 if (err) 905 if (err)
906 goto out; 906 goto out;
@@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1066 goto out; 1066 goto out;
1067 1067
1068 err = -ERESTARTSYS; 1068 err = -ERESTARTSYS;
1069 if (wait_event_interruptible(*(sk->sk_sleep), 1069 if (wait_event_interruptible(*(sk_sleep(sk)),
1070 (sk->sk_state != TCP_SYN_SENT))) 1070 (sk->sk_state != TCP_SYN_SENT)))
1071 goto out; 1071 goto out;
1072 1072
@@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1318 1318
1319 /* Check if IrTTP is wants us to slow down */ 1319 /* Check if IrTTP is wants us to slow down */
1320 1320
1321 if (wait_event_interruptible(*(sk->sk_sleep), 1321 if (wait_event_interruptible(*(sk_sleep(sk)),
1322 (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { 1322 (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) {
1323 err = -ERESTARTSYS; 1323 err = -ERESTARTSYS;
1324 goto out; 1324 goto out;
@@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1477 if (copied >= target) 1477 if (copied >= target)
1478 break; 1478 break;
1479 1479
1480 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1480 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1481 1481
1482 /* 1482 /*
1483 * POSIX 1003.1g mandates this order. 1483 * POSIX 1003.1g mandates this order.
@@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1497 /* Wait process until data arrives */ 1497 /* Wait process until data arrives */
1498 schedule(); 1498 schedule();
1499 1499
1500 finish_wait(sk->sk_sleep, &wait); 1500 finish_wait(sk_sleep(sk), &wait);
1501 1501
1502 if (err) 1502 if (err)
1503 goto out; 1503 goto out;
@@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
1787 IRDA_DEBUG(4, "%s()\n", __func__); 1787 IRDA_DEBUG(4, "%s()\n", __func__);
1788 1788
1789 lock_kernel(); 1789 lock_kernel();
1790 poll_wait(file, sk->sk_sleep, wait); 1790 poll_wait(file, sk_sleep(sk), wait);
1791 mask = 0; 1791 mask = 0;
1792 1792
1793 /* Exceptional events? */ 1793 /* Exceptional events? */
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index e2e893b474e9..8b915f3ac3b9 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -475,7 +475,7 @@ static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
475 /* Check if any of the settings have changed */ 475 /* Check if any of the settings have changed */
476 if (dce & 0x0f) { 476 if (dce & 0x0f) {
477 if (dce & IRCOMM_DELTA_CTS) { 477 if (dce & IRCOMM_DELTA_CTS) {
478 IRDA_DEBUG(2, "%s(), CTS \n", __func__ ); 478 IRDA_DEBUG(2, "%s(), CTS\n", __func__ );
479 } 479 }
480 } 480 }
481 481
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 79a1e5a23e10..fce364c6c71a 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -685,8 +685,6 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
685 /* We have a match; send the value. */ 685 /* We have a match; send the value. */
686 iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS, 686 iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
687 attrib->value); 687 attrib->value);
688
689 return;
690} 688}
691 689
692/* 690/*
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index df18ab4b6c5e..e98e40d76f4f 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -678,7 +678,6 @@ irda_irnet_destroy(irnet_socket * self)
678 self->stsap_sel = 0; 678 self->stsap_sel = 0;
679 679
680 DEXIT(IRDA_SOCK_TRACE, "\n"); 680 DEXIT(IRDA_SOCK_TRACE, "\n");
681 return;
682} 681}
683 682
684 683
@@ -928,7 +927,6 @@ irnet_disconnect_server(irnet_socket * self,
928 irttp_listen(self->tsap); 927 irttp_listen(self->tsap);
929 928
930 DEXIT(IRDA_SERV_TRACE, "\n"); 929 DEXIT(IRDA_SERV_TRACE, "\n");
931 return;
932} 930}
933 931
934/*------------------------------------------------------------------*/ 932/*------------------------------------------------------------------*/
@@ -1013,7 +1011,6 @@ irnet_destroy_server(void)
1013 irda_irnet_destroy(&irnet_server.s); 1011 irda_irnet_destroy(&irnet_server.s);
1014 1012
1015 DEXIT(IRDA_SERV_TRACE, "\n"); 1013 DEXIT(IRDA_SERV_TRACE, "\n");
1016 return;
1017} 1014}
1018 1015
1019 1016
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6a1a202710c5..dfe7b38dd4af 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -20,7 +20,7 @@
20/* Please put other headers in irnet.h - Thanks */ 20/* Please put other headers in irnet.h - Thanks */
21 21
22/* Generic PPP callbacks (to call us) */ 22/* Generic PPP callbacks (to call us) */
23static struct ppp_channel_ops irnet_ppp_ops = { 23static const struct ppp_channel_ops irnet_ppp_ops = {
24 .start_xmit = ppp_irnet_send, 24 .start_xmit = ppp_irnet_send,
25 .ioctl = ppp_irnet_ioctl 25 .ioctl = ppp_irnet_ioctl
26}; 26};
@@ -527,7 +527,7 @@ static int
527dev_irnet_close(struct inode * inode, 527dev_irnet_close(struct inode * inode,
528 struct file * file) 528 struct file * file)
529{ 529{
530 irnet_socket * ap = (struct irnet_socket *) file->private_data; 530 irnet_socket * ap = file->private_data;
531 531
532 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", 532 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
533 file, ap); 533 file, ap);
@@ -564,7 +564,7 @@ dev_irnet_write(struct file * file,
564 size_t count, 564 size_t count,
565 loff_t * ppos) 565 loff_t * ppos)
566{ 566{
567 irnet_socket * ap = (struct irnet_socket *) file->private_data; 567 irnet_socket * ap = file->private_data;
568 568
569 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", 569 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
570 file, ap, count); 570 file, ap, count);
@@ -588,7 +588,7 @@ dev_irnet_read(struct file * file,
588 size_t count, 588 size_t count,
589 loff_t * ppos) 589 loff_t * ppos)
590{ 590{
591 irnet_socket * ap = (struct irnet_socket *) file->private_data; 591 irnet_socket * ap = file->private_data;
592 592
593 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n", 593 DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
594 file, ap, count); 594 file, ap, count);
@@ -609,7 +609,7 @@ static unsigned int
609dev_irnet_poll(struct file * file, 609dev_irnet_poll(struct file * file,
610 poll_table * wait) 610 poll_table * wait)
611{ 611{
612 irnet_socket * ap = (struct irnet_socket *) file->private_data; 612 irnet_socket * ap = file->private_data;
613 unsigned int mask; 613 unsigned int mask;
614 614
615 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", 615 DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
@@ -638,7 +638,7 @@ dev_irnet_ioctl(
638 unsigned int cmd, 638 unsigned int cmd,
639 unsigned long arg) 639 unsigned long arg)
640{ 640{
641 irnet_socket * ap = (struct irnet_socket *) file->private_data; 641 irnet_socket * ap = file->private_data;
642 int err; 642 int err;
643 int val; 643 int val;
644 void __user *argp = (void __user *)arg; 644 void __user *argp = (void __user *)arg;
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 47db1d8a0d92..285761e77d90 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
1853 self->remote_credit); 1853 self->remote_credit);
1854 seq_printf(seq, "send credit: %d\n", 1854 seq_printf(seq, "send credit: %d\n",
1855 self->send_credit); 1855 self->send_credit);
1856 seq_printf(seq, " tx packets: %ld, ", 1856 seq_printf(seq, " tx packets: %lu, ",
1857 self->stats.tx_packets); 1857 self->stats.tx_packets);
1858 seq_printf(seq, "rx packets: %ld, ", 1858 seq_printf(seq, "rx packets: %lu, ",
1859 self->stats.rx_packets); 1859 self->stats.rx_packets);
1860 seq_printf(seq, "tx_queue len: %d ", 1860 seq_printf(seq, "tx_queue len: %u ",
1861 skb_queue_len(&self->tx_queue)); 1861 skb_queue_len(&self->tx_queue));
1862 seq_printf(seq, "rx_queue len: %d\n", 1862 seq_printf(seq, "rx_queue len: %u\n",
1863 skb_queue_len(&self->rx_queue)); 1863 skb_queue_len(&self->rx_queue));
1864 seq_printf(seq, " tx_sdu_busy: %s, ", 1864 seq_printf(seq, " tx_sdu_busy: %s, ",
1865 self->tx_sdu_busy? "TRUE":"FALSE"); 1865 self->tx_sdu_busy? "TRUE":"FALSE");
1866 seq_printf(seq, "rx_sdu_busy: %s\n", 1866 seq_printf(seq, "rx_sdu_busy: %s\n",
1867 self->rx_sdu_busy? "TRUE":"FALSE"); 1867 self->rx_sdu_busy? "TRUE":"FALSE");
1868 seq_printf(seq, " max_seg_size: %d, ", 1868 seq_printf(seq, " max_seg_size: %u, ",
1869 self->max_seg_size); 1869 self->max_seg_size);
1870 seq_printf(seq, "tx_max_sdu_size: %d, ", 1870 seq_printf(seq, "tx_max_sdu_size: %u, ",
1871 self->tx_max_sdu_size); 1871 self->tx_max_sdu_size);
1872 seq_printf(seq, "rx_max_sdu_size: %d\n", 1872 seq_printf(seq, "rx_max_sdu_size: %u\n",
1873 self->rx_max_sdu_size); 1873 self->rx_max_sdu_size);
1874 1874
1875 seq_printf(seq, " Used by (%s)\n\n", 1875 seq_printf(seq, " Used by (%s)\n\n",
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c18286a2167b..9637e45744fa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -59,7 +59,7 @@ do { \
59 DEFINE_WAIT(__wait); \ 59 DEFINE_WAIT(__wait); \
60 long __timeo = timeo; \ 60 long __timeo = timeo; \
61 ret = 0; \ 61 ret = 0; \
62 prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ 62 prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE); \
63 while (!(condition)) { \ 63 while (!(condition)) { \
64 if (!__timeo) { \ 64 if (!__timeo) { \
65 ret = -EAGAIN; \ 65 ret = -EAGAIN; \
@@ -76,7 +76,7 @@ do { \
76 if (ret) \ 76 if (ret) \
77 break; \ 77 break; \
78 } \ 78 } \
79 finish_wait(sk->sk_sleep, &__wait); \ 79 finish_wait(sk_sleep(sk), &__wait); \
80} while (0) 80} while (0)
81 81
82#define iucv_sock_wait(sk, condition, timeo) \ 82#define iucv_sock_wait(sk, condition, timeo) \
@@ -136,7 +136,6 @@ static void afiucv_pm_complete(struct device *dev)
136#ifdef CONFIG_PM_DEBUG 136#ifdef CONFIG_PM_DEBUG
137 printk(KERN_WARNING "afiucv_pm_complete\n"); 137 printk(KERN_WARNING "afiucv_pm_complete\n");
138#endif 138#endif
139 return;
140} 139}
141 140
142/** 141/**
@@ -305,11 +304,14 @@ static inline int iucv_below_msglim(struct sock *sk)
305 */ 304 */
306static void iucv_sock_wake_msglim(struct sock *sk) 305static void iucv_sock_wake_msglim(struct sock *sk)
307{ 306{
308 read_lock(&sk->sk_callback_lock); 307 struct socket_wq *wq;
309 if (sk_has_sleeper(sk)) 308
310 wake_up_interruptible_all(sk->sk_sleep); 309 rcu_read_lock();
310 wq = rcu_dereference(sk->sk_wq);
311 if (wq_has_sleeper(wq))
312 wake_up_interruptible_all(&wq->wait);
311 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 313 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
312 read_unlock(&sk->sk_callback_lock); 314 rcu_read_unlock();
313} 315}
314 316
315/* Timers */ 317/* Timers */
@@ -795,7 +797,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
795 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 797 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
796 798
797 /* Wait for an incoming connection */ 799 /* Wait for an incoming connection */
798 add_wait_queue_exclusive(sk->sk_sleep, &wait); 800 add_wait_queue_exclusive(sk_sleep(sk), &wait);
799 while (!(nsk = iucv_accept_dequeue(sk, newsock))) { 801 while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
800 set_current_state(TASK_INTERRUPTIBLE); 802 set_current_state(TASK_INTERRUPTIBLE);
801 if (!timeo) { 803 if (!timeo) {
@@ -819,7 +821,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
819 } 821 }
820 822
821 set_current_state(TASK_RUNNING); 823 set_current_state(TASK_RUNNING);
822 remove_wait_queue(sk->sk_sleep, &wait); 824 remove_wait_queue(sk_sleep(sk), &wait);
823 825
824 if (err) 826 if (err)
825 goto done; 827 goto done;
@@ -1269,7 +1271,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
1269 struct sock *sk = sock->sk; 1271 struct sock *sk = sock->sk;
1270 unsigned int mask = 0; 1272 unsigned int mask = 0;
1271 1273
1272 sock_poll_wait(file, sk->sk_sleep, wait); 1274 sock_poll_wait(file, sk_sleep(sk), wait);
1273 1275
1274 if (sk->sk_state == IUCV_LISTEN) 1276 if (sk->sk_state == IUCV_LISTEN)
1275 return iucv_accept_poll(sk); 1277 return iucv_accept_poll(sk);
@@ -1617,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
1617save_message: 1619save_message:
1618 save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA); 1620 save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA);
1619 if (!save_msg) 1621 if (!save_msg)
1620 return; 1622 goto out_unlock;
1621 save_msg->path = path; 1623 save_msg->path = path;
1622 save_msg->msg = *msg; 1624 save_msg->msg = *msg;
1623 1625
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index fd8b28361a64..499c045d6910 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
632 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), 632 iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
633 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 633 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
634 if (!iucv_irq_data[cpu]) 634 if (!iucv_irq_data[cpu])
635 return NOTIFY_BAD; 635 return notifier_from_errno(-ENOMEM);
636
636 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), 637 iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
637 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 638 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
638 if (!iucv_param[cpu]) { 639 if (!iucv_param[cpu]) {
639 kfree(iucv_irq_data[cpu]); 640 kfree(iucv_irq_data[cpu]);
640 iucv_irq_data[cpu] = NULL; 641 iucv_irq_data[cpu] = NULL;
641 return NOTIFY_BAD; 642 return notifier_from_errno(-ENOMEM);
642 } 643 }
643 iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), 644 iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
644 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); 645 GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
@@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
647 iucv_param[cpu] = NULL; 648 iucv_param[cpu] = NULL;
648 kfree(iucv_irq_data[cpu]); 649 kfree(iucv_irq_data[cpu]);
649 iucv_irq_data[cpu] = NULL; 650 iucv_irq_data[cpu] = NULL;
650 return NOTIFY_BAD; 651 return notifier_from_errno(-ENOMEM);
651 } 652 }
652 break; 653 break;
653 case CPU_UP_CANCELED: 654 case CPU_UP_CANCELED:
@@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
677 cpu_clear(cpu, cpumask); 678 cpu_clear(cpu, cpumask);
678 if (cpus_empty(cpumask)) 679 if (cpus_empty(cpumask))
679 /* Can't offline last IUCV enabled cpu. */ 680 /* Can't offline last IUCV enabled cpu. */
680 return NOTIFY_BAD; 681 return notifier_from_errno(-EINVAL);
681 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1); 682 smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
682 if (cpus_empty(iucv_irq_cpumask)) 683 if (cpus_empty(iucv_irq_cpumask))
683 smp_call_function_single(first_cpu(iucv_buffer_cpumask), 684 smp_call_function_single(first_cpu(iucv_buffer_cpumask),
@@ -1462,7 +1463,7 @@ struct iucv_path_pending {
1462 u32 res3; 1463 u32 res3;
1463 u8 ippollfg; 1464 u8 ippollfg;
1464 u8 res4[3]; 1465 u8 res4[3];
1465} __attribute__ ((packed)); 1466} __packed;
1466 1467
1467static void iucv_path_pending(struct iucv_irq_data *data) 1468static void iucv_path_pending(struct iucv_irq_data *data)
1468{ 1469{
@@ -1523,7 +1524,7 @@ struct iucv_path_complete {
1523 u32 res3; 1524 u32 res3;
1524 u8 ippollfg; 1525 u8 ippollfg;
1525 u8 res4[3]; 1526 u8 res4[3];
1526} __attribute__ ((packed)); 1527} __packed;
1527 1528
1528static void iucv_path_complete(struct iucv_irq_data *data) 1529static void iucv_path_complete(struct iucv_irq_data *data)
1529{ 1530{
@@ -1553,7 +1554,7 @@ struct iucv_path_severed {
1553 u32 res4; 1554 u32 res4;
1554 u8 ippollfg; 1555 u8 ippollfg;
1555 u8 res5[3]; 1556 u8 res5[3];
1556} __attribute__ ((packed)); 1557} __packed;
1557 1558
1558static void iucv_path_severed(struct iucv_irq_data *data) 1559static void iucv_path_severed(struct iucv_irq_data *data)
1559{ 1560{
@@ -1589,7 +1590,7 @@ struct iucv_path_quiesced {
1589 u32 res4; 1590 u32 res4;
1590 u8 ippollfg; 1591 u8 ippollfg;
1591 u8 res5[3]; 1592 u8 res5[3];
1592} __attribute__ ((packed)); 1593} __packed;
1593 1594
1594static void iucv_path_quiesced(struct iucv_irq_data *data) 1595static void iucv_path_quiesced(struct iucv_irq_data *data)
1595{ 1596{
@@ -1617,7 +1618,7 @@ struct iucv_path_resumed {
1617 u32 res4; 1618 u32 res4;
1618 u8 ippollfg; 1619 u8 ippollfg;
1619 u8 res5[3]; 1620 u8 res5[3];
1620} __attribute__ ((packed)); 1621} __packed;
1621 1622
1622static void iucv_path_resumed(struct iucv_irq_data *data) 1623static void iucv_path_resumed(struct iucv_irq_data *data)
1623{ 1624{
@@ -1648,7 +1649,7 @@ struct iucv_message_complete {
1648 u32 ipbfln2f; 1649 u32 ipbfln2f;
1649 u8 ippollfg; 1650 u8 ippollfg;
1650 u8 res2[3]; 1651 u8 res2[3];
1651} __attribute__ ((packed)); 1652} __packed;
1652 1653
1653static void iucv_message_complete(struct iucv_irq_data *data) 1654static void iucv_message_complete(struct iucv_irq_data *data)
1654{ 1655{
@@ -1693,7 +1694,7 @@ struct iucv_message_pending {
1693 u32 ipbfln2f; 1694 u32 ipbfln2f;
1694 u8 ippollfg; 1695 u8 ippollfg;
1695 u8 res2[3]; 1696 u8 res2[3];
1696} __attribute__ ((packed)); 1697} __packed;
1697 1698
1698static void iucv_message_pending(struct iucv_irq_data *data) 1699static void iucv_message_pending(struct iucv_irq_data *data)
1699{ 1700{
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ba9a3fcc2fed..43040e97c474 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -99,7 +99,7 @@ static void pfkey_sock_destruct(struct sock *sk)
99 skb_queue_purge(&sk->sk_receive_queue); 99 skb_queue_purge(&sk->sk_receive_queue);
100 100
101 if (!sock_flag(sk, SOCK_DEAD)) { 101 if (!sock_flag(sk, SOCK_DEAD)) {
102 printk("Attempt to release alive pfkey socket: %p\n", sk); 102 pr_err("Attempt to release alive pfkey socket: %p\n", sk);
103 return; 103 return;
104 } 104 }
105 105
@@ -1402,7 +1402,7 @@ static inline int event2poltype(int event)
1402 case XFRM_MSG_POLEXPIRE: 1402 case XFRM_MSG_POLEXPIRE:
1403 // return SADB_X_SPDEXPIRE; 1403 // return SADB_X_SPDEXPIRE;
1404 default: 1404 default:
1405 printk("pfkey: Unknown policy event %d\n", event); 1405 pr_err("pfkey: Unknown policy event %d\n", event);
1406 break; 1406 break;
1407 } 1407 }
1408 1408
@@ -1421,7 +1421,7 @@ static inline int event2keytype(int event)
1421 case XFRM_MSG_EXPIRE: 1421 case XFRM_MSG_EXPIRE:
1422 return SADB_EXPIRE; 1422 return SADB_EXPIRE;
1423 default: 1423 default:
1424 printk("pfkey: Unknown SA event %d\n", event); 1424 pr_err("pfkey: Unknown SA event %d\n", event);
1425 break; 1425 break;
1426 } 1426 }
1427 1427
@@ -2969,7 +2969,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
2969 case XFRM_MSG_NEWAE: /* not yet supported */ 2969 case XFRM_MSG_NEWAE: /* not yet supported */
2970 break; 2970 break;
2971 default: 2971 default:
2972 printk("pfkey: Unknown SA event %d\n", c->event); 2972 pr_err("pfkey: Unknown SA event %d\n", c->event);
2973 break; 2973 break;
2974 } 2974 }
2975 2975
@@ -2993,7 +2993,7 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
2993 break; 2993 break;
2994 return key_notify_policy_flush(c); 2994 return key_notify_policy_flush(c);
2995 default: 2995 default:
2996 printk("pfkey: Unknown policy event %d\n", c->event); 2996 pr_err("pfkey: Unknown policy event %d\n", c->event);
2997 break; 2997 break;
2998 } 2998 }
2999 2999
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
new file mode 100644
index 000000000000..4b1e71751e10
--- /dev/null
+++ b/net/l2tp/Kconfig
@@ -0,0 +1,107 @@
1#
2# Layer Two Tunneling Protocol (L2TP)
3#
4
5menuconfig L2TP
6 tristate "Layer Two Tunneling Protocol (L2TP)"
7 depends on INET
8 ---help---
9 Layer Two Tunneling Protocol
10
11 From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
12
13 L2TP facilitates the tunneling of packets across an
14 intervening network in a way that is as transparent as
15 possible to both end-users and applications.
16
17 L2TP is often used to tunnel PPP traffic over IP
18 tunnels. One IP tunnel may carry thousands of individual PPP
19 connections. L2TP is also used as a VPN protocol, popular
20 with home workers to connect to their offices.
21
22 L2TPv3 allows other protocols as well as PPP to be carried
23 over L2TP tunnels. L2TPv3 is defined in RFC 3931
24 <http://www.ietf.org/rfc/rfc3931.txt>.
25
26 The kernel component handles only L2TP data packets: a
27 userland daemon handles L2TP the control protocol (tunnel
28 and session setup). One such daemon is OpenL2TP
29 (http://openl2tp.org/).
30
31 If you don't need L2TP, say N. To compile all L2TP code as
32 modules, choose M here.
33
34config L2TP_DEBUGFS
35 tristate "L2TP debugfs support"
36 depends on L2TP && DEBUG_FS
37 help
38 Support for l2tp directory in debugfs filesystem. This may be
39 used to dump internal state of the l2tp drivers for problem
40 analysis.
41
42 If unsure, say 'Y'.
43
44 To compile this driver as a module, choose M here. The module
45 will be called l2tp_debugfs.
46
47config L2TP_V3
48 bool "L2TPv3 support (EXPERIMENTAL)"
49 depends on EXPERIMENTAL && L2TP
50 help
51 Layer Two Tunneling Protocol Version 3
52
53 From RFC 3931 <http://www.ietf.org/rfc/rfc3931.txt>.
54
55 The Layer Two Tunneling Protocol (L2TP) provides a dynamic
56 mechanism for tunneling Layer 2 (L2) "circuits" across a
57 packet-oriented data network (e.g., over IP). L2TP, as
58 originally defined in RFC 2661, is a standard method for
59 tunneling Point-to-Point Protocol (PPP) [RFC1661] sessions.
60 L2TP has since been adopted for tunneling a number of other
61 L2 protocols, including ATM, Frame Relay, HDLC and even raw
62 ethernet frames.
63
64 If you are connecting to L2TPv3 equipment, or you want to
65 tunnel raw ethernet frames using L2TP, say Y here. If
66 unsure, say N.
67
68config L2TP_IP
69 tristate "L2TP IP encapsulation for L2TPv3"
70 depends on L2TP_V3
71 help
72 Support for L2TP-over-IP socket family.
73
74 The L2TPv3 protocol defines two possible encapsulations for
75 L2TP frames, namely UDP and plain IP (without UDP). This
76 driver provides a new L2TPIP socket family with which
77 userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
78 when UDP encapsulation is not required. When L2TP is carried
79 in IP packets, it used IP protocol number 115, so this port
80 must be enabled in firewalls.
81
82 To compile this driver as a module, choose M here. The module
83 will be called l2tp_ip.
84
85config L2TP_ETH
86 tristate "L2TP ethernet pseudowire support for L2TPv3"
87 depends on L2TP_V3
88 help
89 Support for carrying raw ethernet frames over L2TPv3.
90
91 From RFC 4719 <http://www.ietf.org/rfc/rfc4719.txt>.
92
93 The Layer 2 Tunneling Protocol, Version 3 (L2TPv3) can be
94 used as a control protocol and for data encapsulation to set
95 up Pseudowires for transporting layer 2 Packet Data Units
96 across an IP network [RFC3931].
97
98 This driver provides an ethernet virtual interface for each
99 L2TP ethernet pseudowire instance. Standard Linux tools may
100 be used to assign an IP address to the local virtual
101 interface, or add the interface to a bridge.
102
103 If you are using L2TPv3, you will almost certainly want to
104 enable this option.
105
106 To compile this driver as a module, choose M here. The module
107 will be called l2tp_eth.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
new file mode 100644
index 000000000000..110e7bc2de5e
--- /dev/null
+++ b/net/l2tp/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the L2TP.
3#
4
5obj-$(CONFIG_L2TP) += l2tp_core.o
6
7# Build l2tp as modules if L2TP is M
8obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
9obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
10obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
11obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
12obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_DEBUGFS)) += l2tp_debugfs.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
new file mode 100644
index 000000000000..1712af1c7b3f
--- /dev/null
+++ b/net/l2tp/l2tp_core.c
@@ -0,0 +1,1666 @@
1/*
2 * L2TP core.
3 *
4 * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
5 *
6 * This file contains some code of the original L2TPv2 pppol2tp
7 * driver, which has the following copyright:
8 *
9 * Authors: Martijn van Oosterhout <kleptog@svana.org>
10 * James Chapman (jchapman@katalix.com)
11 * Contributors:
12 * Michal Ostrowski <mostrows@speakeasy.net>
13 * Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
14 * David S. Miller (davem@redhat.com)
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License version 2 as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/module.h>
22#include <linux/string.h>
23#include <linux/list.h>
24#include <linux/rculist.h>
25#include <linux/uaccess.h>
26
27#include <linux/kernel.h>
28#include <linux/spinlock.h>
29#include <linux/kthread.h>
30#include <linux/sched.h>
31#include <linux/slab.h>
32#include <linux/errno.h>
33#include <linux/jiffies.h>
34
35#include <linux/netdevice.h>
36#include <linux/net.h>
37#include <linux/inetdevice.h>
38#include <linux/skbuff.h>
39#include <linux/init.h>
40#include <linux/in.h>
41#include <linux/ip.h>
42#include <linux/udp.h>
43#include <linux/l2tp.h>
44#include <linux/hash.h>
45#include <linux/sort.h>
46#include <linux/file.h>
47#include <linux/nsproxy.h>
48#include <net/net_namespace.h>
49#include <net/netns/generic.h>
50#include <net/dst.h>
51#include <net/ip.h>
52#include <net/udp.h>
53#include <net/inet_common.h>
54#include <net/xfrm.h>
55#include <net/protocol.h>
56
57#include <asm/byteorder.h>
58#include <asm/atomic.h>
59
60#include "l2tp_core.h"
61
62#define L2TP_DRV_VERSION "V2.0"
63
64/* L2TP header constants */
65#define L2TP_HDRFLAG_T 0x8000
66#define L2TP_HDRFLAG_L 0x4000
67#define L2TP_HDRFLAG_S 0x0800
68#define L2TP_HDRFLAG_O 0x0200
69#define L2TP_HDRFLAG_P 0x0100
70
71#define L2TP_HDR_VER_MASK 0x000F
72#define L2TP_HDR_VER_2 0x0002
73#define L2TP_HDR_VER_3 0x0003
74
75/* L2TPv3 default L2-specific sublayer */
76#define L2TP_SLFLAG_S 0x40000000
77#define L2TP_SL_SEQ_MASK 0x00ffffff
78
79#define L2TP_HDR_SIZE_SEQ 10
80#define L2TP_HDR_SIZE_NOSEQ 6
81
82/* Default trace flags */
83#define L2TP_DEFAULT_DEBUG_FLAGS 0
84
85#define PRINTK(_mask, _type, _lvl, _fmt, args...) \
86 do { \
87 if ((_mask) & (_type)) \
88 printk(_lvl "L2TP: " _fmt, ##args); \
89 } while (0)
90
91/* Private data stored for received packets in the skb.
92 */
93struct l2tp_skb_cb {
94 u32 ns;
95 u16 has_seq;
96 u16 length;
97 unsigned long expires;
98};
99
100#define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
101
102static atomic_t l2tp_tunnel_count;
103static atomic_t l2tp_session_count;
104
105/* per-net private data for this module */
106static unsigned int l2tp_net_id;
107struct l2tp_net {
108 struct list_head l2tp_tunnel_list;
109 spinlock_t l2tp_tunnel_list_lock;
110 struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
111 spinlock_t l2tp_session_hlist_lock;
112};
113
114static inline struct l2tp_net *l2tp_pernet(struct net *net)
115{
116 BUG_ON(!net);
117
118 return net_generic(net, l2tp_net_id);
119}
120
121/* Session hash global list for L2TPv3.
122 * The session_id SHOULD be random according to RFC3931, but several
123 * L2TP implementations use incrementing session_ids. So we do a real
124 * hash on the session_id, rather than a simple bitmask.
125 */
126static inline struct hlist_head *
127l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
128{
129 return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
130
131}
132
133/* Lookup a session by id in the global session list
134 */
135static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
136{
137 struct l2tp_net *pn = l2tp_pernet(net);
138 struct hlist_head *session_list =
139 l2tp_session_id_hash_2(pn, session_id);
140 struct l2tp_session *session;
141 struct hlist_node *walk;
142
143 rcu_read_lock_bh();
144 hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
145 if (session->session_id == session_id) {
146 rcu_read_unlock_bh();
147 return session;
148 }
149 }
150 rcu_read_unlock_bh();
151
152 return NULL;
153}
154
155/* Session hash list.
156 * The session_id SHOULD be random according to RFC2661, but several
157 * L2TP implementations (Cisco and Microsoft) use incrementing
158 * session_ids. So we do a real hash on the session_id, rather than a
159 * simple bitmask.
160 */
161static inline struct hlist_head *
162l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
163{
164 return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
165}
166
167/* Lookup a session by id
168 */
169struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
170{
171 struct hlist_head *session_list;
172 struct l2tp_session *session;
173 struct hlist_node *walk;
174
175 /* In L2TPv3, session_ids are unique over all tunnels and we
176 * sometimes need to look them up before we know the
177 * tunnel.
178 */
179 if (tunnel == NULL)
180 return l2tp_session_find_2(net, session_id);
181
182 session_list = l2tp_session_id_hash(tunnel, session_id);
183 read_lock_bh(&tunnel->hlist_lock);
184 hlist_for_each_entry(session, walk, session_list, hlist) {
185 if (session->session_id == session_id) {
186 read_unlock_bh(&tunnel->hlist_lock);
187 return session;
188 }
189 }
190 read_unlock_bh(&tunnel->hlist_lock);
191
192 return NULL;
193}
194EXPORT_SYMBOL_GPL(l2tp_session_find);
195
196struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
197{
198 int hash;
199 struct hlist_node *walk;
200 struct l2tp_session *session;
201 int count = 0;
202
203 read_lock_bh(&tunnel->hlist_lock);
204 for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
205 hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
206 if (++count > nth) {
207 read_unlock_bh(&tunnel->hlist_lock);
208 return session;
209 }
210 }
211 }
212
213 read_unlock_bh(&tunnel->hlist_lock);
214
215 return NULL;
216}
217EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
218
219/* Lookup a session by interface name.
220 * This is very inefficient but is only used by management interfaces.
221 */
222struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
223{
224 struct l2tp_net *pn = l2tp_pernet(net);
225 int hash;
226 struct hlist_node *walk;
227 struct l2tp_session *session;
228
229 rcu_read_lock_bh();
230 for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
231 hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
232 if (!strcmp(session->ifname, ifname)) {
233 rcu_read_unlock_bh();
234 return session;
235 }
236 }
237 }
238
239 rcu_read_unlock_bh();
240
241 return NULL;
242}
243EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
244
245/* Lookup a tunnel by id
246 */
247struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
248{
249 struct l2tp_tunnel *tunnel;
250 struct l2tp_net *pn = l2tp_pernet(net);
251
252 rcu_read_lock_bh();
253 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
254 if (tunnel->tunnel_id == tunnel_id) {
255 rcu_read_unlock_bh();
256 return tunnel;
257 }
258 }
259 rcu_read_unlock_bh();
260
261 return NULL;
262}
263EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
264
265struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
266{
267 struct l2tp_net *pn = l2tp_pernet(net);
268 struct l2tp_tunnel *tunnel;
269 int count = 0;
270
271 rcu_read_lock_bh();
272 list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
273 if (++count > nth) {
274 rcu_read_unlock_bh();
275 return tunnel;
276 }
277 }
278
279 rcu_read_unlock_bh();
280
281 return NULL;
282}
283EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
284
285/*****************************************************************************
286 * Receive data handling
287 *****************************************************************************/
288
289/* Queue a skb in order. We come here only if the skb has an L2TP sequence
290 * number.
291 */
292static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
293{
294 struct sk_buff *skbp;
295 struct sk_buff *tmp;
296 u32 ns = L2TP_SKB_CB(skb)->ns;
297
298 spin_lock_bh(&session->reorder_q.lock);
299 skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
300 if (L2TP_SKB_CB(skbp)->ns > ns) {
301 __skb_queue_before(&session->reorder_q, skbp, skb);
302 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
303 "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
304 session->name, ns, L2TP_SKB_CB(skbp)->ns,
305 skb_queue_len(&session->reorder_q));
306 session->stats.rx_oos_packets++;
307 goto out;
308 }
309 }
310
311 __skb_queue_tail(&session->reorder_q, skb);
312
313out:
314 spin_unlock_bh(&session->reorder_q.lock);
315}
316
317/* Dequeue a single skb.
318 */
319static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
320{
321 struct l2tp_tunnel *tunnel = session->tunnel;
322 int length = L2TP_SKB_CB(skb)->length;
323
324 /* We're about to requeue the skb, so return resources
325 * to its current owner (a socket receive buffer).
326 */
327 skb_orphan(skb);
328
329 tunnel->stats.rx_packets++;
330 tunnel->stats.rx_bytes += length;
331 session->stats.rx_packets++;
332 session->stats.rx_bytes += length;
333
334 if (L2TP_SKB_CB(skb)->has_seq) {
335 /* Bump our Nr */
336 session->nr++;
337 if (tunnel->version == L2TP_HDR_VER_2)
338 session->nr &= 0xffff;
339 else
340 session->nr &= 0xffffff;
341
342 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
343 "%s: updated nr to %hu\n", session->name, session->nr);
344 }
345
346 /* call private receive handler */
347 if (session->recv_skb != NULL)
348 (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
349 else
350 kfree_skb(skb);
351
352 if (session->deref)
353 (*session->deref)(session);
354}
355
356/* Dequeue skbs from the session's reorder_q, subject to packet order.
357 * Skbs that have been in the queue for too long are simply discarded.
358 */
359static void l2tp_recv_dequeue(struct l2tp_session *session)
360{
361 struct sk_buff *skb;
362 struct sk_buff *tmp;
363
364 /* If the pkt at the head of the queue has the nr that we
365 * expect to send up next, dequeue it and any other
366 * in-sequence packets behind it.
367 */
368 spin_lock_bh(&session->reorder_q.lock);
369 skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
370 if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
371 session->stats.rx_seq_discards++;
372 session->stats.rx_errors++;
373 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
374 "%s: oos pkt %u len %d discarded (too old), "
375 "waiting for %u, reorder_q_len=%d\n",
376 session->name, L2TP_SKB_CB(skb)->ns,
377 L2TP_SKB_CB(skb)->length, session->nr,
378 skb_queue_len(&session->reorder_q));
379 __skb_unlink(skb, &session->reorder_q);
380 kfree_skb(skb);
381 if (session->deref)
382 (*session->deref)(session);
383 continue;
384 }
385
386 if (L2TP_SKB_CB(skb)->has_seq) {
387 if (L2TP_SKB_CB(skb)->ns != session->nr) {
388 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
389 "%s: holding oos pkt %u len %d, "
390 "waiting for %u, reorder_q_len=%d\n",
391 session->name, L2TP_SKB_CB(skb)->ns,
392 L2TP_SKB_CB(skb)->length, session->nr,
393 skb_queue_len(&session->reorder_q));
394 goto out;
395 }
396 }
397 __skb_unlink(skb, &session->reorder_q);
398
399 /* Process the skb. We release the queue lock while we
400 * do so to let other contexts process the queue.
401 */
402 spin_unlock_bh(&session->reorder_q.lock);
403 l2tp_recv_dequeue_skb(session, skb);
404 spin_lock_bh(&session->reorder_q.lock);
405 }
406
407out:
408 spin_unlock_bh(&session->reorder_q.lock);
409}
410
411static inline int l2tp_verify_udp_checksum(struct sock *sk,
412 struct sk_buff *skb)
413{
414 struct udphdr *uh = udp_hdr(skb);
415 u16 ulen = ntohs(uh->len);
416 struct inet_sock *inet;
417 __wsum psum;
418
419 if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
420 return 0;
421
422 inet = inet_sk(sk);
423 psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
424 IPPROTO_UDP, 0);
425
426 if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
427 !csum_fold(csum_add(psum, skb->csum)))
428 return 0;
429
430 skb->csum = psum;
431
432 return __skb_checksum_complete(skb);
433}
434
435/* Do receive processing of L2TP data frames. We handle both L2TPv2
436 * and L2TPv3 data frames here.
437 *
438 * L2TPv2 Data Message Header
439 *
440 * 0 1 2 3
441 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
442 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
443 * |T|L|x|x|S|x|O|P|x|x|x|x| Ver | Length (opt) |
444 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
445 * | Tunnel ID | Session ID |
446 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
447 * | Ns (opt) | Nr (opt) |
448 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
449 * | Offset Size (opt) | Offset pad... (opt)
450 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
451 *
452 * Data frames are marked by T=0. All other fields are the same as
453 * those in L2TP control frames.
454 *
455 * L2TPv3 Data Message Header
456 *
457 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
458 * | L2TP Session Header |
459 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
460 * | L2-Specific Sublayer |
461 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
462 * | Tunnel Payload ...
463 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
464 *
465 * L2TPv3 Session Header Over IP
466 *
467 * 0 1 2 3
468 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
469 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
470 * | Session ID |
471 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
472 * | Cookie (optional, maximum 64 bits)...
473 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
474 * |
475 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
476 *
477 * L2TPv3 L2-Specific Sublayer Format
478 *
479 * 0 1 2 3
480 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
481 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
482 * |x|S|x|x|x|x|x|x| Sequence Number |
483 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
484 *
485 * Cookie value, sublayer format and offset (pad) are negotiated with
486 * the peer when the session is set up. Unlike L2TPv2, we do not need
487 * to parse the packet header to determine if optional fields are
488 * present.
489 *
490 * Caller must already have parsed the frame and determined that it is
491 * a data (not control) frame before coming here. Fields up to the
492 * session-id have already been parsed and ptr points to the data
493 * after the session-id.
494 */
495void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
496 unsigned char *ptr, unsigned char *optr, u16 hdrflags,
497 int length, int (*payload_hook)(struct sk_buff *skb))
498{
499 struct l2tp_tunnel *tunnel = session->tunnel;
500 int offset;
501 u32 ns, nr;
502
503 /* The ref count is increased since we now hold a pointer to
504 * the session. Take care to decrement the refcnt when exiting
505 * this function from now on...
506 */
507 l2tp_session_inc_refcount(session);
508 if (session->ref)
509 (*session->ref)(session);
510
511 /* Parse and check optional cookie */
512 if (session->peer_cookie_len > 0) {
513 if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
514 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
515 "%s: cookie mismatch (%u/%u). Discarding.\n",
516 tunnel->name, tunnel->tunnel_id, session->session_id);
517 session->stats.rx_cookie_discards++;
518 goto discard;
519 }
520 ptr += session->peer_cookie_len;
521 }
522
523 /* Handle the optional sequence numbers. Sequence numbers are
524 * in different places for L2TPv2 and L2TPv3.
525 *
526 * If we are the LAC, enable/disable sequence numbers under
527 * the control of the LNS. If no sequence numbers present but
528 * we were expecting them, discard frame.
529 */
530 ns = nr = 0;
531 L2TP_SKB_CB(skb)->has_seq = 0;
532 if (tunnel->version == L2TP_HDR_VER_2) {
533 if (hdrflags & L2TP_HDRFLAG_S) {
534 ns = ntohs(*(__be16 *) ptr);
535 ptr += 2;
536 nr = ntohs(*(__be16 *) ptr);
537 ptr += 2;
538
539 /* Store L2TP info in the skb */
540 L2TP_SKB_CB(skb)->ns = ns;
541 L2TP_SKB_CB(skb)->has_seq = 1;
542
543 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
544 "%s: recv data ns=%u, nr=%u, session nr=%u\n",
545 session->name, ns, nr, session->nr);
546 }
547 } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
548 u32 l2h = ntohl(*(__be32 *) ptr);
549
550 if (l2h & 0x40000000) {
551 ns = l2h & 0x00ffffff;
552
553 /* Store L2TP info in the skb */
554 L2TP_SKB_CB(skb)->ns = ns;
555 L2TP_SKB_CB(skb)->has_seq = 1;
556
557 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
558 "%s: recv data ns=%u, session nr=%u\n",
559 session->name, ns, session->nr);
560 }
561 }
562
563 /* Advance past L2-specific header, if present */
564 ptr += session->l2specific_len;
565
566 if (L2TP_SKB_CB(skb)->has_seq) {
567 /* Received a packet with sequence numbers. If we're the LNS,
568 * check if we sre sending sequence numbers and if not,
569 * configure it so.
570 */
571 if ((!session->lns_mode) && (!session->send_seq)) {
572 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
573 "%s: requested to enable seq numbers by LNS\n",
574 session->name);
575 session->send_seq = -1;
576 l2tp_session_set_header_len(session, tunnel->version);
577 }
578 } else {
579 /* No sequence numbers.
580 * If user has configured mandatory sequence numbers, discard.
581 */
582 if (session->recv_seq) {
583 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
584 "%s: recv data has no seq numbers when required. "
585 "Discarding\n", session->name);
586 session->stats.rx_seq_discards++;
587 goto discard;
588 }
589
590 /* If we're the LAC and we're sending sequence numbers, the
591 * LNS has requested that we no longer send sequence numbers.
592 * If we're the LNS and we're sending sequence numbers, the
593 * LAC is broken. Discard the frame.
594 */
595 if ((!session->lns_mode) && (session->send_seq)) {
596 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
597 "%s: requested to disable seq numbers by LNS\n",
598 session->name);
599 session->send_seq = 0;
600 l2tp_session_set_header_len(session, tunnel->version);
601 } else if (session->send_seq) {
602 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
603 "%s: recv data has no seq numbers when required. "
604 "Discarding\n", session->name);
605 session->stats.rx_seq_discards++;
606 goto discard;
607 }
608 }
609
610 /* Session data offset is handled differently for L2TPv2 and
611 * L2TPv3. For L2TPv2, there is an optional 16-bit value in
612 * the header. For L2TPv3, the offset is negotiated using AVPs
613 * in the session setup control protocol.
614 */
615 if (tunnel->version == L2TP_HDR_VER_2) {
616 /* If offset bit set, skip it. */
617 if (hdrflags & L2TP_HDRFLAG_O) {
618 offset = ntohs(*(__be16 *)ptr);
619 ptr += 2 + offset;
620 }
621 } else
622 ptr += session->offset;
623
624 offset = ptr - optr;
625 if (!pskb_may_pull(skb, offset))
626 goto discard;
627
628 __skb_pull(skb, offset);
629
630 /* If caller wants to process the payload before we queue the
631 * packet, do so now.
632 */
633 if (payload_hook)
634 if ((*payload_hook)(skb))
635 goto discard;
636
637 /* Prepare skb for adding to the session's reorder_q. Hold
638 * packets for max reorder_timeout or 1 second if not
639 * reordering.
640 */
641 L2TP_SKB_CB(skb)->length = length;
642 L2TP_SKB_CB(skb)->expires = jiffies +
643 (session->reorder_timeout ? session->reorder_timeout : HZ);
644
645 /* Add packet to the session's receive queue. Reordering is done here, if
646 * enabled. Saved L2TP protocol info is stored in skb->sb[].
647 */
648 if (L2TP_SKB_CB(skb)->has_seq) {
649 if (session->reorder_timeout != 0) {
650 /* Packet reordering enabled. Add skb to session's
651 * reorder queue, in order of ns.
652 */
653 l2tp_recv_queue_skb(session, skb);
654 } else {
655 /* Packet reordering disabled. Discard out-of-sequence
656 * packets
657 */
658 if (L2TP_SKB_CB(skb)->ns != session->nr) {
659 session->stats.rx_seq_discards++;
660 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
661 "%s: oos pkt %u len %d discarded, "
662 "waiting for %u, reorder_q_len=%d\n",
663 session->name, L2TP_SKB_CB(skb)->ns,
664 L2TP_SKB_CB(skb)->length, session->nr,
665 skb_queue_len(&session->reorder_q));
666 goto discard;
667 }
668 skb_queue_tail(&session->reorder_q, skb);
669 }
670 } else {
671 /* No sequence numbers. Add the skb to the tail of the
672 * reorder queue. This ensures that it will be
673 * delivered after all previous sequenced skbs.
674 */
675 skb_queue_tail(&session->reorder_q, skb);
676 }
677
678 /* Try to dequeue as many skbs from reorder_q as we can. */
679 l2tp_recv_dequeue(session);
680
681 l2tp_session_dec_refcount(session);
682
683 return;
684
685discard:
686 session->stats.rx_errors++;
687 kfree_skb(skb);
688
689 if (session->deref)
690 (*session->deref)(session);
691
692 l2tp_session_dec_refcount(session);
693}
694EXPORT_SYMBOL(l2tp_recv_common);
695
696/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
697 * here. The skb is not on a list when we get here.
698 * Returns 0 if the packet was a data packet and was successfully passed on.
699 * Returns 1 if the packet was not a good data packet and could not be
700 * forwarded. All such packets are passed up to userspace to deal with.
701 */
702int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
703 int (*payload_hook)(struct sk_buff *skb))
704{
705 struct l2tp_session *session = NULL;
706 unsigned char *ptr, *optr;
707 u16 hdrflags;
708 u32 tunnel_id, session_id;
709 int offset;
710 u16 version;
711 int length;
712
713 if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
714 goto discard_bad_csum;
715
716 /* UDP always verifies the packet length. */
717 __skb_pull(skb, sizeof(struct udphdr));
718
719 /* Short packet? */
720 if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
721 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
722 "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
723 goto error;
724 }
725
726 /* Point to L2TP header */
727 optr = ptr = skb->data;
728
729 /* Trace packet contents, if enabled */
730 if (tunnel->debug & L2TP_MSG_DATA) {
731 length = min(32u, skb->len);
732 if (!pskb_may_pull(skb, length))
733 goto error;
734
735 printk(KERN_DEBUG "%s: recv: ", tunnel->name);
736
737 offset = 0;
738 do {
739 printk(" %02X", ptr[offset]);
740 } while (++offset < length);
741
742 printk("\n");
743 }
744
745 /* Get L2TP header flags */
746 hdrflags = ntohs(*(__be16 *) ptr);
747
748 /* Check protocol version */
749 version = hdrflags & L2TP_HDR_VER_MASK;
750 if (version != tunnel->version) {
751 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
752 "%s: recv protocol version mismatch: got %d expected %d\n",
753 tunnel->name, version, tunnel->version);
754 goto error;
755 }
756
757 /* Get length of L2TP packet */
758 length = skb->len;
759
760 /* If type is control packet, it is handled by userspace. */
761 if (hdrflags & L2TP_HDRFLAG_T) {
762 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
763 "%s: recv control packet, len=%d\n", tunnel->name, length);
764 goto error;
765 }
766
767 /* Skip flags */
768 ptr += 2;
769
770 if (tunnel->version == L2TP_HDR_VER_2) {
771 /* If length is present, skip it */
772 if (hdrflags & L2TP_HDRFLAG_L)
773 ptr += 2;
774
775 /* Extract tunnel and session ID */
776 tunnel_id = ntohs(*(__be16 *) ptr);
777 ptr += 2;
778 session_id = ntohs(*(__be16 *) ptr);
779 ptr += 2;
780 } else {
781 ptr += 2; /* skip reserved bits */
782 tunnel_id = tunnel->tunnel_id;
783 session_id = ntohl(*(__be32 *) ptr);
784 ptr += 4;
785 }
786
787 /* Find the session context */
788 session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
789 if (!session || !session->recv_skb) {
790 /* Not found? Pass to userspace to deal with */
791 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
792 "%s: no session found (%u/%u). Passing up.\n",
793 tunnel->name, tunnel_id, session_id);
794 goto error;
795 }
796
797 l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
798
799 return 0;
800
801discard_bad_csum:
802 LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
803 UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
804 tunnel->stats.rx_errors++;
805 kfree_skb(skb);
806
807 return 0;
808
809error:
810 /* Put UDP header back */
811 __skb_push(skb, sizeof(struct udphdr));
812
813 return 1;
814}
815EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
816
817/* UDP encapsulation receive handler. See net/ipv4/udp.c.
818 * Return codes:
819 * 0 : success.
820 * <0: error
821 * >0: skb should be passed up to userspace as UDP.
822 */
823int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
824{
825 struct l2tp_tunnel *tunnel;
826
827 tunnel = l2tp_sock_to_tunnel(sk);
828 if (tunnel == NULL)
829 goto pass_up;
830
831 PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
832 "%s: received %d bytes\n", tunnel->name, skb->len);
833
834 if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
835 goto pass_up_put;
836
837 sock_put(sk);
838 return 0;
839
840pass_up_put:
841 sock_put(sk);
842pass_up:
843 return 1;
844}
845EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
846
847/************************************************************************
848 * Transmit handling
849 ***********************************************************************/
850
851/* Build an L2TP header for the session into the buffer provided.
852 */
853static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
854{
855 struct l2tp_tunnel *tunnel = session->tunnel;
856 __be16 *bufp = buf;
857 __be16 *optr = buf;
858 u16 flags = L2TP_HDR_VER_2;
859 u32 tunnel_id = tunnel->peer_tunnel_id;
860 u32 session_id = session->peer_session_id;
861
862 if (session->send_seq)
863 flags |= L2TP_HDRFLAG_S;
864
865 /* Setup L2TP header. */
866 *bufp++ = htons(flags);
867 *bufp++ = htons(tunnel_id);
868 *bufp++ = htons(session_id);
869 if (session->send_seq) {
870 *bufp++ = htons(session->ns);
871 *bufp++ = 0;
872 session->ns++;
873 session->ns &= 0xffff;
874 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
875 "%s: updated ns to %u\n", session->name, session->ns);
876 }
877
878 return bufp - optr;
879}
880
881static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
882{
883 struct l2tp_tunnel *tunnel = session->tunnel;
884 char *bufp = buf;
885 char *optr = bufp;
886
887 /* Setup L2TP header. The header differs slightly for UDP and
888 * IP encapsulations. For UDP, there is 4 bytes of flags.
889 */
890 if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
891 u16 flags = L2TP_HDR_VER_3;
892 *((__be16 *) bufp) = htons(flags);
893 bufp += 2;
894 *((__be16 *) bufp) = 0;
895 bufp += 2;
896 }
897
898 *((__be32 *) bufp) = htonl(session->peer_session_id);
899 bufp += 4;
900 if (session->cookie_len) {
901 memcpy(bufp, &session->cookie[0], session->cookie_len);
902 bufp += session->cookie_len;
903 }
904 if (session->l2specific_len) {
905 if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
906 u32 l2h = 0;
907 if (session->send_seq) {
908 l2h = 0x40000000 | session->ns;
909 session->ns++;
910 session->ns &= 0xffffff;
911 PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
912 "%s: updated ns to %u\n", session->name, session->ns);
913 }
914
915 *((__be32 *) bufp) = htonl(l2h);
916 }
917 bufp += session->l2specific_len;
918 }
919 if (session->offset)
920 bufp += session->offset;
921
922 return bufp - optr;
923}
924
925int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
926{
927 struct l2tp_tunnel *tunnel = session->tunnel;
928 unsigned int len = skb->len;
929 int error;
930
931 /* Debug */
932 if (session->send_seq)
933 PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
934 "%s: send %Zd bytes, ns=%u\n", session->name,
935 data_len, session->ns - 1);
936 else
937 PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
938 "%s: send %Zd bytes\n", session->name, data_len);
939
940 if (session->debug & L2TP_MSG_DATA) {
941 int i;
942 int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
943 unsigned char *datap = skb->data + uhlen;
944
945 printk(KERN_DEBUG "%s: xmit:", session->name);
946 for (i = 0; i < (len - uhlen); i++) {
947 printk(" %02X", *datap++);
948 if (i == 31) {
949 printk(" ...");
950 break;
951 }
952 }
953 printk("\n");
954 }
955
956 /* Queue the packet to IP for output */
957 skb->local_df = 1;
958 error = ip_queue_xmit(skb);
959
960 /* Update stats */
961 if (error >= 0) {
962 tunnel->stats.tx_packets++;
963 tunnel->stats.tx_bytes += len;
964 session->stats.tx_packets++;
965 session->stats.tx_bytes += len;
966 } else {
967 tunnel->stats.tx_errors++;
968 session->stats.tx_errors++;
969 }
970
971 return 0;
972}
973EXPORT_SYMBOL_GPL(l2tp_xmit_core);
974
975/* Automatically called when the skb is freed.
976 */
977static void l2tp_sock_wfree(struct sk_buff *skb)
978{
979 sock_put(skb->sk);
980}
981
982/* For data skbs that we transmit, we associate with the tunnel socket
983 * but don't do accounting.
984 */
985static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
986{
987 sock_hold(sk);
988 skb->sk = sk;
989 skb->destructor = l2tp_sock_wfree;
990}
991
992/* If caller requires the skb to have a ppp header, the header must be
993 * inserted in the skb data before calling this function.
994 */
995int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
996{
997 int data_len = skb->len;
998 struct l2tp_tunnel *tunnel = session->tunnel;
999 struct sock *sk = tunnel->sock;
1000 struct udphdr *uh;
1001 struct inet_sock *inet;
1002 __wsum csum;
1003 int old_headroom;
1004 int new_headroom;
1005 int headroom;
1006 int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
1007 int udp_len;
1008
1009 /* Check that there's enough headroom in the skb to insert IP,
1010 * UDP and L2TP headers. If not enough, expand it to
1011 * make room. Adjust truesize.
1012 */
1013 headroom = NET_SKB_PAD + sizeof(struct iphdr) +
1014 uhlen + hdr_len;
1015 old_headroom = skb_headroom(skb);
1016 if (skb_cow_head(skb, headroom))
1017 goto abort;
1018
1019 new_headroom = skb_headroom(skb);
1020 skb_orphan(skb);
1021 skb->truesize += new_headroom - old_headroom;
1022
1023 /* Setup L2TP header */
1024 session->build_header(session, __skb_push(skb, hdr_len));
1025
1026 /* Reset skb netfilter state */
1027 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1028 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
1029 IPSKB_REROUTED);
1030 nf_reset(skb);
1031
1032 /* Get routing info from the tunnel socket */
1033 skb_dst_drop(skb);
1034 skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
1035
1036 switch (tunnel->encap) {
1037 case L2TP_ENCAPTYPE_UDP:
1038 /* Setup UDP header */
1039 inet = inet_sk(sk);
1040 __skb_push(skb, sizeof(*uh));
1041 skb_reset_transport_header(skb);
1042 uh = udp_hdr(skb);
1043 uh->source = inet->inet_sport;
1044 uh->dest = inet->inet_dport;
1045 udp_len = uhlen + hdr_len + data_len;
1046 uh->len = htons(udp_len);
1047 uh->check = 0;
1048
1049 /* Calculate UDP checksum if configured to do so */
1050 if (sk->sk_no_check == UDP_CSUM_NOXMIT)
1051 skb->ip_summed = CHECKSUM_NONE;
1052 else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
1053 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
1054 skb->ip_summed = CHECKSUM_COMPLETE;
1055 csum = skb_checksum(skb, 0, udp_len, 0);
1056 uh->check = csum_tcpudp_magic(inet->inet_saddr,
1057 inet->inet_daddr,
1058 udp_len, IPPROTO_UDP, csum);
1059 if (uh->check == 0)
1060 uh->check = CSUM_MANGLED_0;
1061 } else {
1062 skb->ip_summed = CHECKSUM_PARTIAL;
1063 skb->csum_start = skb_transport_header(skb) - skb->head;
1064 skb->csum_offset = offsetof(struct udphdr, check);
1065 uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
1066 inet->inet_daddr,
1067 udp_len, IPPROTO_UDP, 0);
1068 }
1069 break;
1070
1071 case L2TP_ENCAPTYPE_IP:
1072 break;
1073 }
1074
1075 l2tp_skb_set_owner_w(skb, sk);
1076
1077 l2tp_xmit_core(session, skb, data_len);
1078
1079abort:
1080 return 0;
1081}
1082EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
1083
1084/*****************************************************************************
1085 * Tinnel and session create/destroy.
1086 *****************************************************************************/
1087
1088/* Tunnel socket destruct hook.
1089 * The tunnel context is deleted only when all session sockets have been
1090 * closed.
1091 */
1092void l2tp_tunnel_destruct(struct sock *sk)
1093{
1094 struct l2tp_tunnel *tunnel;
1095
1096 tunnel = sk->sk_user_data;
1097 if (tunnel == NULL)
1098 goto end;
1099
1100 PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
1101 "%s: closing...\n", tunnel->name);
1102
1103 /* Close all sessions */
1104 l2tp_tunnel_closeall(tunnel);
1105
1106 switch (tunnel->encap) {
1107 case L2TP_ENCAPTYPE_UDP:
1108 /* No longer an encapsulation socket. See net/ipv4/udp.c */
1109 (udp_sk(sk))->encap_type = 0;
1110 (udp_sk(sk))->encap_rcv = NULL;
1111 break;
1112 case L2TP_ENCAPTYPE_IP:
1113 break;
1114 }
1115
1116 /* Remove hooks into tunnel socket */
1117 tunnel->sock = NULL;
1118 sk->sk_destruct = tunnel->old_sk_destruct;
1119 sk->sk_user_data = NULL;
1120
1121 /* Call the original destructor */
1122 if (sk->sk_destruct)
1123 (*sk->sk_destruct)(sk);
1124
1125 /* We're finished with the socket */
1126 l2tp_tunnel_dec_refcount(tunnel);
1127
1128end:
1129 return;
1130}
1131EXPORT_SYMBOL(l2tp_tunnel_destruct);
1132
1133/* When the tunnel is closed, all the attached sessions need to go too.
1134 */
1135void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
1136{
1137 int hash;
1138 struct hlist_node *walk;
1139 struct hlist_node *tmp;
1140 struct l2tp_session *session;
1141
1142 BUG_ON(tunnel == NULL);
1143
1144 PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
1145 "%s: closing all sessions...\n", tunnel->name);
1146
1147 write_lock_bh(&tunnel->hlist_lock);
1148 for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
1149again:
1150 hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
1151 session = hlist_entry(walk, struct l2tp_session, hlist);
1152
1153 PRINTK(session->debug, L2TP_MSG_CONTROL, KERN_INFO,
1154 "%s: closing session\n", session->name);
1155
1156 hlist_del_init(&session->hlist);
1157
1158 /* Since we should hold the sock lock while
1159 * doing any unbinding, we need to release the
1160 * lock we're holding before taking that lock.
1161 * Hold a reference to the sock so it doesn't
1162 * disappear as we're jumping between locks.
1163 */
1164 if (session->ref != NULL)
1165 (*session->ref)(session);
1166
1167 write_unlock_bh(&tunnel->hlist_lock);
1168
1169 if (tunnel->version != L2TP_HDR_VER_2) {
1170 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1171
1172 spin_lock_bh(&pn->l2tp_session_hlist_lock);
1173 hlist_del_init_rcu(&session->global_hlist);
1174 spin_unlock_bh(&pn->l2tp_session_hlist_lock);
1175 synchronize_rcu();
1176 }
1177
1178 if (session->session_close != NULL)
1179 (*session->session_close)(session);
1180
1181 if (session->deref != NULL)
1182 (*session->deref)(session);
1183
1184 write_lock_bh(&tunnel->hlist_lock);
1185
1186 /* Now restart from the beginning of this hash
1187 * chain. We always remove a session from the
1188 * list so we are guaranteed to make forward
1189 * progress.
1190 */
1191 goto again;
1192 }
1193 }
1194 write_unlock_bh(&tunnel->hlist_lock);
1195}
1196EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
1197
1198/* Really kill the tunnel.
1199 * Come here only when all sessions have been cleared from the tunnel.
1200 */
1201void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
1202{
1203 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1204
1205 BUG_ON(atomic_read(&tunnel->ref_count) != 0);
1206 BUG_ON(tunnel->sock != NULL);
1207
1208 PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
1209 "%s: free...\n", tunnel->name);
1210
1211 /* Remove from tunnel list */
1212 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1213 list_del_rcu(&tunnel->list);
1214 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1215 synchronize_rcu();
1216
1217 atomic_dec(&l2tp_tunnel_count);
1218 kfree(tunnel);
1219}
1220EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
1221
1222/* Create a socket for the tunnel, if one isn't set up by
1223 * userspace. This is used for static tunnels where there is no
1224 * managing L2TP daemon.
1225 */
1226static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
1227{
1228 int err = -EINVAL;
1229 struct sockaddr_in udp_addr;
1230 struct sockaddr_l2tpip ip_addr;
1231 struct socket *sock = NULL;
1232
1233 switch (cfg->encap) {
1234 case L2TP_ENCAPTYPE_UDP:
1235 err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
1236 if (err < 0)
1237 goto out;
1238
1239 sock = *sockp;
1240
1241 memset(&udp_addr, 0, sizeof(udp_addr));
1242 udp_addr.sin_family = AF_INET;
1243 udp_addr.sin_addr = cfg->local_ip;
1244 udp_addr.sin_port = htons(cfg->local_udp_port);
1245 err = kernel_bind(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr));
1246 if (err < 0)
1247 goto out;
1248
1249 udp_addr.sin_family = AF_INET;
1250 udp_addr.sin_addr = cfg->peer_ip;
1251 udp_addr.sin_port = htons(cfg->peer_udp_port);
1252 err = kernel_connect(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr), 0);
1253 if (err < 0)
1254 goto out;
1255
1256 if (!cfg->use_udp_checksums)
1257 sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
1258
1259 break;
1260
1261 case L2TP_ENCAPTYPE_IP:
1262 err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, sockp);
1263 if (err < 0)
1264 goto out;
1265
1266 sock = *sockp;
1267
1268 memset(&ip_addr, 0, sizeof(ip_addr));
1269 ip_addr.l2tp_family = AF_INET;
1270 ip_addr.l2tp_addr = cfg->local_ip;
1271 ip_addr.l2tp_conn_id = tunnel_id;
1272 err = kernel_bind(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr));
1273 if (err < 0)
1274 goto out;
1275
1276 ip_addr.l2tp_family = AF_INET;
1277 ip_addr.l2tp_addr = cfg->peer_ip;
1278 ip_addr.l2tp_conn_id = peer_tunnel_id;
1279 err = kernel_connect(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr), 0);
1280 if (err < 0)
1281 goto out;
1282
1283 break;
1284
1285 default:
1286 goto out;
1287 }
1288
1289out:
1290 if ((err < 0) && sock) {
1291 sock_release(sock);
1292 *sockp = NULL;
1293 }
1294
1295 return err;
1296}
1297
1298int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
1299{
1300 struct l2tp_tunnel *tunnel = NULL;
1301 int err;
1302 struct socket *sock = NULL;
1303 struct sock *sk = NULL;
1304 struct l2tp_net *pn;
1305 enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
1306
1307 /* Get the tunnel socket from the fd, which was opened by
1308 * the userspace L2TP daemon. If not specified, create a
1309 * kernel socket.
1310 */
1311 if (fd < 0) {
1312 err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
1313 if (err < 0)
1314 goto err;
1315 } else {
1316 err = -EBADF;
1317 sock = sockfd_lookup(fd, &err);
1318 if (!sock) {
1319 printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
1320 tunnel_id, fd, err);
1321 goto err;
1322 }
1323 }
1324
1325 sk = sock->sk;
1326
1327 if (cfg != NULL)
1328 encap = cfg->encap;
1329
1330 /* Quick sanity checks */
1331 switch (encap) {
1332 case L2TP_ENCAPTYPE_UDP:
1333 err = -EPROTONOSUPPORT;
1334 if (sk->sk_protocol != IPPROTO_UDP) {
1335 printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
1336 tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
1337 goto err;
1338 }
1339 break;
1340 case L2TP_ENCAPTYPE_IP:
1341 err = -EPROTONOSUPPORT;
1342 if (sk->sk_protocol != IPPROTO_L2TP) {
1343 printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
1344 tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
1345 goto err;
1346 }
1347 break;
1348 }
1349
1350 /* Check if this socket has already been prepped */
1351 tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
1352 if (tunnel != NULL) {
1353 /* This socket has already been prepped */
1354 err = -EBUSY;
1355 goto err;
1356 }
1357
1358 tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
1359 if (tunnel == NULL) {
1360 err = -ENOMEM;
1361 goto err;
1362 }
1363
1364 tunnel->version = version;
1365 tunnel->tunnel_id = tunnel_id;
1366 tunnel->peer_tunnel_id = peer_tunnel_id;
1367 tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
1368
1369 tunnel->magic = L2TP_TUNNEL_MAGIC;
1370 sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
1371 rwlock_init(&tunnel->hlist_lock);
1372
1373 /* The net we belong to */
1374 tunnel->l2tp_net = net;
1375 pn = l2tp_pernet(net);
1376
1377 if (cfg != NULL)
1378 tunnel->debug = cfg->debug;
1379
1380 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
1381 tunnel->encap = encap;
1382 if (encap == L2TP_ENCAPTYPE_UDP) {
1383 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
1384 udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
1385 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
1386 }
1387
1388 sk->sk_user_data = tunnel;
1389
1390 /* Hook on the tunnel socket destructor so that we can cleanup
1391 * if the tunnel socket goes away.
1392 */
1393 tunnel->old_sk_destruct = sk->sk_destruct;
1394 sk->sk_destruct = &l2tp_tunnel_destruct;
1395 tunnel->sock = sk;
1396 sk->sk_allocation = GFP_ATOMIC;
1397
1398 /* Add tunnel to our list */
1399 INIT_LIST_HEAD(&tunnel->list);
1400 spin_lock_bh(&pn->l2tp_tunnel_list_lock);
1401 list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
1402 spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
1403 synchronize_rcu();
1404 atomic_inc(&l2tp_tunnel_count);
1405
1406 /* Bump the reference count. The tunnel context is deleted
1407 * only when this drops to zero.
1408 */
1409 l2tp_tunnel_inc_refcount(tunnel);
1410
1411 err = 0;
1412err:
1413 if (tunnelp)
1414 *tunnelp = tunnel;
1415
1416 /* If tunnel's socket was created by the kernel, it doesn't
1417 * have a file.
1418 */
1419 if (sock && sock->file)
1420 sockfd_put(sock);
1421
1422 return err;
1423}
1424EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
1425
1426/* This function is used by the netlink TUNNEL_DELETE command.
1427 */
1428int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
1429{
1430 int err = 0;
1431 struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
1432
1433 /* Force the tunnel socket to close. This will eventually
1434 * cause the tunnel to be deleted via the normal socket close
1435 * mechanisms when userspace closes the tunnel socket.
1436 */
1437 if (sock != NULL) {
1438 err = inet_shutdown(sock, 2);
1439
1440 /* If the tunnel's socket was created by the kernel,
1441 * close the socket here since the socket was not
1442 * created by userspace.
1443 */
1444 if (sock->file == NULL)
1445 err = inet_release(sock);
1446 }
1447
1448 return err;
1449}
1450EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
1451
1452/* Really kill the session.
1453 */
1454void l2tp_session_free(struct l2tp_session *session)
1455{
1456 struct l2tp_tunnel *tunnel;
1457
1458 BUG_ON(atomic_read(&session->ref_count) != 0);
1459
1460 tunnel = session->tunnel;
1461 if (tunnel != NULL) {
1462 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
1463
1464 /* Delete the session from the hash */
1465 write_lock_bh(&tunnel->hlist_lock);
1466 hlist_del_init(&session->hlist);
1467 write_unlock_bh(&tunnel->hlist_lock);
1468
1469 /* Unlink from the global hash if not L2TPv2 */
1470 if (tunnel->version != L2TP_HDR_VER_2) {
1471 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1472
1473 spin_lock_bh(&pn->l2tp_session_hlist_lock);
1474 hlist_del_init_rcu(&session->global_hlist);
1475 spin_unlock_bh(&pn->l2tp_session_hlist_lock);
1476 synchronize_rcu();
1477 }
1478
1479 if (session->session_id != 0)
1480 atomic_dec(&l2tp_session_count);
1481
1482 sock_put(tunnel->sock);
1483
1484 /* This will delete the tunnel context if this
1485 * is the last session on the tunnel.
1486 */
1487 session->tunnel = NULL;
1488 l2tp_tunnel_dec_refcount(tunnel);
1489 }
1490
1491 kfree(session);
1492
1493 return;
1494}
1495EXPORT_SYMBOL_GPL(l2tp_session_free);
1496
1497/* This function is used by the netlink SESSION_DELETE command and by
1498 pseudowire modules.
1499 */
1500int l2tp_session_delete(struct l2tp_session *session)
1501{
1502 if (session->session_close != NULL)
1503 (*session->session_close)(session);
1504
1505 l2tp_session_dec_refcount(session);
1506
1507 return 0;
1508}
1509EXPORT_SYMBOL_GPL(l2tp_session_delete);
1510
1511
1512/* We come here whenever a session's send_seq, cookie_len or
1513 * l2specific_len parameters are set.
1514 */
1515void l2tp_session_set_header_len(struct l2tp_session *session, int version)
1516{
1517 if (version == L2TP_HDR_VER_2) {
1518 session->hdr_len = 6;
1519 if (session->send_seq)
1520 session->hdr_len += 4;
1521 } else {
1522 session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
1523 if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
1524 session->hdr_len += 4;
1525 }
1526
1527}
1528EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
1529
1530struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
1531{
1532 struct l2tp_session *session;
1533
1534 session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
1535 if (session != NULL) {
1536 session->magic = L2TP_SESSION_MAGIC;
1537 session->tunnel = tunnel;
1538
1539 session->session_id = session_id;
1540 session->peer_session_id = peer_session_id;
1541 session->nr = 1;
1542
1543 sprintf(&session->name[0], "sess %u/%u",
1544 tunnel->tunnel_id, session->session_id);
1545
1546 skb_queue_head_init(&session->reorder_q);
1547
1548 INIT_HLIST_NODE(&session->hlist);
1549 INIT_HLIST_NODE(&session->global_hlist);
1550
1551 /* Inherit debug options from tunnel */
1552 session->debug = tunnel->debug;
1553
1554 if (cfg) {
1555 session->pwtype = cfg->pw_type;
1556 session->debug = cfg->debug;
1557 session->mtu = cfg->mtu;
1558 session->mru = cfg->mru;
1559 session->send_seq = cfg->send_seq;
1560 session->recv_seq = cfg->recv_seq;
1561 session->lns_mode = cfg->lns_mode;
1562 session->reorder_timeout = cfg->reorder_timeout;
1563 session->offset = cfg->offset;
1564 session->l2specific_type = cfg->l2specific_type;
1565 session->l2specific_len = cfg->l2specific_len;
1566 session->cookie_len = cfg->cookie_len;
1567 memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
1568 session->peer_cookie_len = cfg->peer_cookie_len;
1569 memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
1570 }
1571
1572 if (tunnel->version == L2TP_HDR_VER_2)
1573 session->build_header = l2tp_build_l2tpv2_header;
1574 else
1575 session->build_header = l2tp_build_l2tpv3_header;
1576
1577 l2tp_session_set_header_len(session, tunnel->version);
1578
1579 /* Bump the reference count. The session context is deleted
1580 * only when this drops to zero.
1581 */
1582 l2tp_session_inc_refcount(session);
1583 l2tp_tunnel_inc_refcount(tunnel);
1584
1585 /* Ensure tunnel socket isn't deleted */
1586 sock_hold(tunnel->sock);
1587
1588 /* Add session to the tunnel's hash list */
1589 write_lock_bh(&tunnel->hlist_lock);
1590 hlist_add_head(&session->hlist,
1591 l2tp_session_id_hash(tunnel, session_id));
1592 write_unlock_bh(&tunnel->hlist_lock);
1593
1594 /* And to the global session list if L2TPv3 */
1595 if (tunnel->version != L2TP_HDR_VER_2) {
1596 struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
1597
1598 spin_lock_bh(&pn->l2tp_session_hlist_lock);
1599 hlist_add_head_rcu(&session->global_hlist,
1600 l2tp_session_id_hash_2(pn, session_id));
1601 spin_unlock_bh(&pn->l2tp_session_hlist_lock);
1602 synchronize_rcu();
1603 }
1604
1605 /* Ignore management session in session count value */
1606 if (session->session_id != 0)
1607 atomic_inc(&l2tp_session_count);
1608 }
1609
1610 return session;
1611}
1612EXPORT_SYMBOL_GPL(l2tp_session_create);
1613
1614/*****************************************************************************
1615 * Init and cleanup
1616 *****************************************************************************/
1617
1618static __net_init int l2tp_init_net(struct net *net)
1619{
1620 struct l2tp_net *pn = net_generic(net, l2tp_net_id);
1621 int hash;
1622
1623 INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
1624 spin_lock_init(&pn->l2tp_tunnel_list_lock);
1625
1626 for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
1627 INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
1628
1629 spin_lock_init(&pn->l2tp_session_hlist_lock);
1630
1631 return 0;
1632}
1633
1634static struct pernet_operations l2tp_net_ops = {
1635 .init = l2tp_init_net,
1636 .id = &l2tp_net_id,
1637 .size = sizeof(struct l2tp_net),
1638};
1639
1640static int __init l2tp_init(void)
1641{
1642 int rc = 0;
1643
1644 rc = register_pernet_device(&l2tp_net_ops);
1645 if (rc)
1646 goto out;
1647
1648 printk(KERN_INFO "L2TP core driver, %s\n", L2TP_DRV_VERSION);
1649
1650out:
1651 return rc;
1652}
1653
1654static void __exit l2tp_exit(void)
1655{
1656 unregister_pernet_device(&l2tp_net_ops);
1657}
1658
1659module_init(l2tp_init);
1660module_exit(l2tp_exit);
1661
1662MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
1663MODULE_DESCRIPTION("L2TP core");
1664MODULE_LICENSE("GPL");
1665MODULE_VERSION(L2TP_DRV_VERSION);
1666
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
new file mode 100644
index 000000000000..f0f318edd3f1
--- /dev/null
+++ b/net/l2tp/l2tp_core.h
@@ -0,0 +1,304 @@
1/*
2 * L2TP internal definitions.
3 *
4 * Copyright (c) 2008,2009 Katalix Systems Ltd
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#ifndef _L2TP_CORE_H_
12#define _L2TP_CORE_H_
13
14/* Just some random numbers */
15#define L2TP_TUNNEL_MAGIC 0x42114DDA
16#define L2TP_SESSION_MAGIC 0x0C04EB7D
17
18/* Per tunnel, session hash table size */
19#define L2TP_HASH_BITS 4
20#define L2TP_HASH_SIZE (1 << L2TP_HASH_BITS)
21
22/* System-wide, session hash table size */
23#define L2TP_HASH_BITS_2 8
24#define L2TP_HASH_SIZE_2 (1 << L2TP_HASH_BITS_2)
25
26/* Debug message categories for the DEBUG socket option */
27enum {
28 L2TP_MSG_DEBUG = (1 << 0), /* verbose debug (if
29 * compiled in) */
30 L2TP_MSG_CONTROL = (1 << 1), /* userspace - kernel
31 * interface */
32 L2TP_MSG_SEQ = (1 << 2), /* sequence numbers */
33 L2TP_MSG_DATA = (1 << 3), /* data packets */
34};
35
36struct sk_buff;
37
38struct l2tp_stats {
39 u64 tx_packets;
40 u64 tx_bytes;
41 u64 tx_errors;
42 u64 rx_packets;
43 u64 rx_bytes;
44 u64 rx_seq_discards;
45 u64 rx_oos_packets;
46 u64 rx_errors;
47 u64 rx_cookie_discards;
48};
49
50struct l2tp_tunnel;
51
52/* Describes a session. Contains information to determine incoming
53 * packets and transmit outgoing ones.
54 */
55struct l2tp_session_cfg {
56 enum l2tp_pwtype pw_type;
57 unsigned data_seq:2; /* data sequencing level
58 * 0 => none, 1 => IP only,
59 * 2 => all
60 */
61 unsigned recv_seq:1; /* expect receive packets with
62 * sequence numbers? */
63 unsigned send_seq:1; /* send packets with sequence
64 * numbers? */
65 unsigned lns_mode:1; /* behave as LNS? LAC enables
66 * sequence numbers under
67 * control of LNS. */
68 int debug; /* bitmask of debug message
69 * categories */
70 u16 vlan_id; /* VLAN pseudowire only */
71 u16 offset; /* offset to payload */
72 u16 l2specific_len; /* Layer 2 specific length */
73 u16 l2specific_type; /* Layer 2 specific type */
74 u8 cookie[8]; /* optional cookie */
75 int cookie_len; /* 0, 4 or 8 bytes */
76 u8 peer_cookie[8]; /* peer's cookie */
77 int peer_cookie_len; /* 0, 4 or 8 bytes */
78 int reorder_timeout; /* configured reorder timeout
79 * (in jiffies) */
80 int mtu;
81 int mru;
82 char *ifname;
83};
84
85struct l2tp_session {
86 int magic; /* should be
87 * L2TP_SESSION_MAGIC */
88
89 struct l2tp_tunnel *tunnel; /* back pointer to tunnel
90 * context */
91 u32 session_id;
92 u32 peer_session_id;
93 u8 cookie[8];
94 int cookie_len;
95 u8 peer_cookie[8];
96 int peer_cookie_len;
97 u16 offset; /* offset from end of L2TP header
98 to beginning of data */
99 u16 l2specific_len;
100 u16 l2specific_type;
101 u16 hdr_len;
102 u32 nr; /* session NR state (receive) */
103 u32 ns; /* session NR state (send) */
104 struct sk_buff_head reorder_q; /* receive reorder queue */
105 struct hlist_node hlist; /* Hash list node */
106 atomic_t ref_count;
107
108 char name[32]; /* for logging */
109 char ifname[IFNAMSIZ];
110 unsigned data_seq:2; /* data sequencing level
111 * 0 => none, 1 => IP only,
112 * 2 => all
113 */
114 unsigned recv_seq:1; /* expect receive packets with
115 * sequence numbers? */
116 unsigned send_seq:1; /* send packets with sequence
117 * numbers? */
118 unsigned lns_mode:1; /* behave as LNS? LAC enables
119 * sequence numbers under
120 * control of LNS. */
121 int debug; /* bitmask of debug message
122 * categories */
123 int reorder_timeout; /* configured reorder timeout
124 * (in jiffies) */
125 int mtu;
126 int mru;
127 enum l2tp_pwtype pwtype;
128 struct l2tp_stats stats;
129 struct hlist_node global_hlist; /* Global hash list node */
130
131 int (*build_header)(struct l2tp_session *session, void *buf);
132 void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
133 void (*session_close)(struct l2tp_session *session);
134 void (*ref)(struct l2tp_session *session);
135 void (*deref)(struct l2tp_session *session);
136#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
137 void (*show)(struct seq_file *m, void *priv);
138#endif
139 uint8_t priv[0]; /* private data */
140};
141
142/* Describes the tunnel. It contains info to track all the associated
143 * sessions so incoming packets can be sorted out
144 */
145struct l2tp_tunnel_cfg {
146 int debug; /* bitmask of debug message
147 * categories */
148 enum l2tp_encap_type encap;
149
150 /* Used only for kernel-created sockets */
151 struct in_addr local_ip;
152 struct in_addr peer_ip;
153 u16 local_udp_port;
154 u16 peer_udp_port;
155 unsigned int use_udp_checksums:1;
156};
157
158struct l2tp_tunnel {
159 int magic; /* Should be L2TP_TUNNEL_MAGIC */
160 rwlock_t hlist_lock; /* protect session_hlist */
161 struct hlist_head session_hlist[L2TP_HASH_SIZE];
162 /* hashed list of sessions,
163 * hashed by id */
164 u32 tunnel_id;
165 u32 peer_tunnel_id;
166 int version; /* 2=>L2TPv2, 3=>L2TPv3 */
167
168 char name[20]; /* for logging */
169 int debug; /* bitmask of debug message
170 * categories */
171 enum l2tp_encap_type encap;
172 struct l2tp_stats stats;
173
174 struct list_head list; /* Keep a list of all tunnels */
175 struct net *l2tp_net; /* the net we belong to */
176
177 atomic_t ref_count;
178#ifdef CONFIG_DEBUG_FS
179 void (*show)(struct seq_file *m, void *arg);
180#endif
181 int (*recv_payload_hook)(struct sk_buff *skb);
182 void (*old_sk_destruct)(struct sock *);
183 struct sock *sock; /* Parent socket */
184 int fd;
185
186 uint8_t priv[0]; /* private data */
187};
188
189struct l2tp_nl_cmd_ops {
190 int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
191 int (*session_delete)(struct l2tp_session *session);
192};
193
194static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
195{
196 return &tunnel->priv[0];
197}
198
199static inline void *l2tp_session_priv(struct l2tp_session *session)
200{
201 return &session->priv[0];
202}
203
204static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
205{
206 struct l2tp_tunnel *tunnel;
207
208 if (sk == NULL)
209 return NULL;
210
211 sock_hold(sk);
212 tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
213 if (tunnel == NULL) {
214 sock_put(sk);
215 goto out;
216 }
217
218 BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
219
220out:
221 return tunnel;
222}
223
224extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
225extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
226extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
227extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
228extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
229
230extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
231extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
232extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
233extern int l2tp_session_delete(struct l2tp_session *session);
234extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
235extern void l2tp_session_free(struct l2tp_session *session);
236extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
237extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
238extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
239
240extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
241extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
242extern void l2tp_tunnel_destruct(struct sock *sk);
243extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
244extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
245
246extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
247extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
248
249/* Tunnel reference counts. Incremented per session that is added to
250 * the tunnel.
251 */
252static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
253{
254 atomic_inc(&tunnel->ref_count);
255}
256
257static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
258{
259 if (atomic_dec_and_test(&tunnel->ref_count))
260 l2tp_tunnel_free(tunnel);
261}
262#ifdef L2TP_REFCNT_DEBUG
263#define l2tp_tunnel_inc_refcount(_t) do { \
264 printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
265 l2tp_tunnel_inc_refcount_1(_t); \
266 } while (0)
267#define l2tp_tunnel_dec_refcount(_t) do { \
268 printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
269 l2tp_tunnel_dec_refcount_1(_t); \
270 } while (0)
271#else
272#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
273#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
274#endif
275
276/* Session reference counts. Incremented when code obtains a reference
277 * to a session.
278 */
279static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
280{
281 atomic_inc(&session->ref_count);
282}
283
284static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
285{
286 if (atomic_dec_and_test(&session->ref_count))
287 l2tp_session_free(session);
288}
289
290#ifdef L2TP_REFCNT_DEBUG
291#define l2tp_session_inc_refcount(_s) do { \
292 printk(KERN_DEBUG "l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
293 l2tp_session_inc_refcount_1(_s); \
294 } while (0)
295#define l2tp_session_dec_refcount(_s) do { \
296 printk(KERN_DEBUG "l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
297 l2tp_session_dec_refcount_1(_s); \
298 } while (0)
299#else
300#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
301#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
302#endif
303
304#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
new file mode 100644
index 000000000000..104ec3b283d4
--- /dev/null
+++ b/net/l2tp/l2tp_debugfs.c
@@ -0,0 +1,341 @@
1/*
2 * L2TP subsystem debugfs
3 *
4 * Copyright (c) 2010 Katalix Systems Ltd
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/socket.h>
15#include <linux/hash.h>
16#include <linux/l2tp.h>
17#include <linux/in.h>
18#include <linux/etherdevice.h>
19#include <linux/spinlock.h>
20#include <linux/debugfs.h>
21#include <net/sock.h>
22#include <net/ip.h>
23#include <net/icmp.h>
24#include <net/udp.h>
25#include <net/inet_common.h>
26#include <net/inet_hashtables.h>
27#include <net/tcp_states.h>
28#include <net/protocol.h>
29#include <net/xfrm.h>
30#include <net/net_namespace.h>
31#include <net/netns/generic.h>
32
33#include "l2tp_core.h"
34
35static struct dentry *rootdir;
36static struct dentry *tunnels;
37
38struct l2tp_dfs_seq_data {
39 struct net *net;
40 int tunnel_idx; /* current tunnel */
41 int session_idx; /* index of session within current tunnel */
42 struct l2tp_tunnel *tunnel;
43 struct l2tp_session *session; /* NULL means get next tunnel */
44};
45
46static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
47{
48 pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
49 pd->tunnel_idx++;
50}
51
52static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
53{
54 pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
55 pd->session_idx++;
56
57 if (pd->session == NULL) {
58 pd->session_idx = 0;
59 l2tp_dfs_next_tunnel(pd);
60 }
61
62}
63
64static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
65{
66 struct l2tp_dfs_seq_data *pd = SEQ_START_TOKEN;
67 loff_t pos = *offs;
68
69 if (!pos)
70 goto out;
71
72 BUG_ON(m->private == NULL);
73 pd = m->private;
74
75 if (pd->tunnel == NULL)
76 l2tp_dfs_next_tunnel(pd);
77 else
78 l2tp_dfs_next_session(pd);
79
80 /* NULL tunnel and session indicates end of list */
81 if ((pd->tunnel == NULL) && (pd->session == NULL))
82 pd = NULL;
83
84out:
85 return pd;
86}
87
88
89static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
90{
91 (*pos)++;
92 return NULL;
93}
94
95static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
96{
97 /* nothing to do */
98}
99
100static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
101{
102 struct l2tp_tunnel *tunnel = v;
103 int session_count = 0;
104 int hash;
105 struct hlist_node *walk;
106 struct hlist_node *tmp;
107
108 read_lock_bh(&tunnel->hlist_lock);
109 for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
110 hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
111 struct l2tp_session *session;
112
113 session = hlist_entry(walk, struct l2tp_session, hlist);
114 if (session->session_id == 0)
115 continue;
116
117 session_count++;
118 }
119 }
120 read_unlock_bh(&tunnel->hlist_lock);
121
122 seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
123 if (tunnel->sock) {
124 struct inet_sock *inet = inet_sk(tunnel->sock);
125 seq_printf(m, " from %pI4 to %pI4\n",
126 &inet->inet_saddr, &inet->inet_daddr);
127 if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
128 seq_printf(m, " source port %hu, dest port %hu\n",
129 ntohs(inet->inet_sport), ntohs(inet->inet_dport));
130 }
131 seq_printf(m, " L2TPv%d, %s\n", tunnel->version,
132 tunnel->encap == L2TP_ENCAPTYPE_UDP ? "UDP" :
133 tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
134 "");
135 seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
136 tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
137 atomic_read(&tunnel->ref_count));
138
139 seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
140 tunnel->debug,
141 (unsigned long long)tunnel->stats.tx_packets,
142 (unsigned long long)tunnel->stats.tx_bytes,
143 (unsigned long long)tunnel->stats.tx_errors,
144 (unsigned long long)tunnel->stats.rx_packets,
145 (unsigned long long)tunnel->stats.rx_bytes,
146 (unsigned long long)tunnel->stats.rx_errors);
147
148 if (tunnel->show != NULL)
149 tunnel->show(m, tunnel);
150}
151
152static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
153{
154 struct l2tp_session *session = v;
155
156 seq_printf(m, " SESSION %u, peer %u, %s\n", session->session_id,
157 session->peer_session_id,
158 session->pwtype == L2TP_PWTYPE_ETH ? "ETH" :
159 session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
160 "");
161 if (session->send_seq || session->recv_seq)
162 seq_printf(m, " nr %hu, ns %hu\n", session->nr, session->ns);
163 seq_printf(m, " refcnt %d\n", atomic_read(&session->ref_count));
164 seq_printf(m, " config %d/%d/%c/%c/%s/%s %08x %u\n",
165 session->mtu, session->mru,
166 session->recv_seq ? 'R' : '-',
167 session->send_seq ? 'S' : '-',
168 session->data_seq == 1 ? "IPSEQ" :
169 session->data_seq == 2 ? "DATASEQ" : "-",
170 session->lns_mode ? "LNS" : "LAC",
171 session->debug,
172 jiffies_to_msecs(session->reorder_timeout));
173 seq_printf(m, " offset %hu l2specific %hu/%hu\n",
174 session->offset, session->l2specific_type, session->l2specific_len);
175 if (session->cookie_len) {
176 seq_printf(m, " cookie %02x%02x%02x%02x",
177 session->cookie[0], session->cookie[1],
178 session->cookie[2], session->cookie[3]);
179 if (session->cookie_len == 8)
180 seq_printf(m, "%02x%02x%02x%02x",
181 session->cookie[4], session->cookie[5],
182 session->cookie[6], session->cookie[7]);
183 seq_printf(m, "\n");
184 }
185 if (session->peer_cookie_len) {
186 seq_printf(m, " peer cookie %02x%02x%02x%02x",
187 session->peer_cookie[0], session->peer_cookie[1],
188 session->peer_cookie[2], session->peer_cookie[3]);
189 if (session->peer_cookie_len == 8)
190 seq_printf(m, "%02x%02x%02x%02x",
191 session->peer_cookie[4], session->peer_cookie[5],
192 session->peer_cookie[6], session->peer_cookie[7]);
193 seq_printf(m, "\n");
194 }
195
196 seq_printf(m, " %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
197 session->nr, session->ns,
198 (unsigned long long)session->stats.tx_packets,
199 (unsigned long long)session->stats.tx_bytes,
200 (unsigned long long)session->stats.tx_errors,
201 (unsigned long long)session->stats.rx_packets,
202 (unsigned long long)session->stats.rx_bytes,
203 (unsigned long long)session->stats.rx_errors);
204
205 if (session->show != NULL)
206 session->show(m, session);
207}
208
209static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
210{
211 struct l2tp_dfs_seq_data *pd = v;
212
213 /* display header on line 1 */
214 if (v == SEQ_START_TOKEN) {
215 seq_puts(m, "TUNNEL ID, peer ID from IP to IP\n");
216 seq_puts(m, " L2TPv2/L2TPv3, UDP/IP\n");
217 seq_puts(m, " sessions session-count, refcnt refcnt/sk->refcnt\n");
218 seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
219 seq_puts(m, " SESSION ID, peer ID, PWTYPE\n");
220 seq_puts(m, " refcnt cnt\n");
221 seq_puts(m, " offset OFFSET l2specific TYPE/LEN\n");
222 seq_puts(m, " [ cookie ]\n");
223 seq_puts(m, " [ peer cookie ]\n");
224 seq_puts(m, " config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
225 seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
226 goto out;
227 }
228
229 /* Show the tunnel or session context */
230 if (pd->session == NULL)
231 l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
232 else
233 l2tp_dfs_seq_session_show(m, pd->session);
234
235out:
236 return 0;
237}
238
239static const struct seq_operations l2tp_dfs_seq_ops = {
240 .start = l2tp_dfs_seq_start,
241 .next = l2tp_dfs_seq_next,
242 .stop = l2tp_dfs_seq_stop,
243 .show = l2tp_dfs_seq_show,
244};
245
246static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
247{
248 struct l2tp_dfs_seq_data *pd;
249 struct seq_file *seq;
250 int rc = -ENOMEM;
251
252 pd = kzalloc(GFP_KERNEL, sizeof(*pd));
253 if (pd == NULL)
254 goto out;
255
256 /* Derive the network namespace from the pid opening the
257 * file.
258 */
259 pd->net = get_net_ns_by_pid(current->pid);
260 if (IS_ERR(pd->net)) {
261 rc = -PTR_ERR(pd->net);
262 goto err_free_pd;
263 }
264
265 rc = seq_open(file, &l2tp_dfs_seq_ops);
266 if (rc)
267 goto err_free_net;
268
269 seq = file->private_data;
270 seq->private = pd;
271
272out:
273 return rc;
274
275err_free_net:
276 put_net(pd->net);
277err_free_pd:
278 kfree(pd);
279 goto out;
280}
281
282static int l2tp_dfs_seq_release(struct inode *inode, struct file *file)
283{
284 struct l2tp_dfs_seq_data *pd;
285 struct seq_file *seq;
286
287 seq = file->private_data;
288 pd = seq->private;
289 if (pd->net)
290 put_net(pd->net);
291 kfree(pd);
292 seq_release(inode, file);
293
294 return 0;
295}
296
297static const struct file_operations l2tp_dfs_fops = {
298 .owner = THIS_MODULE,
299 .open = l2tp_dfs_seq_open,
300 .read = seq_read,
301 .llseek = seq_lseek,
302 .release = l2tp_dfs_seq_release,
303};
304
305static int __init l2tp_debugfs_init(void)
306{
307 int rc = 0;
308
309 rootdir = debugfs_create_dir("l2tp", NULL);
310 if (IS_ERR(rootdir)) {
311 rc = PTR_ERR(rootdir);
312 rootdir = NULL;
313 goto out;
314 }
315
316 tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
317 if (tunnels == NULL)
318 rc = -EIO;
319
320 printk(KERN_INFO "L2TP debugfs support\n");
321
322out:
323 if (rc)
324 printk(KERN_WARNING "l2tp debugfs: unable to init\n");
325
326 return rc;
327}
328
329static void __exit l2tp_debugfs_exit(void)
330{
331 debugfs_remove(tunnels);
332 debugfs_remove(rootdir);
333}
334
335module_init(l2tp_debugfs_init);
336module_exit(l2tp_debugfs_exit);
337
338MODULE_LICENSE("GPL");
339MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
340MODULE_DESCRIPTION("L2TP debugfs driver");
341MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
new file mode 100644
index 000000000000..58c6c4cda73b
--- /dev/null
+++ b/net/l2tp/l2tp_eth.c
@@ -0,0 +1,334 @@
1/*
2 * L2TPv3 ethernet pseudowire driver
3 *
4 * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/socket.h>
15#include <linux/hash.h>
16#include <linux/l2tp.h>
17#include <linux/in.h>
18#include <linux/etherdevice.h>
19#include <linux/spinlock.h>
20#include <net/sock.h>
21#include <net/ip.h>
22#include <net/icmp.h>
23#include <net/udp.h>
24#include <net/inet_common.h>
25#include <net/inet_hashtables.h>
26#include <net/tcp_states.h>
27#include <net/protocol.h>
28#include <net/xfrm.h>
29#include <net/net_namespace.h>
30#include <net/netns/generic.h>
31
32#include "l2tp_core.h"
33
34/* Default device name. May be overridden by name specified by user */
35#define L2TP_ETH_DEV_NAME "l2tpeth%d"
36
37/* via netdev_priv() */
38struct l2tp_eth {
39 struct net_device *dev;
40 struct sock *tunnel_sock;
41 struct l2tp_session *session;
42 struct list_head list;
43};
44
45/* via l2tp_session_priv() */
46struct l2tp_eth_sess {
47 struct net_device *dev;
48};
49
50/* per-net private data for this module */
51static unsigned int l2tp_eth_net_id;
52struct l2tp_eth_net {
53 struct list_head l2tp_eth_dev_list;
54 spinlock_t l2tp_eth_lock;
55};
56
57static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
58{
59 return net_generic(net, l2tp_eth_net_id);
60}
61
62static int l2tp_eth_dev_init(struct net_device *dev)
63{
64 struct l2tp_eth *priv = netdev_priv(dev);
65
66 priv->dev = dev;
67 random_ether_addr(dev->dev_addr);
68 memset(&dev->broadcast[0], 0xff, 6);
69
70 return 0;
71}
72
73static void l2tp_eth_dev_uninit(struct net_device *dev)
74{
75 struct l2tp_eth *priv = netdev_priv(dev);
76 struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
77
78 spin_lock(&pn->l2tp_eth_lock);
79 list_del_init(&priv->list);
80 spin_unlock(&pn->l2tp_eth_lock);
81 dev_put(dev);
82}
83
84static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
85{
86 struct l2tp_eth *priv = netdev_priv(dev);
87 struct l2tp_session *session = priv->session;
88
89 l2tp_xmit_skb(session, skb, session->hdr_len);
90
91 dev->stats.tx_bytes += skb->len;
92 dev->stats.tx_packets++;
93
94 return 0;
95}
96
97static struct net_device_ops l2tp_eth_netdev_ops = {
98 .ndo_init = l2tp_eth_dev_init,
99 .ndo_uninit = l2tp_eth_dev_uninit,
100 .ndo_start_xmit = l2tp_eth_dev_xmit,
101};
102
103static void l2tp_eth_dev_setup(struct net_device *dev)
104{
105 ether_setup(dev);
106
107 dev->netdev_ops = &l2tp_eth_netdev_ops;
108 dev->destructor = free_netdev;
109}
110
111static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
112{
113 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
114 struct net_device *dev = spriv->dev;
115
116 if (session->debug & L2TP_MSG_DATA) {
117 unsigned int length;
118 int offset;
119 u8 *ptr = skb->data;
120
121 length = min(32u, skb->len);
122 if (!pskb_may_pull(skb, length))
123 goto error;
124
125 printk(KERN_DEBUG "%s: eth recv: ", session->name);
126
127 offset = 0;
128 do {
129 printk(" %02X", ptr[offset]);
130 } while (++offset < length);
131
132 printk("\n");
133 }
134
135 if (data_len < ETH_HLEN)
136 goto error;
137
138 secpath_reset(skb);
139
140 /* checksums verified by L2TP */
141 skb->ip_summed = CHECKSUM_NONE;
142
143 skb_dst_drop(skb);
144 nf_reset(skb);
145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len;
150 } else
151 dev->stats.rx_errors++;
152
153 return;
154
155error:
156 dev->stats.rx_errors++;
157 kfree_skb(skb);
158}
159
160static void l2tp_eth_delete(struct l2tp_session *session)
161{
162 struct l2tp_eth_sess *spriv;
163 struct net_device *dev;
164
165 if (session) {
166 spriv = l2tp_session_priv(session);
167 dev = spriv->dev;
168 if (dev) {
169 unregister_netdev(dev);
170 spriv->dev = NULL;
171 }
172 }
173}
174
175#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
176static void l2tp_eth_show(struct seq_file *m, void *arg)
177{
178 struct l2tp_session *session = arg;
179 struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
180 struct net_device *dev = spriv->dev;
181
182 seq_printf(m, " interface %s\n", dev->name);
183}
184#endif
185
186static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
187{
188 struct net_device *dev;
189 char name[IFNAMSIZ];
190 struct l2tp_tunnel *tunnel;
191 struct l2tp_session *session;
192 struct l2tp_eth *priv;
193 struct l2tp_eth_sess *spriv;
194 int rc;
195 struct l2tp_eth_net *pn;
196
197 tunnel = l2tp_tunnel_find(net, tunnel_id);
198 if (!tunnel) {
199 rc = -ENODEV;
200 goto out;
201 }
202
203 session = l2tp_session_find(net, tunnel, session_id);
204 if (session) {
205 rc = -EEXIST;
206 goto out;
207 }
208
209 if (cfg->ifname) {
210 dev = dev_get_by_name(net, cfg->ifname);
211 if (dev) {
212 dev_put(dev);
213 rc = -EEXIST;
214 goto out;
215 }
216 strlcpy(name, cfg->ifname, IFNAMSIZ);
217 } else
218 strcpy(name, L2TP_ETH_DEV_NAME);
219
220 session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
221 peer_session_id, cfg);
222 if (!session) {
223 rc = -ENOMEM;
224 goto out;
225 }
226
227 dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
228 if (!dev) {
229 rc = -ENOMEM;
230 goto out_del_session;
231 }
232
233 dev_net_set(dev, net);
234 if (session->mtu == 0)
235 session->mtu = dev->mtu - session->hdr_len;
236 dev->mtu = session->mtu;
237 dev->needed_headroom += session->hdr_len;
238
239 priv = netdev_priv(dev);
240 priv->dev = dev;
241 priv->session = session;
242 INIT_LIST_HEAD(&priv->list);
243
244 priv->tunnel_sock = tunnel->sock;
245 session->recv_skb = l2tp_eth_dev_recv;
246 session->session_close = l2tp_eth_delete;
247#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
248 session->show = l2tp_eth_show;
249#endif
250
251 spriv = l2tp_session_priv(session);
252 spriv->dev = dev;
253
254 rc = register_netdev(dev);
255 if (rc < 0)
256 goto out_del_dev;
257
258 /* Must be done after register_netdev() */
259 strlcpy(session->ifname, dev->name, IFNAMSIZ);
260
261 dev_hold(dev);
262 pn = l2tp_eth_pernet(dev_net(dev));
263 spin_lock(&pn->l2tp_eth_lock);
264 list_add(&priv->list, &pn->l2tp_eth_dev_list);
265 spin_unlock(&pn->l2tp_eth_lock);
266
267 return 0;
268
269out_del_dev:
270 free_netdev(dev);
271out_del_session:
272 l2tp_session_delete(session);
273out:
274 return rc;
275}
276
277static __net_init int l2tp_eth_init_net(struct net *net)
278{
279 struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
280
281 INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
282 spin_lock_init(&pn->l2tp_eth_lock);
283
284 return 0;
285}
286
287static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
288 .init = l2tp_eth_init_net,
289 .id = &l2tp_eth_net_id,
290 .size = sizeof(struct l2tp_eth_net),
291};
292
293
294static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
295 .session_create = l2tp_eth_create,
296 .session_delete = l2tp_session_delete,
297};
298
299
300static int __init l2tp_eth_init(void)
301{
302 int err = 0;
303
304 err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
305 if (err)
306 goto out;
307
308 err = register_pernet_device(&l2tp_eth_net_ops);
309 if (err)
310 goto out_unreg;
311
312 printk(KERN_INFO "L2TP ethernet pseudowire support (L2TPv3)\n");
313
314 return 0;
315
316out_unreg:
317 l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
318out:
319 return err;
320}
321
322static void __exit l2tp_eth_exit(void)
323{
324 unregister_pernet_device(&l2tp_eth_net_ops);
325 l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
326}
327
328module_init(l2tp_eth_init);
329module_exit(l2tp_eth_exit);
330
331MODULE_LICENSE("GPL");
332MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
333MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
334MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 000000000000..226a0ae3bcfd
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
1/*
2 * L2TPv3 IP encapsulation support
3 *
4 * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/icmp.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/random.h>
16#include <linux/socket.h>
17#include <linux/l2tp.h>
18#include <linux/in.h>
19#include <net/sock.h>
20#include <net/ip.h>
21#include <net/icmp.h>
22#include <net/udp.h>
23#include <net/inet_common.h>
24#include <net/inet_hashtables.h>
25#include <net/tcp_states.h>
26#include <net/protocol.h>
27#include <net/xfrm.h>
28
29#include "l2tp_core.h"
30
31struct l2tp_ip_sock {
32 /* inet_sock has to be the first member of l2tp_ip_sock */
33 struct inet_sock inet;
34
35 __u32 conn_id;
36 __u32 peer_conn_id;
37
38 __u64 tx_packets;
39 __u64 tx_bytes;
40 __u64 tx_errors;
41 __u64 rx_packets;
42 __u64 rx_bytes;
43 __u64 rx_errors;
44};
45
46static DEFINE_RWLOCK(l2tp_ip_lock);
47static struct hlist_head l2tp_ip_table;
48static struct hlist_head l2tp_ip_bind_table;
49
50static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
51{
52 return (struct l2tp_ip_sock *)sk;
53}
54
55static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
56{
57 struct hlist_node *node;
58 struct sock *sk;
59
60 sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
61 struct inet_sock *inet = inet_sk(sk);
62 struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
63
64 if (l2tp == NULL)
65 continue;
66
67 if ((l2tp->conn_id == tunnel_id) &&
68#ifdef CONFIG_NET_NS
69 (sk->sk_net == net) &&
70#endif
71 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
72 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
73 goto found;
74 }
75
76 sk = NULL;
77found:
78 return sk;
79}
80
81static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
82{
83 struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
84 if (sk)
85 sock_hold(sk);
86
87 return sk;
88}
89
90/* When processing receive frames, there are two cases to
91 * consider. Data frames consist of a non-zero session-id and an
92 * optional cookie. Control frames consist of a regular L2TP header
93 * preceded by 32-bits of zeros.
94 *
95 * L2TPv3 Session Header Over IP
96 *
97 * 0 1 2 3
98 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
99 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
100 * | Session ID |
101 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
102 * | Cookie (optional, maximum 64 bits)...
103 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
104 * |
105 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
106 *
107 * L2TPv3 Control Message Header Over IP
108 *
109 * 0 1 2 3
110 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
111 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
112 * | (32 bits of zeros) |
113 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
114 * |T|L|x|x|S|x|x|x|x|x|x|x| Ver | Length |
115 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
116 * | Control Connection ID |
117 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
118 * | Ns | Nr |
119 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
120 *
121 * All control frames are passed to userspace.
122 */
123static int l2tp_ip_recv(struct sk_buff *skb)
124{
125 struct sock *sk;
126 u32 session_id;
127 u32 tunnel_id;
128 unsigned char *ptr, *optr;
129 struct l2tp_session *session;
130 struct l2tp_tunnel *tunnel = NULL;
131 int length;
132 int offset;
133
134 /* Point to L2TP header */
135 optr = ptr = skb->data;
136
137 if (!pskb_may_pull(skb, 4))
138 goto discard;
139
140 session_id = ntohl(*((__be32 *) ptr));
141 ptr += 4;
142
143 /* RFC3931: L2TP/IP packets have the first 4 bytes containing
144 * the session_id. If it is 0, the packet is a L2TP control
145 * frame and the session_id value can be discarded.
146 */
147 if (session_id == 0) {
148 __skb_pull(skb, 4);
149 goto pass_up;
150 }
151
152 /* Ok, this is a data packet. Lookup the session. */
153 session = l2tp_session_find(&init_net, NULL, session_id);
154 if (session == NULL)
155 goto discard;
156
157 tunnel = session->tunnel;
158 if (tunnel == NULL)
159 goto discard;
160
161 /* Trace packet contents, if enabled */
162 if (tunnel->debug & L2TP_MSG_DATA) {
163 length = min(32u, skb->len);
164 if (!pskb_may_pull(skb, length))
165 goto discard;
166
167 printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
168
169 offset = 0;
170 do {
171 printk(" %02X", ptr[offset]);
172 } while (++offset < length);
173
174 printk("\n");
175 }
176
177 l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
178
179 return 0;
180
181pass_up:
182 /* Get the tunnel_id from the L2TP header */
183 if (!pskb_may_pull(skb, 12))
184 goto discard;
185
186 if ((skb->data[0] & 0xc0) != 0xc0)
187 goto discard;
188
189 tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
190 tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
191 if (tunnel != NULL)
192 sk = tunnel->sock;
193 else {
194 struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
195
196 read_lock_bh(&l2tp_ip_lock);
197 sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
198 read_unlock_bh(&l2tp_ip_lock);
199 }
200
201 if (sk == NULL)
202 goto discard;
203
204 sock_hold(sk);
205
206 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
207 goto discard_put;
208
209 nf_reset(skb);
210
211 return sk_receive_skb(sk, skb, 1);
212
213discard_put:
214 sock_put(sk);
215
216discard:
217 kfree_skb(skb);
218 return 0;
219}
220
221static int l2tp_ip_open(struct sock *sk)
222{
223 /* Prevent autobind. We don't have ports. */
224 inet_sk(sk)->inet_num = IPPROTO_L2TP;
225
226 write_lock_bh(&l2tp_ip_lock);
227 sk_add_node(sk, &l2tp_ip_table);
228 write_unlock_bh(&l2tp_ip_lock);
229
230 return 0;
231}
232
233static void l2tp_ip_close(struct sock *sk, long timeout)
234{
235 write_lock_bh(&l2tp_ip_lock);
236 hlist_del_init(&sk->sk_bind_node);
237 hlist_del_init(&sk->sk_node);
238 write_unlock_bh(&l2tp_ip_lock);
239 sk_common_release(sk);
240}
241
242static void l2tp_ip_destroy_sock(struct sock *sk)
243{
244 struct sk_buff *skb;
245
246 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
247 kfree_skb(skb);
248
249 sk_refcnt_debug_dec(sk);
250}
251
252static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
253{
254 struct inet_sock *inet = inet_sk(sk);
255 struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
256 int ret = -EINVAL;
257 int chk_addr_ret;
258
259 ret = -EADDRINUSE;
260 read_lock_bh(&l2tp_ip_lock);
261 if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
262 goto out_in_use;
263
264 read_unlock_bh(&l2tp_ip_lock);
265
266 lock_sock(sk);
267 if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
268 goto out;
269
270 chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
271 ret = -EADDRNOTAVAIL;
272 if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
273 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
274 goto out;
275
276 inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
277 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
278 inet->inet_saddr = 0; /* Use device */
279 sk_dst_reset(sk);
280
281 l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
282
283 write_lock_bh(&l2tp_ip_lock);
284 sk_add_bind_node(sk, &l2tp_ip_bind_table);
285 sk_del_node_init(sk);
286 write_unlock_bh(&l2tp_ip_lock);
287 ret = 0;
288out:
289 release_sock(sk);
290
291 return ret;
292
293out_in_use:
294 read_unlock_bh(&l2tp_ip_lock);
295
296 return ret;
297}
298
299static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
300{
301 int rc;
302 struct inet_sock *inet = inet_sk(sk);
303 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
304 struct rtable *rt;
305 __be32 saddr;
306 int oif;
307
308 rc = -EINVAL;
309 if (addr_len < sizeof(*lsa))
310 goto out;
311
312 rc = -EAFNOSUPPORT;
313 if (lsa->l2tp_family != AF_INET)
314 goto out;
315
316 sk_dst_reset(sk);
317
318 oif = sk->sk_bound_dev_if;
319 saddr = inet->inet_saddr;
320
321 rc = -EINVAL;
322 if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
323 goto out;
324
325 rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
326 RT_CONN_FLAGS(sk), oif,
327 IPPROTO_L2TP,
328 0, 0, sk, 1);
329 if (rc) {
330 if (rc == -ENETUNREACH)
331 IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
332 goto out;
333 }
334
335 rc = -ENETUNREACH;
336 if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
337 ip_rt_put(rt);
338 goto out;
339 }
340
341 l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
342
343 if (!inet->inet_saddr)
344 inet->inet_saddr = rt->rt_src;
345 if (!inet->inet_rcv_saddr)
346 inet->inet_rcv_saddr = rt->rt_src;
347 inet->inet_daddr = rt->rt_dst;
348 sk->sk_state = TCP_ESTABLISHED;
349 inet->inet_id = jiffies;
350
351 sk_dst_set(sk, &rt->dst);
352
353 write_lock_bh(&l2tp_ip_lock);
354 hlist_del_init(&sk->sk_bind_node);
355 sk_add_bind_node(sk, &l2tp_ip_bind_table);
356 write_unlock_bh(&l2tp_ip_lock);
357
358 rc = 0;
359out:
360 return rc;
361}
362
363static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
364 int *uaddr_len, int peer)
365{
366 struct sock *sk = sock->sk;
367 struct inet_sock *inet = inet_sk(sk);
368 struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
369 struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
370
371 memset(lsa, 0, sizeof(*lsa));
372 lsa->l2tp_family = AF_INET;
373 if (peer) {
374 if (!inet->inet_dport)
375 return -ENOTCONN;
376 lsa->l2tp_conn_id = lsk->peer_conn_id;
377 lsa->l2tp_addr.s_addr = inet->inet_daddr;
378 } else {
379 __be32 addr = inet->inet_rcv_saddr;
380 if (!addr)
381 addr = inet->inet_saddr;
382 lsa->l2tp_conn_id = lsk->conn_id;
383 lsa->l2tp_addr.s_addr = addr;
384 }
385 *uaddr_len = sizeof(*lsa);
386 return 0;
387}
388
389static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
390{
391 int rc;
392
393 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
394 goto drop;
395
396 nf_reset(skb);
397
398 /* Charge it to the socket, dropping if the queue is full. */
399 rc = sock_queue_rcv_skb(sk, skb);
400 if (rc < 0)
401 goto drop;
402
403 return 0;
404
405drop:
406 IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
407 kfree_skb(skb);
408 return -1;
409}
410
411/* Userspace will call sendmsg() on the tunnel socket to send L2TP
412 * control frames.
413 */
414static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
415{
416 struct sk_buff *skb;
417 int rc;
418 struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
419 struct inet_sock *inet = inet_sk(sk);
420 struct ip_options *opt = inet->opt;
421 struct rtable *rt = NULL;
422 int connected = 0;
423 __be32 daddr;
424
425 if (sock_flag(sk, SOCK_DEAD))
426 return -ENOTCONN;
427
428 /* Get and verify the address. */
429 if (msg->msg_name) {
430 struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
431 if (msg->msg_namelen < sizeof(*lip))
432 return -EINVAL;
433
434 if (lip->l2tp_family != AF_INET) {
435 if (lip->l2tp_family != AF_UNSPEC)
436 return -EAFNOSUPPORT;
437 }
438
439 daddr = lip->l2tp_addr.s_addr;
440 } else {
441 if (sk->sk_state != TCP_ESTABLISHED)
442 return -EDESTADDRREQ;
443
444 daddr = inet->inet_daddr;
445 connected = 1;
446 }
447
448 /* Allocate a socket buffer */
449 rc = -ENOMEM;
450 skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
451 4 + len, 0, GFP_KERNEL);
452 if (!skb)
453 goto error;
454
455 /* Reserve space for headers, putting IP header on 4-byte boundary. */
456 skb_reserve(skb, 2 + NET_SKB_PAD);
457 skb_reset_network_header(skb);
458 skb_reserve(skb, sizeof(struct iphdr));
459 skb_reset_transport_header(skb);
460
461 /* Insert 0 session_id */
462 *((__be32 *) skb_put(skb, 4)) = 0;
463
464 /* Copy user data into skb */
465 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
466 if (rc < 0) {
467 kfree_skb(skb);
468 goto error;
469 }
470
471 if (connected)
472 rt = (struct rtable *) __sk_dst_check(sk, 0);
473
474 if (rt == NULL) {
475 /* Use correct destination address if we have options. */
476 if (opt && opt->srr)
477 daddr = opt->faddr;
478
479 {
480 struct flowi fl = { .oif = sk->sk_bound_dev_if,
481 .nl_u = { .ip4_u = {
482 .daddr = daddr,
483 .saddr = inet->inet_saddr,
484 .tos = RT_CONN_FLAGS(sk) } },
485 .proto = sk->sk_protocol,
486 .flags = inet_sk_flowi_flags(sk),
487 .uli_u = { .ports = {
488 .sport = inet->inet_sport,
489 .dport = inet->inet_dport } } };
490
491 /* If this fails, retransmit mechanism of transport layer will
492 * keep trying until route appears or the connection times
493 * itself out.
494 */
495 security_sk_classify_flow(sk, &fl);
496 if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
497 goto no_route;
498 }
499 sk_setup_caps(sk, &rt->dst);
500 }
501 skb_dst_set(skb, dst_clone(&rt->dst));
502
503 /* Queue the packet to IP for output */
504 rc = ip_queue_xmit(skb);
505
506error:
507 /* Update stats */
508 if (rc >= 0) {
509 lsa->tx_packets++;
510 lsa->tx_bytes += len;
511 rc = len;
512 } else {
513 lsa->tx_errors++;
514 }
515
516 return rc;
517
518no_route:
519 IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
520 kfree_skb(skb);
521 return -EHOSTUNREACH;
522}
523
524static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
525 size_t len, int noblock, int flags, int *addr_len)
526{
527 struct inet_sock *inet = inet_sk(sk);
528 struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
529 size_t copied = 0;
530 int err = -EOPNOTSUPP;
531 struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
532 struct sk_buff *skb;
533
534 if (flags & MSG_OOB)
535 goto out;
536
537 if (addr_len)
538 *addr_len = sizeof(*sin);
539
540 skb = skb_recv_datagram(sk, flags, noblock, &err);
541 if (!skb)
542 goto out;
543
544 copied = skb->len;
545 if (len < copied) {
546 msg->msg_flags |= MSG_TRUNC;
547 copied = len;
548 }
549
550 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
551 if (err)
552 goto done;
553
554 sock_recv_timestamp(msg, sk, skb);
555
556 /* Copy the address. */
557 if (sin) {
558 sin->sin_family = AF_INET;
559 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
560 sin->sin_port = 0;
561 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
562 }
563 if (inet->cmsg_flags)
564 ip_cmsg_recv(msg, skb);
565 if (flags & MSG_TRUNC)
566 copied = skb->len;
567done:
568 skb_free_datagram(sk, skb);
569out:
570 if (err) {
571 lsk->rx_errors++;
572 return err;
573 }
574
575 lsk->rx_packets++;
576 lsk->rx_bytes += copied;
577
578 return copied;
579}
580
581struct proto l2tp_ip_prot = {
582 .name = "L2TP/IP",
583 .owner = THIS_MODULE,
584 .init = l2tp_ip_open,
585 .close = l2tp_ip_close,
586 .bind = l2tp_ip_bind,
587 .connect = l2tp_ip_connect,
588 .disconnect = udp_disconnect,
589 .ioctl = udp_ioctl,
590 .destroy = l2tp_ip_destroy_sock,
591 .setsockopt = ip_setsockopt,
592 .getsockopt = ip_getsockopt,
593 .sendmsg = l2tp_ip_sendmsg,
594 .recvmsg = l2tp_ip_recvmsg,
595 .backlog_rcv = l2tp_ip_backlog_recv,
596 .hash = inet_hash,
597 .unhash = inet_unhash,
598 .obj_size = sizeof(struct l2tp_ip_sock),
599#ifdef CONFIG_COMPAT
600 .compat_setsockopt = compat_ip_setsockopt,
601 .compat_getsockopt = compat_ip_getsockopt,
602#endif
603};
604
605static const struct proto_ops l2tp_ip_ops = {
606 .family = PF_INET,
607 .owner = THIS_MODULE,
608 .release = inet_release,
609 .bind = inet_bind,
610 .connect = inet_dgram_connect,
611 .socketpair = sock_no_socketpair,
612 .accept = sock_no_accept,
613 .getname = l2tp_ip_getname,
614 .poll = datagram_poll,
615 .ioctl = inet_ioctl,
616 .listen = sock_no_listen,
617 .shutdown = inet_shutdown,
618 .setsockopt = sock_common_setsockopt,
619 .getsockopt = sock_common_getsockopt,
620 .sendmsg = inet_sendmsg,
621 .recvmsg = sock_common_recvmsg,
622 .mmap = sock_no_mmap,
623 .sendpage = sock_no_sendpage,
624#ifdef CONFIG_COMPAT
625 .compat_setsockopt = compat_sock_common_setsockopt,
626 .compat_getsockopt = compat_sock_common_getsockopt,
627#endif
628};
629
630static struct inet_protosw l2tp_ip_protosw = {
631 .type = SOCK_DGRAM,
632 .protocol = IPPROTO_L2TP,
633 .prot = &l2tp_ip_prot,
634 .ops = &l2tp_ip_ops,
635 .no_check = 0,
636};
637
638static struct net_protocol l2tp_ip_protocol __read_mostly = {
639 .handler = l2tp_ip_recv,
640};
641
642static int __init l2tp_ip_init(void)
643{
644 int err;
645
646 printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
647
648 err = proto_register(&l2tp_ip_prot, 1);
649 if (err != 0)
650 goto out;
651
652 err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
653 if (err)
654 goto out1;
655
656 inet_register_protosw(&l2tp_ip_protosw);
657 return 0;
658
659out1:
660 proto_unregister(&l2tp_ip_prot);
661out:
662 return err;
663}
664
665static void __exit l2tp_ip_exit(void)
666{
667 inet_unregister_protosw(&l2tp_ip_protosw);
668 inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
669 proto_unregister(&l2tp_ip_prot);
670}
671
672module_init(l2tp_ip_init);
673module_exit(l2tp_ip_exit);
674
675MODULE_LICENSE("GPL");
676MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
677MODULE_DESCRIPTION("L2TP over IP");
678MODULE_VERSION("1.0");
679MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 000000000000..4c1e540732d7
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,840 @@
1/*
2 * L2TP netlink layer, for management
3 *
4 * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
5 *
6 * Partly based on the IrDA nelink implementation
7 * (see net/irda/irnetlink.c) which is:
8 * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
9 * which is in turn partly based on the wireless netlink code:
10 * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#include <net/sock.h>
18#include <net/genetlink.h>
19#include <net/udp.h>
20#include <linux/in.h>
21#include <linux/udp.h>
22#include <linux/socket.h>
23#include <linux/module.h>
24#include <linux/list.h>
25#include <net/net_namespace.h>
26
27#include <linux/l2tp.h>
28
29#include "l2tp_core.h"
30
31
32static struct genl_family l2tp_nl_family = {
33 .id = GENL_ID_GENERATE,
34 .name = L2TP_GENL_NAME,
35 .version = L2TP_GENL_VERSION,
36 .hdrsize = 0,
37 .maxattr = L2TP_ATTR_MAX,
38};
39
40/* Accessed under genl lock */
41static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
42
43static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
44{
45 u32 tunnel_id;
46 u32 session_id;
47 char *ifname;
48 struct l2tp_tunnel *tunnel;
49 struct l2tp_session *session = NULL;
50 struct net *net = genl_info_net(info);
51
52 if (info->attrs[L2TP_ATTR_IFNAME]) {
53 ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
54 session = l2tp_session_find_by_ifname(net, ifname);
55 } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
56 (info->attrs[L2TP_ATTR_CONN_ID])) {
57 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
58 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
59 tunnel = l2tp_tunnel_find(net, tunnel_id);
60 if (tunnel)
61 session = l2tp_session_find(net, tunnel, session_id);
62 }
63
64 return session;
65}
66
67static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
68{
69 struct sk_buff *msg;
70 void *hdr;
71 int ret = -ENOBUFS;
72
73 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
74 if (!msg) {
75 ret = -ENOMEM;
76 goto out;
77 }
78
79 hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
80 &l2tp_nl_family, 0, L2TP_CMD_NOOP);
81 if (IS_ERR(hdr)) {
82 ret = PTR_ERR(hdr);
83 goto err_out;
84 }
85
86 genlmsg_end(msg, hdr);
87
88 return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
89
90err_out:
91 nlmsg_free(msg);
92
93out:
94 return ret;
95}
96
97static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
98{
99 u32 tunnel_id;
100 u32 peer_tunnel_id;
101 int proto_version;
102 int fd;
103 int ret = 0;
104 struct l2tp_tunnel_cfg cfg = { 0, };
105 struct l2tp_tunnel *tunnel;
106 struct net *net = genl_info_net(info);
107
108 if (!info->attrs[L2TP_ATTR_CONN_ID]) {
109 ret = -EINVAL;
110 goto out;
111 }
112 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
113
114 if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
115 ret = -EINVAL;
116 goto out;
117 }
118 peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
119
120 if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
121 ret = -EINVAL;
122 goto out;
123 }
124 proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
125
126 if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
127 ret = -EINVAL;
128 goto out;
129 }
130 cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
131
132 fd = -1;
133 if (info->attrs[L2TP_ATTR_FD]) {
134 fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
135 } else {
136 if (info->attrs[L2TP_ATTR_IP_SADDR])
137 cfg.local_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_SADDR]);
138 if (info->attrs[L2TP_ATTR_IP_DADDR])
139 cfg.peer_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_DADDR]);
140 if (info->attrs[L2TP_ATTR_UDP_SPORT])
141 cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
142 if (info->attrs[L2TP_ATTR_UDP_DPORT])
143 cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
144 if (info->attrs[L2TP_ATTR_UDP_CSUM])
145 cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
146 }
147
148 if (info->attrs[L2TP_ATTR_DEBUG])
149 cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
150
151 tunnel = l2tp_tunnel_find(net, tunnel_id);
152 if (tunnel != NULL) {
153 ret = -EEXIST;
154 goto out;
155 }
156
157 ret = -EINVAL;
158 switch (cfg.encap) {
159 case L2TP_ENCAPTYPE_UDP:
160 case L2TP_ENCAPTYPE_IP:
161 ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
162 peer_tunnel_id, &cfg, &tunnel);
163 break;
164 }
165
166out:
167 return ret;
168}
169
170static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
171{
172 struct l2tp_tunnel *tunnel;
173 u32 tunnel_id;
174 int ret = 0;
175 struct net *net = genl_info_net(info);
176
177 if (!info->attrs[L2TP_ATTR_CONN_ID]) {
178 ret = -EINVAL;
179 goto out;
180 }
181 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
182
183 tunnel = l2tp_tunnel_find(net, tunnel_id);
184 if (tunnel == NULL) {
185 ret = -ENODEV;
186 goto out;
187 }
188
189 (void) l2tp_tunnel_delete(tunnel);
190
191out:
192 return ret;
193}
194
195static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
196{
197 struct l2tp_tunnel *tunnel;
198 u32 tunnel_id;
199 int ret = 0;
200 struct net *net = genl_info_net(info);
201
202 if (!info->attrs[L2TP_ATTR_CONN_ID]) {
203 ret = -EINVAL;
204 goto out;
205 }
206 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
207
208 tunnel = l2tp_tunnel_find(net, tunnel_id);
209 if (tunnel == NULL) {
210 ret = -ENODEV;
211 goto out;
212 }
213
214 if (info->attrs[L2TP_ATTR_DEBUG])
215 tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
216
217out:
218 return ret;
219}
220
221static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
222 struct l2tp_tunnel *tunnel)
223{
224 void *hdr;
225 struct nlattr *nest;
226 struct sock *sk = NULL;
227 struct inet_sock *inet;
228
229 hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
230 L2TP_CMD_TUNNEL_GET);
231 if (IS_ERR(hdr))
232 return PTR_ERR(hdr);
233
234 NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
235 NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
236 NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
237 NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
238 NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
239
240 nest = nla_nest_start(skb, L2TP_ATTR_STATS);
241 if (nest == NULL)
242 goto nla_put_failure;
243
244 NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
245 NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
246 NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
247 NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
248 NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
249 NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
250 NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
251 NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
252 nla_nest_end(skb, nest);
253
254 sk = tunnel->sock;
255 if (!sk)
256 goto out;
257
258 inet = inet_sk(sk);
259
260 switch (tunnel->encap) {
261 case L2TP_ENCAPTYPE_UDP:
262 NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
263 NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
264 NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
265 /* NOBREAK */
266 case L2TP_ENCAPTYPE_IP:
267 NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
268 NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
269 break;
270 }
271
272out:
273 return genlmsg_end(skb, hdr);
274
275nla_put_failure:
276 genlmsg_cancel(skb, hdr);
277 return -1;
278}
279
280static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
281{
282 struct l2tp_tunnel *tunnel;
283 struct sk_buff *msg;
284 u32 tunnel_id;
285 int ret = -ENOBUFS;
286 struct net *net = genl_info_net(info);
287
288 if (!info->attrs[L2TP_ATTR_CONN_ID]) {
289 ret = -EINVAL;
290 goto out;
291 }
292
293 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
294
295 tunnel = l2tp_tunnel_find(net, tunnel_id);
296 if (tunnel == NULL) {
297 ret = -ENODEV;
298 goto out;
299 }
300
301 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
302 if (!msg) {
303 ret = -ENOMEM;
304 goto out;
305 }
306
307 ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
308 NLM_F_ACK, tunnel);
309 if (ret < 0)
310 goto err_out;
311
312 return genlmsg_unicast(net, msg, info->snd_pid);
313
314err_out:
315 nlmsg_free(msg);
316
317out:
318 return ret;
319}
320
321static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
322{
323 int ti = cb->args[0];
324 struct l2tp_tunnel *tunnel;
325 struct net *net = sock_net(skb->sk);
326
327 for (;;) {
328 tunnel = l2tp_tunnel_find_nth(net, ti);
329 if (tunnel == NULL)
330 goto out;
331
332 if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
333 cb->nlh->nlmsg_seq, NLM_F_MULTI,
334 tunnel) <= 0)
335 goto out;
336
337 ti++;
338 }
339
340out:
341 cb->args[0] = ti;
342
343 return skb->len;
344}
345
346static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
347{
348 u32 tunnel_id = 0;
349 u32 session_id;
350 u32 peer_session_id;
351 int ret = 0;
352 struct l2tp_tunnel *tunnel;
353 struct l2tp_session *session;
354 struct l2tp_session_cfg cfg = { 0, };
355 struct net *net = genl_info_net(info);
356
357 if (!info->attrs[L2TP_ATTR_CONN_ID]) {
358 ret = -EINVAL;
359 goto out;
360 }
361 tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
362 tunnel = l2tp_tunnel_find(net, tunnel_id);
363 if (!tunnel) {
364 ret = -ENODEV;
365 goto out;
366 }
367
368 if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
369 ret = -EINVAL;
370 goto out;
371 }
372 session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
373 session = l2tp_session_find(net, tunnel, session_id);
374 if (session) {
375 ret = -EEXIST;
376 goto out;
377 }
378
379 if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
380 ret = -EINVAL;
381 goto out;
382 }
383 peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
384
385 if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
386 ret = -EINVAL;
387 goto out;
388 }
389 cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
390 if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
391 ret = -EINVAL;
392 goto out;
393 }
394
395 if (tunnel->version > 2) {
396 if (info->attrs[L2TP_ATTR_OFFSET])
397 cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
398
399 if (info->attrs[L2TP_ATTR_DATA_SEQ])
400 cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
401
402 cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
403 if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
404 cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
405
406 cfg.l2specific_len = 4;
407 if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
408 cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
409
410 if (info->attrs[L2TP_ATTR_COOKIE]) {
411 u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
412 if (len > 8) {
413 ret = -EINVAL;
414 goto out;
415 }
416 cfg.cookie_len = len;
417 memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
418 }
419 if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
420 u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
421 if (len > 8) {
422 ret = -EINVAL;
423 goto out;
424 }
425 cfg.peer_cookie_len = len;
426 memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
427 }
428 if (info->attrs[L2TP_ATTR_IFNAME])
429 cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
430
431 if (info->attrs[L2TP_ATTR_VLAN_ID])
432 cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
433 }
434
435 if (info->attrs[L2TP_ATTR_DEBUG])
436 cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
437
438 if (info->attrs[L2TP_ATTR_RECV_SEQ])
439 cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
440
441 if (info->attrs[L2TP_ATTR_SEND_SEQ])
442 cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
443
444 if (info->attrs[L2TP_ATTR_LNS_MODE])
445 cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
446
447 if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
448 cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
449
450 if (info->attrs[L2TP_ATTR_MTU])
451 cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
452
453 if (info->attrs[L2TP_ATTR_MRU])
454 cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
455
456 if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
457 (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
458 ret = -EPROTONOSUPPORT;
459 goto out;
460 }
461
462 /* Check that pseudowire-specific params are present */
463 switch (cfg.pw_type) {
464 case L2TP_PWTYPE_NONE:
465 break;
466 case L2TP_PWTYPE_ETH_VLAN:
467 if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
468 ret = -EINVAL;
469 goto out;
470 }
471 break;
472 case L2TP_PWTYPE_ETH:
473 break;
474 case L2TP_PWTYPE_PPP:
475 case L2TP_PWTYPE_PPP_AC:
476 break;
477 case L2TP_PWTYPE_IP:
478 default:
479 ret = -EPROTONOSUPPORT;
480 break;
481 }
482
483 ret = -EPROTONOSUPPORT;
484 if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
485 ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
486 session_id, peer_session_id, &cfg);
487
488out:
489 return ret;
490}
491
492static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
493{
494 int ret = 0;
495 struct l2tp_session *session;
496 u16 pw_type;
497
498 session = l2tp_nl_session_find(info);
499 if (session == NULL) {
500 ret = -ENODEV;
501 goto out;
502 }
503
504 pw_type = session->pwtype;
505 if (pw_type < __L2TP_PWTYPE_MAX)
506 if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
507 ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
508
509out:
510 return ret;
511}
512
513static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
514{
515 int ret = 0;
516 struct l2tp_session *session;
517
518 session = l2tp_nl_session_find(info);
519 if (session == NULL) {
520 ret = -ENODEV;
521 goto out;
522 }
523
524 if (info->attrs[L2TP_ATTR_DEBUG])
525 session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
526
527 if (info->attrs[L2TP_ATTR_DATA_SEQ])
528 session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
529
530 if (info->attrs[L2TP_ATTR_RECV_SEQ])
531 session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
532
533 if (info->attrs[L2TP_ATTR_SEND_SEQ])
534 session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
535
536 if (info->attrs[L2TP_ATTR_LNS_MODE])
537 session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
538
539 if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
540 session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
541
542 if (info->attrs[L2TP_ATTR_MTU])
543 session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
544
545 if (info->attrs[L2TP_ATTR_MRU])
546 session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
547
548out:
549 return ret;
550}
551
552static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
553 struct l2tp_session *session)
554{
555 void *hdr;
556 struct nlattr *nest;
557 struct l2tp_tunnel *tunnel = session->tunnel;
558 struct sock *sk = NULL;
559
560 sk = tunnel->sock;
561
562 hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
563 if (IS_ERR(hdr))
564 return PTR_ERR(hdr);
565
566 NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
567 NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
568 NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
569 NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
570 NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
571 NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
572 NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
573 if (session->mru)
574 NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
575
576 if (session->ifname && session->ifname[0])
577 NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
578 if (session->cookie_len)
579 NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
580 if (session->peer_cookie_len)
581 NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
582 NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
583 NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
584 NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
585#ifdef CONFIG_XFRM
586 if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
587 NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
588#endif
589 if (session->reorder_timeout)
590 NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
591
592 nest = nla_nest_start(skb, L2TP_ATTR_STATS);
593 if (nest == NULL)
594 goto nla_put_failure;
595 NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
596 NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
597 NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
598 NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
599 NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
600 NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
601 NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
602 NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
603 nla_nest_end(skb, nest);
604
605 return genlmsg_end(skb, hdr);
606
607 nla_put_failure:
608 genlmsg_cancel(skb, hdr);
609 return -1;
610}
611
612static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
613{
614 struct l2tp_session *session;
615 struct sk_buff *msg;
616 int ret;
617
618 session = l2tp_nl_session_find(info);
619 if (session == NULL) {
620 ret = -ENODEV;
621 goto out;
622 }
623
624 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
625 if (!msg) {
626 ret = -ENOMEM;
627 goto out;
628 }
629
630 ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
631 0, session);
632 if (ret < 0)
633 goto err_out;
634
635 return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
636
637err_out:
638 nlmsg_free(msg);
639
640out:
641 return ret;
642}
643
644static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
645{
646 struct net *net = sock_net(skb->sk);
647 struct l2tp_session *session;
648 struct l2tp_tunnel *tunnel = NULL;
649 int ti = cb->args[0];
650 int si = cb->args[1];
651
652 for (;;) {
653 if (tunnel == NULL) {
654 tunnel = l2tp_tunnel_find_nth(net, ti);
655 if (tunnel == NULL)
656 goto out;
657 }
658
659 session = l2tp_session_find_nth(tunnel, si);
660 if (session == NULL) {
661 ti++;
662 tunnel = NULL;
663 si = 0;
664 continue;
665 }
666
667 if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
668 cb->nlh->nlmsg_seq, NLM_F_MULTI,
669 session) <= 0)
670 break;
671
672 si++;
673 }
674
675out:
676 cb->args[0] = ti;
677 cb->args[1] = si;
678
679 return skb->len;
680}
681
682static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
683 [L2TP_ATTR_NONE] = { .type = NLA_UNSPEC, },
684 [L2TP_ATTR_PW_TYPE] = { .type = NLA_U16, },
685 [L2TP_ATTR_ENCAP_TYPE] = { .type = NLA_U16, },
686 [L2TP_ATTR_OFFSET] = { .type = NLA_U16, },
687 [L2TP_ATTR_DATA_SEQ] = { .type = NLA_U8, },
688 [L2TP_ATTR_L2SPEC_TYPE] = { .type = NLA_U8, },
689 [L2TP_ATTR_L2SPEC_LEN] = { .type = NLA_U8, },
690 [L2TP_ATTR_PROTO_VERSION] = { .type = NLA_U8, },
691 [L2TP_ATTR_CONN_ID] = { .type = NLA_U32, },
692 [L2TP_ATTR_PEER_CONN_ID] = { .type = NLA_U32, },
693 [L2TP_ATTR_SESSION_ID] = { .type = NLA_U32, },
694 [L2TP_ATTR_PEER_SESSION_ID] = { .type = NLA_U32, },
695 [L2TP_ATTR_UDP_CSUM] = { .type = NLA_U8, },
696 [L2TP_ATTR_VLAN_ID] = { .type = NLA_U16, },
697 [L2TP_ATTR_DEBUG] = { .type = NLA_U32, },
698 [L2TP_ATTR_RECV_SEQ] = { .type = NLA_U8, },
699 [L2TP_ATTR_SEND_SEQ] = { .type = NLA_U8, },
700 [L2TP_ATTR_LNS_MODE] = { .type = NLA_U8, },
701 [L2TP_ATTR_USING_IPSEC] = { .type = NLA_U8, },
702 [L2TP_ATTR_RECV_TIMEOUT] = { .type = NLA_MSECS, },
703 [L2TP_ATTR_FD] = { .type = NLA_U32, },
704 [L2TP_ATTR_IP_SADDR] = { .type = NLA_U32, },
705 [L2TP_ATTR_IP_DADDR] = { .type = NLA_U32, },
706 [L2TP_ATTR_UDP_SPORT] = { .type = NLA_U16, },
707 [L2TP_ATTR_UDP_DPORT] = { .type = NLA_U16, },
708 [L2TP_ATTR_MTU] = { .type = NLA_U16, },
709 [L2TP_ATTR_MRU] = { .type = NLA_U16, },
710 [L2TP_ATTR_STATS] = { .type = NLA_NESTED, },
711 [L2TP_ATTR_IFNAME] = {
712 .type = NLA_NUL_STRING,
713 .len = IFNAMSIZ - 1,
714 },
715 [L2TP_ATTR_COOKIE] = {
716 .type = NLA_BINARY,
717 .len = 8,
718 },
719 [L2TP_ATTR_PEER_COOKIE] = {
720 .type = NLA_BINARY,
721 .len = 8,
722 },
723};
724
725static struct genl_ops l2tp_nl_ops[] = {
726 {
727 .cmd = L2TP_CMD_NOOP,
728 .doit = l2tp_nl_cmd_noop,
729 .policy = l2tp_nl_policy,
730 /* can be retrieved by unprivileged users */
731 },
732 {
733 .cmd = L2TP_CMD_TUNNEL_CREATE,
734 .doit = l2tp_nl_cmd_tunnel_create,
735 .policy = l2tp_nl_policy,
736 .flags = GENL_ADMIN_PERM,
737 },
738 {
739 .cmd = L2TP_CMD_TUNNEL_DELETE,
740 .doit = l2tp_nl_cmd_tunnel_delete,
741 .policy = l2tp_nl_policy,
742 .flags = GENL_ADMIN_PERM,
743 },
744 {
745 .cmd = L2TP_CMD_TUNNEL_MODIFY,
746 .doit = l2tp_nl_cmd_tunnel_modify,
747 .policy = l2tp_nl_policy,
748 .flags = GENL_ADMIN_PERM,
749 },
750 {
751 .cmd = L2TP_CMD_TUNNEL_GET,
752 .doit = l2tp_nl_cmd_tunnel_get,
753 .dumpit = l2tp_nl_cmd_tunnel_dump,
754 .policy = l2tp_nl_policy,
755 .flags = GENL_ADMIN_PERM,
756 },
757 {
758 .cmd = L2TP_CMD_SESSION_CREATE,
759 .doit = l2tp_nl_cmd_session_create,
760 .policy = l2tp_nl_policy,
761 .flags = GENL_ADMIN_PERM,
762 },
763 {
764 .cmd = L2TP_CMD_SESSION_DELETE,
765 .doit = l2tp_nl_cmd_session_delete,
766 .policy = l2tp_nl_policy,
767 .flags = GENL_ADMIN_PERM,
768 },
769 {
770 .cmd = L2TP_CMD_SESSION_MODIFY,
771 .doit = l2tp_nl_cmd_session_modify,
772 .policy = l2tp_nl_policy,
773 .flags = GENL_ADMIN_PERM,
774 },
775 {
776 .cmd = L2TP_CMD_SESSION_GET,
777 .doit = l2tp_nl_cmd_session_get,
778 .dumpit = l2tp_nl_cmd_session_dump,
779 .policy = l2tp_nl_policy,
780 .flags = GENL_ADMIN_PERM,
781 },
782};
783
784int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
785{
786 int ret;
787
788 ret = -EINVAL;
789 if (pw_type >= __L2TP_PWTYPE_MAX)
790 goto err;
791
792 genl_lock();
793 ret = -EBUSY;
794 if (l2tp_nl_cmd_ops[pw_type])
795 goto out;
796
797 l2tp_nl_cmd_ops[pw_type] = ops;
798
799out:
800 genl_unlock();
801err:
802 return 0;
803}
804EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
805
806void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
807{
808 if (pw_type < __L2TP_PWTYPE_MAX) {
809 genl_lock();
810 l2tp_nl_cmd_ops[pw_type] = NULL;
811 genl_unlock();
812 }
813}
814EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
815
816static int l2tp_nl_init(void)
817{
818 int err;
819
820 printk(KERN_INFO "L2TP netlink interface\n");
821 err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
822 ARRAY_SIZE(l2tp_nl_ops));
823
824 return err;
825}
826
827static void l2tp_nl_cleanup(void)
828{
829 genl_unregister_family(&l2tp_nl_family);
830}
831
832module_init(l2tp_nl_init);
833module_exit(l2tp_nl_cleanup);
834
835MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
836MODULE_DESCRIPTION("L2TP netlink");
837MODULE_LICENSE("GPL");
838MODULE_VERSION("1.0");
839MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
840 __stringify(NETLINK_GENERIC) "-type-" "l2tp");
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
new file mode 100644
index 000000000000..ff954b3e94b6
--- /dev/null
+++ b/net/l2tp/l2tp_ppp.c
@@ -0,0 +1,1840 @@
1/*****************************************************************************
2 * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
3 *
4 * PPPoX --- Generic PPP encapsulation socket family
5 * PPPoL2TP --- PPP over L2TP (RFC 2661)
6 *
7 * Version: 2.0.0
8 *
9 * Authors: James Chapman (jchapman@katalix.com)
10 *
11 * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
12 *
13 * License:
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version
17 * 2 of the License, or (at your option) any later version.
18 *
19 */
20
21/* This driver handles only L2TP data frames; control frames are handled by a
22 * userspace application.
23 *
24 * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
25 * attaches it to a bound UDP socket with local tunnel_id / session_id and
26 * peer tunnel_id / session_id set. Data can then be sent or received using
27 * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
28 * can be read or modified using ioctl() or [gs]etsockopt() calls.
29 *
30 * When a PPPoL2TP socket is connected with local and peer session_id values
31 * zero, the socket is treated as a special tunnel management socket.
32 *
33 * Here's example userspace code to create a socket for sending/receiving data
34 * over an L2TP session:-
35 *
36 * struct sockaddr_pppol2tp sax;
37 * int fd;
38 * int session_fd;
39 *
40 * fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
41 *
42 * sax.sa_family = AF_PPPOX;
43 * sax.sa_protocol = PX_PROTO_OL2TP;
44 * sax.pppol2tp.fd = tunnel_fd; // bound UDP socket
45 * sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
46 * sax.pppol2tp.addr.sin_port = addr->sin_port;
47 * sax.pppol2tp.addr.sin_family = AF_INET;
48 * sax.pppol2tp.s_tunnel = tunnel_id;
49 * sax.pppol2tp.s_session = session_id;
50 * sax.pppol2tp.d_tunnel = peer_tunnel_id;
51 * sax.pppol2tp.d_session = peer_session_id;
52 *
53 * session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
54 *
55 * A pppd plugin that allows PPP traffic to be carried over L2TP using
56 * this driver is available from the OpenL2TP project at
57 * http://openl2tp.sourceforge.net.
58 */
59
60#include <linux/module.h>
61#include <linux/string.h>
62#include <linux/list.h>
63#include <linux/uaccess.h>
64
65#include <linux/kernel.h>
66#include <linux/spinlock.h>
67#include <linux/kthread.h>
68#include <linux/sched.h>
69#include <linux/slab.h>
70#include <linux/errno.h>
71#include <linux/jiffies.h>
72
73#include <linux/netdevice.h>
74#include <linux/net.h>
75#include <linux/inetdevice.h>
76#include <linux/skbuff.h>
77#include <linux/init.h>
78#include <linux/ip.h>
79#include <linux/udp.h>
80#include <linux/if_pppox.h>
81#include <linux/if_pppol2tp.h>
82#include <net/sock.h>
83#include <linux/ppp_channel.h>
84#include <linux/ppp_defs.h>
85#include <linux/if_ppp.h>
86#include <linux/file.h>
87#include <linux/hash.h>
88#include <linux/sort.h>
89#include <linux/proc_fs.h>
90#include <linux/l2tp.h>
91#include <linux/nsproxy.h>
92#include <net/net_namespace.h>
93#include <net/netns/generic.h>
94#include <net/dst.h>
95#include <net/ip.h>
96#include <net/udp.h>
97#include <net/xfrm.h>
98
99#include <asm/byteorder.h>
100#include <asm/atomic.h>
101
102#include "l2tp_core.h"
103
104#define PPPOL2TP_DRV_VERSION "V2.0"
105
106/* Space for UDP, L2TP and PPP headers */
107#define PPPOL2TP_HEADER_OVERHEAD 40
108
109#define PRINTK(_mask, _type, _lvl, _fmt, args...) \
110 do { \
111 if ((_mask) & (_type)) \
112 printk(_lvl "PPPOL2TP: " _fmt, ##args); \
113 } while (0)
114
115/* Number of bytes to build transmit L2TP headers.
116 * Unfortunately the size is different depending on whether sequence numbers
117 * are enabled.
118 */
119#define PPPOL2TP_L2TP_HDR_SIZE_SEQ 10
120#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ 6
121
122/* Private data of each session. This data lives at the end of struct
123 * l2tp_session, referenced via session->priv[].
124 */
125struct pppol2tp_session {
126 int owner; /* pid that opened the socket */
127
128 struct sock *sock; /* Pointer to the session
129 * PPPoX socket */
130 struct sock *tunnel_sock; /* Pointer to the tunnel UDP
131 * socket */
132 int flags; /* accessed by PPPIOCGFLAGS.
133 * Unused. */
134};
135
136static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
137
138static const struct ppp_channel_ops pppol2tp_chan_ops = {
139 .start_xmit = pppol2tp_xmit,
140};
141
142static const struct proto_ops pppol2tp_ops;
143
144/* Helpers to obtain tunnel/session contexts from sockets.
145 */
146static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
147{
148 struct l2tp_session *session;
149
150 if (sk == NULL)
151 return NULL;
152
153 sock_hold(sk);
154 session = (struct l2tp_session *)(sk->sk_user_data);
155 if (session == NULL) {
156 sock_put(sk);
157 goto out;
158 }
159
160 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
161
162out:
163 return session;
164}
165
166/*****************************************************************************
167 * Receive data handling
168 *****************************************************************************/
169
170static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
171{
172 /* Skip PPP header, if present. In testing, Microsoft L2TP clients
173 * don't send the PPP header (PPP header compression enabled), but
174 * other clients can include the header. So we cope with both cases
175 * here. The PPP header is always FF03 when using L2TP.
176 *
177 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
178 * the field may be unaligned.
179 */
180 if (!pskb_may_pull(skb, 2))
181 return 1;
182
183 if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
184 skb_pull(skb, 2);
185
186 return 0;
187}
188
189/* Receive message. This is the recvmsg for the PPPoL2TP socket.
190 */
191static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
192 struct msghdr *msg, size_t len,
193 int flags)
194{
195 int err;
196 struct sk_buff *skb;
197 struct sock *sk = sock->sk;
198
199 err = -EIO;
200 if (sk->sk_state & PPPOX_BOUND)
201 goto end;
202
203 msg->msg_namelen = 0;
204
205 err = 0;
206 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
207 flags & MSG_DONTWAIT, &err);
208 if (!skb)
209 goto end;
210
211 if (len > skb->len)
212 len = skb->len;
213 else if (len < skb->len)
214 msg->msg_flags |= MSG_TRUNC;
215
216 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
217 if (likely(err == 0))
218 err = len;
219
220 kfree_skb(skb);
221end:
222 return err;
223}
224
225static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
226{
227 struct pppol2tp_session *ps = l2tp_session_priv(session);
228 struct sock *sk = NULL;
229
230 /* If the socket is bound, send it in to PPP's input queue. Otherwise
231 * queue it on the session socket.
232 */
233 sk = ps->sock;
234 if (sk == NULL)
235 goto no_sock;
236
237 if (sk->sk_state & PPPOX_BOUND) {
238 struct pppox_sock *po;
239 PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
240 "%s: recv %d byte data frame, passing to ppp\n",
241 session->name, data_len);
242
243 /* We need to forget all info related to the L2TP packet
244 * gathered in the skb as we are going to reuse the same
245 * skb for the inner packet.
246 * Namely we need to:
247 * - reset xfrm (IPSec) information as it applies to
248 * the outer L2TP packet and not to the inner one
249 * - release the dst to force a route lookup on the inner
250 * IP packet since skb->dst currently points to the dst
251 * of the UDP tunnel
252 * - reset netfilter information as it doesn't apply
253 * to the inner packet either
254 */
255 secpath_reset(skb);
256 skb_dst_drop(skb);
257 nf_reset(skb);
258
259 po = pppox_sk(sk);
260 ppp_input(&po->chan, skb);
261 } else {
262 PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
263 "%s: socket not bound\n", session->name);
264
265 /* Not bound. Nothing we can do, so discard. */
266 session->stats.rx_errors++;
267 kfree_skb(skb);
268 }
269
270 return;
271
272no_sock:
273 PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
274 "%s: no socket\n", session->name);
275 kfree_skb(skb);
276}
277
278static void pppol2tp_session_sock_hold(struct l2tp_session *session)
279{
280 struct pppol2tp_session *ps = l2tp_session_priv(session);
281
282 if (ps->sock)
283 sock_hold(ps->sock);
284}
285
286static void pppol2tp_session_sock_put(struct l2tp_session *session)
287{
288 struct pppol2tp_session *ps = l2tp_session_priv(session);
289
290 if (ps->sock)
291 sock_put(ps->sock);
292}
293
294/************************************************************************
295 * Transmit handling
296 ***********************************************************************/
297
298/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket. We come here
299 * when a user application does a sendmsg() on the session socket. L2TP and
300 * PPP headers must be inserted into the user's data.
301 */
302static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
303 size_t total_len)
304{
305 static const unsigned char ppph[2] = { 0xff, 0x03 };
306 struct sock *sk = sock->sk;
307 struct sk_buff *skb;
308 int error;
309 struct l2tp_session *session;
310 struct l2tp_tunnel *tunnel;
311 struct pppol2tp_session *ps;
312 int uhlen;
313
314 error = -ENOTCONN;
315 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
316 goto error;
317
318 /* Get session and tunnel contexts */
319 error = -EBADF;
320 session = pppol2tp_sock_to_session(sk);
321 if (session == NULL)
322 goto error;
323
324 ps = l2tp_session_priv(session);
325 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
326 if (tunnel == NULL)
327 goto error_put_sess;
328
329 uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
330
331 /* Allocate a socket buffer */
332 error = -ENOMEM;
333 skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
334 uhlen + session->hdr_len +
335 sizeof(ppph) + total_len,
336 0, GFP_KERNEL);
337 if (!skb)
338 goto error_put_sess_tun;
339
340 /* Reserve space for headers. */
341 skb_reserve(skb, NET_SKB_PAD);
342 skb_reset_network_header(skb);
343 skb_reserve(skb, sizeof(struct iphdr));
344 skb_reset_transport_header(skb);
345 skb_reserve(skb, uhlen);
346
347 /* Add PPP header */
348 skb->data[0] = ppph[0];
349 skb->data[1] = ppph[1];
350 skb_put(skb, 2);
351
352 /* Copy user data into skb */
353 error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
354 if (error < 0) {
355 kfree_skb(skb);
356 goto error_put_sess_tun;
357 }
358 skb_put(skb, total_len);
359
360 l2tp_xmit_skb(session, skb, session->hdr_len);
361
362 sock_put(ps->tunnel_sock);
363
364 return error;
365
366error_put_sess_tun:
367 sock_put(ps->tunnel_sock);
368error_put_sess:
369 sock_put(sk);
370error:
371 return error;
372}
373
374/* Transmit function called by generic PPP driver. Sends PPP frame
375 * over PPPoL2TP socket.
376 *
377 * This is almost the same as pppol2tp_sendmsg(), but rather than
378 * being called with a msghdr from userspace, it is called with a skb
379 * from the kernel.
380 *
381 * The supplied skb from ppp doesn't have enough headroom for the
382 * insertion of L2TP, UDP and IP headers so we need to allocate more
383 * headroom in the skb. This will create a cloned skb. But we must be
384 * careful in the error case because the caller will expect to free
385 * the skb it supplied, not our cloned skb. So we take care to always
386 * leave the original skb unfreed if we return an error.
387 */
388static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
389{
390 static const u8 ppph[2] = { 0xff, 0x03 };
391 struct sock *sk = (struct sock *) chan->private;
392 struct sock *sk_tun;
393 struct l2tp_session *session;
394 struct l2tp_tunnel *tunnel;
395 struct pppol2tp_session *ps;
396 int old_headroom;
397 int new_headroom;
398
399 if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
400 goto abort;
401
402 /* Get session and tunnel contexts from the socket */
403 session = pppol2tp_sock_to_session(sk);
404 if (session == NULL)
405 goto abort;
406
407 ps = l2tp_session_priv(session);
408 sk_tun = ps->tunnel_sock;
409 if (sk_tun == NULL)
410 goto abort_put_sess;
411 tunnel = l2tp_sock_to_tunnel(sk_tun);
412 if (tunnel == NULL)
413 goto abort_put_sess;
414
415 old_headroom = skb_headroom(skb);
416 if (skb_cow_head(skb, sizeof(ppph)))
417 goto abort_put_sess_tun;
418
419 new_headroom = skb_headroom(skb);
420 skb->truesize += new_headroom - old_headroom;
421
422 /* Setup PPP header */
423 __skb_push(skb, sizeof(ppph));
424 skb->data[0] = ppph[0];
425 skb->data[1] = ppph[1];
426
427 l2tp_xmit_skb(session, skb, session->hdr_len);
428
429 sock_put(sk_tun);
430 sock_put(sk);
431 return 1;
432
433abort_put_sess_tun:
434 sock_put(sk_tun);
435abort_put_sess:
436 sock_put(sk);
437abort:
438 /* Free the original skb */
439 kfree_skb(skb);
440 return 1;
441}
442
443/*****************************************************************************
444 * Session (and tunnel control) socket create/destroy.
445 *****************************************************************************/
446
447/* Called by l2tp_core when a session socket is being closed.
448 */
449static void pppol2tp_session_close(struct l2tp_session *session)
450{
451 struct pppol2tp_session *ps = l2tp_session_priv(session);
452 struct sock *sk = ps->sock;
453 struct sk_buff *skb;
454
455 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
456
457 if (session->session_id == 0)
458 goto out;
459
460 if (sk != NULL) {
461 lock_sock(sk);
462
463 if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
464 pppox_unbind_sock(sk);
465 sk->sk_state = PPPOX_DEAD;
466 sk->sk_state_change(sk);
467 }
468
469 /* Purge any queued data */
470 skb_queue_purge(&sk->sk_receive_queue);
471 skb_queue_purge(&sk->sk_write_queue);
472 while ((skb = skb_dequeue(&session->reorder_q))) {
473 kfree_skb(skb);
474 sock_put(sk);
475 }
476
477 release_sock(sk);
478 }
479
480out:
481 return;
482}
483
484/* Really kill the session socket. (Called from sock_put() if
485 * refcnt == 0.)
486 */
487static void pppol2tp_session_destruct(struct sock *sk)
488{
489 struct l2tp_session *session;
490
491 if (sk->sk_user_data != NULL) {
492 session = sk->sk_user_data;
493 if (session == NULL)
494 goto out;
495
496 sk->sk_user_data = NULL;
497 BUG_ON(session->magic != L2TP_SESSION_MAGIC);
498 l2tp_session_dec_refcount(session);
499 }
500
501out:
502 return;
503}
504
505/* Called when the PPPoX socket (session) is closed.
506 */
507static int pppol2tp_release(struct socket *sock)
508{
509 struct sock *sk = sock->sk;
510 struct l2tp_session *session;
511 int error;
512
513 if (!sk)
514 return 0;
515
516 error = -EBADF;
517 lock_sock(sk);
518 if (sock_flag(sk, SOCK_DEAD) != 0)
519 goto error;
520
521 pppox_unbind_sock(sk);
522
523 /* Signal the death of the socket. */
524 sk->sk_state = PPPOX_DEAD;
525 sock_orphan(sk);
526 sock->sk = NULL;
527
528 session = pppol2tp_sock_to_session(sk);
529
530 /* Purge any queued data */
531 skb_queue_purge(&sk->sk_receive_queue);
532 skb_queue_purge(&sk->sk_write_queue);
533 if (session != NULL) {
534 struct sk_buff *skb;
535 while ((skb = skb_dequeue(&session->reorder_q))) {
536 kfree_skb(skb);
537 sock_put(sk);
538 }
539 sock_put(sk);
540 }
541
542 release_sock(sk);
543
544 /* This will delete the session context via
545 * pppol2tp_session_destruct() if the socket's refcnt drops to
546 * zero.
547 */
548 sock_put(sk);
549
550 return 0;
551
552error:
553 release_sock(sk);
554 return error;
555}
556
557static struct proto pppol2tp_sk_proto = {
558 .name = "PPPOL2TP",
559 .owner = THIS_MODULE,
560 .obj_size = sizeof(struct pppox_sock),
561};
562
563static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
564{
565 int rc;
566
567 rc = l2tp_udp_encap_recv(sk, skb);
568 if (rc)
569 kfree_skb(skb);
570
571 return NET_RX_SUCCESS;
572}
573
574/* socket() handler. Initialize a new struct sock.
575 */
576static int pppol2tp_create(struct net *net, struct socket *sock)
577{
578 int error = -ENOMEM;
579 struct sock *sk;
580
581 sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
582 if (!sk)
583 goto out;
584
585 sock_init_data(sock, sk);
586
587 sock->state = SS_UNCONNECTED;
588 sock->ops = &pppol2tp_ops;
589
590 sk->sk_backlog_rcv = pppol2tp_backlog_recv;
591 sk->sk_protocol = PX_PROTO_OL2TP;
592 sk->sk_family = PF_PPPOX;
593 sk->sk_state = PPPOX_NONE;
594 sk->sk_type = SOCK_STREAM;
595 sk->sk_destruct = pppol2tp_session_destruct;
596
597 error = 0;
598
599out:
600 return error;
601}
602
603#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
604static void pppol2tp_show(struct seq_file *m, void *arg)
605{
606 struct l2tp_session *session = arg;
607 struct pppol2tp_session *ps = l2tp_session_priv(session);
608
609 if (ps) {
610 struct pppox_sock *po = pppox_sk(ps->sock);
611 if (po)
612 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
613 }
614}
615#endif
616
617/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
618 */
619static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
620 int sockaddr_len, int flags)
621{
622 struct sock *sk = sock->sk;
623 struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
624 struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
625 struct pppox_sock *po = pppox_sk(sk);
626 struct l2tp_session *session = NULL;
627 struct l2tp_tunnel *tunnel;
628 struct pppol2tp_session *ps;
629 struct dst_entry *dst;
630 struct l2tp_session_cfg cfg = { 0, };
631 int error = 0;
632 u32 tunnel_id, peer_tunnel_id;
633 u32 session_id, peer_session_id;
634 int ver = 2;
635 int fd;
636
637 lock_sock(sk);
638
639 error = -EINVAL;
640 if (sp->sa_protocol != PX_PROTO_OL2TP)
641 goto end;
642
643 /* Check for already bound sockets */
644 error = -EBUSY;
645 if (sk->sk_state & PPPOX_CONNECTED)
646 goto end;
647
648 /* We don't supporting rebinding anyway */
649 error = -EALREADY;
650 if (sk->sk_user_data)
651 goto end; /* socket is already attached */
652
653 /* Get params from socket address. Handle L2TPv2 and L2TPv3 */
654 if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
655 fd = sp->pppol2tp.fd;
656 tunnel_id = sp->pppol2tp.s_tunnel;
657 peer_tunnel_id = sp->pppol2tp.d_tunnel;
658 session_id = sp->pppol2tp.s_session;
659 peer_session_id = sp->pppol2tp.d_session;
660 } else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
661 ver = 3;
662 fd = sp3->pppol2tp.fd;
663 tunnel_id = sp3->pppol2tp.s_tunnel;
664 peer_tunnel_id = sp3->pppol2tp.d_tunnel;
665 session_id = sp3->pppol2tp.s_session;
666 peer_session_id = sp3->pppol2tp.d_session;
667 } else {
668 error = -EINVAL;
669 goto end; /* bad socket address */
670 }
671
672 /* Don't bind if tunnel_id is 0 */
673 error = -EINVAL;
674 if (tunnel_id == 0)
675 goto end;
676
677 tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
678
679 /* Special case: create tunnel context if session_id and
680 * peer_session_id is 0. Otherwise look up tunnel using supplied
681 * tunnel id.
682 */
683 if ((session_id == 0) && (peer_session_id == 0)) {
684 if (tunnel == NULL) {
685 struct l2tp_tunnel_cfg tcfg = {
686 .encap = L2TP_ENCAPTYPE_UDP,
687 .debug = 0,
688 };
689 error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
690 if (error < 0)
691 goto end;
692 }
693 } else {
694 /* Error if we can't find the tunnel */
695 error = -ENOENT;
696 if (tunnel == NULL)
697 goto end;
698
699 /* Error if socket is not prepped */
700 if (tunnel->sock == NULL)
701 goto end;
702 }
703
704 if (tunnel->recv_payload_hook == NULL)
705 tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
706
707 if (tunnel->peer_tunnel_id == 0) {
708 if (ver == 2)
709 tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
710 else
711 tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
712 }
713
714 /* Create session if it doesn't already exist. We handle the
715 * case where a session was previously created by the netlink
716 * interface by checking that the session doesn't already have
717 * a socket and its tunnel socket are what we expect. If any
718 * of those checks fail, return EEXIST to the caller.
719 */
720 session = l2tp_session_find(sock_net(sk), tunnel, session_id);
721 if (session == NULL) {
722 /* Default MTU must allow space for UDP/L2TP/PPP
723 * headers.
724 */
725 cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
726
727 /* Allocate and initialize a new session context. */
728 session = l2tp_session_create(sizeof(struct pppol2tp_session),
729 tunnel, session_id,
730 peer_session_id, &cfg);
731 if (session == NULL) {
732 error = -ENOMEM;
733 goto end;
734 }
735 } else {
736 ps = l2tp_session_priv(session);
737 error = -EEXIST;
738 if (ps->sock != NULL)
739 goto end;
740
741 /* consistency checks */
742 if (ps->tunnel_sock != tunnel->sock)
743 goto end;
744 }
745
746 /* Associate session with its PPPoL2TP socket */
747 ps = l2tp_session_priv(session);
748 ps->owner = current->pid;
749 ps->sock = sk;
750 ps->tunnel_sock = tunnel->sock;
751
752 session->recv_skb = pppol2tp_recv;
753 session->session_close = pppol2tp_session_close;
754#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
755 session->show = pppol2tp_show;
756#endif
757
758 /* We need to know each time a skb is dropped from the reorder
759 * queue.
760 */
761 session->ref = pppol2tp_session_sock_hold;
762 session->deref = pppol2tp_session_sock_put;
763
764 /* If PMTU discovery was enabled, use the MTU that was discovered */
765 dst = sk_dst_get(sk);
766 if (dst != NULL) {
767 u32 pmtu = dst_mtu(__sk_dst_get(sk));
768 if (pmtu != 0)
769 session->mtu = session->mru = pmtu -
770 PPPOL2TP_HEADER_OVERHEAD;
771 dst_release(dst);
772 }
773
774 /* Special case: if source & dest session_id == 0x0000, this
775 * socket is being created to manage the tunnel. Just set up
776 * the internal context for use by ioctl() and sockopt()
777 * handlers.
778 */
779 if ((session->session_id == 0) &&
780 (session->peer_session_id == 0)) {
781 error = 0;
782 goto out_no_ppp;
783 }
784
785 /* The only header we need to worry about is the L2TP
786 * header. This size is different depending on whether
787 * sequence numbers are enabled for the data channel.
788 */
789 po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
790
791 po->chan.private = sk;
792 po->chan.ops = &pppol2tp_chan_ops;
793 po->chan.mtu = session->mtu;
794
795 error = ppp_register_net_channel(sock_net(sk), &po->chan);
796 if (error)
797 goto end;
798
799out_no_ppp:
800 /* This is how we get the session context from the socket. */
801 sk->sk_user_data = session;
802 sk->sk_state = PPPOX_CONNECTED;
803 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
804 "%s: created\n", session->name);
805
806end:
807 release_sock(sk);
808
809 return error;
810}
811
812#ifdef CONFIG_L2TP_V3
813
814/* Called when creating sessions via the netlink interface.
815 */
816static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
817{
818 int error;
819 struct l2tp_tunnel *tunnel;
820 struct l2tp_session *session;
821 struct pppol2tp_session *ps;
822
823 tunnel = l2tp_tunnel_find(net, tunnel_id);
824
825 /* Error if we can't find the tunnel */
826 error = -ENOENT;
827 if (tunnel == NULL)
828 goto out;
829
830 /* Error if tunnel socket is not prepped */
831 if (tunnel->sock == NULL)
832 goto out;
833
834 /* Check that this session doesn't already exist */
835 error = -EEXIST;
836 session = l2tp_session_find(net, tunnel, session_id);
837 if (session != NULL)
838 goto out;
839
840 /* Default MTU values. */
841 if (cfg->mtu == 0)
842 cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
843 if (cfg->mru == 0)
844 cfg->mru = cfg->mtu;
845
846 /* Allocate and initialize a new session context. */
847 error = -ENOMEM;
848 session = l2tp_session_create(sizeof(struct pppol2tp_session),
849 tunnel, session_id,
850 peer_session_id, cfg);
851 if (session == NULL)
852 goto out;
853
854 ps = l2tp_session_priv(session);
855 ps->tunnel_sock = tunnel->sock;
856
857 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
858 "%s: created\n", session->name);
859
860 error = 0;
861
862out:
863 return error;
864}
865
866/* Called when deleting sessions via the netlink interface.
867 */
868static int pppol2tp_session_delete(struct l2tp_session *session)
869{
870 struct pppol2tp_session *ps = l2tp_session_priv(session);
871
872 if (ps->sock == NULL)
873 l2tp_session_dec_refcount(session);
874
875 return 0;
876}
877
878#endif /* CONFIG_L2TP_V3 */
879
880/* getname() support.
881 */
882static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
883 int *usockaddr_len, int peer)
884{
885 int len = 0;
886 int error = 0;
887 struct l2tp_session *session;
888 struct l2tp_tunnel *tunnel;
889 struct sock *sk = sock->sk;
890 struct inet_sock *inet;
891 struct pppol2tp_session *pls;
892
893 error = -ENOTCONN;
894 if (sk == NULL)
895 goto end;
896 if (sk->sk_state != PPPOX_CONNECTED)
897 goto end;
898
899 error = -EBADF;
900 session = pppol2tp_sock_to_session(sk);
901 if (session == NULL)
902 goto end;
903
904 pls = l2tp_session_priv(session);
905 tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
906 if (tunnel == NULL) {
907 error = -EBADF;
908 goto end_put_sess;
909 }
910
911 inet = inet_sk(sk);
912 if (tunnel->version == 2) {
913 struct sockaddr_pppol2tp sp;
914 len = sizeof(sp);
915 memset(&sp, 0, len);
916 sp.sa_family = AF_PPPOX;
917 sp.sa_protocol = PX_PROTO_OL2TP;
918 sp.pppol2tp.fd = tunnel->fd;
919 sp.pppol2tp.pid = pls->owner;
920 sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
921 sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
922 sp.pppol2tp.s_session = session->session_id;
923 sp.pppol2tp.d_session = session->peer_session_id;
924 sp.pppol2tp.addr.sin_family = AF_INET;
925 sp.pppol2tp.addr.sin_port = inet->inet_dport;
926 sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
927 memcpy(uaddr, &sp, len);
928 } else if (tunnel->version == 3) {
929 struct sockaddr_pppol2tpv3 sp;
930 len = sizeof(sp);
931 memset(&sp, 0, len);
932 sp.sa_family = AF_PPPOX;
933 sp.sa_protocol = PX_PROTO_OL2TP;
934 sp.pppol2tp.fd = tunnel->fd;
935 sp.pppol2tp.pid = pls->owner;
936 sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
937 sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
938 sp.pppol2tp.s_session = session->session_id;
939 sp.pppol2tp.d_session = session->peer_session_id;
940 sp.pppol2tp.addr.sin_family = AF_INET;
941 sp.pppol2tp.addr.sin_port = inet->inet_dport;
942 sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
943 memcpy(uaddr, &sp, len);
944 }
945
946 *usockaddr_len = len;
947
948 sock_put(pls->tunnel_sock);
949end_put_sess:
950 sock_put(sk);
951 error = 0;
952
953end:
954 return error;
955}
956
957/****************************************************************************
958 * ioctl() handlers.
959 *
960 * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
961 * sockets. However, in order to control kernel tunnel features, we allow
962 * userspace to create a special "tunnel" PPPoX socket which is used for
963 * control only. Tunnel PPPoX sockets have session_id == 0 and simply allow
964 * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
965 * calls.
966 ****************************************************************************/
967
968static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
969 struct l2tp_stats *stats)
970{
971 dest->tx_packets = stats->tx_packets;
972 dest->tx_bytes = stats->tx_bytes;
973 dest->tx_errors = stats->tx_errors;
974 dest->rx_packets = stats->rx_packets;
975 dest->rx_bytes = stats->rx_bytes;
976 dest->rx_seq_discards = stats->rx_seq_discards;
977 dest->rx_oos_packets = stats->rx_oos_packets;
978 dest->rx_errors = stats->rx_errors;
979}
980
981/* Session ioctl helper.
982 */
983static int pppol2tp_session_ioctl(struct l2tp_session *session,
984 unsigned int cmd, unsigned long arg)
985{
986 struct ifreq ifr;
987 int err = 0;
988 struct sock *sk;
989 int val = (int) arg;
990 struct pppol2tp_session *ps = l2tp_session_priv(session);
991 struct l2tp_tunnel *tunnel = session->tunnel;
992 struct pppol2tp_ioc_stats stats;
993
994 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
995 "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
996 session->name, cmd, arg);
997
998 sk = ps->sock;
999 sock_hold(sk);
1000
1001 switch (cmd) {
1002 case SIOCGIFMTU:
1003 err = -ENXIO;
1004 if (!(sk->sk_state & PPPOX_CONNECTED))
1005 break;
1006
1007 err = -EFAULT;
1008 if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
1009 break;
1010 ifr.ifr_mtu = session->mtu;
1011 if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
1012 break;
1013
1014 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1015 "%s: get mtu=%d\n", session->name, session->mtu);
1016 err = 0;
1017 break;
1018
1019 case SIOCSIFMTU:
1020 err = -ENXIO;
1021 if (!(sk->sk_state & PPPOX_CONNECTED))
1022 break;
1023
1024 err = -EFAULT;
1025 if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
1026 break;
1027
1028 session->mtu = ifr.ifr_mtu;
1029
1030 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1031 "%s: set mtu=%d\n", session->name, session->mtu);
1032 err = 0;
1033 break;
1034
1035 case PPPIOCGMRU:
1036 err = -ENXIO;
1037 if (!(sk->sk_state & PPPOX_CONNECTED))
1038 break;
1039
1040 err = -EFAULT;
1041 if (put_user(session->mru, (int __user *) arg))
1042 break;
1043
1044 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1045 "%s: get mru=%d\n", session->name, session->mru);
1046 err = 0;
1047 break;
1048
1049 case PPPIOCSMRU:
1050 err = -ENXIO;
1051 if (!(sk->sk_state & PPPOX_CONNECTED))
1052 break;
1053
1054 err = -EFAULT;
1055 if (get_user(val, (int __user *) arg))
1056 break;
1057
1058 session->mru = val;
1059 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1060 "%s: set mru=%d\n", session->name, session->mru);
1061 err = 0;
1062 break;
1063
1064 case PPPIOCGFLAGS:
1065 err = -EFAULT;
1066 if (put_user(ps->flags, (int __user *) arg))
1067 break;
1068
1069 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1070 "%s: get flags=%d\n", session->name, ps->flags);
1071 err = 0;
1072 break;
1073
1074 case PPPIOCSFLAGS:
1075 err = -EFAULT;
1076 if (get_user(val, (int __user *) arg))
1077 break;
1078 ps->flags = val;
1079 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1080 "%s: set flags=%d\n", session->name, ps->flags);
1081 err = 0;
1082 break;
1083
1084 case PPPIOCGL2TPSTATS:
1085 err = -ENXIO;
1086 if (!(sk->sk_state & PPPOX_CONNECTED))
1087 break;
1088
1089 memset(&stats, 0, sizeof(stats));
1090 stats.tunnel_id = tunnel->tunnel_id;
1091 stats.session_id = session->session_id;
1092 pppol2tp_copy_stats(&stats, &session->stats);
1093 if (copy_to_user((void __user *) arg, &stats,
1094 sizeof(stats)))
1095 break;
1096 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1097 "%s: get L2TP stats\n", session->name);
1098 err = 0;
1099 break;
1100
1101 default:
1102 err = -ENOSYS;
1103 break;
1104 }
1105
1106 sock_put(sk);
1107
1108 return err;
1109}
1110
1111/* Tunnel ioctl helper.
1112 *
1113 * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
1114 * specifies a session_id, the session ioctl handler is called. This allows an
1115 * application to retrieve session stats via a tunnel socket.
1116 */
1117static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
1118 unsigned int cmd, unsigned long arg)
1119{
1120 int err = 0;
1121 struct sock *sk;
1122 struct pppol2tp_ioc_stats stats;
1123
1124 PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
1125 "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
1126 tunnel->name, cmd, arg);
1127
1128 sk = tunnel->sock;
1129 sock_hold(sk);
1130
1131 switch (cmd) {
1132 case PPPIOCGL2TPSTATS:
1133 err = -ENXIO;
1134 if (!(sk->sk_state & PPPOX_CONNECTED))
1135 break;
1136
1137 if (copy_from_user(&stats, (void __user *) arg,
1138 sizeof(stats))) {
1139 err = -EFAULT;
1140 break;
1141 }
1142 if (stats.session_id != 0) {
1143 /* resend to session ioctl handler */
1144 struct l2tp_session *session =
1145 l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
1146 if (session != NULL)
1147 err = pppol2tp_session_ioctl(session, cmd, arg);
1148 else
1149 err = -EBADR;
1150 break;
1151 }
1152#ifdef CONFIG_XFRM
1153 stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
1154#endif
1155 pppol2tp_copy_stats(&stats, &tunnel->stats);
1156 if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
1157 err = -EFAULT;
1158 break;
1159 }
1160 PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1161 "%s: get L2TP stats\n", tunnel->name);
1162 err = 0;
1163 break;
1164
1165 default:
1166 err = -ENOSYS;
1167 break;
1168 }
1169
1170 sock_put(sk);
1171
1172 return err;
1173}
1174
1175/* Main ioctl() handler.
1176 * Dispatch to tunnel or session helpers depending on the socket.
1177 */
1178static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
1179 unsigned long arg)
1180{
1181 struct sock *sk = sock->sk;
1182 struct l2tp_session *session;
1183 struct l2tp_tunnel *tunnel;
1184 struct pppol2tp_session *ps;
1185 int err;
1186
1187 if (!sk)
1188 return 0;
1189
1190 err = -EBADF;
1191 if (sock_flag(sk, SOCK_DEAD) != 0)
1192 goto end;
1193
1194 err = -ENOTCONN;
1195 if ((sk->sk_user_data == NULL) ||
1196 (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
1197 goto end;
1198
1199 /* Get session context from the socket */
1200 err = -EBADF;
1201 session = pppol2tp_sock_to_session(sk);
1202 if (session == NULL)
1203 goto end;
1204
1205 /* Special case: if session's session_id is zero, treat ioctl as a
1206 * tunnel ioctl
1207 */
1208 ps = l2tp_session_priv(session);
1209 if ((session->session_id == 0) &&
1210 (session->peer_session_id == 0)) {
1211 err = -EBADF;
1212 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1213 if (tunnel == NULL)
1214 goto end_put_sess;
1215
1216 err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
1217 sock_put(ps->tunnel_sock);
1218 goto end_put_sess;
1219 }
1220
1221 err = pppol2tp_session_ioctl(session, cmd, arg);
1222
1223end_put_sess:
1224 sock_put(sk);
1225end:
1226 return err;
1227}
1228
1229/*****************************************************************************
1230 * setsockopt() / getsockopt() support.
1231 *
1232 * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
1233 * sockets. In order to control kernel tunnel features, we allow userspace to
1234 * create a special "tunnel" PPPoX socket which is used for control only.
1235 * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
1236 * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
1237 *****************************************************************************/
1238
1239/* Tunnel setsockopt() helper.
1240 */
1241static int pppol2tp_tunnel_setsockopt(struct sock *sk,
1242 struct l2tp_tunnel *tunnel,
1243 int optname, int val)
1244{
1245 int err = 0;
1246
1247 switch (optname) {
1248 case PPPOL2TP_SO_DEBUG:
1249 tunnel->debug = val;
1250 PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1251 "%s: set debug=%x\n", tunnel->name, tunnel->debug);
1252 break;
1253
1254 default:
1255 err = -ENOPROTOOPT;
1256 break;
1257 }
1258
1259 return err;
1260}
1261
1262/* Session setsockopt helper.
1263 */
1264static int pppol2tp_session_setsockopt(struct sock *sk,
1265 struct l2tp_session *session,
1266 int optname, int val)
1267{
1268 int err = 0;
1269 struct pppol2tp_session *ps = l2tp_session_priv(session);
1270
1271 switch (optname) {
1272 case PPPOL2TP_SO_RECVSEQ:
1273 if ((val != 0) && (val != 1)) {
1274 err = -EINVAL;
1275 break;
1276 }
1277 session->recv_seq = val ? -1 : 0;
1278 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1279 "%s: set recv_seq=%d\n", session->name, session->recv_seq);
1280 break;
1281
1282 case PPPOL2TP_SO_SENDSEQ:
1283 if ((val != 0) && (val != 1)) {
1284 err = -EINVAL;
1285 break;
1286 }
1287 session->send_seq = val ? -1 : 0;
1288 {
1289 struct sock *ssk = ps->sock;
1290 struct pppox_sock *po = pppox_sk(ssk);
1291 po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
1292 PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
1293 }
1294 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1295 "%s: set send_seq=%d\n", session->name, session->send_seq);
1296 break;
1297
1298 case PPPOL2TP_SO_LNSMODE:
1299 if ((val != 0) && (val != 1)) {
1300 err = -EINVAL;
1301 break;
1302 }
1303 session->lns_mode = val ? -1 : 0;
1304 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1305 "%s: set lns_mode=%d\n", session->name, session->lns_mode);
1306 break;
1307
1308 case PPPOL2TP_SO_DEBUG:
1309 session->debug = val;
1310 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1311 "%s: set debug=%x\n", session->name, session->debug);
1312 break;
1313
1314 case PPPOL2TP_SO_REORDERTO:
1315 session->reorder_timeout = msecs_to_jiffies(val);
1316 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1317 "%s: set reorder_timeout=%d\n", session->name, session->reorder_timeout);
1318 break;
1319
1320 default:
1321 err = -ENOPROTOOPT;
1322 break;
1323 }
1324
1325 return err;
1326}
1327
1328/* Main setsockopt() entry point.
1329 * Does API checks, then calls either the tunnel or session setsockopt
1330 * handler, according to whether the PPPoL2TP socket is a for a regular
1331 * session or the special tunnel type.
1332 */
1333static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
1334 char __user *optval, unsigned int optlen)
1335{
1336 struct sock *sk = sock->sk;
1337 struct l2tp_session *session;
1338 struct l2tp_tunnel *tunnel;
1339 struct pppol2tp_session *ps;
1340 int val;
1341 int err;
1342
1343 if (level != SOL_PPPOL2TP)
1344 return udp_prot.setsockopt(sk, level, optname, optval, optlen);
1345
1346 if (optlen < sizeof(int))
1347 return -EINVAL;
1348
1349 if (get_user(val, (int __user *)optval))
1350 return -EFAULT;
1351
1352 err = -ENOTCONN;
1353 if (sk->sk_user_data == NULL)
1354 goto end;
1355
1356 /* Get session context from the socket */
1357 err = -EBADF;
1358 session = pppol2tp_sock_to_session(sk);
1359 if (session == NULL)
1360 goto end;
1361
1362 /* Special case: if session_id == 0x0000, treat as operation on tunnel
1363 */
1364 ps = l2tp_session_priv(session);
1365 if ((session->session_id == 0) &&
1366 (session->peer_session_id == 0)) {
1367 err = -EBADF;
1368 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1369 if (tunnel == NULL)
1370 goto end_put_sess;
1371
1372 err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
1373 sock_put(ps->tunnel_sock);
1374 } else
1375 err = pppol2tp_session_setsockopt(sk, session, optname, val);
1376
1377 err = 0;
1378
1379end_put_sess:
1380 sock_put(sk);
1381end:
1382 return err;
1383}
1384
1385/* Tunnel getsockopt helper. Called with sock locked.
1386 */
1387static int pppol2tp_tunnel_getsockopt(struct sock *sk,
1388 struct l2tp_tunnel *tunnel,
1389 int optname, int *val)
1390{
1391 int err = 0;
1392
1393 switch (optname) {
1394 case PPPOL2TP_SO_DEBUG:
1395 *val = tunnel->debug;
1396 PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1397 "%s: get debug=%x\n", tunnel->name, tunnel->debug);
1398 break;
1399
1400 default:
1401 err = -ENOPROTOOPT;
1402 break;
1403 }
1404
1405 return err;
1406}
1407
1408/* Session getsockopt helper. Called with sock locked.
1409 */
1410static int pppol2tp_session_getsockopt(struct sock *sk,
1411 struct l2tp_session *session,
1412 int optname, int *val)
1413{
1414 int err = 0;
1415
1416 switch (optname) {
1417 case PPPOL2TP_SO_RECVSEQ:
1418 *val = session->recv_seq;
1419 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1420 "%s: get recv_seq=%d\n", session->name, *val);
1421 break;
1422
1423 case PPPOL2TP_SO_SENDSEQ:
1424 *val = session->send_seq;
1425 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1426 "%s: get send_seq=%d\n", session->name, *val);
1427 break;
1428
1429 case PPPOL2TP_SO_LNSMODE:
1430 *val = session->lns_mode;
1431 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1432 "%s: get lns_mode=%d\n", session->name, *val);
1433 break;
1434
1435 case PPPOL2TP_SO_DEBUG:
1436 *val = session->debug;
1437 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1438 "%s: get debug=%d\n", session->name, *val);
1439 break;
1440
1441 case PPPOL2TP_SO_REORDERTO:
1442 *val = (int) jiffies_to_msecs(session->reorder_timeout);
1443 PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
1444 "%s: get reorder_timeout=%d\n", session->name, *val);
1445 break;
1446
1447 default:
1448 err = -ENOPROTOOPT;
1449 }
1450
1451 return err;
1452}
1453
1454/* Main getsockopt() entry point.
1455 * Does API checks, then calls either the tunnel or session getsockopt
1456 * handler, according to whether the PPPoX socket is a for a regular session
1457 * or the special tunnel type.
1458 */
1459static int pppol2tp_getsockopt(struct socket *sock, int level,
1460 int optname, char __user *optval, int __user *optlen)
1461{
1462 struct sock *sk = sock->sk;
1463 struct l2tp_session *session;
1464 struct l2tp_tunnel *tunnel;
1465 int val, len;
1466 int err;
1467 struct pppol2tp_session *ps;
1468
1469 if (level != SOL_PPPOL2TP)
1470 return udp_prot.getsockopt(sk, level, optname, optval, optlen);
1471
1472 if (get_user(len, (int __user *) optlen))
1473 return -EFAULT;
1474
1475 len = min_t(unsigned int, len, sizeof(int));
1476
1477 if (len < 0)
1478 return -EINVAL;
1479
1480 err = -ENOTCONN;
1481 if (sk->sk_user_data == NULL)
1482 goto end;
1483
1484 /* Get the session context */
1485 err = -EBADF;
1486 session = pppol2tp_sock_to_session(sk);
1487 if (session == NULL)
1488 goto end;
1489
1490 /* Special case: if session_id == 0x0000, treat as operation on tunnel */
1491 ps = l2tp_session_priv(session);
1492 if ((session->session_id == 0) &&
1493 (session->peer_session_id == 0)) {
1494 err = -EBADF;
1495 tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
1496 if (tunnel == NULL)
1497 goto end_put_sess;
1498
1499 err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
1500 sock_put(ps->tunnel_sock);
1501 } else
1502 err = pppol2tp_session_getsockopt(sk, session, optname, &val);
1503
1504 err = -EFAULT;
1505 if (put_user(len, (int __user *) optlen))
1506 goto end_put_sess;
1507
1508 if (copy_to_user((void __user *) optval, &val, len))
1509 goto end_put_sess;
1510
1511 err = 0;
1512
1513end_put_sess:
1514 sock_put(sk);
1515end:
1516 return err;
1517}
1518
1519/*****************************************************************************
1520 * /proc filesystem for debug
1521 * Since the original pppol2tp driver provided /proc/net/pppol2tp for
1522 * L2TPv2, we dump only L2TPv2 tunnels and sessions here.
1523 *****************************************************************************/
1524
1525static unsigned int pppol2tp_net_id;
1526
1527#ifdef CONFIG_PROC_FS
1528
1529struct pppol2tp_seq_data {
1530 struct seq_net_private p;
1531 int tunnel_idx; /* current tunnel */
1532 int session_idx; /* index of session within current tunnel */
1533 struct l2tp_tunnel *tunnel;
1534 struct l2tp_session *session; /* NULL means get next tunnel */
1535};
1536
1537static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
1538{
1539 for (;;) {
1540 pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
1541 pd->tunnel_idx++;
1542
1543 if (pd->tunnel == NULL)
1544 break;
1545
1546 /* Ignore L2TPv3 tunnels */
1547 if (pd->tunnel->version < 3)
1548 break;
1549 }
1550}
1551
1552static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
1553{
1554 pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
1555 pd->session_idx++;
1556
1557 if (pd->session == NULL) {
1558 pd->session_idx = 0;
1559 pppol2tp_next_tunnel(net, pd);
1560 }
1561}
1562
1563static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
1564{
1565 struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
1566 loff_t pos = *offs;
1567 struct net *net;
1568
1569 if (!pos)
1570 goto out;
1571
1572 BUG_ON(m->private == NULL);
1573 pd = m->private;
1574 net = seq_file_net(m);
1575
1576 if (pd->tunnel == NULL)
1577 pppol2tp_next_tunnel(net, pd);
1578 else
1579 pppol2tp_next_session(net, pd);
1580
1581 /* NULL tunnel and session indicates end of list */
1582 if ((pd->tunnel == NULL) && (pd->session == NULL))
1583 pd = NULL;
1584
1585out:
1586 return pd;
1587}
1588
1589static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
1590{
1591 (*pos)++;
1592 return NULL;
1593}
1594
1595static void pppol2tp_seq_stop(struct seq_file *p, void *v)
1596{
1597 /* nothing to do */
1598}
1599
1600static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
1601{
1602 struct l2tp_tunnel *tunnel = v;
1603
1604 seq_printf(m, "\nTUNNEL '%s', %c %d\n",
1605 tunnel->name,
1606 (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
1607 atomic_read(&tunnel->ref_count) - 1);
1608 seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
1609 tunnel->debug,
1610 (unsigned long long)tunnel->stats.tx_packets,
1611 (unsigned long long)tunnel->stats.tx_bytes,
1612 (unsigned long long)tunnel->stats.tx_errors,
1613 (unsigned long long)tunnel->stats.rx_packets,
1614 (unsigned long long)tunnel->stats.rx_bytes,
1615 (unsigned long long)tunnel->stats.rx_errors);
1616}
1617
1618static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
1619{
1620 struct l2tp_session *session = v;
1621 struct l2tp_tunnel *tunnel = session->tunnel;
1622 struct pppol2tp_session *ps = l2tp_session_priv(session);
1623 struct pppox_sock *po = pppox_sk(ps->sock);
1624 u32 ip = 0;
1625 u16 port = 0;
1626
1627 if (tunnel->sock) {
1628 struct inet_sock *inet = inet_sk(tunnel->sock);
1629 ip = ntohl(inet->inet_saddr);
1630 port = ntohs(inet->inet_sport);
1631 }
1632
1633 seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> "
1634 "%04X/%04X %d %c\n",
1635 session->name, ip, port,
1636 tunnel->tunnel_id,
1637 session->session_id,
1638 tunnel->peer_tunnel_id,
1639 session->peer_session_id,
1640 ps->sock->sk_state,
1641 (session == ps->sock->sk_user_data) ?
1642 'Y' : 'N');
1643 seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n",
1644 session->mtu, session->mru,
1645 session->recv_seq ? 'R' : '-',
1646 session->send_seq ? 'S' : '-',
1647 session->lns_mode ? "LNS" : "LAC",
1648 session->debug,
1649 jiffies_to_msecs(session->reorder_timeout));
1650 seq_printf(m, " %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
1651 session->nr, session->ns,
1652 (unsigned long long)session->stats.tx_packets,
1653 (unsigned long long)session->stats.tx_bytes,
1654 (unsigned long long)session->stats.tx_errors,
1655 (unsigned long long)session->stats.rx_packets,
1656 (unsigned long long)session->stats.rx_bytes,
1657 (unsigned long long)session->stats.rx_errors);
1658
1659 if (po)
1660 seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan));
1661}
1662
1663static int pppol2tp_seq_show(struct seq_file *m, void *v)
1664{
1665 struct pppol2tp_seq_data *pd = v;
1666
1667 /* display header on line 1 */
1668 if (v == SEQ_START_TOKEN) {
1669 seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
1670 seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
1671 seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
1672 seq_puts(m, " SESSION name, addr/port src-tid/sid "
1673 "dest-tid/sid state user-data-ok\n");
1674 seq_puts(m, " mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
1675 seq_puts(m, " nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
1676 goto out;
1677 }
1678
1679 /* Show the tunnel or session context.
1680 */
1681 if (pd->session == NULL)
1682 pppol2tp_seq_tunnel_show(m, pd->tunnel);
1683 else
1684 pppol2tp_seq_session_show(m, pd->session);
1685
1686out:
1687 return 0;
1688}
1689
1690static const struct seq_operations pppol2tp_seq_ops = {
1691 .start = pppol2tp_seq_start,
1692 .next = pppol2tp_seq_next,
1693 .stop = pppol2tp_seq_stop,
1694 .show = pppol2tp_seq_show,
1695};
1696
1697/* Called when our /proc file is opened. We allocate data for use when
1698 * iterating our tunnel / session contexts and store it in the private
1699 * data of the seq_file.
1700 */
1701static int pppol2tp_proc_open(struct inode *inode, struct file *file)
1702{
1703 return seq_open_net(inode, file, &pppol2tp_seq_ops,
1704 sizeof(struct pppol2tp_seq_data));
1705}
1706
1707static const struct file_operations pppol2tp_proc_fops = {
1708 .owner = THIS_MODULE,
1709 .open = pppol2tp_proc_open,
1710 .read = seq_read,
1711 .llseek = seq_lseek,
1712 .release = seq_release_net,
1713};
1714
1715#endif /* CONFIG_PROC_FS */
1716
1717/*****************************************************************************
1718 * Network namespace
1719 *****************************************************************************/
1720
1721static __net_init int pppol2tp_init_net(struct net *net)
1722{
1723 struct proc_dir_entry *pde;
1724 int err = 0;
1725
1726 pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
1727 if (!pde) {
1728 err = -ENOMEM;
1729 goto out;
1730 }
1731
1732out:
1733 return err;
1734}
1735
1736static __net_exit void pppol2tp_exit_net(struct net *net)
1737{
1738 proc_net_remove(net, "pppol2tp");
1739}
1740
1741static struct pernet_operations pppol2tp_net_ops = {
1742 .init = pppol2tp_init_net,
1743 .exit = pppol2tp_exit_net,
1744 .id = &pppol2tp_net_id,
1745};
1746
1747/*****************************************************************************
1748 * Init and cleanup
1749 *****************************************************************************/
1750
1751static const struct proto_ops pppol2tp_ops = {
1752 .family = AF_PPPOX,
1753 .owner = THIS_MODULE,
1754 .release = pppol2tp_release,
1755 .bind = sock_no_bind,
1756 .connect = pppol2tp_connect,
1757 .socketpair = sock_no_socketpair,
1758 .accept = sock_no_accept,
1759 .getname = pppol2tp_getname,
1760 .poll = datagram_poll,
1761 .listen = sock_no_listen,
1762 .shutdown = sock_no_shutdown,
1763 .setsockopt = pppol2tp_setsockopt,
1764 .getsockopt = pppol2tp_getsockopt,
1765 .sendmsg = pppol2tp_sendmsg,
1766 .recvmsg = pppol2tp_recvmsg,
1767 .mmap = sock_no_mmap,
1768 .ioctl = pppox_ioctl,
1769};
1770
1771static struct pppox_proto pppol2tp_proto = {
1772 .create = pppol2tp_create,
1773 .ioctl = pppol2tp_ioctl
1774};
1775
1776#ifdef CONFIG_L2TP_V3
1777
1778static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
1779 .session_create = pppol2tp_session_create,
1780 .session_delete = pppol2tp_session_delete,
1781};
1782
1783#endif /* CONFIG_L2TP_V3 */
1784
1785static int __init pppol2tp_init(void)
1786{
1787 int err;
1788
1789 err = register_pernet_device(&pppol2tp_net_ops);
1790 if (err)
1791 goto out;
1792
1793 err = proto_register(&pppol2tp_sk_proto, 0);
1794 if (err)
1795 goto out_unregister_pppol2tp_pernet;
1796
1797 err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
1798 if (err)
1799 goto out_unregister_pppol2tp_proto;
1800
1801#ifdef CONFIG_L2TP_V3
1802 err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
1803 if (err)
1804 goto out_unregister_pppox;
1805#endif
1806
1807 printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
1808 PPPOL2TP_DRV_VERSION);
1809
1810out:
1811 return err;
1812
1813#ifdef CONFIG_L2TP_V3
1814out_unregister_pppox:
1815 unregister_pppox_proto(PX_PROTO_OL2TP);
1816#endif
1817out_unregister_pppol2tp_proto:
1818 proto_unregister(&pppol2tp_sk_proto);
1819out_unregister_pppol2tp_pernet:
1820 unregister_pernet_device(&pppol2tp_net_ops);
1821 goto out;
1822}
1823
1824static void __exit pppol2tp_exit(void)
1825{
1826#ifdef CONFIG_L2TP_V3
1827 l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
1828#endif
1829 unregister_pppox_proto(PX_PROTO_OL2TP);
1830 proto_unregister(&pppol2tp_sk_proto);
1831 unregister_pernet_device(&pppol2tp_net_ops);
1832}
1833
1834module_init(pppol2tp_init);
1835module_exit(pppol2tp_exit);
1836
1837MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
1838MODULE_DESCRIPTION("PPP over L2TP over UDP");
1839MODULE_LICENSE("GPL");
1840MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2db6a9f75913..023ba820236f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
536 int rc = 0; 536 int rc = 0;
537 537
538 while (1) { 538 while (1) {
539 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 539 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
540 if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE)) 540 if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
541 break; 541 break;
542 rc = -ERESTARTSYS; 542 rc = -ERESTARTSYS;
@@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
547 break; 547 break;
548 rc = 0; 548 rc = 0;
549 } 549 }
550 finish_wait(sk->sk_sleep, &wait); 550 finish_wait(sk_sleep(sk), &wait);
551 return rc; 551 return rc;
552} 552}
553 553
@@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout)
556 DEFINE_WAIT(wait); 556 DEFINE_WAIT(wait);
557 557
558 while (1) { 558 while (1) {
559 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 559 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
560 if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT)) 560 if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
561 break; 561 break;
562 if (signal_pending(current) || !timeout) 562 if (signal_pending(current) || !timeout)
563 break; 563 break;
564 } 564 }
565 finish_wait(sk->sk_sleep, &wait); 565 finish_wait(sk_sleep(sk), &wait);
566 return timeout; 566 return timeout;
567} 567}
568 568
@@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
573 int rc; 573 int rc;
574 574
575 while (1) { 575 while (1) {
576 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 576 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
577 rc = 0; 577 rc = 0;
578 if (sk_wait_event(sk, &timeout, 578 if (sk_wait_event(sk, &timeout,
579 (sk->sk_shutdown & RCV_SHUTDOWN) || 579 (sk->sk_shutdown & RCV_SHUTDOWN) ||
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
588 if (!timeout) 588 if (!timeout)
589 break; 589 break;
590 } 590 }
591 finish_wait(sk->sk_sleep, &wait); 591 finish_wait(sk_sleep(sk), &wait);
592 return rc; 592 return rc;
593} 593}
594 594
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 78167e81dfeb..2bb0ddff8c0f 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -144,12 +144,6 @@ static struct packet_type llc_tr_packet_type __read_mostly = {
144 144
145static int __init llc_init(void) 145static int __init llc_init(void)
146{ 146{
147 struct net_device *dev;
148
149 dev = first_net_device(&init_net);
150 if (dev != NULL)
151 dev = next_net_device(dev);
152
153 dev_add_pack(&llc_packet_type); 147 dev_add_pack(&llc_packet_type);
154 dev_add_pack(&llc_tr_packet_type); 148 dev_add_pack(&llc_tr_packet_type);
155 return 0; 149 return 0;
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index a432f0ec051c..94e7fca75b85 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -31,7 +31,7 @@ static int llc_mac_header_len(unsigned short devtype)
31 case ARPHRD_ETHER: 31 case ARPHRD_ETHER:
32 case ARPHRD_LOOPBACK: 32 case ARPHRD_LOOPBACK:
33 return sizeof(struct ethhdr); 33 return sizeof(struct ethhdr);
34#ifdef CONFIG_TR 34#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
35 case ARPHRD_IEEE802_TR: 35 case ARPHRD_IEEE802_TR:
36 return sizeof(struct trh_hdr); 36 return sizeof(struct trh_hdr);
37#endif 37#endif
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a952b7f8c648..4d6f8653ec88 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,8 +15,12 @@ comment "CFG80211 needs to be enabled for MAC80211"
15 15
16if MAC80211 != n 16if MAC80211 != n
17 17
18config MAC80211_HAS_RC
19 def_bool n
20
18config MAC80211_RC_PID 21config MAC80211_RC_PID
19 bool "PID controller based rate control algorithm" if EMBEDDED 22 bool "PID controller based rate control algorithm" if EMBEDDED
23 select MAC80211_HAS_RC
20 ---help--- 24 ---help---
21 This option enables a TX rate control algorithm for 25 This option enables a TX rate control algorithm for
22 mac80211 that uses a PID controller to select the TX 26 mac80211 that uses a PID controller to select the TX
@@ -24,12 +28,21 @@ config MAC80211_RC_PID
24 28
25config MAC80211_RC_MINSTREL 29config MAC80211_RC_MINSTREL
26 bool "Minstrel" if EMBEDDED 30 bool "Minstrel" if EMBEDDED
31 select MAC80211_HAS_RC
27 default y 32 default y
28 ---help--- 33 ---help---
29 This option enables the 'minstrel' TX rate control algorithm 34 This option enables the 'minstrel' TX rate control algorithm
30 35
36config MAC80211_RC_MINSTREL_HT
37 bool "Minstrel 802.11n support" if EMBEDDED
38 depends on MAC80211_RC_MINSTREL
39 default y
40 ---help---
41 This option enables the 'minstrel_ht' TX rate control algorithm
42
31choice 43choice
32 prompt "Default rate control algorithm" 44 prompt "Default rate control algorithm"
45 depends on MAC80211_HAS_RC
33 default MAC80211_RC_DEFAULT_MINSTREL 46 default MAC80211_RC_DEFAULT_MINSTREL
34 ---help--- 47 ---help---
35 This option selects the default rate control algorithm 48 This option selects the default rate control algorithm
@@ -56,12 +69,16 @@ endchoice
56 69
57config MAC80211_RC_DEFAULT 70config MAC80211_RC_DEFAULT
58 string 71 string
72 default "minstrel_ht" if MAC80211_RC_DEFAULT_MINSTREL && MAC80211_RC_MINSTREL_HT
59 default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL 73 default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL
60 default "pid" if MAC80211_RC_DEFAULT_PID 74 default "pid" if MAC80211_RC_DEFAULT_PID
61 default "" 75 default ""
62 76
63endif 77endif
64 78
79comment "Some wireless drivers require a rate control algorithm"
80 depends on MAC80211_HAS_RC=n
81
65config MAC80211_MESH 82config MAC80211_MESH
66 bool "Enable mac80211 mesh networking (pre-802.11s) support" 83 bool "Enable mac80211 mesh networking (pre-802.11s) support"
67 depends on MAC80211 && EXPERIMENTAL 84 depends on MAC80211 && EXPERIMENTAL
@@ -212,8 +229,8 @@ config MAC80211_DRIVER_API_TRACER
212 depends on EVENT_TRACING 229 depends on EVENT_TRACING
213 help 230 help
214 Say Y here to make mac80211 register with the ftrace 231 Say Y here to make mac80211 register with the ftrace
215 framework for the driver API -- you can see which 232 framework for the driver API -- you can then see which
216 driver methods it is calling then by looking at the 233 driver methods it is calling and which API functions
217 trace. 234 drivers are calling by looking at the trace.
218 235
219 If unsure, say N. 236 If unsure, say Y.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 04420291e7ad..fdb54e61d637 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -23,7 +23,8 @@ mac80211-y := \
23 key.o \ 23 key.o \
24 util.o \ 24 util.o \
25 wme.o \ 25 wme.o \
26 event.o 26 event.o \
27 chan.o
27 28
28mac80211-$(CONFIG_MAC80211_LEDS) += led.o 29mac80211-$(CONFIG_MAC80211_LEDS) += led.o
29mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ 30mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
@@ -50,7 +51,11 @@ rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
50rc80211_minstrel-y := rc80211_minstrel.o 51rc80211_minstrel-y := rc80211_minstrel.o
51rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o 52rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o
52 53
54rc80211_minstrel_ht-y := rc80211_minstrel_ht.o
55rc80211_minstrel_ht-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_ht_debugfs.o
56
53mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) 57mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y)
54mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) 58mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
59mac80211-$(CONFIG_MAC80211_RC_MINSTREL_HT) += $(rc80211_minstrel_ht-y)
55 60
56ccflags-y += -D__CHECK_ENDIAN__ 61ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index f9516a27e233..965b272499fd 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -6,36 +6,69 @@
6 * Copyright 2005-2006, Devicescape Software, Inc. 6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2008, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation. 13 * published by the Free Software Foundation.
14 */ 14 */
15 15
16/**
17 * DOC: RX A-MPDU aggregation
18 *
19 * Aggregation on the RX side requires only implementing the
20 * @ampdu_action callback that is invoked to start/stop any
21 * block-ack sessions for RX aggregation.
22 *
23 * When RX aggregation is started by the peer, the driver is
24 * notified via @ampdu_action function, with the
25 * %IEEE80211_AMPDU_RX_START action, and may reject the request
26 * in which case a negative response is sent to the peer, if it
27 * accepts it a positive response is sent.
28 *
29 * While the session is active, the device/driver are required
30 * to de-aggregate frames and pass them up one by one to mac80211,
31 * which will handle the reorder buffer.
32 *
33 * When the aggregation session is stopped again by the peer or
34 * ourselves, the driver's @ampdu_action function will be called
35 * with the action %IEEE80211_AMPDU_RX_STOP. In this case, the
36 * call must not fail.
37 */
38
16#include <linux/ieee80211.h> 39#include <linux/ieee80211.h>
17#include <linux/slab.h> 40#include <linux/slab.h>
18#include <net/mac80211.h> 41#include <net/mac80211.h>
19#include "ieee80211_i.h" 42#include "ieee80211_i.h"
20#include "driver-ops.h" 43#include "driver-ops.h"
21 44
22void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 45static void ieee80211_free_tid_rx(struct rcu_head *h)
23 u16 initiator, u16 reason)
24{ 46{
25 struct ieee80211_local *local = sta->local; 47 struct tid_ampdu_rx *tid_rx =
48 container_of(h, struct tid_ampdu_rx, rcu_head);
26 int i; 49 int i;
27 50
28 /* check if TID is in operational state */ 51 for (i = 0; i < tid_rx->buf_size; i++)
29 spin_lock_bh(&sta->lock); 52 dev_kfree_skb(tid_rx->reorder_buf[i]);
30 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) { 53 kfree(tid_rx->reorder_buf);
31 spin_unlock_bh(&sta->lock); 54 kfree(tid_rx->reorder_time);
55 kfree(tid_rx);
56}
57
58void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
59 u16 initiator, u16 reason)
60{
61 struct ieee80211_local *local = sta->local;
62 struct tid_ampdu_rx *tid_rx;
63
64 lockdep_assert_held(&sta->ampdu_mlme.mtx);
65
66 tid_rx = sta->ampdu_mlme.tid_rx[tid];
67
68 if (!tid_rx)
32 return; 69 return;
33 }
34 70
35 sta->ampdu_mlme.tid_state_rx[tid] = 71 rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], NULL);
36 HT_AGG_STATE_REQ_STOP_BA_MSK |
37 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
38 spin_unlock_bh(&sta->lock);
39 72
40#ifdef CONFIG_MAC80211_HT_DEBUG 73#ifdef CONFIG_MAC80211_HT_DEBUG
41 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", 74 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -47,61 +80,27 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
47 printk(KERN_DEBUG "HW problem - can not stop rx " 80 printk(KERN_DEBUG "HW problem - can not stop rx "
48 "aggregation for tid %d\n", tid); 81 "aggregation for tid %d\n", tid);
49 82
50 /* shutdown timer has not expired */
51 if (initiator != WLAN_BACK_TIMER)
52 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
53
54 /* check if this is a self generated aggregation halt */ 83 /* check if this is a self generated aggregation halt */
55 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) 84 if (initiator == WLAN_BACK_RECIPIENT)
56 ieee80211_send_delba(sta->sdata, sta->sta.addr, 85 ieee80211_send_delba(sta->sdata, sta->sta.addr,
57 tid, 0, reason); 86 tid, 0, reason);
58 87
59 /* free the reordering buffer */ 88 del_timer_sync(&tid_rx->session_timer);
60 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
61 if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
62 /* release the reordered frames */
63 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
64 sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
65 sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
66 }
67 }
68 89
69 spin_lock_bh(&sta->lock); 90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
70 /* free resources */
71 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
72 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_time);
73
74 if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
75 kfree(sta->ampdu_mlme.tid_rx[tid]);
76 sta->ampdu_mlme.tid_rx[tid] = NULL;
77 }
78
79 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
80 spin_unlock_bh(&sta->lock);
81} 91}
82 92
83void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, 93void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
84 u16 initiator, u16 reason) 94 u16 initiator, u16 reason)
85{ 95{
86 struct sta_info *sta; 96 mutex_lock(&sta->ampdu_mlme.mtx);
87 97 ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
88 rcu_read_lock(); 98 mutex_unlock(&sta->ampdu_mlme.mtx);
89
90 sta = sta_info_get(sdata, ra);
91 if (!sta) {
92 rcu_read_unlock();
93 return;
94 }
95
96 __ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
97
98 rcu_read_unlock();
99} 99}
100 100
101/* 101/*
102 * After accepting the AddBA Request we activated a timer, 102 * After accepting the AddBA Request we activated a timer,
103 * resetting it after each frame that arrives from the originator. 103 * resetting it after each frame that arrives from the originator.
104 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
105 */ 104 */
106static void sta_rx_agg_session_timer_expired(unsigned long data) 105static void sta_rx_agg_session_timer_expired(unsigned long data)
107{ 106{
@@ -117,9 +116,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
117#ifdef CONFIG_MAC80211_HT_DEBUG 116#ifdef CONFIG_MAC80211_HT_DEBUG
118 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); 117 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
119#endif 118#endif
120 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, 119 set_bit(*ptid, sta->ampdu_mlme.tid_rx_timer_expired);
121 (u16)*ptid, WLAN_BACK_TIMER, 120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
122 WLAN_REASON_QSTA_TIMEOUT);
123} 121}
124 122
125static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
@@ -194,7 +192,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
194 192
195 status = WLAN_STATUS_REQUEST_DECLINED; 193 status = WLAN_STATUS_REQUEST_DECLINED;
196 194
197 if (test_sta_flags(sta, WLAN_STA_SUSPEND)) { 195 if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
198#ifdef CONFIG_MAC80211_HT_DEBUG 196#ifdef CONFIG_MAC80211_HT_DEBUG
199 printk(KERN_DEBUG "Suspend in progress. " 197 printk(KERN_DEBUG "Suspend in progress. "
200 "Denying ADDBA request\n"); 198 "Denying ADDBA request\n");
@@ -230,9 +228,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
230 228
231 229
232 /* examine state machine */ 230 /* examine state machine */
233 spin_lock_bh(&sta->lock); 231 mutex_lock(&sta->ampdu_mlme.mtx);
234 232
235 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { 233 if (sta->ampdu_mlme.tid_rx[tid]) {
236#ifdef CONFIG_MAC80211_HT_DEBUG 234#ifdef CONFIG_MAC80211_HT_DEBUG
237 if (net_ratelimit()) 235 if (net_ratelimit())
238 printk(KERN_DEBUG "unexpected AddBA Req from " 236 printk(KERN_DEBUG "unexpected AddBA Req from "
@@ -243,9 +241,8 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
243 } 241 }
244 242
245 /* prepare A-MPDU MLME for Rx aggregation */ 243 /* prepare A-MPDU MLME for Rx aggregation */
246 sta->ampdu_mlme.tid_rx[tid] = 244 tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
247 kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); 245 if (!tid_agg_rx) {
248 if (!sta->ampdu_mlme.tid_rx[tid]) {
249#ifdef CONFIG_MAC80211_HT_DEBUG 246#ifdef CONFIG_MAC80211_HT_DEBUG
250 if (net_ratelimit()) 247 if (net_ratelimit())
251 printk(KERN_ERR "allocate rx mlme to tid %d failed\n", 248 printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
@@ -253,14 +250,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
253#endif 250#endif
254 goto end; 251 goto end;
255 } 252 }
256 /* rx timer */
257 sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
258 sta_rx_agg_session_timer_expired;
259 sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
260 (unsigned long)&sta->timer_to_tid[tid];
261 init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
262 253
263 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; 254 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer);
264 258
265 /* prepare reordering buffer */ 259 /* prepare reordering buffer */
266 tid_agg_rx->reorder_buf = 260 tid_agg_rx->reorder_buf =
@@ -275,8 +269,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
275#endif 269#endif
276 kfree(tid_agg_rx->reorder_buf); 270 kfree(tid_agg_rx->reorder_buf);
277 kfree(tid_agg_rx->reorder_time); 271 kfree(tid_agg_rx->reorder_time);
278 kfree(sta->ampdu_mlme.tid_rx[tid]); 272 kfree(tid_agg_rx);
279 sta->ampdu_mlme.tid_rx[tid] = NULL;
280 goto end; 273 goto end;
281 } 274 }
282 275
@@ -288,13 +281,12 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
288 281
289 if (ret) { 282 if (ret) {
290 kfree(tid_agg_rx->reorder_buf); 283 kfree(tid_agg_rx->reorder_buf);
284 kfree(tid_agg_rx->reorder_time);
291 kfree(tid_agg_rx); 285 kfree(tid_agg_rx);
292 sta->ampdu_mlme.tid_rx[tid] = NULL;
293 goto end; 286 goto end;
294 } 287 }
295 288
296 /* change state and send addba resp */ 289 /* update data */
297 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
298 tid_agg_rx->dialog_token = dialog_token; 290 tid_agg_rx->dialog_token = dialog_token;
299 tid_agg_rx->ssn = start_seq_num; 291 tid_agg_rx->ssn = start_seq_num;
300 tid_agg_rx->head_seq_num = start_seq_num; 292 tid_agg_rx->head_seq_num = start_seq_num;
@@ -302,8 +294,15 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
302 tid_agg_rx->timeout = timeout; 294 tid_agg_rx->timeout = timeout;
303 tid_agg_rx->stored_mpdu_num = 0; 295 tid_agg_rx->stored_mpdu_num = 0;
304 status = WLAN_STATUS_SUCCESS; 296 status = WLAN_STATUS_SUCCESS;
297
298 /* activate it for RX */
299 rcu_assign_pointer(sta->ampdu_mlme.tid_rx[tid], tid_agg_rx);
300
301 if (timeout)
302 mod_timer(&tid_agg_rx->session_timer, TU_TO_EXP_TIME(timeout));
303
305end: 304end:
306 spin_unlock_bh(&sta->lock); 305 mutex_unlock(&sta->ampdu_mlme.mtx);
307 306
308end_no_lock: 307end_no_lock:
309 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, 308 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 87782a4bb541..c893f236acea 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -6,7 +6,7 @@
6 * Copyright 2005-2006, Devicescape Software, Inc. 6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2009, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
@@ -21,28 +21,39 @@
21#include "wme.h" 21#include "wme.h"
22 22
23/** 23/**
24 * DOC: TX aggregation 24 * DOC: TX A-MPDU aggregation
25 * 25 *
26 * Aggregation on the TX side requires setting the hardware flag 26 * Aggregation on the TX side requires setting the hardware flag
27 * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues 27 * %IEEE80211_HW_AMPDU_AGGREGATION. The driver will then be handed
28 * hardware parameter to the number of hardware AMPDU queues. If there are no 28 * packets with a flag indicating A-MPDU aggregation. The driver
29 * hardware queues then the driver will (currently) have to do all frame 29 * or device is responsible for actually aggregating the frames,
30 * buffering. 30 * as well as deciding how many and which to aggregate.
31 * 31 *
32 * When TX aggregation is started by some subsystem (usually the rate control 32 * When TX aggregation is started by some subsystem (usually the rate
33 * algorithm would be appropriate) by calling the 33 * control algorithm would be appropriate) by calling the
34 * ieee80211_start_tx_ba_session() function, the driver will be notified via 34 * ieee80211_start_tx_ba_session() function, the driver will be
35 * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action. 35 * notified via its @ampdu_action function, with the
36 * %IEEE80211_AMPDU_TX_START action.
36 * 37 *
37 * In response to that, the driver is later required to call the 38 * In response to that, the driver is later required to call the
38 * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe()) 39 * ieee80211_start_tx_ba_cb_irqsafe() function, which will really
39 * function, which will start the aggregation session. 40 * start the aggregation session after the peer has also responded.
41 * If the peer responds negatively, the session will be stopped
42 * again right away. Note that it is possible for the aggregation
43 * session to be stopped before the driver has indicated that it
44 * is done setting it up, in which case it must not indicate the
45 * setup completion.
40 * 46 *
41 * Similarly, when the aggregation session is stopped by 47 * Also note that, since we also need to wait for a response from
42 * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will 48 * the peer, the driver is notified of the completion of the
43 * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the 49 * handshake by the %IEEE80211_AMPDU_TX_OPERATIONAL action to the
44 * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb() 50 * @ampdu_action callback.
45 * (or ieee80211_stop_tx_ba_cb_irqsafe()). 51 *
52 * Similarly, when the aggregation session is stopped by the peer
53 * or something calling ieee80211_stop_tx_ba_session(), the driver's
54 * @ampdu_action function will be called with the action
55 * %IEEE80211_AMPDU_TX_STOP. In this case, the call must not fail,
56 * and the driver must later call ieee80211_stop_tx_ba_cb_irqsafe().
46 */ 57 */
47 58
48static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, 59static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
@@ -125,25 +136,53 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1
125 ieee80211_tx_skb(sdata, skb); 136 ieee80211_tx_skb(sdata, skb);
126} 137}
127 138
139static void kfree_tid_tx(struct rcu_head *rcu_head)
140{
141 struct tid_ampdu_tx *tid_tx =
142 container_of(rcu_head, struct tid_ampdu_tx, rcu_head);
143
144 kfree(tid_tx);
145}
146
128int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
129 enum ieee80211_back_parties initiator) 148 enum ieee80211_back_parties initiator)
130{ 149{
131 struct ieee80211_local *local = sta->local; 150 struct ieee80211_local *local = sta->local;
151 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
132 int ret; 152 int ret;
133 u8 *state; 153
154 lockdep_assert_held(&sta->ampdu_mlme.mtx);
155
156 if (!tid_tx)
157 return -ENOENT;
158
159 spin_lock_bh(&sta->lock);
160
161 if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
162 /* not even started yet! */
163 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
164 spin_unlock_bh(&sta->lock);
165 call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
166 return 0;
167 }
168
169 spin_unlock_bh(&sta->lock);
134 170
135#ifdef CONFIG_MAC80211_HT_DEBUG 171#ifdef CONFIG_MAC80211_HT_DEBUG
136 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", 172 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
137 sta->sta.addr, tid); 173 sta->sta.addr, tid);
138#endif /* CONFIG_MAC80211_HT_DEBUG */ 174#endif /* CONFIG_MAC80211_HT_DEBUG */
139 175
140 state = &sta->ampdu_mlme.tid_state_tx[tid]; 176 set_bit(HT_AGG_STATE_STOPPING, &tid_tx->state);
141 177
142 if (*state == HT_AGG_STATE_OPERATIONAL) 178 /*
143 sta->ampdu_mlme.addba_req_num[tid] = 0; 179 * After this packets are no longer handed right through
180 * to the driver but are put onto tid_tx->pending instead,
181 * with locking to ensure proper access.
182 */
183 clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
144 184
145 *state = HT_AGG_STATE_REQ_STOP_BA_MSK | 185 tid_tx->stop_initiator = initiator;
146 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
147 186
148 ret = drv_ampdu_action(local, sta->sdata, 187 ret = drv_ampdu_action(local, sta->sdata,
149 IEEE80211_AMPDU_TX_STOP, 188 IEEE80211_AMPDU_TX_STOP,
@@ -174,19 +213,17 @@ static void sta_addba_resp_timer_expired(unsigned long data)
174 u16 tid = *(u8 *)data; 213 u16 tid = *(u8 *)data;
175 struct sta_info *sta = container_of((void *)data, 214 struct sta_info *sta = container_of((void *)data,
176 struct sta_info, timer_to_tid[tid]); 215 struct sta_info, timer_to_tid[tid]);
177 u8 *state; 216 struct tid_ampdu_tx *tid_tx;
178
179 state = &sta->ampdu_mlme.tid_state_tx[tid];
180 217
181 /* check if the TID waits for addBA response */ 218 /* check if the TID waits for addBA response */
182 spin_lock_bh(&sta->lock); 219 rcu_read_lock();
183 if ((*state & (HT_ADDBA_REQUESTED_MSK | HT_ADDBA_RECEIVED_MSK | 220 tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[tid]);
184 HT_AGG_STATE_REQ_STOP_BA_MSK)) != 221 if (!tid_tx ||
185 HT_ADDBA_REQUESTED_MSK) { 222 test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) {
186 spin_unlock_bh(&sta->lock); 223 rcu_read_unlock();
187#ifdef CONFIG_MAC80211_HT_DEBUG 224#ifdef CONFIG_MAC80211_HT_DEBUG
188 printk(KERN_DEBUG "timer expired on tid %d but we are not " 225 printk(KERN_DEBUG "timer expired on tid %d but we are not "
189 "(or no longer) expecting addBA response there", 226 "(or no longer) expecting addBA response there\n",
190 tid); 227 tid);
191#endif 228#endif
192 return; 229 return;
@@ -196,8 +233,8 @@ static void sta_addba_resp_timer_expired(unsigned long data)
196 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); 233 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
197#endif 234#endif
198 235
199 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); 236 ieee80211_stop_tx_ba_session(&sta->sta, tid);
200 spin_unlock_bh(&sta->lock); 237 rcu_read_unlock();
201} 238}
202 239
203static inline int ieee80211_ac_from_tid(int tid) 240static inline int ieee80211_ac_from_tid(int tid)
@@ -205,14 +242,114 @@ static inline int ieee80211_ac_from_tid(int tid)
205 return ieee802_1d_to_ac[tid & 7]; 242 return ieee802_1d_to_ac[tid & 7];
206} 243}
207 244
245/*
246 * When multiple aggregation sessions on multiple stations
247 * are being created/destroyed simultaneously, we need to
248 * refcount the global queue stop caused by that in order
249 * to not get into a situation where one of the aggregation
250 * setup or teardown re-enables queues before the other is
251 * ready to handle that.
252 *
253 * These two functions take care of this issue by keeping
254 * a global "agg_queue_stop" refcount.
255 */
256static void __acquires(agg_queue)
257ieee80211_stop_queue_agg(struct ieee80211_local *local, int tid)
258{
259 int queue = ieee80211_ac_from_tid(tid);
260
261 if (atomic_inc_return(&local->agg_queue_stop[queue]) == 1)
262 ieee80211_stop_queue_by_reason(
263 &local->hw, queue,
264 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
265 __acquire(agg_queue);
266}
267
268static void __releases(agg_queue)
269ieee80211_wake_queue_agg(struct ieee80211_local *local, int tid)
270{
271 int queue = ieee80211_ac_from_tid(tid);
272
273 if (atomic_dec_return(&local->agg_queue_stop[queue]) == 0)
274 ieee80211_wake_queue_by_reason(
275 &local->hw, queue,
276 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
277 __release(agg_queue);
278}
279
280void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
281{
282 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
283 struct ieee80211_local *local = sta->local;
284 struct ieee80211_sub_if_data *sdata = sta->sdata;
285 u16 start_seq_num;
286 int ret;
287
288 lockdep_assert_held(&sta->ampdu_mlme.mtx);
289
290 /*
291 * While we're asking the driver about the aggregation,
292 * stop the AC queue so that we don't have to worry
293 * about frames that came in while we were doing that,
294 * which would require us to put them to the AC pending
295 * afterwards which just makes the code more complex.
296 */
297 ieee80211_stop_queue_agg(local, tid);
298
299 clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
300
301 /*
302 * make sure no packets are being processed to get
303 * valid starting sequence number
304 */
305 synchronize_net();
306
307 start_seq_num = sta->tid_seq[tid] >> 4;
308
309 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
310 &sta->sta, tid, &start_seq_num);
311 if (ret) {
312#ifdef CONFIG_MAC80211_HT_DEBUG
313 printk(KERN_DEBUG "BA request denied - HW unavailable for"
314 " tid %d\n", tid);
315#endif
316 spin_lock_bh(&sta->lock);
317 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
318 spin_unlock_bh(&sta->lock);
319
320 ieee80211_wake_queue_agg(local, tid);
321 call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
322 return;
323 }
324
325 /* we can take packets again now */
326 ieee80211_wake_queue_agg(local, tid);
327
328 /* activate the timer for the recipient's addBA response */
329 mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
330#ifdef CONFIG_MAC80211_HT_DEBUG
331 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
332#endif
333
334 spin_lock_bh(&sta->lock);
335 sta->ampdu_mlme.addba_req_num[tid]++;
336 spin_unlock_bh(&sta->lock);
337
338 /* send AddBA request */
339 ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
340 tid_tx->dialog_token, start_seq_num,
341 0x40, 5000);
342}
343
208int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) 344int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
209{ 345{
210 struct sta_info *sta = container_of(pubsta, struct sta_info, sta); 346 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
211 struct ieee80211_sub_if_data *sdata = sta->sdata; 347 struct ieee80211_sub_if_data *sdata = sta->sdata;
212 struct ieee80211_local *local = sdata->local; 348 struct ieee80211_local *local = sdata->local;
213 u8 *state; 349 struct tid_ampdu_tx *tid_tx;
214 int ret = 0; 350 int ret = 0;
215 u16 start_seq_num; 351
352 trace_api_start_tx_ba_session(pubsta, tid);
216 353
217 if (WARN_ON(!local->ops->ampdu_action)) 354 if (WARN_ON(!local->ops->ampdu_action))
218 return -EINVAL; 355 return -EINVAL;
@@ -237,24 +374,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
237 sdata->vif.type != NL80211_IFTYPE_AP) 374 sdata->vif.type != NL80211_IFTYPE_AP)
238 return -EINVAL; 375 return -EINVAL;
239 376
240 if (test_sta_flags(sta, WLAN_STA_DISASSOC)) { 377 if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
241#ifdef CONFIG_MAC80211_HT_DEBUG
242 printk(KERN_DEBUG "Disassociation is in progress. "
243 "Denying BA session request\n");
244#endif
245 return -EINVAL;
246 }
247
248 if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
249#ifdef CONFIG_MAC80211_HT_DEBUG 378#ifdef CONFIG_MAC80211_HT_DEBUG
250 printk(KERN_DEBUG "Suspend in progress. " 379 printk(KERN_DEBUG "BA sessions blocked. "
251 "Denying BA session request\n"); 380 "Denying BA session request\n");
252#endif 381#endif
253 return -EINVAL; 382 return -EINVAL;
254 } 383 }
255 384
256 spin_lock_bh(&sta->lock); 385 spin_lock_bh(&sta->lock);
257 spin_lock(&local->ampdu_lock);
258 386
259 /* we have tried too many times, receiver does not want A-MPDU */ 387 /* we have tried too many times, receiver does not want A-MPDU */
260 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { 388 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
@@ -262,9 +390,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
262 goto err_unlock_sta; 390 goto err_unlock_sta;
263 } 391 }
264 392
265 state = &sta->ampdu_mlme.tid_state_tx[tid]; 393 tid_tx = sta->ampdu_mlme.tid_tx[tid];
266 /* check if the TID is not in aggregation flow already */ 394 /* check if the TID is not in aggregation flow already */
267 if (*state != HT_AGG_STATE_IDLE) { 395 if (tid_tx) {
268#ifdef CONFIG_MAC80211_HT_DEBUG 396#ifdef CONFIG_MAC80211_HT_DEBUG
269 printk(KERN_DEBUG "BA request denied - session is not " 397 printk(KERN_DEBUG "BA request denied - session is not "
270 "idle on tid %u\n", tid); 398 "idle on tid %u\n", tid);
@@ -273,94 +401,37 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
273 goto err_unlock_sta; 401 goto err_unlock_sta;
274 } 402 }
275 403
276 /*
277 * While we're asking the driver about the aggregation,
278 * stop the AC queue so that we don't have to worry
279 * about frames that came in while we were doing that,
280 * which would require us to put them to the AC pending
281 * afterwards which just makes the code more complex.
282 */
283 ieee80211_stop_queue_by_reason(
284 &local->hw, ieee80211_ac_from_tid(tid),
285 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
286
287 /* prepare A-MPDU MLME for Tx aggregation */ 404 /* prepare A-MPDU MLME for Tx aggregation */
288 sta->ampdu_mlme.tid_tx[tid] = 405 tid_tx = kzalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
289 kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); 406 if (!tid_tx) {
290 if (!sta->ampdu_mlme.tid_tx[tid]) {
291#ifdef CONFIG_MAC80211_HT_DEBUG 407#ifdef CONFIG_MAC80211_HT_DEBUG
292 if (net_ratelimit()) 408 if (net_ratelimit())
293 printk(KERN_ERR "allocate tx mlme to tid %d failed\n", 409 printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
294 tid); 410 tid);
295#endif 411#endif
296 ret = -ENOMEM; 412 ret = -ENOMEM;
297 goto err_wake_queue; 413 goto err_unlock_sta;
298 } 414 }
299 415
300 skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending); 416 skb_queue_head_init(&tid_tx->pending);
417 __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
301 418
302 /* Tx timer */ 419 /* Tx timer */
303 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = 420 tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired;
304 sta_addba_resp_timer_expired; 421 tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid];
305 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = 422 init_timer(&tid_tx->addba_resp_timer);
306 (unsigned long)&sta->timer_to_tid[tid];
307 init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
308
309 /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
310 * call back right away, it must see that the flow has begun */
311 *state |= HT_ADDBA_REQUESTED_MSK;
312
313 start_seq_num = sta->tid_seq[tid] >> 4;
314
315 ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
316 pubsta, tid, &start_seq_num);
317 423
318 if (ret) { 424 /* assign a dialog token */
319#ifdef CONFIG_MAC80211_HT_DEBUG 425 sta->ampdu_mlme.dialog_token_allocator++;
320 printk(KERN_DEBUG "BA request denied - HW unavailable for" 426 tid_tx->dialog_token = sta->ampdu_mlme.dialog_token_allocator;
321 " tid %d\n", tid);
322#endif /* CONFIG_MAC80211_HT_DEBUG */
323 *state = HT_AGG_STATE_IDLE;
324 goto err_free;
325 }
326 427
327 /* Driver vetoed or OKed, but we can take packets again now */ 428 /* finally, assign it to the array */
328 ieee80211_wake_queue_by_reason( 429 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], tid_tx);
329 &local->hw, ieee80211_ac_from_tid(tid),
330 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
331 430
332 spin_unlock(&local->ampdu_lock); 431 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
333 spin_unlock_bh(&sta->lock);
334 432
335 /* send an addBA request */ 433 /* this flow continues off the work */
336 sta->ampdu_mlme.dialog_token_allocator++;
337 sta->ampdu_mlme.tid_tx[tid]->dialog_token =
338 sta->ampdu_mlme.dialog_token_allocator;
339 sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
340
341 ieee80211_send_addba_request(sdata, pubsta->addr, tid,
342 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
343 sta->ampdu_mlme.tid_tx[tid]->ssn,
344 0x40, 5000);
345 sta->ampdu_mlme.addba_req_num[tid]++;
346 /* activate the timer for the recipient's addBA response */
347 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
348 jiffies + ADDBA_RESP_INTERVAL;
349 add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
350#ifdef CONFIG_MAC80211_HT_DEBUG
351 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
352#endif
353 return 0;
354
355 err_free:
356 kfree(sta->ampdu_mlme.tid_tx[tid]);
357 sta->ampdu_mlme.tid_tx[tid] = NULL;
358 err_wake_queue:
359 ieee80211_wake_queue_by_reason(
360 &local->hw, ieee80211_ac_from_tid(tid),
361 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
362 err_unlock_sta: 434 err_unlock_sta:
363 spin_unlock(&local->ampdu_lock);
364 spin_unlock_bh(&sta->lock); 435 spin_unlock_bh(&sta->lock);
365 return ret; 436 return ret;
366} 437}
@@ -368,69 +439,65 @@ EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
368 439
369/* 440/*
370 * splice packets from the STA's pending to the local pending, 441 * splice packets from the STA's pending to the local pending,
371 * requires a call to ieee80211_agg_splice_finish and holding 442 * requires a call to ieee80211_agg_splice_finish later
372 * local->ampdu_lock across both calls.
373 */ 443 */
374static void ieee80211_agg_splice_packets(struct ieee80211_local *local, 444static void __acquires(agg_queue)
375 struct sta_info *sta, u16 tid) 445ieee80211_agg_splice_packets(struct ieee80211_local *local,
446 struct tid_ampdu_tx *tid_tx, u16 tid)
376{ 447{
448 int queue = ieee80211_ac_from_tid(tid);
377 unsigned long flags; 449 unsigned long flags;
378 u16 queue = ieee80211_ac_from_tid(tid);
379
380 ieee80211_stop_queue_by_reason(
381 &local->hw, queue,
382 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
383 450
384 if (!(sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)) 451 ieee80211_stop_queue_agg(local, tid);
385 return;
386 452
387 if (WARN(!sta->ampdu_mlme.tid_tx[tid], 453 if (WARN(!tid_tx, "TID %d gone but expected when splicing aggregates"
388 "TID %d gone but expected when splicing aggregates from" 454 " from the pending queue\n", tid))
389 "the pending queue\n", tid))
390 return; 455 return;
391 456
392 if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { 457 if (!skb_queue_empty(&tid_tx->pending)) {
393 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 458 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
394 /* copy over remaining packets */ 459 /* copy over remaining packets */
395 skb_queue_splice_tail_init( 460 skb_queue_splice_tail_init(&tid_tx->pending,
396 &sta->ampdu_mlme.tid_tx[tid]->pending, 461 &local->pending[queue]);
397 &local->pending[queue]);
398 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 462 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
399 } 463 }
400} 464}
401 465
402static void ieee80211_agg_splice_finish(struct ieee80211_local *local, 466static void __releases(agg_queue)
403 struct sta_info *sta, u16 tid) 467ieee80211_agg_splice_finish(struct ieee80211_local *local, u16 tid)
404{ 468{
405 u16 queue = ieee80211_ac_from_tid(tid); 469 ieee80211_wake_queue_agg(local, tid);
406
407 ieee80211_wake_queue_by_reason(
408 &local->hw, queue,
409 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
410} 470}
411 471
412/* caller must hold sta->lock */
413static void ieee80211_agg_tx_operational(struct ieee80211_local *local, 472static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
414 struct sta_info *sta, u16 tid) 473 struct sta_info *sta, u16 tid)
415{ 474{
475 lockdep_assert_held(&sta->ampdu_mlme.mtx);
476
416#ifdef CONFIG_MAC80211_HT_DEBUG 477#ifdef CONFIG_MAC80211_HT_DEBUG
417 printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); 478 printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
418#endif 479#endif
419 480
420 spin_lock(&local->ampdu_lock);
421 ieee80211_agg_splice_packets(local, sta, tid);
422 /*
423 * NB: we rely on sta->lock being taken in the TX
424 * processing here when adding to the pending queue,
425 * otherwise we could only change the state of the
426 * session to OPERATIONAL _here_.
427 */
428 ieee80211_agg_splice_finish(local, sta, tid);
429 spin_unlock(&local->ampdu_lock);
430
431 drv_ampdu_action(local, sta->sdata, 481 drv_ampdu_action(local, sta->sdata,
432 IEEE80211_AMPDU_TX_OPERATIONAL, 482 IEEE80211_AMPDU_TX_OPERATIONAL,
433 &sta->sta, tid, NULL); 483 &sta->sta, tid, NULL);
484
485 /*
486 * synchronize with TX path, while splicing the TX path
487 * should block so it won't put more packets onto pending.
488 */
489 spin_lock_bh(&sta->lock);
490
491 ieee80211_agg_splice_packets(local, sta->ampdu_mlme.tid_tx[tid], tid);
492 /*
493 * Now mark as operational. This will be visible
494 * in the TX path, and lets it go lock-free in
495 * the common case.
496 */
497 set_bit(HT_AGG_STATE_OPERATIONAL, &sta->ampdu_mlme.tid_tx[tid]->state);
498 ieee80211_agg_splice_finish(local, tid);
499
500 spin_unlock_bh(&sta->lock);
434} 501}
435 502
436void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) 503void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
@@ -438,7 +505,9 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
438 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 505 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
439 struct ieee80211_local *local = sdata->local; 506 struct ieee80211_local *local = sdata->local;
440 struct sta_info *sta; 507 struct sta_info *sta;
441 u8 *state; 508 struct tid_ampdu_tx *tid_tx;
509
510 trace_api_start_tx_ba_cb(sdata, ra, tid);
442 511
443 if (tid >= STA_TID_NUM) { 512 if (tid >= STA_TID_NUM) {
444#ifdef CONFIG_MAC80211_HT_DEBUG 513#ifdef CONFIG_MAC80211_HT_DEBUG
@@ -448,42 +517,36 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
448 return; 517 return;
449 } 518 }
450 519
451 rcu_read_lock(); 520 mutex_lock(&local->sta_mtx);
452 sta = sta_info_get(sdata, ra); 521 sta = sta_info_get(sdata, ra);
453 if (!sta) { 522 if (!sta) {
454 rcu_read_unlock(); 523 mutex_unlock(&local->sta_mtx);
455#ifdef CONFIG_MAC80211_HT_DEBUG 524#ifdef CONFIG_MAC80211_HT_DEBUG
456 printk(KERN_DEBUG "Could not find station: %pM\n", ra); 525 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
457#endif 526#endif
458 return; 527 return;
459 } 528 }
460 529
461 state = &sta->ampdu_mlme.tid_state_tx[tid]; 530 mutex_lock(&sta->ampdu_mlme.mtx);
462 spin_lock_bh(&sta->lock); 531 tid_tx = sta->ampdu_mlme.tid_tx[tid];
463 532
464 if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) { 533 if (WARN_ON(!tid_tx)) {
465#ifdef CONFIG_MAC80211_HT_DEBUG 534#ifdef CONFIG_MAC80211_HT_DEBUG
466 printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", 535 printk(KERN_DEBUG "addBA was not requested!\n");
467 *state);
468#endif 536#endif
469 spin_unlock_bh(&sta->lock); 537 goto unlock;
470 rcu_read_unlock();
471 return;
472 } 538 }
473 539
474 if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK)) 540 if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
475 goto out; 541 goto unlock;
476
477 *state |= HT_ADDBA_DRV_READY_MSK;
478 542
479 if (*state == HT_AGG_STATE_OPERATIONAL) 543 if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
480 ieee80211_agg_tx_operational(local, sta, tid); 544 ieee80211_agg_tx_operational(local, sta, tid);
481 545
482 out: 546 unlock:
483 spin_unlock_bh(&sta->lock); 547 mutex_unlock(&sta->ampdu_mlme.mtx);
484 rcu_read_unlock(); 548 mutex_unlock(&local->sta_mtx);
485} 549}
486EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
487 550
488void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, 551void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
489 const u8 *ra, u16 tid) 552 const u8 *ra, u16 tid)
@@ -504,42 +567,36 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
504 ra_tid = (struct ieee80211_ra_tid *) &skb->cb; 567 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
505 memcpy(&ra_tid->ra, ra, ETH_ALEN); 568 memcpy(&ra_tid->ra, ra, ETH_ALEN);
506 ra_tid->tid = tid; 569 ra_tid->tid = tid;
507 ra_tid->vif = vif;
508 570
509 skb->pkt_type = IEEE80211_ADDBA_MSG; 571 skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
510 skb_queue_tail(&local->skb_queue, skb); 572 skb_queue_tail(&sdata->skb_queue, skb);
511 tasklet_schedule(&local->tasklet); 573 ieee80211_queue_work(&local->hw, &sdata->work);
512} 574}
513EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); 575EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
514 576
515int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 577int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
516 enum ieee80211_back_parties initiator) 578 enum ieee80211_back_parties initiator)
517{ 579{
518 u8 *state;
519 int ret; 580 int ret;
520 581
521 /* check if the TID is in aggregation */ 582 mutex_lock(&sta->ampdu_mlme.mtx);
522 state = &sta->ampdu_mlme.tid_state_tx[tid];
523 spin_lock_bh(&sta->lock);
524
525 if (*state != HT_AGG_STATE_OPERATIONAL) {
526 ret = -ENOENT;
527 goto unlock;
528 }
529 583
530 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); 584 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator);
531 585
532 unlock: 586 mutex_unlock(&sta->ampdu_mlme.mtx);
533 spin_unlock_bh(&sta->lock); 587
534 return ret; 588 return ret;
535} 589}
536 590
537int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, 591int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
538 enum ieee80211_back_parties initiator)
539{ 592{
540 struct sta_info *sta = container_of(pubsta, struct sta_info, sta); 593 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
541 struct ieee80211_sub_if_data *sdata = sta->sdata; 594 struct ieee80211_sub_if_data *sdata = sta->sdata;
542 struct ieee80211_local *local = sdata->local; 595 struct ieee80211_local *local = sdata->local;
596 struct tid_ampdu_tx *tid_tx;
597 int ret = 0;
598
599 trace_api_stop_tx_ba_session(pubsta, tid);
543 600
544 if (!local->ops->ampdu_action) 601 if (!local->ops->ampdu_action)
545 return -EINVAL; 602 return -EINVAL;
@@ -547,7 +604,26 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
547 if (tid >= STA_TID_NUM) 604 if (tid >= STA_TID_NUM)
548 return -EINVAL; 605 return -EINVAL;
549 606
550 return __ieee80211_stop_tx_ba_session(sta, tid, initiator); 607 spin_lock_bh(&sta->lock);
608 tid_tx = sta->ampdu_mlme.tid_tx[tid];
609
610 if (!tid_tx) {
611 ret = -ENOENT;
612 goto unlock;
613 }
614
615 if (test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
616 /* already in progress stopping it */
617 ret = 0;
618 goto unlock;
619 }
620
621 set_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state);
622 ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
623
624 unlock:
625 spin_unlock_bh(&sta->lock);
626 return ret;
551} 627}
552EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); 628EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
553 629
@@ -556,7 +632,9 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
556 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 632 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
557 struct ieee80211_local *local = sdata->local; 633 struct ieee80211_local *local = sdata->local;
558 struct sta_info *sta; 634 struct sta_info *sta;
559 u8 *state; 635 struct tid_ampdu_tx *tid_tx;
636
637 trace_api_stop_tx_ba_cb(sdata, ra, tid);
560 638
561 if (tid >= STA_TID_NUM) { 639 if (tid >= STA_TID_NUM) {
562#ifdef CONFIG_MAC80211_HT_DEBUG 640#ifdef CONFIG_MAC80211_HT_DEBUG
@@ -571,51 +649,56 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
571 ra, tid); 649 ra, tid);
572#endif /* CONFIG_MAC80211_HT_DEBUG */ 650#endif /* CONFIG_MAC80211_HT_DEBUG */
573 651
574 rcu_read_lock(); 652 mutex_lock(&local->sta_mtx);
653
575 sta = sta_info_get(sdata, ra); 654 sta = sta_info_get(sdata, ra);
576 if (!sta) { 655 if (!sta) {
577#ifdef CONFIG_MAC80211_HT_DEBUG 656#ifdef CONFIG_MAC80211_HT_DEBUG
578 printk(KERN_DEBUG "Could not find station: %pM\n", ra); 657 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
579#endif 658#endif
580 rcu_read_unlock(); 659 goto unlock;
581 return;
582 } 660 }
583 state = &sta->ampdu_mlme.tid_state_tx[tid];
584 661
585 /* NOTE: no need to use sta->lock in this state check, as 662 mutex_lock(&sta->ampdu_mlme.mtx);
586 * ieee80211_stop_tx_ba_session will let only one stop call to 663 spin_lock_bh(&sta->lock);
587 * pass through per sta/tid 664 tid_tx = sta->ampdu_mlme.tid_tx[tid];
588 */ 665
589 if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { 666 if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
590#ifdef CONFIG_MAC80211_HT_DEBUG 667#ifdef CONFIG_MAC80211_HT_DEBUG
591 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); 668 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
592#endif 669#endif
593 rcu_read_unlock(); 670 goto unlock_sta;
594 return;
595 } 671 }
596 672
597 if (*state & HT_AGG_STATE_INITIATOR_MSK) 673 if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR)
598 ieee80211_send_delba(sta->sdata, ra, tid, 674 ieee80211_send_delba(sta->sdata, ra, tid,
599 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); 675 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
600 676
601 spin_lock_bh(&sta->lock); 677 /*
602 spin_lock(&local->ampdu_lock); 678 * When we get here, the TX path will not be lockless any more wrt.
679 * aggregation, since the OPERATIONAL bit has long been cleared.
680 * Thus it will block on getting the lock, if it occurs. So if we
681 * stop the queue now, we will not get any more packets, and any
682 * that might be being processed will wait for us here, thereby
683 * guaranteeing that no packets go to the tid_tx pending queue any
684 * more.
685 */
603 686
604 ieee80211_agg_splice_packets(local, sta, tid); 687 ieee80211_agg_splice_packets(local, tid_tx, tid);
605 688
606 *state = HT_AGG_STATE_IDLE; 689 /* future packets must not find the tid_tx struct any more */
607 /* from now on packets are no longer put onto sta->pending */ 690 rcu_assign_pointer(sta->ampdu_mlme.tid_tx[tid], NULL);
608 kfree(sta->ampdu_mlme.tid_tx[tid]);
609 sta->ampdu_mlme.tid_tx[tid] = NULL;
610 691
611 ieee80211_agg_splice_finish(local, sta, tid); 692 ieee80211_agg_splice_finish(local, tid);
612 693
613 spin_unlock(&local->ampdu_lock); 694 call_rcu(&tid_tx->rcu_head, kfree_tid_tx);
614 spin_unlock_bh(&sta->lock);
615 695
616 rcu_read_unlock(); 696 unlock_sta:
697 spin_unlock_bh(&sta->lock);
698 mutex_unlock(&sta->ampdu_mlme.mtx);
699 unlock:
700 mutex_unlock(&local->sta_mtx);
617} 701}
618EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
619 702
620void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, 703void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
621 const u8 *ra, u16 tid) 704 const u8 *ra, u16 tid)
@@ -636,11 +719,10 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
636 ra_tid = (struct ieee80211_ra_tid *) &skb->cb; 719 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
637 memcpy(&ra_tid->ra, ra, ETH_ALEN); 720 memcpy(&ra_tid->ra, ra, ETH_ALEN);
638 ra_tid->tid = tid; 721 ra_tid->tid = tid;
639 ra_tid->vif = vif;
640 722
641 skb->pkt_type = IEEE80211_DELBA_MSG; 723 skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
642 skb_queue_tail(&local->skb_queue, skb); 724 skb_queue_tail(&sdata->skb_queue, skb);
643 tasklet_schedule(&local->tasklet); 725 ieee80211_queue_work(&local->hw, &sdata->work);
644} 726}
645EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); 727EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
646 728
@@ -650,40 +732,40 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
650 struct ieee80211_mgmt *mgmt, 732 struct ieee80211_mgmt *mgmt,
651 size_t len) 733 size_t len)
652{ 734{
735 struct tid_ampdu_tx *tid_tx;
653 u16 capab, tid; 736 u16 capab, tid;
654 u8 *state;
655 737
656 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); 738 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
657 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; 739 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
658 740
659 state = &sta->ampdu_mlme.tid_state_tx[tid]; 741 mutex_lock(&sta->ampdu_mlme.mtx);
660
661 spin_lock_bh(&sta->lock);
662 742
663 if (!(*state & HT_ADDBA_REQUESTED_MSK)) 743 tid_tx = sta->ampdu_mlme.tid_tx[tid];
744 if (!tid_tx)
664 goto out; 745 goto out;
665 746
666 if (mgmt->u.action.u.addba_resp.dialog_token != 747 if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) {
667 sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
668#ifdef CONFIG_MAC80211_HT_DEBUG 748#ifdef CONFIG_MAC80211_HT_DEBUG
669 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); 749 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
670#endif /* CONFIG_MAC80211_HT_DEBUG */ 750#endif
671 goto out; 751 goto out;
672 } 752 }
673 753
674 del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); 754 del_timer(&tid_tx->addba_resp_timer);
675 755
676#ifdef CONFIG_MAC80211_HT_DEBUG 756#ifdef CONFIG_MAC80211_HT_DEBUG
677 printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); 757 printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
678#endif /* CONFIG_MAC80211_HT_DEBUG */ 758#endif
679 759
680 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) 760 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
681 == WLAN_STATUS_SUCCESS) { 761 == WLAN_STATUS_SUCCESS) {
682 u8 curstate = *state; 762 if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED,
683 763 &tid_tx->state)) {
684 *state |= HT_ADDBA_RECEIVED_MSK; 764 /* ignore duplicate response */
765 goto out;
766 }
685 767
686 if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL) 768 if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
687 ieee80211_agg_tx_operational(local, sta, tid); 769 ieee80211_agg_tx_operational(local, sta, tid);
688 770
689 sta->ampdu_mlme.addba_req_num[tid] = 0; 771 sta->ampdu_mlme.addba_req_num[tid] = 0;
@@ -692,5 +774,5 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
692 } 774 }
693 775
694 out: 776 out:
695 spin_unlock_bh(&sta->lock); 777 mutex_unlock(&sta->ampdu_mlme.mtx);
696} 778}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index edc872e22c9b..29ac8e1a509e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -97,9 +97,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
97 params->mesh_id_len, 97 params->mesh_id_len,
98 params->mesh_id); 98 params->mesh_id);
99 99
100 if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags)
101 return 0;
102
103 if (type == NL80211_IFTYPE_AP_VLAN && 100 if (type == NL80211_IFTYPE_AP_VLAN &&
104 params && params->use_4addr == 0) 101 params && params->use_4addr == 0)
105 rcu_assign_pointer(sdata->u.vlan.sta, NULL); 102 rcu_assign_pointer(sdata->u.vlan.sta, NULL);
@@ -107,7 +104,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
107 params && params->use_4addr >= 0) 104 params && params->use_4addr >= 0)
108 sdata->u.mgd.use_4addr = params->use_4addr; 105 sdata->u.mgd.use_4addr = params->use_4addr;
109 106
110 sdata->u.mntr_flags = *flags; 107 if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags)
108 sdata->u.mntr_flags = *flags;
109
111 return 0; 110 return 0;
112} 111}
113 112
@@ -121,6 +120,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
121 struct ieee80211_key *key; 120 struct ieee80211_key *key;
122 int err; 121 int err;
123 122
123 if (!netif_running(dev))
124 return -ENETDOWN;
125
124 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 126 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
125 127
126 switch (params->cipher) { 128 switch (params->cipher) {
@@ -141,17 +143,22 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
141 return -EINVAL; 143 return -EINVAL;
142 } 144 }
143 145
146 /* reject WEP and TKIP keys if WEP failed to initialize */
147 if ((alg == ALG_WEP || alg == ALG_TKIP) &&
148 IS_ERR(sdata->local->wep_tx_tfm))
149 return -EINVAL;
150
144 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, 151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key,
145 params->seq_len, params->seq); 152 params->seq_len, params->seq);
146 if (!key) 153 if (!key)
147 return -ENOMEM; 154 return -ENOMEM;
148 155
149 rcu_read_lock(); 156 mutex_lock(&sdata->local->sta_mtx);
150 157
151 if (mac_addr) { 158 if (mac_addr) {
152 sta = sta_info_get_bss(sdata, mac_addr); 159 sta = sta_info_get_bss(sdata, mac_addr);
153 if (!sta) { 160 if (!sta) {
154 ieee80211_key_free(key); 161 ieee80211_key_free(sdata->local, key);
155 err = -ENOENT; 162 err = -ENOENT;
156 goto out_unlock; 163 goto out_unlock;
157 } 164 }
@@ -161,7 +168,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
161 168
162 err = 0; 169 err = 0;
163 out_unlock: 170 out_unlock:
164 rcu_read_unlock(); 171 mutex_unlock(&sdata->local->sta_mtx);
165 172
166 return err; 173 return err;
167} 174}
@@ -175,7 +182,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
175 182
176 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 183 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
177 184
178 rcu_read_lock(); 185 mutex_lock(&sdata->local->sta_mtx);
179 186
180 if (mac_addr) { 187 if (mac_addr) {
181 ret = -ENOENT; 188 ret = -ENOENT;
@@ -185,7 +192,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
185 goto out_unlock; 192 goto out_unlock;
186 193
187 if (sta->key) { 194 if (sta->key) {
188 ieee80211_key_free(sta->key); 195 ieee80211_key_free(sdata->local, sta->key);
189 WARN_ON(sta->key); 196 WARN_ON(sta->key);
190 ret = 0; 197 ret = 0;
191 } 198 }
@@ -198,12 +205,12 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
198 goto out_unlock; 205 goto out_unlock;
199 } 206 }
200 207
201 ieee80211_key_free(sdata->keys[key_idx]); 208 ieee80211_key_free(sdata->local, sdata->keys[key_idx]);
202 WARN_ON(sdata->keys[key_idx]); 209 WARN_ON(sdata->keys[key_idx]);
203 210
204 ret = 0; 211 ret = 0;
205 out_unlock: 212 out_unlock:
206 rcu_read_unlock(); 213 mutex_unlock(&sdata->local->sta_mtx);
207 214
208 return ret; 215 return ret;
209} 216}
@@ -306,15 +313,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
306 struct net_device *dev, 313 struct net_device *dev,
307 u8 key_idx) 314 u8 key_idx)
308{ 315{
309 struct ieee80211_sub_if_data *sdata; 316 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
310
311 rcu_read_lock();
312 317
313 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
314 ieee80211_set_default_key(sdata, key_idx); 318 ieee80211_set_default_key(sdata, key_idx);
315 319
316 rcu_read_unlock();
317
318 return 0; 320 return 0;
319} 321}
320 322
@@ -322,15 +324,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
322 struct net_device *dev, 324 struct net_device *dev,
323 u8 key_idx) 325 u8 key_idx)
324{ 326{
325 struct ieee80211_sub_if_data *sdata; 327 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
326
327 rcu_read_lock();
328 328
329 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
330 ieee80211_set_default_mgmt_key(sdata, key_idx); 329 ieee80211_set_default_mgmt_key(sdata, key_idx);
331 330
332 rcu_read_unlock();
333
334 return 0; 331 return 0;
335} 332}
336 333
@@ -411,6 +408,14 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
411 return ret; 408 return ret;
412} 409}
413 410
411static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
412 int idx, struct survey_info *survey)
413{
414 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
415
416 return drv_get_survey(local, idx, survey);
417}
418
414static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, 419static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
415 u8 *mac, struct station_info *sinfo) 420 u8 *mac, struct station_info *sinfo)
416{ 421{
@@ -590,7 +595,7 @@ struct iapp_layer2_update {
590 u8 ssap; /* 0 */ 595 u8 ssap; /* 0 */
591 u8 control; 596 u8 control;
592 u8 xid_info[3]; 597 u8 xid_info[3];
593} __attribute__ ((packed)); 598} __packed;
594 599
595static void ieee80211_send_layer2_update(struct sta_info *sta) 600static void ieee80211_send_layer2_update(struct sta_info *sta)
596{ 601{
@@ -622,7 +627,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta)
622 skb->dev = sta->sdata->dev; 627 skb->dev = sta->sdata->dev;
623 skb->protocol = eth_type_trans(skb, sta->sdata->dev); 628 skb->protocol = eth_type_trans(skb, sta->sdata->dev);
624 memset(skb->cb, 0, sizeof(skb->cb)); 629 memset(skb->cb, 0, sizeof(skb->cb));
625 netif_rx(skb); 630 netif_rx_ni(skb);
626} 631}
627 632
628static void sta_apply_parameters(struct ieee80211_local *local, 633static void sta_apply_parameters(struct ieee80211_local *local,
@@ -1104,6 +1109,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
1104 changed |= BSS_CHANGED_BASIC_RATES; 1109 changed |= BSS_CHANGED_BASIC_RATES;
1105 } 1110 }
1106 1111
1112 if (params->ap_isolate >= 0) {
1113 if (params->ap_isolate)
1114 sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
1115 else
1116 sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
1117 }
1118
1107 ieee80211_bss_info_change_notify(sdata, changed); 1119 ieee80211_bss_info_change_notify(sdata, changed);
1108 1120
1109 return 0; 1121 return 0;
@@ -1141,15 +1153,39 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1141} 1153}
1142 1154
1143static int ieee80211_set_channel(struct wiphy *wiphy, 1155static int ieee80211_set_channel(struct wiphy *wiphy,
1156 struct net_device *netdev,
1144 struct ieee80211_channel *chan, 1157 struct ieee80211_channel *chan,
1145 enum nl80211_channel_type channel_type) 1158 enum nl80211_channel_type channel_type)
1146{ 1159{
1147 struct ieee80211_local *local = wiphy_priv(wiphy); 1160 struct ieee80211_local *local = wiphy_priv(wiphy);
1161 struct ieee80211_sub_if_data *sdata = NULL;
1162
1163 if (netdev)
1164 sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
1165
1166 switch (ieee80211_get_channel_mode(local, NULL)) {
1167 case CHAN_MODE_HOPPING:
1168 return -EBUSY;
1169 case CHAN_MODE_FIXED:
1170 if (local->oper_channel != chan)
1171 return -EBUSY;
1172 if (!sdata && local->_oper_channel_type == channel_type)
1173 return 0;
1174 break;
1175 case CHAN_MODE_UNDEFINED:
1176 break;
1177 }
1148 1178
1149 local->oper_channel = chan; 1179 local->oper_channel = chan;
1150 local->oper_channel_type = channel_type;
1151 1180
1152 return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 1181 if (!ieee80211_set_channel_type(local, sdata, channel_type))
1182 return -EBUSY;
1183
1184 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1185 if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR)
1186 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
1187
1188 return 0;
1153} 1189}
1154 1190
1155#ifdef CONFIG_PM 1191#ifdef CONFIG_PM
@@ -1193,6 +1229,20 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
1193static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, 1229static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
1194 struct cfg80211_assoc_request *req) 1230 struct cfg80211_assoc_request *req)
1195{ 1231{
1232 struct ieee80211_local *local = wiphy_priv(wiphy);
1233 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1234
1235 switch (ieee80211_get_channel_mode(local, sdata)) {
1236 case CHAN_MODE_HOPPING:
1237 return -EBUSY;
1238 case CHAN_MODE_FIXED:
1239 if (local->oper_channel == req->bss->channel)
1240 break;
1241 return -EBUSY;
1242 case CHAN_MODE_UNDEFINED:
1243 break;
1244 }
1245
1196 return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req); 1246 return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req);
1197} 1247}
1198 1248
@@ -1215,8 +1265,22 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
1215static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev, 1265static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
1216 struct cfg80211_ibss_params *params) 1266 struct cfg80211_ibss_params *params)
1217{ 1267{
1268 struct ieee80211_local *local = wiphy_priv(wiphy);
1218 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1269 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1219 1270
1271 switch (ieee80211_get_channel_mode(local, sdata)) {
1272 case CHAN_MODE_HOPPING:
1273 return -EBUSY;
1274 case CHAN_MODE_FIXED:
1275 if (!params->channel_fixed)
1276 return -EBUSY;
1277 if (local->oper_channel == params->channel)
1278 break;
1279 return -EBUSY;
1280 case CHAN_MODE_UNDEFINED:
1281 break;
1282 }
1283
1220 return ieee80211_ibss_join(sdata, params); 1284 return ieee80211_ibss_join(sdata, params);
1221} 1285}
1222 1286
@@ -1258,28 +1322,28 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
1258} 1322}
1259 1323
1260static int ieee80211_set_tx_power(struct wiphy *wiphy, 1324static int ieee80211_set_tx_power(struct wiphy *wiphy,
1261 enum tx_power_setting type, int dbm) 1325 enum nl80211_tx_power_setting type, int mbm)
1262{ 1326{
1263 struct ieee80211_local *local = wiphy_priv(wiphy); 1327 struct ieee80211_local *local = wiphy_priv(wiphy);
1264 struct ieee80211_channel *chan = local->hw.conf.channel; 1328 struct ieee80211_channel *chan = local->hw.conf.channel;
1265 u32 changes = 0; 1329 u32 changes = 0;
1266 1330
1267 switch (type) { 1331 switch (type) {
1268 case TX_POWER_AUTOMATIC: 1332 case NL80211_TX_POWER_AUTOMATIC:
1269 local->user_power_level = -1; 1333 local->user_power_level = -1;
1270 break; 1334 break;
1271 case TX_POWER_LIMITED: 1335 case NL80211_TX_POWER_LIMITED:
1272 if (dbm < 0) 1336 if (mbm < 0 || (mbm % 100))
1273 return -EINVAL; 1337 return -EOPNOTSUPP;
1274 local->user_power_level = dbm; 1338 local->user_power_level = MBM_TO_DBM(mbm);
1275 break; 1339 break;
1276 case TX_POWER_FIXED: 1340 case NL80211_TX_POWER_FIXED:
1277 if (dbm < 0) 1341 if (mbm < 0 || (mbm % 100))
1278 return -EINVAL; 1342 return -EOPNOTSUPP;
1279 /* TODO: move to cfg80211 when it knows the channel */ 1343 /* TODO: move to cfg80211 when it knows the channel */
1280 if (dbm > chan->max_power) 1344 if (MBM_TO_DBM(mbm) > chan->max_power)
1281 return -EINVAL; 1345 return -EINVAL;
1282 local->user_power_level = dbm; 1346 local->user_power_level = MBM_TO_DBM(mbm);
1283 break; 1347 break;
1284 } 1348 }
1285 1349
@@ -1345,7 +1409,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1345 * association, there's no need to send an action frame. 1409 * association, there's no need to send an action frame.
1346 */ 1410 */
1347 if (!sdata->u.mgd.associated || 1411 if (!sdata->u.mgd.associated ||
1348 sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) { 1412 sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
1349 mutex_lock(&sdata->local->iflist_mtx); 1413 mutex_lock(&sdata->local->iflist_mtx);
1350 ieee80211_recalc_smps(sdata->local, sdata); 1414 ieee80211_recalc_smps(sdata->local, sdata);
1351 mutex_unlock(&sdata->local->iflist_mtx); 1415 mutex_unlock(&sdata->local->iflist_mtx);
@@ -1375,7 +1439,6 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
1375{ 1439{
1376 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1440 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1377 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1441 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1378 struct ieee80211_conf *conf = &local->hw.conf;
1379 1442
1380 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1443 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1381 return -EOPNOTSUPP; 1444 return -EOPNOTSUPP;
@@ -1384,11 +1447,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
1384 return -EOPNOTSUPP; 1447 return -EOPNOTSUPP;
1385 1448
1386 if (enabled == sdata->u.mgd.powersave && 1449 if (enabled == sdata->u.mgd.powersave &&
1387 timeout == conf->dynamic_ps_timeout) 1450 timeout == local->dynamic_ps_forced_timeout)
1388 return 0; 1451 return 0;
1389 1452
1390 sdata->u.mgd.powersave = enabled; 1453 sdata->u.mgd.powersave = enabled;
1391 conf->dynamic_ps_timeout = timeout; 1454 local->dynamic_ps_forced_timeout = timeout;
1392 1455
1393 /* no change, but if automatic follow powersave */ 1456 /* no change, but if automatic follow powersave */
1394 mutex_lock(&sdata->u.mgd.mtx); 1457 mutex_lock(&sdata->u.mgd.mtx);
@@ -1403,6 +1466,35 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
1403 return 0; 1466 return 0;
1404} 1467}
1405 1468
1469static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
1470 struct net_device *dev,
1471 s32 rssi_thold, u32 rssi_hyst)
1472{
1473 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1474 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1475 struct ieee80211_vif *vif = &sdata->vif;
1476 struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
1477
1478 if (rssi_thold == bss_conf->cqm_rssi_thold &&
1479 rssi_hyst == bss_conf->cqm_rssi_hyst)
1480 return 0;
1481
1482 bss_conf->cqm_rssi_thold = rssi_thold;
1483 bss_conf->cqm_rssi_hyst = rssi_hyst;
1484
1485 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1486 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1487 return -EOPNOTSUPP;
1488 return 0;
1489 }
1490
1491 /* tell the driver upon association, unless already associated */
1492 if (sdata->u.mgd.associated)
1493 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
1494
1495 return 0;
1496}
1497
1406static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, 1498static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
1407 struct net_device *dev, 1499 struct net_device *dev,
1408 const u8 *addr, 1500 const u8 *addr,
@@ -1452,10 +1544,58 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1452static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1453 struct ieee80211_channel *chan, 1545 struct ieee80211_channel *chan,
1454 enum nl80211_channel_type channel_type, 1546 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid,
1455 const u8 *buf, size_t len, u64 *cookie) 1548 const u8 *buf, size_t len, u64 *cookie)
1456{ 1549{
1457 return ieee80211_mgd_action(IEEE80211_DEV_TO_SUB_IF(dev), chan, 1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1458 channel_type, buf, len, cookie); 1551 struct ieee80211_local *local = sdata->local;
1552 struct sk_buff *skb;
1553 struct sta_info *sta;
1554 const struct ieee80211_mgmt *mgmt = (void *)buf;
1555 u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
1556 IEEE80211_TX_CTL_REQ_TX_STATUS;
1557
1558 /* Check that we are on the requested channel for transmission */
1559 if (chan != local->tmp_channel &&
1560 chan != local->oper_channel)
1561 return -EBUSY;
1562 if (channel_type_valid &&
1563 (channel_type != local->tmp_channel_type &&
1564 channel_type != local->_oper_channel_type))
1565 return -EBUSY;
1566
1567 switch (sdata->vif.type) {
1568 case NL80211_IFTYPE_ADHOC:
1569 if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC)
1570 break;
1571 rcu_read_lock();
1572 sta = sta_info_get(sdata, mgmt->da);
1573 rcu_read_unlock();
1574 if (!sta)
1575 return -ENOLINK;
1576 break;
1577 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED))
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break;
1581 default:
1582 return -EOPNOTSUPP;
1583 }
1584
1585 skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
1586 if (!skb)
1587 return -ENOMEM;
1588 skb_reserve(skb, local->hw.extra_tx_headroom);
1589
1590 memcpy(skb_put(skb, len), buf, len);
1591
1592 IEEE80211_SKB_CB(skb)->flags = flags;
1593
1594 skb->dev = sdata->dev;
1595 ieee80211_tx_skb(sdata, skb);
1596
1597 *cookie = (unsigned long) skb;
1598 return 0;
1459} 1599}
1460 1600
1461struct cfg80211_ops mac80211_config_ops = { 1601struct cfg80211_ops mac80211_config_ops = {
@@ -1475,6 +1615,7 @@ struct cfg80211_ops mac80211_config_ops = {
1475 .change_station = ieee80211_change_station, 1615 .change_station = ieee80211_change_station,
1476 .get_station = ieee80211_get_station, 1616 .get_station = ieee80211_get_station,
1477 .dump_station = ieee80211_dump_station, 1617 .dump_station = ieee80211_dump_station,
1618 .dump_survey = ieee80211_dump_survey,
1478#ifdef CONFIG_MAC80211_MESH 1619#ifdef CONFIG_MAC80211_MESH
1479 .add_mpath = ieee80211_add_mpath, 1620 .add_mpath = ieee80211_add_mpath,
1480 .del_mpath = ieee80211_del_mpath, 1621 .del_mpath = ieee80211_del_mpath,
@@ -1507,4 +1648,5 @@ struct cfg80211_ops mac80211_config_ops = {
1507 .remain_on_channel = ieee80211_remain_on_channel, 1648 .remain_on_channel = ieee80211_remain_on_channel,
1508 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1509 .action = ieee80211_action, 1650 .action = ieee80211_action,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1510}; 1652};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
new file mode 100644
index 000000000000..32be11e4c4d9
--- /dev/null
+++ b/net/mac80211/chan.c
@@ -0,0 +1,127 @@
1/*
2 * mac80211 - channel management
3 */
4
5#include <linux/nl80211.h>
6#include "ieee80211_i.h"
7
8static enum ieee80211_chan_mode
9__ieee80211_get_channel_mode(struct ieee80211_local *local,
10 struct ieee80211_sub_if_data *ignore)
11{
12 struct ieee80211_sub_if_data *sdata;
13
14 WARN_ON(!mutex_is_locked(&local->iflist_mtx));
15
16 list_for_each_entry(sdata, &local->interfaces, list) {
17 if (sdata == ignore)
18 continue;
19
20 if (!ieee80211_sdata_running(sdata))
21 continue;
22
23 if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
24 continue;
25
26 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
27 !sdata->u.mgd.associated)
28 continue;
29
30 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
31 if (!sdata->u.ibss.ssid_len)
32 continue;
33 if (!sdata->u.ibss.fixed_channel)
34 return CHAN_MODE_HOPPING;
35 }
36
37 if (sdata->vif.type == NL80211_IFTYPE_AP &&
38 !sdata->u.ap.beacon)
39 continue;
40
41 return CHAN_MODE_FIXED;
42 }
43
44 return CHAN_MODE_UNDEFINED;
45}
46
47enum ieee80211_chan_mode
48ieee80211_get_channel_mode(struct ieee80211_local *local,
49 struct ieee80211_sub_if_data *ignore)
50{
51 enum ieee80211_chan_mode mode;
52
53 mutex_lock(&local->iflist_mtx);
54 mode = __ieee80211_get_channel_mode(local, ignore);
55 mutex_unlock(&local->iflist_mtx);
56
57 return mode;
58}
59
60bool ieee80211_set_channel_type(struct ieee80211_local *local,
61 struct ieee80211_sub_if_data *sdata,
62 enum nl80211_channel_type chantype)
63{
64 struct ieee80211_sub_if_data *tmp;
65 enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT;
66 bool result;
67
68 mutex_lock(&local->iflist_mtx);
69
70 list_for_each_entry(tmp, &local->interfaces, list) {
71 if (tmp == sdata)
72 continue;
73
74 if (!ieee80211_sdata_running(tmp))
75 continue;
76
77 switch (tmp->vif.bss_conf.channel_type) {
78 case NL80211_CHAN_NO_HT:
79 case NL80211_CHAN_HT20:
80 superchan = tmp->vif.bss_conf.channel_type;
81 break;
82 case NL80211_CHAN_HT40PLUS:
83 WARN_ON(superchan == NL80211_CHAN_HT40MINUS);
84 superchan = NL80211_CHAN_HT40PLUS;
85 break;
86 case NL80211_CHAN_HT40MINUS:
87 WARN_ON(superchan == NL80211_CHAN_HT40PLUS);
88 superchan = NL80211_CHAN_HT40MINUS;
89 break;
90 }
91 }
92
93 switch (superchan) {
94 case NL80211_CHAN_NO_HT:
95 case NL80211_CHAN_HT20:
96 /*
97 * allow any change that doesn't go to no-HT
98 * (if it already is no-HT no change is needed)
99 */
100 if (chantype == NL80211_CHAN_NO_HT)
101 break;
102 superchan = chantype;
103 break;
104 case NL80211_CHAN_HT40PLUS:
105 case NL80211_CHAN_HT40MINUS:
106 /* allow smaller bandwidth and same */
107 if (chantype == NL80211_CHAN_NO_HT)
108 break;
109 if (chantype == NL80211_CHAN_HT20)
110 break;
111 if (superchan == chantype)
112 break;
113 result = false;
114 goto out;
115 }
116
117 local->_oper_channel_type = superchan;
118
119 if (sdata)
120 sdata->vif.bss_conf.channel_type = chantype;
121
122 result = true;
123 out:
124 mutex_unlock(&local->iflist_mtx);
125
126 return result;
127}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 637929b65ccc..a694c593ff6a 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -307,9 +307,6 @@ static const struct file_operations queues_ops = {
307 307
308/* statistics stuff */ 308/* statistics stuff */
309 309
310#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \
311 DEBUGFS_READONLY_FILE(stats_ ##name, buflen, fmt, ##value)
312
313static ssize_t format_devstat_counter(struct ieee80211_local *local, 310static ssize_t format_devstat_counter(struct ieee80211_local *local,
314 char __user *userbuf, 311 char __user *userbuf,
315 size_t count, loff_t *ppos, 312 size_t count, loff_t *ppos,
@@ -351,75 +348,16 @@ static const struct file_operations stats_ ##name## _ops = { \
351 .open = mac80211_open_file_generic, \ 348 .open = mac80211_open_file_generic, \
352}; 349};
353 350
354#define DEBUGFS_STATS_ADD(name) \ 351#define DEBUGFS_STATS_ADD(name, field) \
352 debugfs_create_u32(#name, 0400, statsd, (u32 *) &field);
353#define DEBUGFS_DEVSTATS_ADD(name) \
355 debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops); 354 debugfs_create_file(#name, 0400, statsd, local, &stats_ ##name## _ops);
356 355
357DEBUGFS_STATS_FILE(transmitted_fragment_count, 20, "%u",
358 local->dot11TransmittedFragmentCount);
359DEBUGFS_STATS_FILE(multicast_transmitted_frame_count, 20, "%u",
360 local->dot11MulticastTransmittedFrameCount);
361DEBUGFS_STATS_FILE(failed_count, 20, "%u",
362 local->dot11FailedCount);
363DEBUGFS_STATS_FILE(retry_count, 20, "%u",
364 local->dot11RetryCount);
365DEBUGFS_STATS_FILE(multiple_retry_count, 20, "%u",
366 local->dot11MultipleRetryCount);
367DEBUGFS_STATS_FILE(frame_duplicate_count, 20, "%u",
368 local->dot11FrameDuplicateCount);
369DEBUGFS_STATS_FILE(received_fragment_count, 20, "%u",
370 local->dot11ReceivedFragmentCount);
371DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u",
372 local->dot11MulticastReceivedFrameCount);
373DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u",
374 local->dot11TransmittedFrameCount);
375#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
376DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u",
377 local->tx_handlers_drop);
378DEBUGFS_STATS_FILE(tx_handlers_queued, 20, "%u",
379 local->tx_handlers_queued);
380DEBUGFS_STATS_FILE(tx_handlers_drop_unencrypted, 20, "%u",
381 local->tx_handlers_drop_unencrypted);
382DEBUGFS_STATS_FILE(tx_handlers_drop_fragment, 20, "%u",
383 local->tx_handlers_drop_fragment);
384DEBUGFS_STATS_FILE(tx_handlers_drop_wep, 20, "%u",
385 local->tx_handlers_drop_wep);
386DEBUGFS_STATS_FILE(tx_handlers_drop_not_assoc, 20, "%u",
387 local->tx_handlers_drop_not_assoc);
388DEBUGFS_STATS_FILE(tx_handlers_drop_unauth_port, 20, "%u",
389 local->tx_handlers_drop_unauth_port);
390DEBUGFS_STATS_FILE(rx_handlers_drop, 20, "%u",
391 local->rx_handlers_drop);
392DEBUGFS_STATS_FILE(rx_handlers_queued, 20, "%u",
393 local->rx_handlers_queued);
394DEBUGFS_STATS_FILE(rx_handlers_drop_nullfunc, 20, "%u",
395 local->rx_handlers_drop_nullfunc);
396DEBUGFS_STATS_FILE(rx_handlers_drop_defrag, 20, "%u",
397 local->rx_handlers_drop_defrag);
398DEBUGFS_STATS_FILE(rx_handlers_drop_short, 20, "%u",
399 local->rx_handlers_drop_short);
400DEBUGFS_STATS_FILE(rx_handlers_drop_passive_scan, 20, "%u",
401 local->rx_handlers_drop_passive_scan);
402DEBUGFS_STATS_FILE(tx_expand_skb_head, 20, "%u",
403 local->tx_expand_skb_head);
404DEBUGFS_STATS_FILE(tx_expand_skb_head_cloned, 20, "%u",
405 local->tx_expand_skb_head_cloned);
406DEBUGFS_STATS_FILE(rx_expand_skb_head, 20, "%u",
407 local->rx_expand_skb_head);
408DEBUGFS_STATS_FILE(rx_expand_skb_head2, 20, "%u",
409 local->rx_expand_skb_head2);
410DEBUGFS_STATS_FILE(rx_handlers_fragments, 20, "%u",
411 local->rx_handlers_fragments);
412DEBUGFS_STATS_FILE(tx_status_drop, 20, "%u",
413 local->tx_status_drop);
414
415#endif
416
417DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount); 356DEBUGFS_DEVSTATS_FILE(dot11ACKFailureCount);
418DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount); 357DEBUGFS_DEVSTATS_FILE(dot11RTSFailureCount);
419DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount); 358DEBUGFS_DEVSTATS_FILE(dot11FCSErrorCount);
420DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount); 359DEBUGFS_DEVSTATS_FILE(dot11RTSSuccessCount);
421 360
422
423void debugfs_hw_add(struct ieee80211_local *local) 361void debugfs_hw_add(struct ieee80211_local *local)
424{ 362{
425 struct dentry *phyd = local->hw.wiphy->debugfsdir; 363 struct dentry *phyd = local->hw.wiphy->debugfsdir;
@@ -448,38 +386,60 @@ void debugfs_hw_add(struct ieee80211_local *local)
448 if (!statsd) 386 if (!statsd)
449 return; 387 return;
450 388
451 DEBUGFS_STATS_ADD(transmitted_fragment_count); 389 DEBUGFS_STATS_ADD(transmitted_fragment_count,
452 DEBUGFS_STATS_ADD(multicast_transmitted_frame_count); 390 local->dot11TransmittedFragmentCount);
453 DEBUGFS_STATS_ADD(failed_count); 391 DEBUGFS_STATS_ADD(multicast_transmitted_frame_count,
454 DEBUGFS_STATS_ADD(retry_count); 392 local->dot11MulticastTransmittedFrameCount);
455 DEBUGFS_STATS_ADD(multiple_retry_count); 393 DEBUGFS_STATS_ADD(failed_count, local->dot11FailedCount);
456 DEBUGFS_STATS_ADD(frame_duplicate_count); 394 DEBUGFS_STATS_ADD(retry_count, local->dot11RetryCount);
457 DEBUGFS_STATS_ADD(received_fragment_count); 395 DEBUGFS_STATS_ADD(multiple_retry_count,
458 DEBUGFS_STATS_ADD(multicast_received_frame_count); 396 local->dot11MultipleRetryCount);
459 DEBUGFS_STATS_ADD(transmitted_frame_count); 397 DEBUGFS_STATS_ADD(frame_duplicate_count,
398 local->dot11FrameDuplicateCount);
399 DEBUGFS_STATS_ADD(received_fragment_count,
400 local->dot11ReceivedFragmentCount);
401 DEBUGFS_STATS_ADD(multicast_received_frame_count,
402 local->dot11MulticastReceivedFrameCount);
403 DEBUGFS_STATS_ADD(transmitted_frame_count,
404 local->dot11TransmittedFrameCount);
460#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 405#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
461 DEBUGFS_STATS_ADD(tx_handlers_drop); 406 DEBUGFS_STATS_ADD(tx_handlers_drop, local->tx_handlers_drop);
462 DEBUGFS_STATS_ADD(tx_handlers_queued); 407 DEBUGFS_STATS_ADD(tx_handlers_queued, local->tx_handlers_queued);
463 DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted); 408 DEBUGFS_STATS_ADD(tx_handlers_drop_unencrypted,
464 DEBUGFS_STATS_ADD(tx_handlers_drop_fragment); 409 local->tx_handlers_drop_unencrypted);
465 DEBUGFS_STATS_ADD(tx_handlers_drop_wep); 410 DEBUGFS_STATS_ADD(tx_handlers_drop_fragment,
466 DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc); 411 local->tx_handlers_drop_fragment);
467 DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port); 412 DEBUGFS_STATS_ADD(tx_handlers_drop_wep,
468 DEBUGFS_STATS_ADD(rx_handlers_drop); 413 local->tx_handlers_drop_wep);
469 DEBUGFS_STATS_ADD(rx_handlers_queued); 414 DEBUGFS_STATS_ADD(tx_handlers_drop_not_assoc,
470 DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc); 415 local->tx_handlers_drop_not_assoc);
471 DEBUGFS_STATS_ADD(rx_handlers_drop_defrag); 416 DEBUGFS_STATS_ADD(tx_handlers_drop_unauth_port,
472 DEBUGFS_STATS_ADD(rx_handlers_drop_short); 417 local->tx_handlers_drop_unauth_port);
473 DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan); 418 DEBUGFS_STATS_ADD(rx_handlers_drop, local->rx_handlers_drop);
474 DEBUGFS_STATS_ADD(tx_expand_skb_head); 419 DEBUGFS_STATS_ADD(rx_handlers_queued, local->rx_handlers_queued);
475 DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned); 420 DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc,
476 DEBUGFS_STATS_ADD(rx_expand_skb_head); 421 local->rx_handlers_drop_nullfunc);
477 DEBUGFS_STATS_ADD(rx_expand_skb_head2); 422 DEBUGFS_STATS_ADD(rx_handlers_drop_defrag,
478 DEBUGFS_STATS_ADD(rx_handlers_fragments); 423 local->rx_handlers_drop_defrag);
479 DEBUGFS_STATS_ADD(tx_status_drop); 424 DEBUGFS_STATS_ADD(rx_handlers_drop_short,
425 local->rx_handlers_drop_short);
426 DEBUGFS_STATS_ADD(rx_handlers_drop_passive_scan,
427 local->rx_handlers_drop_passive_scan);
428 DEBUGFS_STATS_ADD(tx_expand_skb_head,
429 local->tx_expand_skb_head);
430 DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned,
431 local->tx_expand_skb_head_cloned);
432 DEBUGFS_STATS_ADD(rx_expand_skb_head,
433 local->rx_expand_skb_head);
434 DEBUGFS_STATS_ADD(rx_expand_skb_head2,
435 local->rx_expand_skb_head2);
436 DEBUGFS_STATS_ADD(rx_handlers_fragments,
437 local->rx_handlers_fragments);
438 DEBUGFS_STATS_ADD(tx_status_drop,
439 local->tx_status_drop);
480#endif 440#endif
481 DEBUGFS_STATS_ADD(dot11ACKFailureCount); 441 DEBUGFS_DEVSTATS_ADD(dot11ACKFailureCount);
482 DEBUGFS_STATS_ADD(dot11RTSFailureCount); 442 DEBUGFS_DEVSTATS_ADD(dot11RTSFailureCount);
483 DEBUGFS_STATS_ADD(dot11FCSErrorCount); 443 DEBUGFS_DEVSTATS_ADD(dot11FCSErrorCount);
484 DEBUGFS_STATS_ADD(dot11RTSSuccessCount); 444 DEBUGFS_DEVSTATS_ADD(dot11RTSSuccessCount);
485} 445}
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 68e6a2050f9a..09cc9be34796 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -7,7 +7,6 @@ extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
7#else 7#else
8static inline void debugfs_hw_add(struct ieee80211_local *local) 8static inline void debugfs_hw_add(struct ieee80211_local *local)
9{ 9{
10 return;
11} 10}
12#endif 11#endif
13 12
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 97c9e46e859e..fa5e76e658ef 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -143,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
143 len = p - buf; 143 len = p - buf;
144 break; 144 break;
145 case ALG_CCMP: 145 case ALG_CCMP:
146 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) { 146 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
147 rpn = key->u.ccmp.rx_pn[i]; 147 rpn = key->u.ccmp.rx_pn[i];
148 p += scnprintf(p, sizeof(buf)+buf-p, 148 p += scnprintf(p, sizeof(buf)+buf-p,
149 "%02x%02x%02x%02x%02x%02x\n", 149 "%02x%02x%02x%02x%02x%02x\n",
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 83d4289d954b..20b2998fa0ed 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -100,6 +100,14 @@ static ssize_t ieee80211_if_fmt_##name( \
100 return scnprintf(buf, buflen, "%pM\n", sdata->field); \ 100 return scnprintf(buf, buflen, "%pM\n", sdata->field); \
101} 101}
102 102
103#define IEEE80211_IF_FMT_DEC_DIV_16(name, field) \
104static ssize_t ieee80211_if_fmt_##name( \
105 const struct ieee80211_sub_if_data *sdata, \
106 char *buf, int buflen) \
107{ \
108 return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \
109}
110
103#define __IEEE80211_IF_FILE(name, _write) \ 111#define __IEEE80211_IF_FILE(name, _write) \
104static ssize_t ieee80211_if_read_##name(struct file *file, \ 112static ssize_t ieee80211_if_read_##name(struct file *file, \
105 char __user *userbuf, \ 113 char __user *userbuf, \
@@ -140,6 +148,8 @@ IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
140/* STA attributes */ 148/* STA attributes */
141IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); 149IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
142IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); 150IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
151IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
152IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
143 153
144static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, 154static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
145 enum ieee80211_smps_mode smps_mode) 155 enum ieee80211_smps_mode smps_mode)
@@ -276,6 +286,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
276 286
277 DEBUGFS_ADD(bssid); 287 DEBUGFS_ADD(bssid);
278 DEBUGFS_ADD(aid); 288 DEBUGFS_ADD(aid);
289 DEBUGFS_ADD(last_beacon);
290 DEBUGFS_ADD(ave_beacon);
279 DEBUGFS_ADD_MODE(smps, 0600); 291 DEBUGFS_ADD_MODE(smps, 0600);
280} 292}
281 293
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d92800bb2d2f..76839d4dfaac 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -30,7 +30,6 @@ static ssize_t sta_ ##name## _read(struct file *file, \
30} 30}
31#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n") 31#define STA_READ_D(name, field) STA_READ(name, 20, field, "%d\n")
32#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n") 32#define STA_READ_U(name, field) STA_READ(name, 20, field, "%u\n")
33#define STA_READ_LU(name, field) STA_READ(name, 20, field, "%lu\n")
34#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n") 33#define STA_READ_S(name, field) STA_READ(name, 20, field, "%s\n")
35 34
36#define STA_OPS(name) \ 35#define STA_OPS(name) \
@@ -39,26 +38,20 @@ static const struct file_operations sta_ ##name## _ops = { \
39 .open = mac80211_open_file_generic, \ 38 .open = mac80211_open_file_generic, \
40} 39}
41 40
41#define STA_OPS_RW(name) \
42static const struct file_operations sta_ ##name## _ops = { \
43 .read = sta_##name##_read, \
44 .write = sta_##name##_write, \
45 .open = mac80211_open_file_generic, \
46}
47
42#define STA_FILE(name, field, format) \ 48#define STA_FILE(name, field, format) \
43 STA_READ_##format(name, field) \ 49 STA_READ_##format(name, field) \
44 STA_OPS(name) 50 STA_OPS(name)
45 51
46STA_FILE(aid, sta.aid, D); 52STA_FILE(aid, sta.aid, D);
47STA_FILE(dev, sdata->name, S); 53STA_FILE(dev, sdata->name, S);
48STA_FILE(rx_packets, rx_packets, LU);
49STA_FILE(tx_packets, tx_packets, LU);
50STA_FILE(rx_bytes, rx_bytes, LU);
51STA_FILE(tx_bytes, tx_bytes, LU);
52STA_FILE(rx_duplicates, num_duplicates, LU);
53STA_FILE(rx_fragments, rx_fragments, LU);
54STA_FILE(rx_dropped, rx_dropped, LU);
55STA_FILE(tx_fragments, tx_fragments, LU);
56STA_FILE(tx_filtered, tx_filtered_count, LU);
57STA_FILE(tx_retry_failed, tx_retry_failed, LU);
58STA_FILE(tx_retry_count, tx_retry_count, LU);
59STA_FILE(last_signal, last_signal, D); 54STA_FILE(last_signal, last_signal, D);
60STA_FILE(last_noise, last_noise, D);
61STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
62 55
63static ssize_t sta_flags_read(struct file *file, char __user *userbuf, 56static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
64 size_t count, loff_t *ppos) 57 size_t count, loff_t *ppos)
@@ -120,7 +113,7 @@ STA_OPS(last_seq_ctrl);
120static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, 113static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
121 size_t count, loff_t *ppos) 114 size_t count, loff_t *ppos)
122{ 115{
123 char buf[64 + STA_TID_NUM * 40], *p = buf; 116 char buf[71 + STA_TID_NUM * 40], *p = buf;
124 int i; 117 int i;
125 struct sta_info *sta = file->private_data; 118 struct sta_info *sta = file->private_data;
126 119
@@ -128,28 +121,25 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
128 p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n", 121 p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
129 sta->ampdu_mlme.dialog_token_allocator + 1); 122 sta->ampdu_mlme.dialog_token_allocator + 1);
130 p += scnprintf(p, sizeof(buf) + buf - p, 123 p += scnprintf(p, sizeof(buf) + buf - p,
131 "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n"); 124 "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
132 for (i = 0; i < STA_TID_NUM; i++) { 125 for (i = 0; i < STA_TID_NUM; i++) {
133 p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i); 126 p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
134 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", 127 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
135 sta->ampdu_mlme.tid_state_rx[i]); 128 !!sta->ampdu_mlme.tid_rx[i]);
136 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", 129 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
137 sta->ampdu_mlme.tid_state_rx[i] ? 130 sta->ampdu_mlme.tid_rx[i] ?
138 sta->ampdu_mlme.tid_rx[i]->dialog_token : 0); 131 sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
139 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x", 132 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
140 sta->ampdu_mlme.tid_state_rx[i] ? 133 sta->ampdu_mlme.tid_rx[i] ?
141 sta->ampdu_mlme.tid_rx[i]->ssn : 0); 134 sta->ampdu_mlme.tid_rx[i]->ssn : 0);
142 135
143 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", 136 p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
144 sta->ampdu_mlme.tid_state_tx[i]); 137 !!sta->ampdu_mlme.tid_tx[i]);
145 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x", 138 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
146 sta->ampdu_mlme.tid_state_tx[i] ? 139 sta->ampdu_mlme.tid_tx[i] ?
147 sta->ampdu_mlme.tid_tx[i]->dialog_token : 0); 140 sta->ampdu_mlme.tid_tx[i]->dialog_token : 0);
148 p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
149 sta->ampdu_mlme.tid_state_tx[i] ?
150 sta->ampdu_mlme.tid_tx[i]->ssn : 0);
151 p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d", 141 p += scnprintf(p, sizeof(buf) + buf - p, "\t%03d",
152 sta->ampdu_mlme.tid_state_tx[i] ? 142 sta->ampdu_mlme.tid_tx[i] ?
153 skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0); 143 skb_queue_len(&sta->ampdu_mlme.tid_tx[i]->pending) : 0);
154 p += scnprintf(p, sizeof(buf) + buf - p, "\n"); 144 p += scnprintf(p, sizeof(buf) + buf - p, "\n");
155 } 145 }
@@ -157,7 +147,62 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
157 147
158 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); 148 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
159} 149}
160STA_OPS(agg_status); 150
151static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
152 size_t count, loff_t *ppos)
153{
154 char _buf[12], *buf = _buf;
155 struct sta_info *sta = file->private_data;
156 bool start, tx;
157 unsigned long tid;
158 int ret;
159
160 if (count > sizeof(_buf))
161 return -EINVAL;
162
163 if (copy_from_user(buf, userbuf, count))
164 return -EFAULT;
165
166 buf[sizeof(_buf) - 1] = '\0';
167
168 if (strncmp(buf, "tx ", 3) == 0) {
169 buf += 3;
170 tx = true;
171 } else if (strncmp(buf, "rx ", 3) == 0) {
172 buf += 3;
173 tx = false;
174 } else
175 return -EINVAL;
176
177 if (strncmp(buf, "start ", 6) == 0) {
178 buf += 6;
179 start = true;
180 if (!tx)
181 return -EINVAL;
182 } else if (strncmp(buf, "stop ", 5) == 0) {
183 buf += 5;
184 start = false;
185 } else
186 return -EINVAL;
187
188 tid = simple_strtoul(buf, NULL, 0);
189
190 if (tid >= STA_TID_NUM)
191 return -EINVAL;
192
193 if (tx) {
194 if (start)
195 ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
196 else
197 ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
198 } else {
199 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3);
200 ret = 0;
201 }
202
203 return ret ?: count;
204}
205STA_OPS_RW(agg_status);
161 206
162static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, 207static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
163 size_t count, loff_t *ppos) 208 size_t count, loff_t *ppos)
@@ -177,7 +222,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
177 if (htc->ht_supported) { 222 if (htc->ht_supported) {
178 p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap); 223 p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
179 224
180 PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDCP"); 225 PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC");
181 PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40"); 226 PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40");
182 PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20"); 227 PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20");
183 228
@@ -245,6 +290,13 @@ STA_OPS(ht_capa);
245 debugfs_create_file(#name, 0400, \ 290 debugfs_create_file(#name, 0400, \
246 sta->debugfs.dir, sta, &sta_ ##name## _ops); 291 sta->debugfs.dir, sta, &sta_ ##name## _ops);
247 292
293#define DEBUGFS_ADD_COUNTER(name, field) \
294 if (sizeof(sta->field) == sizeof(u32)) \
295 debugfs_create_u32(#name, 0400, sta->debugfs.dir, \
296 (u32 *) &sta->field); \
297 else \
298 debugfs_create_u64(#name, 0400, sta->debugfs.dir, \
299 (u64 *) &sta->field);
248 300
249void ieee80211_sta_debugfs_add(struct sta_info *sta) 301void ieee80211_sta_debugfs_add(struct sta_info *sta)
250{ 302{
@@ -277,21 +329,21 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
277 DEBUGFS_ADD(last_seq_ctrl); 329 DEBUGFS_ADD(last_seq_ctrl);
278 DEBUGFS_ADD(agg_status); 330 DEBUGFS_ADD(agg_status);
279 DEBUGFS_ADD(dev); 331 DEBUGFS_ADD(dev);
280 DEBUGFS_ADD(rx_packets);
281 DEBUGFS_ADD(tx_packets);
282 DEBUGFS_ADD(rx_bytes);
283 DEBUGFS_ADD(tx_bytes);
284 DEBUGFS_ADD(rx_duplicates);
285 DEBUGFS_ADD(rx_fragments);
286 DEBUGFS_ADD(rx_dropped);
287 DEBUGFS_ADD(tx_fragments);
288 DEBUGFS_ADD(tx_filtered);
289 DEBUGFS_ADD(tx_retry_failed);
290 DEBUGFS_ADD(tx_retry_count);
291 DEBUGFS_ADD(last_signal); 332 DEBUGFS_ADD(last_signal);
292 DEBUGFS_ADD(last_noise);
293 DEBUGFS_ADD(wep_weak_iv_count);
294 DEBUGFS_ADD(ht_capa); 333 DEBUGFS_ADD(ht_capa);
334
335 DEBUGFS_ADD_COUNTER(rx_packets, rx_packets);
336 DEBUGFS_ADD_COUNTER(tx_packets, tx_packets);
337 DEBUGFS_ADD_COUNTER(rx_bytes, rx_bytes);
338 DEBUGFS_ADD_COUNTER(tx_bytes, tx_bytes);
339 DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
340 DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
341 DEBUGFS_ADD_COUNTER(rx_dropped, rx_dropped);
342 DEBUGFS_ADD_COUNTER(tx_fragments, tx_fragments);
343 DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
344 DEBUGFS_ADD_COUNTER(tx_retry_failed, tx_retry_failed);
345 DEBUGFS_ADD_COUNTER(tx_retry_count, tx_retry_count);
346 DEBUGFS_ADD_COUNTER(wep_weak_iv_count, wep_weak_iv_count);
295} 347}
296 348
297void ieee80211_sta_debugfs_remove(struct sta_info *sta) 349void ieee80211_sta_debugfs_remove(struct sta_info *sta)
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..14123dce544b 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -16,10 +16,11 @@ static inline int drv_start(struct ieee80211_local *local)
16 16
17 might_sleep(); 17 might_sleep();
18 18
19 trace_drv_start(local);
19 local->started = true; 20 local->started = true;
20 smp_mb(); 21 smp_mb();
21 ret = local->ops->start(&local->hw); 22 ret = local->ops->start(&local->hw);
22 trace_drv_start(local, ret); 23 trace_drv_return_int(local, ret);
23 return ret; 24 return ret;
24} 25}
25 26
@@ -27,8 +28,9 @@ static inline void drv_stop(struct ieee80211_local *local)
27{ 28{
28 might_sleep(); 29 might_sleep();
29 30
30 local->ops->stop(&local->hw);
31 trace_drv_stop(local); 31 trace_drv_stop(local);
32 local->ops->stop(&local->hw);
33 trace_drv_return_void(local);
32 34
33 /* sync away all work on the tasklet before clearing started */ 35 /* sync away all work on the tasklet before clearing started */
34 tasklet_disable(&local->tasklet); 36 tasklet_disable(&local->tasklet);
@@ -46,8 +48,9 @@ static inline int drv_add_interface(struct ieee80211_local *local,
46 48
47 might_sleep(); 49 might_sleep();
48 50
51 trace_drv_add_interface(local, vif_to_sdata(vif));
49 ret = local->ops->add_interface(&local->hw, vif); 52 ret = local->ops->add_interface(&local->hw, vif);
50 trace_drv_add_interface(local, vif_to_sdata(vif), ret); 53 trace_drv_return_int(local, ret);
51 return ret; 54 return ret;
52} 55}
53 56
@@ -56,8 +59,9 @@ static inline void drv_remove_interface(struct ieee80211_local *local,
56{ 59{
57 might_sleep(); 60 might_sleep();
58 61
59 local->ops->remove_interface(&local->hw, vif);
60 trace_drv_remove_interface(local, vif_to_sdata(vif)); 62 trace_drv_remove_interface(local, vif_to_sdata(vif));
63 local->ops->remove_interface(&local->hw, vif);
64 trace_drv_return_void(local);
61} 65}
62 66
63static inline int drv_config(struct ieee80211_local *local, u32 changed) 67static inline int drv_config(struct ieee80211_local *local, u32 changed)
@@ -66,8 +70,9 @@ static inline int drv_config(struct ieee80211_local *local, u32 changed)
66 70
67 might_sleep(); 71 might_sleep();
68 72
73 trace_drv_config(local, changed);
69 ret = local->ops->config(&local->hw, changed); 74 ret = local->ops->config(&local->hw, changed);
70 trace_drv_config(local, changed, ret); 75 trace_drv_return_int(local, ret);
71 return ret; 76 return ret;
72} 77}
73 78
@@ -78,22 +83,23 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
78{ 83{
79 might_sleep(); 84 might_sleep();
80 85
86 trace_drv_bss_info_changed(local, sdata, info, changed);
81 if (local->ops->bss_info_changed) 87 if (local->ops->bss_info_changed)
82 local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed); 88 local->ops->bss_info_changed(&local->hw, &sdata->vif, info, changed);
83 trace_drv_bss_info_changed(local, sdata, info, changed); 89 trace_drv_return_void(local);
84} 90}
85 91
86static inline u64 drv_prepare_multicast(struct ieee80211_local *local, 92static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
87 int mc_count, 93 struct netdev_hw_addr_list *mc_list)
88 struct dev_addr_list *mc_list)
89{ 94{
90 u64 ret = 0; 95 u64 ret = 0;
91 96
97 trace_drv_prepare_multicast(local, mc_list->count);
98
92 if (local->ops->prepare_multicast) 99 if (local->ops->prepare_multicast)
93 ret = local->ops->prepare_multicast(&local->hw, mc_count, 100 ret = local->ops->prepare_multicast(&local->hw, mc_list);
94 mc_list);
95 101
96 trace_drv_prepare_multicast(local, mc_count, ret); 102 trace_drv_return_u64(local, ret);
97 103
98 return ret; 104 return ret;
99} 105}
@@ -105,19 +111,21 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
105{ 111{
106 might_sleep(); 112 might_sleep();
107 113
108 local->ops->configure_filter(&local->hw, changed_flags, total_flags,
109 multicast);
110 trace_drv_configure_filter(local, changed_flags, total_flags, 114 trace_drv_configure_filter(local, changed_flags, total_flags,
111 multicast); 115 multicast);
116 local->ops->configure_filter(&local->hw, changed_flags, total_flags,
117 multicast);
118 trace_drv_return_void(local);
112} 119}
113 120
114static inline int drv_set_tim(struct ieee80211_local *local, 121static inline int drv_set_tim(struct ieee80211_local *local,
115 struct ieee80211_sta *sta, bool set) 122 struct ieee80211_sta *sta, bool set)
116{ 123{
117 int ret = 0; 124 int ret = 0;
125 trace_drv_set_tim(local, sta, set);
118 if (local->ops->set_tim) 126 if (local->ops->set_tim)
119 ret = local->ops->set_tim(&local->hw, sta, set); 127 ret = local->ops->set_tim(&local->hw, sta, set);
120 trace_drv_set_tim(local, sta, set, ret); 128 trace_drv_return_int(local, ret);
121 return ret; 129 return ret;
122} 130}
123 131
@@ -131,8 +139,9 @@ static inline int drv_set_key(struct ieee80211_local *local,
131 139
132 might_sleep(); 140 might_sleep();
133 141
142 trace_drv_set_key(local, cmd, sdata, sta, key);
134 ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); 143 ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key);
135 trace_drv_set_key(local, cmd, sdata, sta, key, ret); 144 trace_drv_return_int(local, ret);
136 return ret; 145 return ret;
137} 146}
138 147
@@ -147,21 +156,24 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
147 if (sta) 156 if (sta)
148 ista = &sta->sta; 157 ista = &sta->sta;
149 158
159 trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
150 if (local->ops->update_tkip_key) 160 if (local->ops->update_tkip_key)
151 local->ops->update_tkip_key(&local->hw, &sdata->vif, conf, 161 local->ops->update_tkip_key(&local->hw, &sdata->vif, conf,
152 ista, iv32, phase1key); 162 ista, iv32, phase1key);
153 trace_drv_update_tkip_key(local, sdata, conf, ista, iv32); 163 trace_drv_return_void(local);
154} 164}
155 165
156static inline int drv_hw_scan(struct ieee80211_local *local, 166static inline int drv_hw_scan(struct ieee80211_local *local,
167 struct ieee80211_sub_if_data *sdata,
157 struct cfg80211_scan_request *req) 168 struct cfg80211_scan_request *req)
158{ 169{
159 int ret; 170 int ret;
160 171
161 might_sleep(); 172 might_sleep();
162 173
163 ret = local->ops->hw_scan(&local->hw, req); 174 trace_drv_hw_scan(local, sdata, req);
164 trace_drv_hw_scan(local, req, ret); 175 ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
176 trace_drv_return_int(local, ret);
165 return ret; 177 return ret;
166} 178}
167 179
@@ -169,18 +181,20 @@ static inline void drv_sw_scan_start(struct ieee80211_local *local)
169{ 181{
170 might_sleep(); 182 might_sleep();
171 183
184 trace_drv_sw_scan_start(local);
172 if (local->ops->sw_scan_start) 185 if (local->ops->sw_scan_start)
173 local->ops->sw_scan_start(&local->hw); 186 local->ops->sw_scan_start(&local->hw);
174 trace_drv_sw_scan_start(local); 187 trace_drv_return_void(local);
175} 188}
176 189
177static inline void drv_sw_scan_complete(struct ieee80211_local *local) 190static inline void drv_sw_scan_complete(struct ieee80211_local *local)
178{ 191{
179 might_sleep(); 192 might_sleep();
180 193
194 trace_drv_sw_scan_complete(local);
181 if (local->ops->sw_scan_complete) 195 if (local->ops->sw_scan_complete)
182 local->ops->sw_scan_complete(&local->hw); 196 local->ops->sw_scan_complete(&local->hw);
183 trace_drv_sw_scan_complete(local); 197 trace_drv_return_void(local);
184} 198}
185 199
186static inline int drv_get_stats(struct ieee80211_local *local, 200static inline int drv_get_stats(struct ieee80211_local *local,
@@ -212,9 +226,10 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
212 226
213 might_sleep(); 227 might_sleep();
214 228
229 trace_drv_set_rts_threshold(local, value);
215 if (local->ops->set_rts_threshold) 230 if (local->ops->set_rts_threshold)
216 ret = local->ops->set_rts_threshold(&local->hw, value); 231 ret = local->ops->set_rts_threshold(&local->hw, value);
217 trace_drv_set_rts_threshold(local, value, ret); 232 trace_drv_return_int(local, ret);
218 return ret; 233 return ret;
219} 234}
220 235
@@ -224,12 +239,13 @@ static inline int drv_set_coverage_class(struct ieee80211_local *local,
224 int ret = 0; 239 int ret = 0;
225 might_sleep(); 240 might_sleep();
226 241
242 trace_drv_set_coverage_class(local, value);
227 if (local->ops->set_coverage_class) 243 if (local->ops->set_coverage_class)
228 local->ops->set_coverage_class(&local->hw, value); 244 local->ops->set_coverage_class(&local->hw, value);
229 else 245 else
230 ret = -EOPNOTSUPP; 246 ret = -EOPNOTSUPP;
231 247
232 trace_drv_set_coverage_class(local, value, ret); 248 trace_drv_return_int(local, ret);
233 return ret; 249 return ret;
234} 250}
235 251
@@ -238,9 +254,10 @@ static inline void drv_sta_notify(struct ieee80211_local *local,
238 enum sta_notify_cmd cmd, 254 enum sta_notify_cmd cmd,
239 struct ieee80211_sta *sta) 255 struct ieee80211_sta *sta)
240{ 256{
257 trace_drv_sta_notify(local, sdata, cmd, sta);
241 if (local->ops->sta_notify) 258 if (local->ops->sta_notify)
242 local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta); 259 local->ops->sta_notify(&local->hw, &sdata->vif, cmd, sta);
243 trace_drv_sta_notify(local, sdata, cmd, sta); 260 trace_drv_return_void(local);
244} 261}
245 262
246static inline int drv_sta_add(struct ieee80211_local *local, 263static inline int drv_sta_add(struct ieee80211_local *local,
@@ -251,13 +268,11 @@ static inline int drv_sta_add(struct ieee80211_local *local,
251 268
252 might_sleep(); 269 might_sleep();
253 270
271 trace_drv_sta_add(local, sdata, sta);
254 if (local->ops->sta_add) 272 if (local->ops->sta_add)
255 ret = local->ops->sta_add(&local->hw, &sdata->vif, sta); 273 ret = local->ops->sta_add(&local->hw, &sdata->vif, sta);
256 else if (local->ops->sta_notify)
257 local->ops->sta_notify(&local->hw, &sdata->vif,
258 STA_NOTIFY_ADD, sta);
259 274
260 trace_drv_sta_add(local, sdata, sta, ret); 275 trace_drv_return_int(local, ret);
261 276
262 return ret; 277 return ret;
263} 278}
@@ -268,13 +283,11 @@ static inline void drv_sta_remove(struct ieee80211_local *local,
268{ 283{
269 might_sleep(); 284 might_sleep();
270 285
286 trace_drv_sta_remove(local, sdata, sta);
271 if (local->ops->sta_remove) 287 if (local->ops->sta_remove)
272 local->ops->sta_remove(&local->hw, &sdata->vif, sta); 288 local->ops->sta_remove(&local->hw, &sdata->vif, sta);
273 else if (local->ops->sta_notify)
274 local->ops->sta_notify(&local->hw, &sdata->vif,
275 STA_NOTIFY_REMOVE, sta);
276 289
277 trace_drv_sta_remove(local, sdata, sta); 290 trace_drv_return_void(local);
278} 291}
279 292
280static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue, 293static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
@@ -284,9 +297,10 @@ static inline int drv_conf_tx(struct ieee80211_local *local, u16 queue,
284 297
285 might_sleep(); 298 might_sleep();
286 299
300 trace_drv_conf_tx(local, queue, params);
287 if (local->ops->conf_tx) 301 if (local->ops->conf_tx)
288 ret = local->ops->conf_tx(&local->hw, queue, params); 302 ret = local->ops->conf_tx(&local->hw, queue, params);
289 trace_drv_conf_tx(local, queue, params, ret); 303 trace_drv_return_int(local, ret);
290 return ret; 304 return ret;
291} 305}
292 306
@@ -296,9 +310,10 @@ static inline u64 drv_get_tsf(struct ieee80211_local *local)
296 310
297 might_sleep(); 311 might_sleep();
298 312
313 trace_drv_get_tsf(local);
299 if (local->ops->get_tsf) 314 if (local->ops->get_tsf)
300 ret = local->ops->get_tsf(&local->hw); 315 ret = local->ops->get_tsf(&local->hw);
301 trace_drv_get_tsf(local, ret); 316 trace_drv_return_u64(local, ret);
302 return ret; 317 return ret;
303} 318}
304 319
@@ -306,18 +321,20 @@ static inline void drv_set_tsf(struct ieee80211_local *local, u64 tsf)
306{ 321{
307 might_sleep(); 322 might_sleep();
308 323
324 trace_drv_set_tsf(local, tsf);
309 if (local->ops->set_tsf) 325 if (local->ops->set_tsf)
310 local->ops->set_tsf(&local->hw, tsf); 326 local->ops->set_tsf(&local->hw, tsf);
311 trace_drv_set_tsf(local, tsf); 327 trace_drv_return_void(local);
312} 328}
313 329
314static inline void drv_reset_tsf(struct ieee80211_local *local) 330static inline void drv_reset_tsf(struct ieee80211_local *local)
315{ 331{
316 might_sleep(); 332 might_sleep();
317 333
334 trace_drv_reset_tsf(local);
318 if (local->ops->reset_tsf) 335 if (local->ops->reset_tsf)
319 local->ops->reset_tsf(&local->hw); 336 local->ops->reset_tsf(&local->hw);
320 trace_drv_reset_tsf(local); 337 trace_drv_return_void(local);
321} 338}
322 339
323static inline int drv_tx_last_beacon(struct ieee80211_local *local) 340static inline int drv_tx_last_beacon(struct ieee80211_local *local)
@@ -326,9 +343,10 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
326 343
327 might_sleep(); 344 might_sleep();
328 345
346 trace_drv_tx_last_beacon(local);
329 if (local->ops->tx_last_beacon) 347 if (local->ops->tx_last_beacon)
330 ret = local->ops->tx_last_beacon(&local->hw); 348 ret = local->ops->tx_last_beacon(&local->hw);
331 trace_drv_tx_last_beacon(local, ret); 349 trace_drv_return_int(local, ret);
332 return ret; 350 return ret;
333} 351}
334 352
@@ -339,13 +357,34 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
339 u16 *ssn) 357 u16 *ssn)
340{ 358{
341 int ret = -EOPNOTSUPP; 359 int ret = -EOPNOTSUPP;
360
361 might_sleep();
362
363 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn);
364
342 if (local->ops->ampdu_action) 365 if (local->ops->ampdu_action)
343 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action, 366 ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
344 sta, tid, ssn); 367 sta, tid, ssn);
345 trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, ret); 368
369 trace_drv_return_int(local, ret);
370
346 return ret; 371 return ret;
347} 372}
348 373
374static inline int drv_get_survey(struct ieee80211_local *local, int idx,
375 struct survey_info *survey)
376{
377 int ret = -EOPNOTSUPP;
378
379 trace_drv_get_survey(local, idx, survey);
380
381 if (local->ops->get_survey)
382 ret = local->ops->get_survey(&local->hw, idx, survey);
383
384 trace_drv_return_int(local, ret);
385
386 return ret;
387}
349 388
350static inline void drv_rfkill_poll(struct ieee80211_local *local) 389static inline void drv_rfkill_poll(struct ieee80211_local *local)
351{ 390{
@@ -362,5 +401,17 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop)
362 trace_drv_flush(local, drop); 401 trace_drv_flush(local, drop);
363 if (local->ops->flush) 402 if (local->ops->flush)
364 local->ops->flush(&local->hw, drop); 403 local->ops->flush(&local->hw, drop);
404 trace_drv_return_void(local);
365} 405}
406
407static inline void drv_channel_switch(struct ieee80211_local *local,
408 struct ieee80211_channel_switch *ch_switch)
409{
410 might_sleep();
411
412 trace_drv_channel_switch(local, ch_switch);
413 local->ops->channel_switch(&local->hw, ch_switch);
414 trace_drv_return_void(local);
415}
416
366#endif /* __MAC80211_DRIVER_OPS */ 417#endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 41baf730a5c7..5d5d2a974668 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -32,20 +32,62 @@ static inline void trace_ ## name(proto) {}
32#define VIF_PR_FMT " vif:%s(%d)" 32#define VIF_PR_FMT " vif:%s(%d)"
33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type 33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type
34 34
35TRACE_EVENT(drv_start, 35/*
36 TP_PROTO(struct ieee80211_local *local, int ret), 36 * Tracing for driver callbacks.
37 */
37 38
38 TP_ARGS(local, ret), 39TRACE_EVENT(drv_return_void,
40 TP_PROTO(struct ieee80211_local *local),
41 TP_ARGS(local),
42 TP_STRUCT__entry(
43 LOCAL_ENTRY
44 ),
45 TP_fast_assign(
46 LOCAL_ASSIGN;
47 ),
48 TP_printk(LOCAL_PR_FMT, LOCAL_PR_ARG)
49);
39 50
51TRACE_EVENT(drv_return_int,
52 TP_PROTO(struct ieee80211_local *local, int ret),
53 TP_ARGS(local, ret),
40 TP_STRUCT__entry( 54 TP_STRUCT__entry(
41 LOCAL_ENTRY 55 LOCAL_ENTRY
42 __field(int, ret) 56 __field(int, ret)
43 ), 57 ),
58 TP_fast_assign(
59 LOCAL_ASSIGN;
60 __entry->ret = ret;
61 ),
62 TP_printk(LOCAL_PR_FMT " - %d", LOCAL_PR_ARG, __entry->ret)
63);
44 64
65TRACE_EVENT(drv_return_u64,
66 TP_PROTO(struct ieee80211_local *local, u64 ret),
67 TP_ARGS(local, ret),
68 TP_STRUCT__entry(
69 LOCAL_ENTRY
70 __field(u64, ret)
71 ),
45 TP_fast_assign( 72 TP_fast_assign(
46 LOCAL_ASSIGN; 73 LOCAL_ASSIGN;
47 __entry->ret = ret; 74 __entry->ret = ret;
48 ), 75 ),
76 TP_printk(LOCAL_PR_FMT " - %llu", LOCAL_PR_ARG, __entry->ret)
77);
78
79TRACE_EVENT(drv_start,
80 TP_PROTO(struct ieee80211_local *local),
81
82 TP_ARGS(local),
83
84 TP_STRUCT__entry(
85 LOCAL_ENTRY
86 ),
87
88 TP_fast_assign(
89 LOCAL_ASSIGN;
90 ),
49 91
50 TP_printk( 92 TP_printk(
51 LOCAL_PR_FMT, LOCAL_PR_ARG 93 LOCAL_PR_FMT, LOCAL_PR_ARG
@@ -72,28 +114,25 @@ TRACE_EVENT(drv_stop,
72 114
73TRACE_EVENT(drv_add_interface, 115TRACE_EVENT(drv_add_interface,
74 TP_PROTO(struct ieee80211_local *local, 116 TP_PROTO(struct ieee80211_local *local,
75 struct ieee80211_sub_if_data *sdata, 117 struct ieee80211_sub_if_data *sdata),
76 int ret),
77 118
78 TP_ARGS(local, sdata, ret), 119 TP_ARGS(local, sdata),
79 120
80 TP_STRUCT__entry( 121 TP_STRUCT__entry(
81 LOCAL_ENTRY 122 LOCAL_ENTRY
82 VIF_ENTRY 123 VIF_ENTRY
83 __array(char, addr, 6) 124 __array(char, addr, 6)
84 __field(int, ret)
85 ), 125 ),
86 126
87 TP_fast_assign( 127 TP_fast_assign(
88 LOCAL_ASSIGN; 128 LOCAL_ASSIGN;
89 VIF_ASSIGN; 129 VIF_ASSIGN;
90 memcpy(__entry->addr, sdata->vif.addr, 6); 130 memcpy(__entry->addr, sdata->vif.addr, 6);
91 __entry->ret = ret;
92 ), 131 ),
93 132
94 TP_printk( 133 TP_printk(
95 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM ret:%d", 134 LOCAL_PR_FMT VIF_PR_FMT " addr:%pM",
96 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr, __entry->ret 135 LOCAL_PR_ARG, VIF_PR_ARG, __entry->addr
97 ) 136 )
98); 137);
99 138
@@ -122,15 +161,13 @@ TRACE_EVENT(drv_remove_interface,
122 161
123TRACE_EVENT(drv_config, 162TRACE_EVENT(drv_config,
124 TP_PROTO(struct ieee80211_local *local, 163 TP_PROTO(struct ieee80211_local *local,
125 u32 changed, 164 u32 changed),
126 int ret),
127 165
128 TP_ARGS(local, changed, ret), 166 TP_ARGS(local, changed),
129 167
130 TP_STRUCT__entry( 168 TP_STRUCT__entry(
131 LOCAL_ENTRY 169 LOCAL_ENTRY
132 __field(u32, changed) 170 __field(u32, changed)
133 __field(int, ret)
134 __field(u32, flags) 171 __field(u32, flags)
135 __field(int, power_level) 172 __field(int, power_level)
136 __field(int, dynamic_ps_timeout) 173 __field(int, dynamic_ps_timeout)
@@ -146,7 +183,6 @@ TRACE_EVENT(drv_config,
146 TP_fast_assign( 183 TP_fast_assign(
147 LOCAL_ASSIGN; 184 LOCAL_ASSIGN;
148 __entry->changed = changed; 185 __entry->changed = changed;
149 __entry->ret = ret;
150 __entry->flags = local->hw.conf.flags; 186 __entry->flags = local->hw.conf.flags;
151 __entry->power_level = local->hw.conf.power_level; 187 __entry->power_level = local->hw.conf.power_level;
152 __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout; 188 __entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
@@ -160,8 +196,8 @@ TRACE_EVENT(drv_config,
160 ), 196 ),
161 197
162 TP_printk( 198 TP_printk(
163 LOCAL_PR_FMT " ch:%#x freq:%d ret:%d", 199 LOCAL_PR_FMT " ch:%#x freq:%d",
164 LOCAL_PR_ARG, __entry->changed, __entry->center_freq, __entry->ret 200 LOCAL_PR_ARG, __entry->changed, __entry->center_freq
165 ) 201 )
166); 202);
167 203
@@ -216,26 +252,23 @@ TRACE_EVENT(drv_bss_info_changed,
216); 252);
217 253
218TRACE_EVENT(drv_prepare_multicast, 254TRACE_EVENT(drv_prepare_multicast,
219 TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret), 255 TP_PROTO(struct ieee80211_local *local, int mc_count),
220 256
221 TP_ARGS(local, mc_count, ret), 257 TP_ARGS(local, mc_count),
222 258
223 TP_STRUCT__entry( 259 TP_STRUCT__entry(
224 LOCAL_ENTRY 260 LOCAL_ENTRY
225 __field(int, mc_count) 261 __field(int, mc_count)
226 __field(u64, ret)
227 ), 262 ),
228 263
229 TP_fast_assign( 264 TP_fast_assign(
230 LOCAL_ASSIGN; 265 LOCAL_ASSIGN;
231 __entry->mc_count = mc_count; 266 __entry->mc_count = mc_count;
232 __entry->ret = ret;
233 ), 267 ),
234 268
235 TP_printk( 269 TP_printk(
236 LOCAL_PR_FMT " prepare mc (%d): %llx", 270 LOCAL_PR_FMT " prepare mc (%d)",
237 LOCAL_PR_ARG, __entry->mc_count, 271 LOCAL_PR_ARG, __entry->mc_count
238 (unsigned long long) __entry->ret
239 ) 272 )
240); 273);
241 274
@@ -269,27 +302,25 @@ TRACE_EVENT(drv_configure_filter,
269 302
270TRACE_EVENT(drv_set_tim, 303TRACE_EVENT(drv_set_tim,
271 TP_PROTO(struct ieee80211_local *local, 304 TP_PROTO(struct ieee80211_local *local,
272 struct ieee80211_sta *sta, bool set, int ret), 305 struct ieee80211_sta *sta, bool set),
273 306
274 TP_ARGS(local, sta, set, ret), 307 TP_ARGS(local, sta, set),
275 308
276 TP_STRUCT__entry( 309 TP_STRUCT__entry(
277 LOCAL_ENTRY 310 LOCAL_ENTRY
278 STA_ENTRY 311 STA_ENTRY
279 __field(bool, set) 312 __field(bool, set)
280 __field(int, ret)
281 ), 313 ),
282 314
283 TP_fast_assign( 315 TP_fast_assign(
284 LOCAL_ASSIGN; 316 LOCAL_ASSIGN;
285 STA_ASSIGN; 317 STA_ASSIGN;
286 __entry->set = set; 318 __entry->set = set;
287 __entry->ret = ret;
288 ), 319 ),
289 320
290 TP_printk( 321 TP_printk(
291 LOCAL_PR_FMT STA_PR_FMT " set:%d ret:%d", 322 LOCAL_PR_FMT STA_PR_FMT " set:%d",
292 LOCAL_PR_ARG, STA_PR_FMT, __entry->set, __entry->ret 323 LOCAL_PR_ARG, STA_PR_FMT, __entry->set
293 ) 324 )
294); 325);
295 326
@@ -297,9 +328,9 @@ TRACE_EVENT(drv_set_key,
297 TP_PROTO(struct ieee80211_local *local, 328 TP_PROTO(struct ieee80211_local *local,
298 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata, 329 enum set_key_cmd cmd, struct ieee80211_sub_if_data *sdata,
299 struct ieee80211_sta *sta, 330 struct ieee80211_sta *sta,
300 struct ieee80211_key_conf *key, int ret), 331 struct ieee80211_key_conf *key),
301 332
302 TP_ARGS(local, cmd, sdata, sta, key, ret), 333 TP_ARGS(local, cmd, sdata, sta, key),
303 334
304 TP_STRUCT__entry( 335 TP_STRUCT__entry(
305 LOCAL_ENTRY 336 LOCAL_ENTRY
@@ -309,7 +340,6 @@ TRACE_EVENT(drv_set_key,
309 __field(u8, hw_key_idx) 340 __field(u8, hw_key_idx)
310 __field(u8, flags) 341 __field(u8, flags)
311 __field(s8, keyidx) 342 __field(s8, keyidx)
312 __field(int, ret)
313 ), 343 ),
314 344
315 TP_fast_assign( 345 TP_fast_assign(
@@ -320,12 +350,11 @@ TRACE_EVENT(drv_set_key,
320 __entry->flags = key->flags; 350 __entry->flags = key->flags;
321 __entry->keyidx = key->keyidx; 351 __entry->keyidx = key->keyidx;
322 __entry->hw_key_idx = key->hw_key_idx; 352 __entry->hw_key_idx = key->hw_key_idx;
323 __entry->ret = ret;
324 ), 353 ),
325 354
326 TP_printk( 355 TP_printk(
327 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", 356 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
328 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret 357 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
329 ) 358 )
330); 359);
331 360
@@ -359,23 +388,24 @@ TRACE_EVENT(drv_update_tkip_key,
359 388
360TRACE_EVENT(drv_hw_scan, 389TRACE_EVENT(drv_hw_scan,
361 TP_PROTO(struct ieee80211_local *local, 390 TP_PROTO(struct ieee80211_local *local,
362 struct cfg80211_scan_request *req, int ret), 391 struct ieee80211_sub_if_data *sdata,
392 struct cfg80211_scan_request *req),
363 393
364 TP_ARGS(local, req, ret), 394 TP_ARGS(local, sdata, req),
365 395
366 TP_STRUCT__entry( 396 TP_STRUCT__entry(
367 LOCAL_ENTRY 397 LOCAL_ENTRY
368 __field(int, ret) 398 VIF_ENTRY
369 ), 399 ),
370 400
371 TP_fast_assign( 401 TP_fast_assign(
372 LOCAL_ASSIGN; 402 LOCAL_ASSIGN;
373 __entry->ret = ret; 403 VIF_ASSIGN;
374 ), 404 ),
375 405
376 TP_printk( 406 TP_printk(
377 LOCAL_PR_FMT " ret:%d", 407 LOCAL_PR_FMT VIF_PR_FMT,
378 LOCAL_PR_ARG, __entry->ret 408 LOCAL_PR_ARG,VIF_PR_ARG
379 ) 409 )
380); 410);
381 411
@@ -472,48 +502,44 @@ TRACE_EVENT(drv_get_tkip_seq,
472); 502);
473 503
474TRACE_EVENT(drv_set_rts_threshold, 504TRACE_EVENT(drv_set_rts_threshold,
475 TP_PROTO(struct ieee80211_local *local, u32 value, int ret), 505 TP_PROTO(struct ieee80211_local *local, u32 value),
476 506
477 TP_ARGS(local, value, ret), 507 TP_ARGS(local, value),
478 508
479 TP_STRUCT__entry( 509 TP_STRUCT__entry(
480 LOCAL_ENTRY 510 LOCAL_ENTRY
481 __field(u32, value) 511 __field(u32, value)
482 __field(int, ret)
483 ), 512 ),
484 513
485 TP_fast_assign( 514 TP_fast_assign(
486 LOCAL_ASSIGN; 515 LOCAL_ASSIGN;
487 __entry->ret = ret;
488 __entry->value = value; 516 __entry->value = value;
489 ), 517 ),
490 518
491 TP_printk( 519 TP_printk(
492 LOCAL_PR_FMT " value:%d ret:%d", 520 LOCAL_PR_FMT " value:%d",
493 LOCAL_PR_ARG, __entry->value, __entry->ret 521 LOCAL_PR_ARG, __entry->value
494 ) 522 )
495); 523);
496 524
497TRACE_EVENT(drv_set_coverage_class, 525TRACE_EVENT(drv_set_coverage_class,
498 TP_PROTO(struct ieee80211_local *local, u8 value, int ret), 526 TP_PROTO(struct ieee80211_local *local, u8 value),
499 527
500 TP_ARGS(local, value, ret), 528 TP_ARGS(local, value),
501 529
502 TP_STRUCT__entry( 530 TP_STRUCT__entry(
503 LOCAL_ENTRY 531 LOCAL_ENTRY
504 __field(u8, value) 532 __field(u8, value)
505 __field(int, ret)
506 ), 533 ),
507 534
508 TP_fast_assign( 535 TP_fast_assign(
509 LOCAL_ASSIGN; 536 LOCAL_ASSIGN;
510 __entry->ret = ret;
511 __entry->value = value; 537 __entry->value = value;
512 ), 538 ),
513 539
514 TP_printk( 540 TP_printk(
515 LOCAL_PR_FMT " value:%d ret:%d", 541 LOCAL_PR_FMT " value:%d",
516 LOCAL_PR_ARG, __entry->value, __entry->ret 542 LOCAL_PR_ARG, __entry->value
517 ) 543 )
518); 544);
519 545
@@ -548,27 +574,25 @@ TRACE_EVENT(drv_sta_notify,
548TRACE_EVENT(drv_sta_add, 574TRACE_EVENT(drv_sta_add,
549 TP_PROTO(struct ieee80211_local *local, 575 TP_PROTO(struct ieee80211_local *local,
550 struct ieee80211_sub_if_data *sdata, 576 struct ieee80211_sub_if_data *sdata,
551 struct ieee80211_sta *sta, int ret), 577 struct ieee80211_sta *sta),
552 578
553 TP_ARGS(local, sdata, sta, ret), 579 TP_ARGS(local, sdata, sta),
554 580
555 TP_STRUCT__entry( 581 TP_STRUCT__entry(
556 LOCAL_ENTRY 582 LOCAL_ENTRY
557 VIF_ENTRY 583 VIF_ENTRY
558 STA_ENTRY 584 STA_ENTRY
559 __field(int, ret)
560 ), 585 ),
561 586
562 TP_fast_assign( 587 TP_fast_assign(
563 LOCAL_ASSIGN; 588 LOCAL_ASSIGN;
564 VIF_ASSIGN; 589 VIF_ASSIGN;
565 STA_ASSIGN; 590 STA_ASSIGN;
566 __entry->ret = ret;
567 ), 591 ),
568 592
569 TP_printk( 593 TP_printk(
570 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ret:%d", 594 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT,
571 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ret 595 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG
572 ) 596 )
573); 597);
574 598
@@ -599,10 +623,9 @@ TRACE_EVENT(drv_sta_remove,
599 623
600TRACE_EVENT(drv_conf_tx, 624TRACE_EVENT(drv_conf_tx,
601 TP_PROTO(struct ieee80211_local *local, u16 queue, 625 TP_PROTO(struct ieee80211_local *local, u16 queue,
602 const struct ieee80211_tx_queue_params *params, 626 const struct ieee80211_tx_queue_params *params),
603 int ret),
604 627
605 TP_ARGS(local, queue, params, ret), 628 TP_ARGS(local, queue, params),
606 629
607 TP_STRUCT__entry( 630 TP_STRUCT__entry(
608 LOCAL_ENTRY 631 LOCAL_ENTRY
@@ -611,13 +634,11 @@ TRACE_EVENT(drv_conf_tx,
611 __field(u16, cw_min) 634 __field(u16, cw_min)
612 __field(u16, cw_max) 635 __field(u16, cw_max)
613 __field(u8, aifs) 636 __field(u8, aifs)
614 __field(int, ret)
615 ), 637 ),
616 638
617 TP_fast_assign( 639 TP_fast_assign(
618 LOCAL_ASSIGN; 640 LOCAL_ASSIGN;
619 __entry->queue = queue; 641 __entry->queue = queue;
620 __entry->ret = ret;
621 __entry->txop = params->txop; 642 __entry->txop = params->txop;
622 __entry->cw_max = params->cw_max; 643 __entry->cw_max = params->cw_max;
623 __entry->cw_min = params->cw_min; 644 __entry->cw_min = params->cw_min;
@@ -625,29 +646,27 @@ TRACE_EVENT(drv_conf_tx,
625 ), 646 ),
626 647
627 TP_printk( 648 TP_printk(
628 LOCAL_PR_FMT " queue:%d ret:%d", 649 LOCAL_PR_FMT " queue:%d",
629 LOCAL_PR_ARG, __entry->queue, __entry->ret 650 LOCAL_PR_ARG, __entry->queue
630 ) 651 )
631); 652);
632 653
633TRACE_EVENT(drv_get_tsf, 654TRACE_EVENT(drv_get_tsf,
634 TP_PROTO(struct ieee80211_local *local, u64 ret), 655 TP_PROTO(struct ieee80211_local *local),
635 656
636 TP_ARGS(local, ret), 657 TP_ARGS(local),
637 658
638 TP_STRUCT__entry( 659 TP_STRUCT__entry(
639 LOCAL_ENTRY 660 LOCAL_ENTRY
640 __field(u64, ret)
641 ), 661 ),
642 662
643 TP_fast_assign( 663 TP_fast_assign(
644 LOCAL_ASSIGN; 664 LOCAL_ASSIGN;
645 __entry->ret = ret;
646 ), 665 ),
647 666
648 TP_printk( 667 TP_printk(
649 LOCAL_PR_FMT " ret:%llu", 668 LOCAL_PR_FMT,
650 LOCAL_PR_ARG, (unsigned long long)__entry->ret 669 LOCAL_PR_ARG
651 ) 670 )
652); 671);
653 672
@@ -691,23 +710,21 @@ TRACE_EVENT(drv_reset_tsf,
691); 710);
692 711
693TRACE_EVENT(drv_tx_last_beacon, 712TRACE_EVENT(drv_tx_last_beacon,
694 TP_PROTO(struct ieee80211_local *local, int ret), 713 TP_PROTO(struct ieee80211_local *local),
695 714
696 TP_ARGS(local, ret), 715 TP_ARGS(local),
697 716
698 TP_STRUCT__entry( 717 TP_STRUCT__entry(
699 LOCAL_ENTRY 718 LOCAL_ENTRY
700 __field(int, ret)
701 ), 719 ),
702 720
703 TP_fast_assign( 721 TP_fast_assign(
704 LOCAL_ASSIGN; 722 LOCAL_ASSIGN;
705 __entry->ret = ret;
706 ), 723 ),
707 724
708 TP_printk( 725 TP_printk(
709 LOCAL_PR_FMT " ret:%d", 726 LOCAL_PR_FMT,
710 LOCAL_PR_ARG, __entry->ret 727 LOCAL_PR_ARG
711 ) 728 )
712); 729);
713 730
@@ -716,9 +733,9 @@ TRACE_EVENT(drv_ampdu_action,
716 struct ieee80211_sub_if_data *sdata, 733 struct ieee80211_sub_if_data *sdata,
717 enum ieee80211_ampdu_mlme_action action, 734 enum ieee80211_ampdu_mlme_action action,
718 struct ieee80211_sta *sta, u16 tid, 735 struct ieee80211_sta *sta, u16 tid,
719 u16 *ssn, int ret), 736 u16 *ssn),
720 737
721 TP_ARGS(local, sdata, action, sta, tid, ssn, ret), 738 TP_ARGS(local, sdata, action, sta, tid, ssn),
722 739
723 TP_STRUCT__entry( 740 TP_STRUCT__entry(
724 LOCAL_ENTRY 741 LOCAL_ENTRY
@@ -726,7 +743,6 @@ TRACE_EVENT(drv_ampdu_action,
726 __field(u32, action) 743 __field(u32, action)
727 __field(u16, tid) 744 __field(u16, tid)
728 __field(u16, ssn) 745 __field(u16, ssn)
729 __field(int, ret)
730 VIF_ENTRY 746 VIF_ENTRY
731 ), 747 ),
732 748
@@ -734,15 +750,36 @@ TRACE_EVENT(drv_ampdu_action,
734 LOCAL_ASSIGN; 750 LOCAL_ASSIGN;
735 VIF_ASSIGN; 751 VIF_ASSIGN;
736 STA_ASSIGN; 752 STA_ASSIGN;
737 __entry->ret = ret;
738 __entry->action = action; 753 __entry->action = action;
739 __entry->tid = tid; 754 __entry->tid = tid;
740 __entry->ssn = ssn ? *ssn : 0; 755 __entry->ssn = ssn ? *ssn : 0;
741 ), 756 ),
742 757
743 TP_printk( 758 TP_printk(
744 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d ret:%d", 759 LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d",
745 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid, __entry->ret 760 LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action, __entry->tid
761 )
762);
763
764TRACE_EVENT(drv_get_survey,
765 TP_PROTO(struct ieee80211_local *local, int idx,
766 struct survey_info *survey),
767
768 TP_ARGS(local, idx, survey),
769
770 TP_STRUCT__entry(
771 LOCAL_ENTRY
772 __field(int, idx)
773 ),
774
775 TP_fast_assign(
776 LOCAL_ASSIGN;
777 __entry->idx = idx;
778 ),
779
780 TP_printk(
781 LOCAL_PR_FMT " idx:%d",
782 LOCAL_PR_ARG, __entry->idx
746 ) 783 )
747); 784);
748 785
@@ -766,6 +803,324 @@ TRACE_EVENT(drv_flush,
766 LOCAL_PR_ARG, __entry->drop 803 LOCAL_PR_ARG, __entry->drop
767 ) 804 )
768); 805);
806
807TRACE_EVENT(drv_channel_switch,
808 TP_PROTO(struct ieee80211_local *local,
809 struct ieee80211_channel_switch *ch_switch),
810
811 TP_ARGS(local, ch_switch),
812
813 TP_STRUCT__entry(
814 LOCAL_ENTRY
815 __field(u64, timestamp)
816 __field(bool, block_tx)
817 __field(u16, freq)
818 __field(u8, count)
819 ),
820
821 TP_fast_assign(
822 LOCAL_ASSIGN;
823 __entry->timestamp = ch_switch->timestamp;
824 __entry->block_tx = ch_switch->block_tx;
825 __entry->freq = ch_switch->channel->center_freq;
826 __entry->count = ch_switch->count;
827 ),
828
829 TP_printk(
830 LOCAL_PR_FMT " new freq:%u count:%d",
831 LOCAL_PR_ARG, __entry->freq, __entry->count
832 )
833);
834
835/*
836 * Tracing for API calls that drivers call.
837 */
838
839TRACE_EVENT(api_start_tx_ba_session,
840 TP_PROTO(struct ieee80211_sta *sta, u16 tid),
841
842 TP_ARGS(sta, tid),
843
844 TP_STRUCT__entry(
845 STA_ENTRY
846 __field(u16, tid)
847 ),
848
849 TP_fast_assign(
850 STA_ASSIGN;
851 __entry->tid = tid;
852 ),
853
854 TP_printk(
855 STA_PR_FMT " tid:%d",
856 STA_PR_ARG, __entry->tid
857 )
858);
859
860TRACE_EVENT(api_start_tx_ba_cb,
861 TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
862
863 TP_ARGS(sdata, ra, tid),
864
865 TP_STRUCT__entry(
866 VIF_ENTRY
867 __array(u8, ra, ETH_ALEN)
868 __field(u16, tid)
869 ),
870
871 TP_fast_assign(
872 VIF_ASSIGN;
873 memcpy(__entry->ra, ra, ETH_ALEN);
874 __entry->tid = tid;
875 ),
876
877 TP_printk(
878 VIF_PR_FMT " ra:%pM tid:%d",
879 VIF_PR_ARG, __entry->ra, __entry->tid
880 )
881);
882
883TRACE_EVENT(api_stop_tx_ba_session,
884 TP_PROTO(struct ieee80211_sta *sta, u16 tid),
885
886 TP_ARGS(sta, tid),
887
888 TP_STRUCT__entry(
889 STA_ENTRY
890 __field(u16, tid)
891 ),
892
893 TP_fast_assign(
894 STA_ASSIGN;
895 __entry->tid = tid;
896 ),
897
898 TP_printk(
899 STA_PR_FMT " tid:%d",
900 STA_PR_ARG, __entry->tid
901 )
902);
903
904TRACE_EVENT(api_stop_tx_ba_cb,
905 TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
906
907 TP_ARGS(sdata, ra, tid),
908
909 TP_STRUCT__entry(
910 VIF_ENTRY
911 __array(u8, ra, ETH_ALEN)
912 __field(u16, tid)
913 ),
914
915 TP_fast_assign(
916 VIF_ASSIGN;
917 memcpy(__entry->ra, ra, ETH_ALEN);
918 __entry->tid = tid;
919 ),
920
921 TP_printk(
922 VIF_PR_FMT " ra:%pM tid:%d",
923 VIF_PR_ARG, __entry->ra, __entry->tid
924 )
925);
926
927TRACE_EVENT(api_restart_hw,
928 TP_PROTO(struct ieee80211_local *local),
929
930 TP_ARGS(local),
931
932 TP_STRUCT__entry(
933 LOCAL_ENTRY
934 ),
935
936 TP_fast_assign(
937 LOCAL_ASSIGN;
938 ),
939
940 TP_printk(
941 LOCAL_PR_FMT,
942 LOCAL_PR_ARG
943 )
944);
945
946TRACE_EVENT(api_beacon_loss,
947 TP_PROTO(struct ieee80211_sub_if_data *sdata),
948
949 TP_ARGS(sdata),
950
951 TP_STRUCT__entry(
952 VIF_ENTRY
953 ),
954
955 TP_fast_assign(
956 VIF_ASSIGN;
957 ),
958
959 TP_printk(
960 VIF_PR_FMT,
961 VIF_PR_ARG
962 )
963);
964
965TRACE_EVENT(api_connection_loss,
966 TP_PROTO(struct ieee80211_sub_if_data *sdata),
967
968 TP_ARGS(sdata),
969
970 TP_STRUCT__entry(
971 VIF_ENTRY
972 ),
973
974 TP_fast_assign(
975 VIF_ASSIGN;
976 ),
977
978 TP_printk(
979 VIF_PR_FMT,
980 VIF_PR_ARG
981 )
982);
983
984TRACE_EVENT(api_cqm_rssi_notify,
985 TP_PROTO(struct ieee80211_sub_if_data *sdata,
986 enum nl80211_cqm_rssi_threshold_event rssi_event),
987
988 TP_ARGS(sdata, rssi_event),
989
990 TP_STRUCT__entry(
991 VIF_ENTRY
992 __field(u32, rssi_event)
993 ),
994
995 TP_fast_assign(
996 VIF_ASSIGN;
997 __entry->rssi_event = rssi_event;
998 ),
999
1000 TP_printk(
1001 VIF_PR_FMT " event:%d",
1002 VIF_PR_ARG, __entry->rssi_event
1003 )
1004);
1005
1006TRACE_EVENT(api_scan_completed,
1007 TP_PROTO(struct ieee80211_local *local, bool aborted),
1008
1009 TP_ARGS(local, aborted),
1010
1011 TP_STRUCT__entry(
1012 LOCAL_ENTRY
1013 __field(bool, aborted)
1014 ),
1015
1016 TP_fast_assign(
1017 LOCAL_ASSIGN;
1018 __entry->aborted = aborted;
1019 ),
1020
1021 TP_printk(
1022 LOCAL_PR_FMT " aborted:%d",
1023 LOCAL_PR_ARG, __entry->aborted
1024 )
1025);
1026
1027TRACE_EVENT(api_sta_block_awake,
1028 TP_PROTO(struct ieee80211_local *local,
1029 struct ieee80211_sta *sta, bool block),
1030
1031 TP_ARGS(local, sta, block),
1032
1033 TP_STRUCT__entry(
1034 LOCAL_ENTRY
1035 STA_ENTRY
1036 __field(bool, block)
1037 ),
1038
1039 TP_fast_assign(
1040 LOCAL_ASSIGN;
1041 STA_ASSIGN;
1042 __entry->block = block;
1043 ),
1044
1045 TP_printk(
1046 LOCAL_PR_FMT STA_PR_FMT " block:%d",
1047 LOCAL_PR_ARG, STA_PR_FMT, __entry->block
1048 )
1049);
1050
1051TRACE_EVENT(api_chswitch_done,
1052 TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
1053
1054 TP_ARGS(sdata, success),
1055
1056 TP_STRUCT__entry(
1057 VIF_ENTRY
1058 __field(bool, success)
1059 ),
1060
1061 TP_fast_assign(
1062 VIF_ASSIGN;
1063 __entry->success = success;
1064 ),
1065
1066 TP_printk(
1067 VIF_PR_FMT " success=%d",
1068 VIF_PR_ARG, __entry->success
1069 )
1070);
1071
1072/*
1073 * Tracing for internal functions
1074 * (which may also be called in response to driver calls)
1075 */
1076
1077TRACE_EVENT(wake_queue,
1078 TP_PROTO(struct ieee80211_local *local, u16 queue,
1079 enum queue_stop_reason reason),
1080
1081 TP_ARGS(local, queue, reason),
1082
1083 TP_STRUCT__entry(
1084 LOCAL_ENTRY
1085 __field(u16, queue)
1086 __field(u32, reason)
1087 ),
1088
1089 TP_fast_assign(
1090 LOCAL_ASSIGN;
1091 __entry->queue = queue;
1092 __entry->reason = reason;
1093 ),
1094
1095 TP_printk(
1096 LOCAL_PR_FMT " queue:%d, reason:%d",
1097 LOCAL_PR_ARG, __entry->queue, __entry->reason
1098 )
1099);
1100
1101TRACE_EVENT(stop_queue,
1102 TP_PROTO(struct ieee80211_local *local, u16 queue,
1103 enum queue_stop_reason reason),
1104
1105 TP_ARGS(local, queue, reason),
1106
1107 TP_STRUCT__entry(
1108 LOCAL_ENTRY
1109 __field(u16, queue)
1110 __field(u32, reason)
1111 ),
1112
1113 TP_fast_assign(
1114 LOCAL_ASSIGN;
1115 __entry->queue = queue;
1116 __entry->reason = reason;
1117 ),
1118
1119 TP_printk(
1120 LOCAL_PR_FMT " queue:%d, reason:%d",
1121 LOCAL_PR_ARG, __entry->queue, __entry->reason
1122 )
1123);
769#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ 1124#endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
770 1125
771#undef TRACE_INCLUDE_PATH 1126#undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index bb677a73b7c9..9d101fb33861 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -6,7 +6,7 @@
6 * Copyright 2005-2006, Devicescape Software, Inc. 6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net> 8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2008, Intel Corporation 9 * Copyright 2007-2010, Intel Corporation
10 * 10 *
11 * This program is free software; you can redistribute it and/or modify 11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as 12 * it under the terms of the GNU General Public License version 2 as
@@ -29,7 +29,7 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
29 29
30 memset(ht_cap, 0, sizeof(*ht_cap)); 30 memset(ht_cap, 0, sizeof(*ht_cap));
31 31
32 if (!ht_cap_ie) 32 if (!ht_cap_ie || !sband->ht_cap.ht_supported)
33 return; 33 return;
34 34
35 ht_cap->ht_supported = true; 35 ht_cap->ht_supported = true;
@@ -105,6 +105,8 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
105{ 105{
106 int i; 106 int i;
107 107
108 cancel_work_sync(&sta->ampdu_mlme.work);
109
108 for (i = 0; i < STA_TID_NUM; i++) { 110 for (i = 0; i < STA_TID_NUM; i++) {
109 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); 111 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR);
110 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 112 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
@@ -112,6 +114,43 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
112 } 114 }
113} 115}
114 116
117void ieee80211_ba_session_work(struct work_struct *work)
118{
119 struct sta_info *sta =
120 container_of(work, struct sta_info, ampdu_mlme.work);
121 struct tid_ampdu_tx *tid_tx;
122 int tid;
123
124 /*
125 * When this flag is set, new sessions should be
126 * blocked, and existing sessions will be torn
127 * down by the code that set the flag, so this
128 * need not run.
129 */
130 if (test_sta_flags(sta, WLAN_STA_BLOCK_BA))
131 return;
132
133 mutex_lock(&sta->ampdu_mlme.mtx);
134 for (tid = 0; tid < STA_TID_NUM; tid++) {
135 if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
136 ___ieee80211_stop_rx_ba_session(
137 sta, tid, WLAN_BACK_RECIPIENT,
138 WLAN_REASON_QSTA_TIMEOUT);
139
140 tid_tx = sta->ampdu_mlme.tid_tx[tid];
141 if (!tid_tx)
142 continue;
143
144 if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state))
145 ieee80211_tx_ba_session_handle_start(sta, tid);
146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
147 &tid_tx->state))
148 ___ieee80211_stop_tx_ba_session(sta, tid,
149 WLAN_BACK_INITIATOR);
150 }
151 mutex_unlock(&sta->ampdu_mlme.mtx);
152}
153
115void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, 154void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
116 const u8 *da, u16 tid, 155 const u8 *da, u16 tid,
117 u16 initiator, u16 reason_code) 156 u16 initiator, u16 reason_code)
@@ -175,15 +214,9 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
175#endif /* CONFIG_MAC80211_HT_DEBUG */ 214#endif /* CONFIG_MAC80211_HT_DEBUG */
176 215
177 if (initiator == WLAN_BACK_INITIATOR) 216 if (initiator == WLAN_BACK_INITIATOR)
178 ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid, 217 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0);
179 WLAN_BACK_INITIATOR, 0); 218 else
180 else { /* WLAN_BACK_RECIPIENT */ 219 __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT);
181 spin_lock_bh(&sta->lock);
182 if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)
183 ___ieee80211_stop_tx_ba_session(sta, tid,
184 WLAN_BACK_RECIPIENT);
185 spin_unlock_bh(&sta->lock);
186 }
187} 220}
188 221
189int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 222int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index e2976da4e0d9..c691780725a7 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -43,6 +43,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
43{ 43{
44 u16 auth_alg, auth_transaction, status_code; 44 u16 auth_alg, auth_transaction, status_code;
45 45
46 lockdep_assert_held(&sdata->u.ibss.mtx);
47
46 if (len < 24 + 6) 48 if (len < 24 + 6)
47 return; 49 return;
48 50
@@ -78,6 +80,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
78 u32 bss_change; 80 u32 bss_change;
79 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 81 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
80 82
83 lockdep_assert_held(&ifibss->mtx);
84
81 /* Reset own TSF to allow time synchronization work. */ 85 /* Reset own TSF to allow time synchronization work. */
82 drv_reset_tsf(local); 86 drv_reset_tsf(local);
83 87
@@ -92,12 +96,18 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
92 if (memcmp(ifibss->bssid, bssid, ETH_ALEN)) 96 if (memcmp(ifibss->bssid, bssid, ETH_ALEN))
93 sta_info_flush(sdata->local, sdata); 97 sta_info_flush(sdata->local, sdata);
94 98
99 /* if merging, indicate to driver that we leave the old IBSS */
100 if (sdata->vif.bss_conf.ibss_joined) {
101 sdata->vif.bss_conf.ibss_joined = false;
102 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS);
103 }
104
95 memcpy(ifibss->bssid, bssid, ETH_ALEN); 105 memcpy(ifibss->bssid, bssid, ETH_ALEN);
96 106
97 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; 107 sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
98 108
99 local->oper_channel = chan; 109 local->oper_channel = chan;
100 local->oper_channel_type = NL80211_CHAN_NO_HT; 110 WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
101 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 111 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
102 112
103 sband = local->hw.wiphy->bands[chan->band]; 113 sband = local->hw.wiphy->bands[chan->band];
@@ -166,11 +176,15 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
166 rcu_assign_pointer(ifibss->presp, skb); 176 rcu_assign_pointer(ifibss->presp, skb);
167 177
168 sdata->vif.bss_conf.beacon_int = beacon_int; 178 sdata->vif.bss_conf.beacon_int = beacon_int;
179 sdata->vif.bss_conf.basic_rates = basic_rates;
169 bss_change = BSS_CHANGED_BEACON_INT; 180 bss_change = BSS_CHANGED_BEACON_INT;
170 bss_change |= ieee80211_reset_erp_info(sdata); 181 bss_change |= ieee80211_reset_erp_info(sdata);
171 bss_change |= BSS_CHANGED_BSSID; 182 bss_change |= BSS_CHANGED_BSSID;
172 bss_change |= BSS_CHANGED_BEACON; 183 bss_change |= BSS_CHANGED_BEACON;
173 bss_change |= BSS_CHANGED_BEACON_ENABLED; 184 bss_change |= BSS_CHANGED_BEACON_ENABLED;
185 bss_change |= BSS_CHANGED_BASIC_RATES;
186 bss_change |= BSS_CHANGED_IBSS;
187 sdata->vif.bss_conf.ibss_joined = true;
174 ieee80211_bss_info_change_notify(sdata, bss_change); 188 ieee80211_bss_info_change_notify(sdata, bss_change);
175 189
176 ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates); 190 ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates);
@@ -195,6 +209,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
195 int i, j; 209 int i, j;
196 u16 beacon_int = cbss->beacon_interval; 210 u16 beacon_int = cbss->beacon_interval;
197 211
212 lockdep_assert_held(&sdata->u.ibss.mtx);
213
198 if (beacon_int < 10) 214 if (beacon_int < 10)
199 beacon_int = 10; 215 beacon_int = 10;
200 216
@@ -265,17 +281,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
265 sta->sta.supp_rates[band] = supp_rates | 281 sta->sta.supp_rates[band] = supp_rates |
266 ieee80211_mandatory_rates(local, band); 282 ieee80211_mandatory_rates(local, band);
267 283
284 if (sta->sta.supp_rates[band] != prev_rates) {
268#ifdef CONFIG_MAC80211_IBSS_DEBUG 285#ifdef CONFIG_MAC80211_IBSS_DEBUG
269 if (sta->sta.supp_rates[band] != prev_rates)
270 printk(KERN_DEBUG "%s: updated supp_rates set " 286 printk(KERN_DEBUG "%s: updated supp_rates set "
271 "for %pM based on beacon info (0x%llx | " 287 "for %pM based on beacon/probe_response "
272 "0x%llx -> 0x%llx)\n", 288 "(0x%x -> 0x%x)\n",
273 sdata->name, 289 sdata->name, sta->sta.addr,
274 sta->sta.addr, 290 prev_rates, sta->sta.supp_rates[band]);
275 (unsigned long long) prev_rates,
276 (unsigned long long) supp_rates,
277 (unsigned long long) sta->sta.supp_rates[band]);
278#endif 291#endif
292 rate_control_rate_init(sta);
293 }
279 rcu_read_unlock(); 294 rcu_read_unlock();
280 } else { 295 } else {
281 rcu_read_unlock(); 296 rcu_read_unlock();
@@ -371,6 +386,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
371 sdata->name, mgmt->bssid); 386 sdata->name, mgmt->bssid);
372#endif 387#endif
373 ieee80211_sta_join_ibss(sdata, bss); 388 ieee80211_sta_join_ibss(sdata, bss);
389 supp_rates = ieee80211_sta_get_rates(local, elems, band);
374 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 390 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
375 supp_rates, GFP_KERNEL); 391 supp_rates, GFP_KERNEL);
376 } 392 }
@@ -439,6 +455,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
439 int active = 0; 455 int active = 0;
440 struct sta_info *sta; 456 struct sta_info *sta;
441 457
458 lockdep_assert_held(&sdata->u.ibss.mtx);
459
442 rcu_read_lock(); 460 rcu_read_lock();
443 461
444 list_for_each_entry_rcu(sta, &local->sta_list, list) { 462 list_for_each_entry_rcu(sta, &local->sta_list, list) {
@@ -463,6 +481,8 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
463{ 481{
464 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 482 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
465 483
484 lockdep_assert_held(&ifibss->mtx);
485
466 mod_timer(&ifibss->timer, 486 mod_timer(&ifibss->timer,
467 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); 487 round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
468 488
@@ -481,7 +501,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
481 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " 501 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
482 "IBSS networks with same SSID (merge)\n", sdata->name); 502 "IBSS networks with same SSID (merge)\n", sdata->name);
483 503
484 ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len); 504 ieee80211_request_internal_scan(sdata,
505 ifibss->ssid, ifibss->ssid_len,
506 ifibss->fixed_channel ? ifibss->channel : NULL);
485} 507}
486 508
487static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) 509static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -493,6 +515,8 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
493 u16 capability; 515 u16 capability;
494 int i; 516 int i;
495 517
518 lockdep_assert_held(&ifibss->mtx);
519
496 if (ifibss->fixed_bssid) { 520 if (ifibss->fixed_bssid) {
497 memcpy(bssid, ifibss->bssid, ETH_ALEN); 521 memcpy(bssid, ifibss->bssid, ETH_ALEN);
498 } else { 522 } else {
@@ -519,7 +543,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
519 sdata->drop_unencrypted = 0; 543 sdata->drop_unencrypted = 0;
520 544
521 __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, 545 __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int,
522 ifibss->channel, 3, /* first two are basic */ 546 ifibss->channel, ifibss->basic_rates,
523 capability, 0); 547 capability, 0);
524} 548}
525 549
@@ -537,6 +561,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
537 int active_ibss; 561 int active_ibss;
538 u16 capability; 562 u16 capability;
539 563
564 lockdep_assert_held(&ifibss->mtx);
565
540 active_ibss = ieee80211_sta_active_ibss(sdata); 566 active_ibss = ieee80211_sta_active_ibss(sdata);
541#ifdef CONFIG_MAC80211_IBSS_DEBUG 567#ifdef CONFIG_MAC80211_IBSS_DEBUG
542 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", 568 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
@@ -588,8 +614,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
588 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " 614 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
589 "join\n", sdata->name); 615 "join\n", sdata->name);
590 616
591 ieee80211_request_internal_scan(sdata, ifibss->ssid, 617 ieee80211_request_internal_scan(sdata,
592 ifibss->ssid_len); 618 ifibss->ssid, ifibss->ssid_len,
619 ifibss->fixed_channel ? ifibss->channel : NULL);
593 } else { 620 } else {
594 int interval = IEEE80211_SCAN_INTERVAL; 621 int interval = IEEE80211_SCAN_INTERVAL;
595 622
@@ -624,6 +651,8 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
624 struct ieee80211_mgmt *resp; 651 struct ieee80211_mgmt *resp;
625 u8 *pos, *end; 652 u8 *pos, *end;
626 653
654 lockdep_assert_held(&ifibss->mtx);
655
627 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || 656 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
628 len < 24 + 2 || !ifibss->presp) 657 len < 24 + 2 || !ifibss->presp)
629 return; 658 return;
@@ -716,8 +745,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
716 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); 745 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
717} 746}
718 747
719static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 748void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
720 struct sk_buff *skb) 749 struct sk_buff *skb)
721{ 750{
722 struct ieee80211_rx_status *rx_status; 751 struct ieee80211_rx_status *rx_status;
723 struct ieee80211_mgmt *mgmt; 752 struct ieee80211_mgmt *mgmt;
@@ -727,6 +756,8 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
727 mgmt = (struct ieee80211_mgmt *) skb->data; 756 mgmt = (struct ieee80211_mgmt *) skb->data;
728 fc = le16_to_cpu(mgmt->frame_control); 757 fc = le16_to_cpu(mgmt->frame_control);
729 758
759 mutex_lock(&sdata->u.ibss.mtx);
760
730 switch (fc & IEEE80211_FCTL_STYPE) { 761 switch (fc & IEEE80211_FCTL_STYPE) {
731 case IEEE80211_STYPE_PROBE_REQ: 762 case IEEE80211_STYPE_PROBE_REQ:
732 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); 763 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
@@ -744,35 +775,22 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
744 break; 775 break;
745 } 776 }
746 777
747 kfree_skb(skb); 778 mutex_unlock(&sdata->u.ibss.mtx);
748} 779}
749 780
750static void ieee80211_ibss_work(struct work_struct *work) 781void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata)
751{ 782{
752 struct ieee80211_sub_if_data *sdata = 783 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
753 container_of(work, struct ieee80211_sub_if_data, u.ibss.work);
754 struct ieee80211_local *local = sdata->local;
755 struct ieee80211_if_ibss *ifibss;
756 struct sk_buff *skb;
757
758 if (WARN_ON(local->suspended))
759 return;
760
761 if (!ieee80211_sdata_running(sdata))
762 return;
763
764 if (local->scanning)
765 return;
766
767 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC))
768 return;
769 ifibss = &sdata->u.ibss;
770 784
771 while ((skb = skb_dequeue(&ifibss->skb_queue))) 785 mutex_lock(&ifibss->mtx);
772 ieee80211_ibss_rx_queued_mgmt(sdata, skb);
773 786
774 if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request)) 787 /*
775 return; 788 * Work could be scheduled after scan or similar
789 * when we aren't even joined (or trying) with a
790 * network.
791 */
792 if (!ifibss->ssid_len)
793 goto out;
776 794
777 switch (ifibss->state) { 795 switch (ifibss->state) {
778 case IEEE80211_IBSS_MLME_SEARCH: 796 case IEEE80211_IBSS_MLME_SEARCH:
@@ -785,6 +803,9 @@ static void ieee80211_ibss_work(struct work_struct *work)
785 WARN_ON(1); 803 WARN_ON(1);
786 break; 804 break;
787 } 805 }
806
807 out:
808 mutex_unlock(&ifibss->mtx);
788} 809}
789 810
790static void ieee80211_ibss_timer(unsigned long data) 811static void ieee80211_ibss_timer(unsigned long data)
@@ -799,8 +820,7 @@ static void ieee80211_ibss_timer(unsigned long data)
799 return; 820 return;
800 } 821 }
801 822
802 set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); 823 ieee80211_queue_work(&local->hw, &sdata->work);
803 ieee80211_queue_work(&local->hw, &ifibss->work);
804} 824}
805 825
806#ifdef CONFIG_PM 826#ifdef CONFIG_PM
@@ -808,7 +828,6 @@ void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata)
808{ 828{
809 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 829 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
810 830
811 cancel_work_sync(&ifibss->work);
812 if (del_timer_sync(&ifibss->timer)) 831 if (del_timer_sync(&ifibss->timer))
813 ifibss->timer_running = true; 832 ifibss->timer_running = true;
814} 833}
@@ -828,10 +847,9 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
828{ 847{
829 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; 848 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
830 849
831 INIT_WORK(&ifibss->work, ieee80211_ibss_work);
832 setup_timer(&ifibss->timer, ieee80211_ibss_timer, 850 setup_timer(&ifibss->timer, ieee80211_ibss_timer,
833 (unsigned long) sdata); 851 (unsigned long) sdata);
834 skb_queue_head_init(&ifibss->skb_queue); 852 mutex_init(&ifibss->mtx);
835} 853}
836 854
837/* scan finished notification */ 855/* scan finished notification */
@@ -845,45 +863,28 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
845 continue; 863 continue;
846 if (sdata->vif.type != NL80211_IFTYPE_ADHOC) 864 if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
847 continue; 865 continue;
848 if (!sdata->u.ibss.ssid_len)
849 continue;
850 sdata->u.ibss.last_scan_completed = jiffies; 866 sdata->u.ibss.last_scan_completed = jiffies;
851 mod_timer(&sdata->u.ibss.timer, 0); 867 ieee80211_queue_work(&local->hw, &sdata->work);
852 } 868 }
853 mutex_unlock(&local->iflist_mtx); 869 mutex_unlock(&local->iflist_mtx);
854} 870}
855 871
856ieee80211_rx_result
857ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
858{
859 struct ieee80211_local *local = sdata->local;
860 struct ieee80211_mgmt *mgmt;
861 u16 fc;
862
863 if (skb->len < 24)
864 return RX_DROP_MONITOR;
865
866 mgmt = (struct ieee80211_mgmt *) skb->data;
867 fc = le16_to_cpu(mgmt->frame_control);
868
869 switch (fc & IEEE80211_FCTL_STYPE) {
870 case IEEE80211_STYPE_PROBE_RESP:
871 case IEEE80211_STYPE_BEACON:
872 case IEEE80211_STYPE_PROBE_REQ:
873 case IEEE80211_STYPE_AUTH:
874 skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
875 ieee80211_queue_work(&local->hw, &sdata->u.ibss.work);
876 return RX_QUEUED;
877 }
878
879 return RX_DROP_MONITOR;
880}
881
882int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, 872int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
883 struct cfg80211_ibss_params *params) 873 struct cfg80211_ibss_params *params)
884{ 874{
885 struct sk_buff *skb; 875 struct sk_buff *skb;
886 876
877 skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
878 36 /* bitrates */ +
879 34 /* SSID */ +
880 3 /* DS params */ +
881 4 /* IBSS params */ +
882 params->ie_len);
883 if (!skb)
884 return -ENOMEM;
885
886 mutex_lock(&sdata->u.ibss.mtx);
887
887 if (params->bssid) { 888 if (params->bssid) {
888 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); 889 memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN);
889 sdata->u.ibss.fixed_bssid = true; 890 sdata->u.ibss.fixed_bssid = true;
@@ -891,12 +892,20 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
891 sdata->u.ibss.fixed_bssid = false; 892 sdata->u.ibss.fixed_bssid = false;
892 893
893 sdata->u.ibss.privacy = params->privacy; 894 sdata->u.ibss.privacy = params->privacy;
895 sdata->u.ibss.basic_rates = params->basic_rates;
894 896
895 sdata->vif.bss_conf.beacon_int = params->beacon_interval; 897 sdata->vif.bss_conf.beacon_int = params->beacon_interval;
896 898
897 sdata->u.ibss.channel = params->channel; 899 sdata->u.ibss.channel = params->channel;
898 sdata->u.ibss.fixed_channel = params->channel_fixed; 900 sdata->u.ibss.fixed_channel = params->channel_fixed;
899 901
902 /* fix ourselves to that channel now already */
903 if (params->channel_fixed) {
904 sdata->local->oper_channel = params->channel;
905 WARN_ON(!ieee80211_set_channel_type(sdata->local, sdata,
906 NL80211_CHAN_NO_HT));
907 }
908
900 if (params->ie) { 909 if (params->ie) {
901 sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len, 910 sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len,
902 GFP_KERNEL); 911 GFP_KERNEL);
@@ -904,34 +913,18 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
904 sdata->u.ibss.ie_len = params->ie_len; 913 sdata->u.ibss.ie_len = params->ie_len;
905 } 914 }
906 915
907 skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom +
908 36 /* bitrates */ +
909 34 /* SSID */ +
910 3 /* DS params */ +
911 4 /* IBSS params */ +
912 params->ie_len);
913 if (!skb)
914 return -ENOMEM;
915
916 sdata->u.ibss.skb = skb; 916 sdata->u.ibss.skb = skb;
917 sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH; 917 sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;
918 sdata->u.ibss.ibss_join_req = jiffies; 918 sdata->u.ibss.ibss_join_req = jiffies;
919 919
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921
922 /*
923 * The ssid_len setting below is used to see whether
924 * we are active, and we need all other settings
925 * before that may get visible.
926 */
927 mb();
928
929 sdata->u.ibss.ssid_len = params->ssid_len; 921 sdata->u.ibss.ssid_len = params->ssid_len;
930 922
931 ieee80211_recalc_idle(sdata->local); 923 ieee80211_recalc_idle(sdata->local);
932 924
933 set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); 925 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
934 ieee80211_queue_work(&sdata->local->hw, &sdata->u.ibss.work); 926
927 mutex_unlock(&sdata->u.ibss.mtx);
935 928
936 return 0; 929 return 0;
937} 930}
@@ -939,11 +932,33 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
939int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) 932int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
940{ 933{
941 struct sk_buff *skb; 934 struct sk_buff *skb;
935 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
936 struct ieee80211_local *local = sdata->local;
937 struct cfg80211_bss *cbss;
938 u16 capability;
939 int active_ibss;
942 940
943 del_timer_sync(&sdata->u.ibss.timer); 941 mutex_lock(&sdata->u.ibss.mtx);
944 clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); 942
945 cancel_work_sync(&sdata->u.ibss.work); 943 active_ibss = ieee80211_sta_active_ibss(sdata);
946 clear_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request); 944
945 if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) {
946 capability = WLAN_CAPABILITY_IBSS;
947
948 if (ifibss->privacy)
949 capability |= WLAN_CAPABILITY_PRIVACY;
950
951 cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel,
952 ifibss->bssid, ifibss->ssid,
953 ifibss->ssid_len, WLAN_CAPABILITY_IBSS |
954 WLAN_CAPABILITY_PRIVACY,
955 capability);
956
957 if (cbss) {
958 cfg80211_unlink_bss(local->hw.wiphy, cbss);
959 cfg80211_put_bss(cbss);
960 }
961 }
947 962
948 sta_info_flush(sdata->local, sdata); 963 sta_info_flush(sdata->local, sdata);
949 964
@@ -951,14 +966,20 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
951 kfree(sdata->u.ibss.ie); 966 kfree(sdata->u.ibss.ie);
952 skb = sdata->u.ibss.presp; 967 skb = sdata->u.ibss.presp;
953 rcu_assign_pointer(sdata->u.ibss.presp, NULL); 968 rcu_assign_pointer(sdata->u.ibss.presp, NULL);
954 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); 969 sdata->vif.bss_conf.ibss_joined = false;
970 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
971 BSS_CHANGED_IBSS);
955 synchronize_rcu(); 972 synchronize_rcu();
956 kfree_skb(skb); 973 kfree_skb(skb);
957 974
958 skb_queue_purge(&sdata->u.ibss.skb_queue); 975 skb_queue_purge(&sdata->skb_queue);
959 memset(sdata->u.ibss.bssid, 0, ETH_ALEN); 976 memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
960 sdata->u.ibss.ssid_len = 0; 977 sdata->u.ibss.ssid_len = 0;
961 978
979 del_timer_sync(&sdata->u.ibss.timer);
980
981 mutex_unlock(&sdata->u.ibss.mtx);
982
962 ieee80211_recalc_idle(sdata->local); 983 ieee80211_recalc_idle(sdata->local);
963 984
964 return 0; 985 return 0;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 241533e1bc03..65e0ed6c2975 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -238,6 +238,7 @@ enum ieee80211_work_type {
238 IEEE80211_WORK_ABORT, 238 IEEE80211_WORK_ABORT,
239 IEEE80211_WORK_DIRECT_PROBE, 239 IEEE80211_WORK_DIRECT_PROBE,
240 IEEE80211_WORK_AUTH, 240 IEEE80211_WORK_AUTH,
241 IEEE80211_WORK_ASSOC_BEACON_WAIT,
241 IEEE80211_WORK_ASSOC, 242 IEEE80211_WORK_ASSOC,
242 IEEE80211_WORK_REMAIN_ON_CHANNEL, 243 IEEE80211_WORK_REMAIN_ON_CHANNEL,
243}; 244};
@@ -317,6 +318,7 @@ enum ieee80211_sta_flags {
317 IEEE80211_STA_MFP_ENABLED = BIT(6), 318 IEEE80211_STA_MFP_ENABLED = BIT(6),
318 IEEE80211_STA_UAPSD_ENABLED = BIT(7), 319 IEEE80211_STA_UAPSD_ENABLED = BIT(7),
319 IEEE80211_STA_NULLFUNC_ACKED = BIT(8), 320 IEEE80211_STA_NULLFUNC_ACKED = BIT(8),
321 IEEE80211_STA_RESET_SIGNAL_AVE = BIT(9),
320}; 322};
321 323
322struct ieee80211_if_managed { 324struct ieee80211_if_managed {
@@ -324,10 +326,9 @@ struct ieee80211_if_managed {
324 struct timer_list conn_mon_timer; 326 struct timer_list conn_mon_timer;
325 struct timer_list bcn_mon_timer; 327 struct timer_list bcn_mon_timer;
326 struct timer_list chswitch_timer; 328 struct timer_list chswitch_timer;
327 struct work_struct work;
328 struct work_struct monitor_work; 329 struct work_struct monitor_work;
329 struct work_struct chswitch_work; 330 struct work_struct chswitch_work;
330 struct work_struct beacon_loss_work; 331 struct work_struct beacon_connection_loss_work;
331 332
332 unsigned long probe_timeout; 333 unsigned long probe_timeout;
333 int probe_send_count; 334 int probe_send_count;
@@ -339,8 +340,6 @@ struct ieee80211_if_managed {
339 340
340 u16 aid; 341 u16 aid;
341 342
342 struct sk_buff_head skb_queue;
343
344 unsigned long timers_running; /* used for quiesce/restart */ 343 unsigned long timers_running; /* used for quiesce/restart */
345 bool powersave; /* powersave requested for this iface */ 344 bool powersave; /* powersave requested for this iface */
346 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 345 enum ieee80211_smps_mode req_smps, /* requested smps mode */
@@ -359,21 +358,35 @@ struct ieee80211_if_managed {
359 int wmm_last_param_set; 358 int wmm_last_param_set;
360 359
361 u8 use_4addr; 360 u8 use_4addr;
362};
363 361
364enum ieee80211_ibss_request { 362 /* Signal strength from the last Beacon frame in the current BSS. */
365 IEEE80211_IBSS_REQ_RUN = 0, 363 int last_beacon_signal;
364
365 /*
366 * Weighted average of the signal strength from Beacon frames in the
367 * current BSS. This is in units of 1/16 of the signal unit to maintain
368 * accuracy and to speed up calculations, i.e., the value need to be
369 * divided by 16 to get the actual value.
370 */
371 int ave_beacon_signal;
372
373 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association.
377 */
378 int last_cqm_event_signal;
366}; 379};
367 380
368struct ieee80211_if_ibss { 381struct ieee80211_if_ibss {
369 struct timer_list timer; 382 struct timer_list timer;
370 struct work_struct work;
371 383
372 struct sk_buff_head skb_queue; 384 struct mutex mtx;
373 385
374 unsigned long request;
375 unsigned long last_scan_completed; 386 unsigned long last_scan_completed;
376 387
388 u32 basic_rates;
389
377 bool timer_running; 390 bool timer_running;
378 391
379 bool fixed_bssid; 392 bool fixed_bssid;
@@ -397,11 +410,9 @@ struct ieee80211_if_ibss {
397}; 410};
398 411
399struct ieee80211_if_mesh { 412struct ieee80211_if_mesh {
400 struct work_struct work;
401 struct timer_list housekeeping_timer; 413 struct timer_list housekeeping_timer;
402 struct timer_list mesh_path_timer; 414 struct timer_list mesh_path_timer;
403 struct timer_list mesh_path_root_timer; 415 struct timer_list mesh_path_root_timer;
404 struct sk_buff_head skb_queue;
405 416
406 unsigned long timers_running; 417 unsigned long timers_running;
407 418
@@ -498,6 +509,11 @@ struct ieee80211_sub_if_data {
498 509
499 u16 sequence_number; 510 u16 sequence_number;
500 511
512 struct work_struct work;
513 struct sk_buff_head skb_queue;
514
515 bool arp_filter_state;
516
501 /* 517 /*
502 * AP this belongs to: self in AP mode and 518 * AP this belongs to: self in AP mode and
503 * corresponding AP in VLAN mode, NULL for 519 * corresponding AP in VLAN mode, NULL for
@@ -550,11 +566,15 @@ ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata,
550#endif 566#endif
551} 567}
552 568
569enum sdata_queue_type {
570 IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
571 IEEE80211_SDATA_QUEUE_AGG_START = 1,
572 IEEE80211_SDATA_QUEUE_AGG_STOP = 2,
573};
574
553enum { 575enum {
554 IEEE80211_RX_MSG = 1, 576 IEEE80211_RX_MSG = 1,
555 IEEE80211_TX_STATUS_MSG = 2, 577 IEEE80211_TX_STATUS_MSG = 2,
556 IEEE80211_DELBA_MSG = 3,
557 IEEE80211_ADDBA_MSG = 4,
558}; 578};
559 579
560enum queue_stop_reason { 580enum queue_stop_reason {
@@ -646,8 +666,7 @@ struct ieee80211_local {
646 struct work_struct recalc_smps; 666 struct work_struct recalc_smps;
647 667
648 /* aggregated multicast list */ 668 /* aggregated multicast list */
649 struct dev_addr_list *mc_list; 669 struct netdev_hw_addr_list mc_list;
650 int mc_count;
651 670
652 bool tim_in_locked_section; /* see ieee80211_beacon_get() */ 671 bool tim_in_locked_section; /* see ieee80211_beacon_get() */
653 672
@@ -706,13 +725,7 @@ struct ieee80211_local {
706 struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; 725 struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
707 struct tasklet_struct tx_pending_tasklet; 726 struct tasklet_struct tx_pending_tasklet;
708 727
709 /* 728 atomic_t agg_queue_stop[IEEE80211_MAX_QUEUES];
710 * This lock is used to prevent concurrent A-MPDU
711 * session start/stop processing, this thus also
712 * synchronises the ->ampdu_action() callback to
713 * drivers and limits it to one at a time.
714 */
715 spinlock_t ampdu_lock;
716 729
717 /* number of interfaces with corresponding IFF_ flags */ 730 /* number of interfaces with corresponding IFF_ flags */
718 atomic_t iff_allmultis, iff_promiscs; 731 atomic_t iff_allmultis, iff_promiscs;
@@ -728,10 +741,10 @@ struct ieee80211_local {
728 struct mutex iflist_mtx; 741 struct mutex iflist_mtx;
729 742
730 /* 743 /*
731 * Key lock, protects sdata's key_list and sta_info's 744 * Key mutex, protects sdata's key_list and sta_info's
732 * key pointers (write access, they're RCU.) 745 * key pointers (write access, they're RCU.)
733 */ 746 */
734 spinlock_t key_lock; 747 struct mutex key_mtx;
735 748
736 749
737 /* Scanning and BSS list */ 750 /* Scanning and BSS list */
@@ -745,10 +758,11 @@ struct ieee80211_local {
745 int scan_channel_idx; 758 int scan_channel_idx;
746 int scan_ies_len; 759 int scan_ies_len;
747 760
761 unsigned long leave_oper_channel_time;
748 enum mac80211_scan_state next_scan_state; 762 enum mac80211_scan_state next_scan_state;
749 struct delayed_work scan_work; 763 struct delayed_work scan_work;
750 struct ieee80211_sub_if_data *scan_sdata; 764 struct ieee80211_sub_if_data *scan_sdata;
751 enum nl80211_channel_type oper_channel_type; 765 enum nl80211_channel_type _oper_channel_type;
752 struct ieee80211_channel *oper_channel, *csa_channel; 766 struct ieee80211_channel *oper_channel, *csa_channel;
753 767
754 /* Temporary remain-on-channel for off-channel operations */ 768 /* Temporary remain-on-channel for off-channel operations */
@@ -832,6 +846,15 @@ struct ieee80211_local {
832 struct work_struct dynamic_ps_disable_work; 846 struct work_struct dynamic_ps_disable_work;
833 struct timer_list dynamic_ps_timer; 847 struct timer_list dynamic_ps_timer;
834 struct notifier_block network_latency_notifier; 848 struct notifier_block network_latency_notifier;
849 struct notifier_block ifa_notifier;
850
851 /*
852 * The dynamic ps timeout configured from user space via WEXT -
853 * this will override whatever chosen by mac80211 internally.
854 */
855 int dynamic_ps_forced_timeout;
856 int dynamic_ps_user_timeout;
857 bool disable_dynamic_ps;
835 858
836 int user_power_level; /* in dBm */ 859 int user_power_level; /* in dBm */
837 int power_constr_level; /* in dBm */ 860 int power_constr_level; /* in dBm */
@@ -855,9 +878,8 @@ IEEE80211_DEV_TO_SUB_IF(struct net_device *dev)
855 return netdev_priv(dev); 878 return netdev_priv(dev);
856} 879}
857 880
858/* this struct represents 802.11n's RA/TID combination along with our vif */ 881/* this struct represents 802.11n's RA/TID combination */
859struct ieee80211_ra_tid { 882struct ieee80211_ra_tid {
860 struct ieee80211_vif *vif;
861 u8 ra[ETH_ALEN]; 883 u8 ra[ETH_ALEN];
862 u16 tid; 884 u16 tid;
863}; 885};
@@ -966,28 +988,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
966int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, 988int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
967 struct cfg80211_disassoc_request *req, 989 struct cfg80211_disassoc_request *req,
968 void *cookie); 990 void *cookie);
969int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
970 struct ieee80211_channel *chan,
971 enum nl80211_channel_type channel_type,
972 const u8 *buf, size_t len, u64 *cookie);
973ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
974 struct sk_buff *skb);
975void ieee80211_send_pspoll(struct ieee80211_local *local, 991void ieee80211_send_pspoll(struct ieee80211_local *local,
976 struct ieee80211_sub_if_data *sdata); 992 struct ieee80211_sub_if_data *sdata);
977void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency); 993void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
978int ieee80211_max_network_latency(struct notifier_block *nb, 994int ieee80211_max_network_latency(struct notifier_block *nb,
979 unsigned long data, void *dummy); 995 unsigned long data, void *dummy);
996int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
980void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, 997void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
981 struct ieee80211_channel_sw_ie *sw_elem, 998 struct ieee80211_channel_sw_ie *sw_elem,
982 struct ieee80211_bss *bss); 999 struct ieee80211_bss *bss,
1000 u64 timestamp);
983void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); 1001void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
984void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); 1002void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1003void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1004void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1005 struct sk_buff *skb);
985 1006
986/* IBSS code */ 1007/* IBSS code */
987void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); 1008void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
988void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); 1009void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
989ieee80211_rx_result
990ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
991struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, 1010struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
992 u8 *bssid, u8 *addr, u32 supp_rates, 1011 u8 *bssid, u8 *addr, u32 supp_rates,
993 gfp_t gfp); 1012 gfp_t gfp);
@@ -996,11 +1015,20 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
996int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); 1015int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata);
997void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata); 1016void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata);
998void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata); 1017void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
1018void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
1019void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1020 struct sk_buff *skb);
1021
1022/* mesh code */
1023void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
1024void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1025 struct sk_buff *skb);
999 1026
1000/* scan/BSS handling */ 1027/* scan/BSS handling */
1001void ieee80211_scan_work(struct work_struct *work); 1028void ieee80211_scan_work(struct work_struct *work);
1002int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, 1029int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
1003 const u8 *ssid, u8 ssid_len); 1030 const u8 *ssid, u8 ssid_len,
1031 struct ieee80211_channel *chan);
1004int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 1032int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
1005 struct cfg80211_scan_request *req); 1033 struct cfg80211_scan_request *req);
1006void ieee80211_scan_cancel(struct ieee80211_local *local); 1034void ieee80211_scan_cancel(struct ieee80211_local *local);
@@ -1063,7 +1091,7 @@ struct ieee80211_tx_status_rtap_hdr {
1063 u8 padding_for_rate; 1091 u8 padding_for_rate;
1064 __le16 tx_flags; 1092 __le16 tx_flags;
1065 u8 data_retries; 1093 u8 data_retries;
1066} __attribute__ ((packed)); 1094} __packed;
1067 1095
1068 1096
1069/* HT */ 1097/* HT */
@@ -1078,8 +1106,8 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1078 enum ieee80211_smps_mode smps, const u8 *da, 1106 enum ieee80211_smps_mode smps, const u8 *da,
1079 const u8 *bssid); 1107 const u8 *bssid);
1080 1108
1081void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, 1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1082 u16 tid, u16 initiator, u16 reason); 1110 u16 initiator, u16 reason);
1083void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1111void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1084 u16 initiator, u16 reason); 1112 u16 initiator, u16 reason);
1085void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); 1113void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
@@ -1099,6 +1127,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1099 enum ieee80211_back_parties initiator); 1127 enum ieee80211_back_parties initiator);
1100int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 1128int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1101 enum ieee80211_back_parties initiator); 1129 enum ieee80211_back_parties initiator);
1130void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1102 1134
1103/* Spectrum management */ 1135/* Spectrum management */
1104void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1155,7 +1187,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
1155 int powersave); 1187 int powersave);
1156void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1188void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1157 struct ieee80211_hdr *hdr); 1189 struct ieee80211_hdr *hdr);
1158void ieee80211_beacon_loss_work(struct work_struct *work); 1190void ieee80211_beacon_connection_loss_work(struct work_struct *work);
1159 1191
1160void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1192void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
1161 enum queue_stop_reason reason); 1193 enum queue_stop_reason reason);
@@ -1210,6 +1242,20 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1210int ieee80211_wk_cancel_remain_on_channel( 1242int ieee80211_wk_cancel_remain_on_channel(
1211 struct ieee80211_sub_if_data *sdata, u64 cookie); 1243 struct ieee80211_sub_if_data *sdata, u64 cookie);
1212 1244
1245/* channel management */
1246enum ieee80211_chan_mode {
1247 CHAN_MODE_UNDEFINED,
1248 CHAN_MODE_HOPPING,
1249 CHAN_MODE_FIXED,
1250};
1251
1252enum ieee80211_chan_mode
1253ieee80211_get_channel_mode(struct ieee80211_local *local,
1254 struct ieee80211_sub_if_data *ignore);
1255bool ieee80211_set_channel_type(struct ieee80211_local *local,
1256 struct ieee80211_sub_if_data *sdata,
1257 enum nl80211_channel_type chantype);
1258
1213#ifdef CONFIG_MAC80211_NOINLINE 1259#ifdef CONFIG_MAC80211_NOINLINE
1214#define debug_noinline noinline 1260#define debug_noinline noinline
1215#else 1261#else
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e08fa8eda1b3..ebbe264e2b0b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -249,6 +249,8 @@ static int ieee80211_open(struct net_device *dev)
249 local->fif_other_bss++; 249 local->fif_other_bss++;
250 250
251 ieee80211_configure_filter(local); 251 ieee80211_configure_filter(local);
252
253 netif_carrier_on(dev);
252 break; 254 break;
253 default: 255 default:
254 res = drv_add_interface(local, &sdata->vif); 256 res = drv_add_interface(local, &sdata->vif);
@@ -268,7 +270,6 @@ static int ieee80211_open(struct net_device *dev)
268 270
269 changed |= ieee80211_reset_erp_info(sdata); 271 changed |= ieee80211_reset_erp_info(sdata);
270 ieee80211_bss_info_change_notify(sdata, changed); 272 ieee80211_bss_info_change_notify(sdata, changed);
271 ieee80211_enable_keys(sdata);
272 273
273 if (sdata->vif.type == NL80211_IFTYPE_STATION) 274 if (sdata->vif.type == NL80211_IFTYPE_STATION)
274 netif_carrier_off(dev); 275 netif_carrier_off(dev);
@@ -321,15 +322,6 @@ static int ieee80211_open(struct net_device *dev)
321 322
322 ieee80211_recalc_ps(local, -1); 323 ieee80211_recalc_ps(local, -1);
323 324
324 /*
325 * ieee80211_sta_work is disabled while network interface
326 * is down. Therefore, some configuration changes may not
327 * yet be effective. Trigger execution of ieee80211_sta_work
328 * to fix this.
329 */
330 if (sdata->vif.type == NL80211_IFTYPE_STATION)
331 ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
332
333 netif_tx_start_all_queues(dev); 325 netif_tx_start_all_queues(dev);
334 326
335 return 0; 327 return 0;
@@ -349,7 +341,6 @@ static int ieee80211_stop(struct net_device *dev)
349{ 341{
350 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
351 struct ieee80211_local *local = sdata->local; 343 struct ieee80211_local *local = sdata->local;
352 struct sta_info *sta;
353 unsigned long flags; 344 unsigned long flags;
354 struct sk_buff *skb, *tmp; 345 struct sk_buff *skb, *tmp;
355 u32 hw_reconf_flags = 0; 346 u32 hw_reconf_flags = 0;
@@ -366,18 +357,6 @@ static int ieee80211_stop(struct net_device *dev)
366 ieee80211_work_purge(sdata); 357 ieee80211_work_purge(sdata);
367 358
368 /* 359 /*
369 * Now delete all active aggregation sessions.
370 */
371 rcu_read_lock();
372
373 list_for_each_entry_rcu(sta, &local->sta_list, list) {
374 if (sta->sdata == sdata)
375 ieee80211_sta_tear_down_BA_sessions(sta);
376 }
377
378 rcu_read_unlock();
379
380 /*
381 * Remove all stations associated with this interface. 360 * Remove all stations associated with this interface.
382 * 361 *
383 * This must be done before calling ops->remove_interface() 362 * This must be done before calling ops->remove_interface()
@@ -413,8 +392,7 @@ static int ieee80211_stop(struct net_device *dev)
413 392
414 netif_addr_lock_bh(dev); 393 netif_addr_lock_bh(dev);
415 spin_lock_bh(&local->filter_lock); 394 spin_lock_bh(&local->filter_lock);
416 __dev_addr_unsync(&local->mc_list, &local->mc_count, 395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
417 &dev->mc_list, &dev->mc_count);
418 spin_unlock_bh(&local->filter_lock); 396 spin_unlock_bh(&local->filter_lock);
419 netif_addr_unlock_bh(dev); 397 netif_addr_unlock_bh(dev);
420 398
@@ -484,27 +462,14 @@ static int ieee80211_stop(struct net_device *dev)
484 * whether the interface is running, which, at this point, 462 * whether the interface is running, which, at this point,
485 * it no longer is. 463 * it no longer is.
486 */ 464 */
487 cancel_work_sync(&sdata->u.mgd.work);
488 cancel_work_sync(&sdata->u.mgd.chswitch_work); 465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
489 cancel_work_sync(&sdata->u.mgd.monitor_work); 466 cancel_work_sync(&sdata->u.mgd.monitor_work);
490 cancel_work_sync(&sdata->u.mgd.beacon_loss_work); 467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
491 468
492 /*
493 * When we get here, the interface is marked down.
494 * Call synchronize_rcu() to wait for the RX path
495 * should it be using the interface and enqueuing
496 * frames at this very time on another CPU.
497 */
498 synchronize_rcu();
499 skb_queue_purge(&sdata->u.mgd.skb_queue);
500 /* fall through */ 469 /* fall through */
501 case NL80211_IFTYPE_ADHOC: 470 case NL80211_IFTYPE_ADHOC:
502 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
503 del_timer_sync(&sdata->u.ibss.timer); 472 del_timer_sync(&sdata->u.ibss.timer);
504 cancel_work_sync(&sdata->u.ibss.work);
505 synchronize_rcu();
506 skb_queue_purge(&sdata->u.ibss.skb_queue);
507 }
508 /* fall through */ 473 /* fall through */
509 case NL80211_IFTYPE_MESH_POINT: 474 case NL80211_IFTYPE_MESH_POINT:
510 if (ieee80211_vif_is_mesh(&sdata->vif)) { 475 if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -519,6 +484,16 @@ static int ieee80211_stop(struct net_device *dev)
519 } 484 }
520 /* fall through */ 485 /* fall through */
521 default: 486 default:
487 flush_work(&sdata->work);
488 /*
489 * When we get here, the interface is marked down.
490 * Call synchronize_rcu() to wait for the RX path
491 * should it be using the interface and enqueuing
492 * frames at this very time on another CPU.
493 */
494 synchronize_rcu();
495 skb_queue_purge(&sdata->skb_queue);
496
522 if (local->scan_sdata == sdata) 497 if (local->scan_sdata == sdata)
523 ieee80211_scan_cancel(local); 498 ieee80211_scan_cancel(local);
524 499
@@ -532,8 +507,8 @@ static int ieee80211_stop(struct net_device *dev)
532 BSS_CHANGED_BEACON_ENABLED); 507 BSS_CHANGED_BEACON_ENABLED);
533 } 508 }
534 509
535 /* disable all keys for as long as this netdev is down */ 510 /* free all remaining keys, there shouldn't be any */
536 ieee80211_disable_keys(sdata); 511 ieee80211_free_keys(sdata);
537 drv_remove_interface(local, &sdata->vif); 512 drv_remove_interface(local, &sdata->vif);
538 } 513 }
539 514
@@ -597,8 +572,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
597 sdata->flags ^= IEEE80211_SDATA_PROMISC; 572 sdata->flags ^= IEEE80211_SDATA_PROMISC;
598 } 573 }
599 spin_lock_bh(&local->filter_lock); 574 spin_lock_bh(&local->filter_lock);
600 __dev_addr_sync(&local->mc_list, &local->mc_count, 575 __hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
601 &dev->mc_list, &dev->mc_count);
602 spin_unlock_bh(&local->filter_lock); 576 spin_unlock_bh(&local->filter_lock);
603 ieee80211_queue_work(&local->hw, &local->reconfig_filter); 577 ieee80211_queue_work(&local->hw, &local->reconfig_filter);
604} 578}
@@ -729,6 +703,136 @@ static void ieee80211_if_setup(struct net_device *dev)
729 dev->destructor = free_netdev; 703 dev->destructor = free_netdev;
730} 704}
731 705
706static void ieee80211_iface_work(struct work_struct *work)
707{
708 struct ieee80211_sub_if_data *sdata =
709 container_of(work, struct ieee80211_sub_if_data, work);
710 struct ieee80211_local *local = sdata->local;
711 struct sk_buff *skb;
712 struct sta_info *sta;
713 struct ieee80211_ra_tid *ra_tid;
714
715 if (!ieee80211_sdata_running(sdata))
716 return;
717
718 if (local->scanning)
719 return;
720
721 /*
722 * ieee80211_queue_work() should have picked up most cases,
723 * here we'll pick the rest.
724 */
725 if (WARN(local->suspended,
726 "interface work scheduled while going to suspend\n"))
727 return;
728
729 /* first process frames */
730 while ((skb = skb_dequeue(&sdata->skb_queue))) {
731 struct ieee80211_mgmt *mgmt = (void *)skb->data;
732
733 if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) {
734 ra_tid = (void *)&skb->cb;
735 ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra,
736 ra_tid->tid);
737 } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) {
738 ra_tid = (void *)&skb->cb;
739 ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
740 ra_tid->tid);
741 } else if (ieee80211_is_action(mgmt->frame_control) &&
742 mgmt->u.action.category == WLAN_CATEGORY_BACK) {
743 int len = skb->len;
744
745 mutex_lock(&local->sta_mtx);
746 sta = sta_info_get_bss(sdata, mgmt->sa);
747 if (sta) {
748 switch (mgmt->u.action.u.addba_req.action_code) {
749 case WLAN_ACTION_ADDBA_REQ:
750 ieee80211_process_addba_request(
751 local, sta, mgmt, len);
752 break;
753 case WLAN_ACTION_ADDBA_RESP:
754 ieee80211_process_addba_resp(local, sta,
755 mgmt, len);
756 break;
757 case WLAN_ACTION_DELBA:
758 ieee80211_process_delba(sdata, sta,
759 mgmt, len);
760 break;
761 default:
762 WARN_ON(1);
763 break;
764 }
765 }
766 mutex_unlock(&local->sta_mtx);
767 } else if (ieee80211_is_data_qos(mgmt->frame_control)) {
768 struct ieee80211_hdr *hdr = (void *)mgmt;
769 /*
770 * So the frame isn't mgmt, but frame_control
771 * is at the right place anyway, of course, so
772 * the if statement is correct.
773 *
774 * Warn if we have other data frame types here,
775 * they must not get here.
776 */
777 WARN_ON(hdr->frame_control &
778 cpu_to_le16(IEEE80211_STYPE_NULLFUNC));
779 WARN_ON(!(hdr->seq_ctrl &
780 cpu_to_le16(IEEE80211_SCTL_FRAG)));
781 /*
782 * This was a fragment of a frame, received while
783 * a block-ack session was active. That cannot be
784 * right, so terminate the session.
785 */
786 mutex_lock(&local->sta_mtx);
787 sta = sta_info_get_bss(sdata, mgmt->sa);
788 if (sta) {
789 u16 tid = *ieee80211_get_qos_ctl(hdr) &
790 IEEE80211_QOS_CTL_TID_MASK;
791
792 __ieee80211_stop_rx_ba_session(
793 sta, tid, WLAN_BACK_RECIPIENT,
794 WLAN_REASON_QSTA_REQUIRE_SETUP);
795 }
796 mutex_unlock(&local->sta_mtx);
797 } else switch (sdata->vif.type) {
798 case NL80211_IFTYPE_STATION:
799 ieee80211_sta_rx_queued_mgmt(sdata, skb);
800 break;
801 case NL80211_IFTYPE_ADHOC:
802 ieee80211_ibss_rx_queued_mgmt(sdata, skb);
803 break;
804 case NL80211_IFTYPE_MESH_POINT:
805 if (!ieee80211_vif_is_mesh(&sdata->vif))
806 break;
807 ieee80211_mesh_rx_queued_mgmt(sdata, skb);
808 break;
809 default:
810 WARN(1, "frame for unexpected interface type");
811 break;
812 }
813
814 kfree_skb(skb);
815 }
816
817 /* then other type-dependent work */
818 switch (sdata->vif.type) {
819 case NL80211_IFTYPE_STATION:
820 ieee80211_sta_work(sdata);
821 break;
822 case NL80211_IFTYPE_ADHOC:
823 ieee80211_ibss_work(sdata);
824 break;
825 case NL80211_IFTYPE_MESH_POINT:
826 if (!ieee80211_vif_is_mesh(&sdata->vif))
827 break;
828 ieee80211_mesh_work(sdata);
829 break;
830 default:
831 break;
832 }
833}
834
835
732/* 836/*
733 * Helper function to initialise an interface to a specific type. 837 * Helper function to initialise an interface to a specific type.
734 */ 838 */
@@ -746,6 +850,9 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
746 /* only monitor differs */ 850 /* only monitor differs */
747 sdata->dev->type = ARPHRD_ETHER; 851 sdata->dev->type = ARPHRD_ETHER;
748 852
853 skb_queue_head_init(&sdata->skb_queue);
854 INIT_WORK(&sdata->work, ieee80211_iface_work);
855
749 switch (type) { 856 switch (type) {
750 case NL80211_IFTYPE_AP: 857 case NL80211_IFTYPE_AP:
751 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 858 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
@@ -816,6 +923,118 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
816 return 0; 923 return 0;
817} 924}
818 925
926static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
927 struct net_device *dev,
928 enum nl80211_iftype type)
929{
930 struct ieee80211_sub_if_data *sdata;
931 u64 mask, start, addr, val, inc;
932 u8 *m;
933 u8 tmp_addr[ETH_ALEN];
934 int i;
935
936 /* default ... something at least */
937 memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
938
939 if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
940 local->hw.wiphy->n_addresses <= 1)
941 return;
942
943
944 mutex_lock(&local->iflist_mtx);
945
946 switch (type) {
947 case NL80211_IFTYPE_MONITOR:
948 /* doesn't matter */
949 break;
950 case NL80211_IFTYPE_WDS:
951 case NL80211_IFTYPE_AP_VLAN:
952 /* match up with an AP interface */
953 list_for_each_entry(sdata, &local->interfaces, list) {
954 if (sdata->vif.type != NL80211_IFTYPE_AP)
955 continue;
956 memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN);
957 break;
958 }
959 /* keep default if no AP interface present */
960 break;
961 default:
962 /* assign a new address if possible -- try n_addresses first */
963 for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
964 bool used = false;
965
966 list_for_each_entry(sdata, &local->interfaces, list) {
967 if (memcmp(local->hw.wiphy->addresses[i].addr,
968 sdata->vif.addr, ETH_ALEN) == 0) {
969 used = true;
970 break;
971 }
972 }
973
974 if (!used) {
975 memcpy(dev->perm_addr,
976 local->hw.wiphy->addresses[i].addr,
977 ETH_ALEN);
978 break;
979 }
980 }
981
982 /* try mask if available */
983 if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
984 break;
985
986 m = local->hw.wiphy->addr_mask;
987 mask = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
988 ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
989 ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
990
991 if (__ffs64(mask) + hweight64(mask) != fls64(mask)) {
992 /* not a contiguous mask ... not handled now! */
993 printk(KERN_DEBUG "not contiguous\n");
994 break;
995 }
996
997 m = local->hw.wiphy->perm_addr;
998 start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
999 ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
1000 ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
1001
1002 inc = 1ULL<<__ffs64(mask);
1003 val = (start & mask);
1004 addr = (start & ~mask) | (val & mask);
1005 do {
1006 bool used = false;
1007
1008 tmp_addr[5] = addr >> 0*8;
1009 tmp_addr[4] = addr >> 1*8;
1010 tmp_addr[3] = addr >> 2*8;
1011 tmp_addr[2] = addr >> 3*8;
1012 tmp_addr[1] = addr >> 4*8;
1013 tmp_addr[0] = addr >> 5*8;
1014
1015 val += inc;
1016
1017 list_for_each_entry(sdata, &local->interfaces, list) {
1018 if (memcmp(tmp_addr, sdata->vif.addr,
1019 ETH_ALEN) == 0) {
1020 used = true;
1021 break;
1022 }
1023 }
1024
1025 if (!used) {
1026 memcpy(dev->perm_addr, tmp_addr, ETH_ALEN);
1027 break;
1028 }
1029 addr = (start & ~mask) | (val & mask);
1030 } while (addr != start);
1031
1032 break;
1033 }
1034
1035 mutex_unlock(&local->iflist_mtx);
1036}
1037
819int ieee80211_if_add(struct ieee80211_local *local, const char *name, 1038int ieee80211_if_add(struct ieee80211_local *local, const char *name,
820 struct net_device **new_dev, enum nl80211_iftype type, 1039 struct net_device **new_dev, enum nl80211_iftype type,
821 struct vif_params *params) 1040 struct vif_params *params)
@@ -845,8 +1064,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
845 if (ret < 0) 1064 if (ret < 0)
846 goto fail; 1065 goto fail;
847 1066
848 memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); 1067 ieee80211_assign_perm_addr(local, ndev, type);
849 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN); 1068 memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
850 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); 1069 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
851 1070
852 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ 1071 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
@@ -859,6 +1078,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
859 sdata->wdev.wiphy = local->hw.wiphy; 1078 sdata->wdev.wiphy = local->hw.wiphy;
860 sdata->local = local; 1079 sdata->local = local;
861 sdata->dev = ndev; 1080 sdata->dev = ndev;
1081#ifdef CONFIG_INET
1082 sdata->arp_filter_state = true;
1083#endif
862 1084
863 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) 1085 for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
864 skb_queue_head_init(&sdata->fragments[i].skb_list); 1086 skb_queue_head_init(&sdata->fragments[i].skb_list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index e8f6e3b252d8..1b9d87ed143a 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -36,80 +36,20 @@
36 * There is currently no way of knowing this except by looking into 36 * There is currently no way of knowing this except by looking into
37 * debugfs. 37 * debugfs.
38 * 38 *
39 * All key operations are protected internally so you can call them at 39 * All key operations are protected internally.
40 * any time.
41 * 40 *
42 * Within mac80211, key references are, just as STA structure references, 41 * Within mac80211, key references are, just as STA structure references,
43 * protected by RCU. Note, however, that some things are unprotected, 42 * protected by RCU. Note, however, that some things are unprotected,
44 * namely the key->sta dereferences within the hardware acceleration 43 * namely the key->sta dereferences within the hardware acceleration
45 * functions. This means that sta_info_destroy() must flush the key todo 44 * functions. This means that sta_info_destroy() must remove the key
46 * list. 45 * which waits for an RCU grace period.
47 *
48 * All the direct key list manipulation functions must not sleep because
49 * they can operate on STA info structs that are protected by RCU.
50 */ 46 */
51 47
52static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 48static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
53 49
54/* key mutex: used to synchronise todo runners */ 50static void assert_key_lock(struct ieee80211_local *local)
55static DEFINE_MUTEX(key_mutex);
56static DEFINE_SPINLOCK(todo_lock);
57static LIST_HEAD(todo_list);
58
59static void key_todo(struct work_struct *work)
60{
61 ieee80211_key_todo();
62}
63
64static DECLARE_WORK(todo_work, key_todo);
65
66/**
67 * add_todo - add todo item for a key
68 *
69 * @key: key to add to do item for
70 * @flag: todo flag(s)
71 *
72 * Must be called with IRQs or softirqs disabled.
73 */
74static void add_todo(struct ieee80211_key *key, u32 flag)
75{
76 if (!key)
77 return;
78
79 spin_lock(&todo_lock);
80 key->flags |= flag;
81 /*
82 * Remove again if already on the list so that we move it to the end.
83 */
84 if (!list_empty(&key->todo))
85 list_del(&key->todo);
86 list_add_tail(&key->todo, &todo_list);
87 schedule_work(&todo_work);
88 spin_unlock(&todo_lock);
89}
90
91/**
92 * ieee80211_key_lock - lock the mac80211 key operation lock
93 *
94 * This locks the (global) mac80211 key operation lock, all
95 * key operations must be done under this lock.
96 */
97static void ieee80211_key_lock(void)
98{
99 mutex_lock(&key_mutex);
100}
101
102/**
103 * ieee80211_key_unlock - unlock the mac80211 key operation lock
104 */
105static void ieee80211_key_unlock(void)
106{
107 mutex_unlock(&key_mutex);
108}
109
110static void assert_key_lock(void)
111{ 51{
112 WARN_ON(!mutex_is_locked(&key_mutex)); 52 WARN_ON(!mutex_is_locked(&local->key_mtx));
113} 53}
114 54
115static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) 55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -126,12 +66,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
126 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
127 int ret; 67 int ret;
128 68
129 assert_key_lock();
130 might_sleep(); 69 might_sleep();
131 70
132 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key)
133 return; 72 return;
134 73
74 assert_key_lock(key->local);
75
135 sta = get_sta_for_key(key); 76 sta = get_sta_for_key(key);
136 77
137 sdata = key->sdata; 78 sdata = key->sdata;
@@ -142,11 +83,8 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
142 83
143 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); 84 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
144 85
145 if (!ret) { 86 if (!ret)
146 spin_lock_bh(&todo_lock);
147 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
148 spin_unlock_bh(&todo_lock);
149 }
150 88
151 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP)
152 printk(KERN_ERR "mac80211-%s: failed to set key " 90 printk(KERN_ERR "mac80211-%s: failed to set key "
@@ -161,18 +99,15 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
161 struct ieee80211_sta *sta; 99 struct ieee80211_sta *sta;
162 int ret; 100 int ret;
163 101
164 assert_key_lock();
165 might_sleep(); 102 might_sleep();
166 103
167 if (!key || !key->local->ops->set_key) 104 if (!key || !key->local->ops->set_key)
168 return; 105 return;
169 106
170 spin_lock_bh(&todo_lock); 107 assert_key_lock(key->local);
171 if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { 108
172 spin_unlock_bh(&todo_lock); 109 if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
173 return; 110 return;
174 }
175 spin_unlock_bh(&todo_lock);
176 111
177 sta = get_sta_for_key(key); 112 sta = get_sta_for_key(key);
178 sdata = key->sdata; 113 sdata = key->sdata;
@@ -191,9 +126,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
191 wiphy_name(key->local->hw.wiphy), 126 wiphy_name(key->local->hw.wiphy),
192 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
193 128
194 spin_lock_bh(&todo_lock);
195 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
196 spin_unlock_bh(&todo_lock);
197} 130}
198 131
199static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, 132static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
@@ -201,22 +134,24 @@ static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
201{ 134{
202 struct ieee80211_key *key = NULL; 135 struct ieee80211_key *key = NULL;
203 136
137 assert_key_lock(sdata->local);
138
204 if (idx >= 0 && idx < NUM_DEFAULT_KEYS) 139 if (idx >= 0 && idx < NUM_DEFAULT_KEYS)
205 key = sdata->keys[idx]; 140 key = sdata->keys[idx];
206 141
207 rcu_assign_pointer(sdata->default_key, key); 142 rcu_assign_pointer(sdata->default_key, key);
208 143
209 if (key) 144 if (key) {
210 add_todo(key, KEY_FLAG_TODO_DEFKEY); 145 ieee80211_debugfs_key_remove_default(key->sdata);
146 ieee80211_debugfs_key_add_default(key->sdata);
147 }
211} 148}
212 149
213void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) 150void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
214{ 151{
215 unsigned long flags; 152 mutex_lock(&sdata->local->key_mtx);
216
217 spin_lock_irqsave(&sdata->local->key_lock, flags);
218 __ieee80211_set_default_key(sdata, idx); 153 __ieee80211_set_default_key(sdata, idx);
219 spin_unlock_irqrestore(&sdata->local->key_lock, flags); 154 mutex_unlock(&sdata->local->key_mtx);
220} 155}
221 156
222static void 157static void
@@ -224,24 +159,26 @@ __ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
224{ 159{
225 struct ieee80211_key *key = NULL; 160 struct ieee80211_key *key = NULL;
226 161
162 assert_key_lock(sdata->local);
163
227 if (idx >= NUM_DEFAULT_KEYS && 164 if (idx >= NUM_DEFAULT_KEYS &&
228 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) 165 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
229 key = sdata->keys[idx]; 166 key = sdata->keys[idx];
230 167
231 rcu_assign_pointer(sdata->default_mgmt_key, key); 168 rcu_assign_pointer(sdata->default_mgmt_key, key);
232 169
233 if (key) 170 if (key) {
234 add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY); 171 ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
172 ieee80211_debugfs_key_add_mgmt_default(key->sdata);
173 }
235} 174}
236 175
237void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, 176void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
238 int idx) 177 int idx)
239{ 178{
240 unsigned long flags; 179 mutex_lock(&sdata->local->key_mtx);
241
242 spin_lock_irqsave(&sdata->local->key_lock, flags);
243 __ieee80211_set_default_mgmt_key(sdata, idx); 180 __ieee80211_set_default_mgmt_key(sdata, idx);
244 spin_unlock_irqrestore(&sdata->local->key_lock, flags); 181 mutex_unlock(&sdata->local->key_mtx);
245} 182}
246 183
247 184
@@ -336,7 +273,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
336 key->conf.iv_len = CCMP_HDR_LEN; 273 key->conf.iv_len = CCMP_HDR_LEN;
337 key->conf.icv_len = CCMP_MIC_LEN; 274 key->conf.icv_len = CCMP_MIC_LEN;
338 if (seq) { 275 if (seq) {
339 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 276 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++)
340 for (j = 0; j < CCMP_PN_LEN; j++) 277 for (j = 0; j < CCMP_PN_LEN; j++)
341 key->u.ccmp.rx_pn[i][j] = 278 key->u.ccmp.rx_pn[i][j] =
342 seq[CCMP_PN_LEN - j - 1]; 279 seq[CCMP_PN_LEN - j - 1];
@@ -352,7 +289,6 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
352 } 289 }
353 memcpy(key->conf.key, key_data, key_len); 290 memcpy(key->conf.key, key_data, key_len);
354 INIT_LIST_HEAD(&key->list); 291 INIT_LIST_HEAD(&key->list);
355 INIT_LIST_HEAD(&key->todo);
356 292
357 if (alg == ALG_CCMP) { 293 if (alg == ALG_CCMP) {
358 /* 294 /*
@@ -382,12 +318,29 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
382 return key; 318 return key;
383} 319}
384 320
321static void __ieee80211_key_destroy(struct ieee80211_key *key)
322{
323 if (!key)
324 return;
325
326 if (key->local)
327 ieee80211_key_disable_hw_accel(key);
328
329 if (key->conf.alg == ALG_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local)
334 ieee80211_debugfs_key_remove(key);
335
336 kfree(key);
337}
338
385void ieee80211_key_link(struct ieee80211_key *key, 339void ieee80211_key_link(struct ieee80211_key *key,
386 struct ieee80211_sub_if_data *sdata, 340 struct ieee80211_sub_if_data *sdata,
387 struct sta_info *sta) 341 struct sta_info *sta)
388{ 342{
389 struct ieee80211_key *old_key; 343 struct ieee80211_key *old_key;
390 unsigned long flags;
391 int idx; 344 int idx;
392 345
393 BUG_ON(!sdata); 346 BUG_ON(!sdata);
@@ -431,7 +384,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
431 } 384 }
432 } 385 }
433 386
434 spin_lock_irqsave(&sdata->local->key_lock, flags); 387 mutex_lock(&sdata->local->key_mtx);
435 388
436 if (sta) 389 if (sta)
437 old_key = sta->key; 390 old_key = sta->key;
@@ -439,15 +392,13 @@ void ieee80211_key_link(struct ieee80211_key *key,
439 old_key = sdata->keys[idx]; 392 old_key = sdata->keys[idx];
440 393
441 __ieee80211_key_replace(sdata, sta, old_key, key); 394 __ieee80211_key_replace(sdata, sta, old_key, key);
395 __ieee80211_key_destroy(old_key);
442 396
443 /* free old key later */ 397 ieee80211_debugfs_key_add(key);
444 add_todo(old_key, KEY_FLAG_TODO_DELETE);
445 398
446 add_todo(key, KEY_FLAG_TODO_ADD_DEBUGFS); 399 ieee80211_key_enable_hw_accel(key);
447 if (ieee80211_sdata_running(sdata))
448 add_todo(key, KEY_FLAG_TODO_HWACCEL_ADD);
449 400
450 spin_unlock_irqrestore(&sdata->local->key_lock, flags); 401 mutex_unlock(&sdata->local->key_mtx);
451} 402}
452 403
453static void __ieee80211_key_free(struct ieee80211_key *key) 404static void __ieee80211_key_free(struct ieee80211_key *key)
@@ -458,170 +409,62 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
458 if (key->sdata) 409 if (key->sdata)
459 __ieee80211_key_replace(key->sdata, key->sta, 410 __ieee80211_key_replace(key->sdata, key->sta,
460 key, NULL); 411 key, NULL);
461 412 __ieee80211_key_destroy(key);
462 add_todo(key, KEY_FLAG_TODO_DELETE);
463} 413}
464 414
465void ieee80211_key_free(struct ieee80211_key *key) 415void ieee80211_key_free(struct ieee80211_local *local,
416 struct ieee80211_key *key)
466{ 417{
467 unsigned long flags;
468
469 if (!key) 418 if (!key)
470 return; 419 return;
471 420
472 if (!key->sdata) { 421 mutex_lock(&local->key_mtx);
473 /* The key has not been linked yet, simply free it
474 * and don't Oops */
475 if (key->conf.alg == ALG_CCMP)
476 ieee80211_aes_key_free(key->u.ccmp.tfm);
477 kfree(key);
478 return;
479 }
480
481 spin_lock_irqsave(&key->sdata->local->key_lock, flags);
482 __ieee80211_key_free(key); 422 __ieee80211_key_free(key);
483 spin_unlock_irqrestore(&key->sdata->local->key_lock, flags); 423 mutex_unlock(&local->key_mtx);
484} 424}
485 425
486/* 426void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
487 * To be safe against concurrent manipulations of the list (which shouldn't
488 * actually happen) we need to hold the spinlock. But under the spinlock we
489 * can't actually do much, so we defer processing to the todo list. Then run
490 * the todo list to be sure the operation and possibly previously pending
491 * operations are completed.
492 */
493static void ieee80211_todo_for_each_key(struct ieee80211_sub_if_data *sdata,
494 u32 todo_flags)
495{ 427{
496 struct ieee80211_key *key; 428 struct ieee80211_key *key;
497 unsigned long flags;
498
499 might_sleep();
500
501 spin_lock_irqsave(&sdata->local->key_lock, flags);
502 list_for_each_entry(key, &sdata->key_list, list)
503 add_todo(key, todo_flags);
504 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
505 429
506 ieee80211_key_todo();
507}
508
509void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
510{
511 ASSERT_RTNL(); 430 ASSERT_RTNL();
512 431
513 if (WARN_ON(!ieee80211_sdata_running(sdata))) 432 if (WARN_ON(!ieee80211_sdata_running(sdata)))
514 return; 433 return;
515 434
516 ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_ADD); 435 mutex_lock(&sdata->local->key_mtx);
517}
518 436
519void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) 437 list_for_each_entry(key, &sdata->key_list, list)
520{ 438 ieee80211_key_enable_hw_accel(key);
521 ASSERT_RTNL();
522
523 ieee80211_todo_for_each_key(sdata, KEY_FLAG_TODO_HWACCEL_REMOVE);
524}
525
526static void __ieee80211_key_destroy(struct ieee80211_key *key)
527{
528 if (!key)
529 return;
530
531 ieee80211_key_disable_hw_accel(key);
532
533 if (key->conf.alg == ALG_CCMP)
534 ieee80211_aes_key_free(key->u.ccmp.tfm);
535 if (key->conf.alg == ALG_AES_CMAC)
536 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
537 ieee80211_debugfs_key_remove(key);
538 439
539 kfree(key); 440 mutex_unlock(&sdata->local->key_mtx);
540} 441}
541 442
542static void __ieee80211_key_todo(void) 443void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata)
543{ 444{
544 struct ieee80211_key *key; 445 struct ieee80211_key *key;
545 bool work_done;
546 u32 todoflags;
547 446
548 /* 447 ASSERT_RTNL();
549 * NB: sta_info_destroy relies on this!
550 */
551 synchronize_rcu();
552
553 spin_lock_bh(&todo_lock);
554 while (!list_empty(&todo_list)) {
555 key = list_first_entry(&todo_list, struct ieee80211_key, todo);
556 list_del_init(&key->todo);
557 todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS |
558 KEY_FLAG_TODO_DEFKEY |
559 KEY_FLAG_TODO_DEFMGMTKEY |
560 KEY_FLAG_TODO_HWACCEL_ADD |
561 KEY_FLAG_TODO_HWACCEL_REMOVE |
562 KEY_FLAG_TODO_DELETE);
563 key->flags &= ~todoflags;
564 spin_unlock_bh(&todo_lock);
565
566 work_done = false;
567
568 if (todoflags & KEY_FLAG_TODO_ADD_DEBUGFS) {
569 ieee80211_debugfs_key_add(key);
570 work_done = true;
571 }
572 if (todoflags & KEY_FLAG_TODO_DEFKEY) {
573 ieee80211_debugfs_key_remove_default(key->sdata);
574 ieee80211_debugfs_key_add_default(key->sdata);
575 work_done = true;
576 }
577 if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) {
578 ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
579 ieee80211_debugfs_key_add_mgmt_default(key->sdata);
580 work_done = true;
581 }
582 if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) {
583 ieee80211_key_enable_hw_accel(key);
584 work_done = true;
585 }
586 if (todoflags & KEY_FLAG_TODO_HWACCEL_REMOVE) {
587 ieee80211_key_disable_hw_accel(key);
588 work_done = true;
589 }
590 if (todoflags & KEY_FLAG_TODO_DELETE) {
591 __ieee80211_key_destroy(key);
592 work_done = true;
593 }
594 448
595 WARN_ON(!work_done); 449 mutex_lock(&sdata->local->key_mtx);
596 450
597 spin_lock_bh(&todo_lock); 451 list_for_each_entry(key, &sdata->key_list, list)
598 } 452 ieee80211_key_disable_hw_accel(key);
599 spin_unlock_bh(&todo_lock);
600}
601 453
602void ieee80211_key_todo(void) 454 mutex_unlock(&sdata->local->key_mtx);
603{
604 ieee80211_key_lock();
605 __ieee80211_key_todo();
606 ieee80211_key_unlock();
607} 455}
608 456
609void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) 457void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
610{ 458{
611 struct ieee80211_key *key, *tmp; 459 struct ieee80211_key *key, *tmp;
612 unsigned long flags;
613 460
614 ieee80211_key_lock(); 461 mutex_lock(&sdata->local->key_mtx);
615 462
616 ieee80211_debugfs_key_remove_default(sdata); 463 ieee80211_debugfs_key_remove_default(sdata);
617 ieee80211_debugfs_key_remove_mgmt_default(sdata); 464 ieee80211_debugfs_key_remove_mgmt_default(sdata);
618 465
619 spin_lock_irqsave(&sdata->local->key_lock, flags);
620 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) 466 list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
621 __ieee80211_key_free(key); 467 __ieee80211_key_free(key);
622 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
623
624 __ieee80211_key_todo();
625 468
626 ieee80211_key_unlock(); 469 mutex_unlock(&sdata->local->key_mtx);
627} 470}
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index bdc2968c2bbe..b665bbb7a471 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -38,25 +38,9 @@ struct sta_info;
38 * 38 *
39 * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present 39 * @KEY_FLAG_UPLOADED_TO_HARDWARE: Indicates that this key is present
40 * in the hardware for TX crypto hardware acceleration. 40 * in the hardware for TX crypto hardware acceleration.
41 * @KEY_FLAG_TODO_DELETE: Key is marked for deletion and will, after an
42 * RCU grace period, no longer be reachable other than from the
43 * todo list.
44 * @KEY_FLAG_TODO_HWACCEL_ADD: Key needs to be added to hardware acceleration.
45 * @KEY_FLAG_TODO_HWACCEL_REMOVE: Key needs to be removed from hardware
46 * acceleration.
47 * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated.
48 * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs.
49 * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs
50 * to be updated.
51 */ 41 */
52enum ieee80211_internal_key_flags { 42enum ieee80211_internal_key_flags {
53 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), 43 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0),
54 KEY_FLAG_TODO_DELETE = BIT(1),
55 KEY_FLAG_TODO_HWACCEL_ADD = BIT(2),
56 KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3),
57 KEY_FLAG_TODO_DEFKEY = BIT(4),
58 KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5),
59 KEY_FLAG_TODO_DEFMGMTKEY = BIT(6),
60}; 44};
61 45
62enum ieee80211_internal_tkip_state { 46enum ieee80211_internal_tkip_state {
@@ -79,10 +63,8 @@ struct ieee80211_key {
79 63
80 /* for sdata list */ 64 /* for sdata list */
81 struct list_head list; 65 struct list_head list;
82 /* for todo list */
83 struct list_head todo;
84 66
85 /* protected by todo lock! */ 67 /* protected by key mutex */
86 unsigned int flags; 68 unsigned int flags;
87 69
88 union { 70 union {
@@ -95,7 +77,13 @@ struct ieee80211_key {
95 } tkip; 77 } tkip;
96 struct { 78 struct {
97 u8 tx_pn[6]; 79 u8 tx_pn[6];
98 u8 rx_pn[NUM_RX_DATA_QUEUES][6]; 80 /*
81 * Last received packet number. The first
82 * NUM_RX_DATA_QUEUES counters are used with Data
83 * frames and the last counter is used with Robust
84 * Management frames.
85 */
86 u8 rx_pn[NUM_RX_DATA_QUEUES + 1][6];
99 struct crypto_cipher *tfm; 87 struct crypto_cipher *tfm;
100 u32 replays; /* dot11RSNAStatsCCMPReplays */ 88 u32 replays; /* dot11RSNAStatsCCMPReplays */
101 /* scratch buffers for virt_to_page() (crypto API) */ 89 /* scratch buffers for virt_to_page() (crypto API) */
@@ -147,7 +135,8 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
147void ieee80211_key_link(struct ieee80211_key *key, 135void ieee80211_key_link(struct ieee80211_key *key,
148 struct ieee80211_sub_if_data *sdata, 136 struct ieee80211_sub_if_data *sdata,
149 struct sta_info *sta); 137 struct sta_info *sta);
150void ieee80211_key_free(struct ieee80211_key *key); 138void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key);
151void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
152void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, 141void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
153 int idx); 142 int idx);
@@ -155,6 +144,4 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
155void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 144void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
156void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); 145void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
157 146
158void ieee80211_key_todo(void);
159
160#endif /* IEEE80211_KEY_H */ 147#endif /* IEEE80211_KEY_H */
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b887e484ae04..798a91b100cc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -20,6 +20,7 @@
20#include <linux/rtnetlink.h> 20#include <linux/rtnetlink.h>
21#include <linux/bitmap.h> 21#include <linux/bitmap.h>
22#include <linux/pm_qos_params.h> 22#include <linux/pm_qos_params.h>
23#include <linux/inetdevice.h>
23#include <net/net_namespace.h> 24#include <net/net_namespace.h>
24#include <net/cfg80211.h> 25#include <net/cfg80211.h>
25 26
@@ -71,7 +72,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
71 spin_lock_bh(&local->filter_lock); 72 spin_lock_bh(&local->filter_lock);
72 changed_flags = local->filter_flags ^ new_flags; 73 changed_flags = local->filter_flags ^ new_flags;
73 74
74 mc = drv_prepare_multicast(local, local->mc_count, local->mc_list); 75 mc = drv_prepare_multicast(local, &local->mc_list);
75 spin_unlock_bh(&local->filter_lock); 76 spin_unlock_bh(&local->filter_lock);
76 77
77 /* be a bit nasty */ 78 /* be a bit nasty */
@@ -106,12 +107,15 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
106 if (scan_chan) { 107 if (scan_chan) {
107 chan = scan_chan; 108 chan = scan_chan;
108 channel_type = NL80211_CHAN_NO_HT; 109 channel_type = NL80211_CHAN_NO_HT;
110 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
109 } else if (local->tmp_channel) { 111 } else if (local->tmp_channel) {
110 chan = scan_chan = local->tmp_channel; 112 chan = scan_chan = local->tmp_channel;
111 channel_type = local->tmp_channel_type; 113 channel_type = local->tmp_channel_type;
114 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
112 } else { 115 } else {
113 chan = local->oper_channel; 116 chan = local->oper_channel;
114 channel_type = local->oper_channel_type; 117 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
115 } 119 }
116 120
117 if (chan != local->hw.conf.channel || 121 if (chan != local->hw.conf.channel ||
@@ -259,7 +263,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
259{ 263{
260 struct ieee80211_local *local = (struct ieee80211_local *) data; 264 struct ieee80211_local *local = (struct ieee80211_local *) data;
261 struct sk_buff *skb; 265 struct sk_buff *skb;
262 struct ieee80211_ra_tid *ra_tid;
263 266
264 while ((skb = skb_dequeue(&local->skb_queue)) || 267 while ((skb = skb_dequeue(&local->skb_queue)) ||
265 (skb = skb_dequeue(&local->skb_queue_unreliable))) { 268 (skb = skb_dequeue(&local->skb_queue_unreliable))) {
@@ -274,18 +277,6 @@ static void ieee80211_tasklet_handler(unsigned long data)
274 skb->pkt_type = 0; 277 skb->pkt_type = 0;
275 ieee80211_tx_status(local_to_hw(local), skb); 278 ieee80211_tx_status(local_to_hw(local), skb);
276 break; 279 break;
277 case IEEE80211_DELBA_MSG:
278 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
279 ieee80211_stop_tx_ba_cb(ra_tid->vif, ra_tid->ra,
280 ra_tid->tid);
281 dev_kfree_skb(skb);
282 break;
283 case IEEE80211_ADDBA_MSG:
284 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
285 ieee80211_start_tx_ba_cb(ra_tid->vif, ra_tid->ra,
286 ra_tid->tid);
287 dev_kfree_skb(skb);
288 break ;
289 default: 280 default:
290 WARN(1, "mac80211: Packet is of unknown type %d\n", 281 WARN(1, "mac80211: Packet is of unknown type %d\n",
291 skb->pkt_type); 282 skb->pkt_type);
@@ -309,6 +300,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
309{ 300{
310 struct ieee80211_local *local = hw_to_local(hw); 301 struct ieee80211_local *local = hw_to_local(hw);
311 302
303 trace_api_restart_hw(local);
304
312 /* use this reason, __ieee80211_resume will unblock it */ 305 /* use this reason, __ieee80211_resume will unblock it */
313 ieee80211_stop_queues_by_reason(hw, 306 ieee80211_stop_queues_by_reason(hw,
314 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 307 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
@@ -327,6 +320,76 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
327 mutex_unlock(&local->iflist_mtx); 320 mutex_unlock(&local->iflist_mtx);
328} 321}
329 322
323#ifdef CONFIG_INET
324static int ieee80211_ifa_changed(struct notifier_block *nb,
325 unsigned long data, void *arg)
326{
327 struct in_ifaddr *ifa = arg;
328 struct ieee80211_local *local =
329 container_of(nb, struct ieee80211_local,
330 ifa_notifier);
331 struct net_device *ndev = ifa->ifa_dev->dev;
332 struct wireless_dev *wdev = ndev->ieee80211_ptr;
333 struct in_device *idev;
334 struct ieee80211_sub_if_data *sdata;
335 struct ieee80211_bss_conf *bss_conf;
336 struct ieee80211_if_managed *ifmgd;
337 int c = 0;
338
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */
343 if (!wdev)
344 return NOTIFY_DONE;
345
346 if (wdev->wiphy != local->hw.wiphy)
347 return NOTIFY_DONE;
348
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf;
351
352 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE;
355
356 idev = sdata->dev->ip_ptr;
357 if (!idev)
358 return NOTIFY_DONE;
359
360 ifmgd = &sdata->u.mgd;
361 mutex_lock(&ifmgd->mtx);
362
363 /* Copy the addresses to the bss_conf list */
364 ifa = idev->ifa_list;
365 while (c < IEEE80211_BSS_ARP_ADDR_LIST_LEN && ifa) {
366 bss_conf->arp_addr_list[c] = ifa->ifa_address;
367 ifa = ifa->ifa_next;
368 c++;
369 }
370
371 /* If not all addresses fit the list, disable filtering */
372 if (ifa) {
373 sdata->arp_filter_state = false;
374 c = 0;
375 } else {
376 sdata->arp_filter_state = true;
377 }
378 bss_conf->arp_addr_cnt = c;
379
380 /* Configure driver only if associated */
381 if (ifmgd->associated) {
382 bss_conf->arp_filter_enabled = sdata->arp_filter_state;
383 ieee80211_bss_info_change_notify(sdata,
384 BSS_CHANGED_ARP_FILTER);
385 }
386
387 mutex_unlock(&ifmgd->mtx);
388
389 return NOTIFY_DONE;
390}
391#endif
392
330struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
331 const struct ieee80211_ops *ops) 394 const struct ieee80211_ops *ops)
332{ 395{
@@ -388,10 +451,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
388 local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; 451 local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
389 452
390 INIT_LIST_HEAD(&local->interfaces); 453 INIT_LIST_HEAD(&local->interfaces);
454
455 __hw_addr_init(&local->mc_list);
456
391 mutex_init(&local->iflist_mtx); 457 mutex_init(&local->iflist_mtx);
392 mutex_init(&local->scan_mtx); 458 mutex_init(&local->scan_mtx);
393 459
394 spin_lock_init(&local->key_lock); 460 mutex_init(&local->key_mtx);
395 spin_lock_init(&local->filter_lock); 461 spin_lock_init(&local->filter_lock);
396 spin_lock_init(&local->queue_stop_reason_lock); 462 spin_lock_init(&local->queue_stop_reason_lock);
397 463
@@ -414,8 +480,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
414 480
415 sta_info_init(local); 481 sta_info_init(local);
416 482
417 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) 483 for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
418 skb_queue_head_init(&local->pending[i]); 484 skb_queue_head_init(&local->pending[i]);
485 atomic_set(&local->agg_queue_stop[i], 0);
486 }
419 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, 487 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
420 (unsigned long)local); 488 (unsigned long)local);
421 489
@@ -426,8 +494,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
426 skb_queue_head_init(&local->skb_queue); 494 skb_queue_head_init(&local->skb_queue);
427 skb_queue_head_init(&local->skb_queue_unreliable); 495 skb_queue_head_init(&local->skb_queue_unreliable);
428 496
429 spin_lock_init(&local->ampdu_lock);
430
431 return local_to_hw(local); 497 return local_to_hw(local);
432} 498}
433EXPORT_SYMBOL(ieee80211_alloc_hw); 499EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -437,7 +503,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
437 struct ieee80211_local *local = hw_to_local(hw); 503 struct ieee80211_local *local = hw_to_local(hw);
438 int result; 504 int result;
439 enum ieee80211_band band; 505 enum ieee80211_band band;
440 int channels, i, j, max_bitrates; 506 int channels, max_bitrates;
441 bool supp_ht; 507 bool supp_ht;
442 static const u32 cipher_suites[] = { 508 static const u32 cipher_suites[] = {
443 WLAN_CIPHER_SUITE_WEP40, 509 WLAN_CIPHER_SUITE_WEP40,
@@ -567,16 +633,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
567 633
568 local->hw.conf.listen_interval = local->hw.max_listen_interval; 634 local->hw.conf.listen_interval = local->hw.max_listen_interval;
569 635
636 local->dynamic_ps_forced_timeout = -1;
637
570 result = sta_info_start(local); 638 result = sta_info_start(local);
571 if (result < 0) 639 if (result < 0)
572 goto fail_sta_info; 640 goto fail_sta_info;
573 641
574 result = ieee80211_wep_init(local); 642 result = ieee80211_wep_init(local);
575 if (result < 0) { 643 if (result < 0)
576 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n",
577 wiphy_name(local->hw.wiphy), result); 645 wiphy_name(local->hw.wiphy), result);
578 goto fail_wep;
579 }
580 646
581 rtnl_lock(); 647 rtnl_lock();
582 648
@@ -601,40 +667,36 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
601 667
602 ieee80211_led_init(local); 668 ieee80211_led_init(local);
603 669
604 /* alloc internal scan request */
605 i = 0;
606 local->int_scan_req->ssids = &local->scan_ssid;
607 local->int_scan_req->n_ssids = 1;
608 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
609 if (!hw->wiphy->bands[band])
610 continue;
611 for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
612 local->int_scan_req->channels[i] =
613 &hw->wiphy->bands[band]->channels[j];
614 i++;
615 }
616 }
617 local->int_scan_req->n_channels = i;
618
619 local->network_latency_notifier.notifier_call = 670 local->network_latency_notifier.notifier_call =
620 ieee80211_max_network_latency; 671 ieee80211_max_network_latency;
621 result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY, 672 result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
622 &local->network_latency_notifier); 673 &local->network_latency_notifier);
623
624 if (result) { 674 if (result) {
625 rtnl_lock(); 675 rtnl_lock();
626 goto fail_pm_qos; 676 goto fail_pm_qos;
627 } 677 }
628 678
679#ifdef CONFIG_INET
680 local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
681 result = register_inetaddr_notifier(&local->ifa_notifier);
682 if (result)
683 goto fail_ifa;
684#endif
685
629 return 0; 686 return 0;
630 687
688#ifdef CONFIG_INET
689 fail_ifa:
690 pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
691 &local->network_latency_notifier);
692 rtnl_lock();
693#endif
631 fail_pm_qos: 694 fail_pm_qos:
632 ieee80211_led_exit(local); 695 ieee80211_led_exit(local);
633 ieee80211_remove_interfaces(local); 696 ieee80211_remove_interfaces(local);
634 fail_rate: 697 fail_rate:
635 rtnl_unlock(); 698 rtnl_unlock();
636 ieee80211_wep_free(local); 699 ieee80211_wep_free(local);
637 fail_wep:
638 sta_info_stop(local); 700 sta_info_stop(local);
639 fail_sta_info: 701 fail_sta_info:
640 destroy_workqueue(local->workqueue); 702 destroy_workqueue(local->workqueue);
@@ -655,6 +717,9 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
655 717
656 pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY, 718 pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
657 &local->network_latency_notifier); 719 &local->network_latency_notifier);
720#ifdef CONFIG_INET
721 unregister_inetaddr_notifier(&local->ifa_notifier);
722#endif
658 723
659 rtnl_lock(); 724 rtnl_lock();
660 725
@@ -712,6 +777,10 @@ static int __init ieee80211_init(void)
712 if (ret) 777 if (ret)
713 return ret; 778 return ret;
714 779
780 ret = rc80211_minstrel_ht_init();
781 if (ret)
782 goto err_minstrel;
783
715 ret = rc80211_pid_init(); 784 ret = rc80211_pid_init();
716 if (ret) 785 if (ret)
717 goto err_pid; 786 goto err_pid;
@@ -724,6 +793,8 @@ static int __init ieee80211_init(void)
724 err_netdev: 793 err_netdev:
725 rc80211_pid_exit(); 794 rc80211_pid_exit();
726 err_pid: 795 err_pid:
796 rc80211_minstrel_ht_exit();
797 err_minstrel:
727 rc80211_minstrel_exit(); 798 rc80211_minstrel_exit();
728 799
729 return ret; 800 return ret;
@@ -732,6 +803,7 @@ static int __init ieee80211_init(void)
732static void __exit ieee80211_exit(void) 803static void __exit ieee80211_exit(void)
733{ 804{
734 rc80211_pid_exit(); 805 rc80211_pid_exit();
806 rc80211_minstrel_ht_exit();
735 rc80211_minstrel_exit(); 807 rc80211_minstrel_exit();
736 808
737 /* 809 /*
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 859ee5f3d941..c8a4f19ed13b 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -54,7 +54,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
54 return; 54 return;
55 } 55 }
56 56
57 ieee80211_queue_work(&local->hw, &ifmsh->work); 57 ieee80211_queue_work(&local->hw, &sdata->work);
58} 58}
59 59
60/** 60/**
@@ -287,8 +287,6 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
287 *pos++ |= sdata->u.mesh.accepting_plinks ? 287 *pos++ |= sdata->u.mesh.accepting_plinks ?
288 MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; 288 MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
289 *pos++ = 0x00; 289 *pos++ = 0x00;
290
291 return;
292} 290}
293 291
294u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) 292u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
@@ -347,7 +345,7 @@ static void ieee80211_mesh_path_timer(unsigned long data)
347 return; 345 return;
348 } 346 }
349 347
350 ieee80211_queue_work(&local->hw, &ifmsh->work); 348 ieee80211_queue_work(&local->hw, &sdata->work);
351} 349}
352 350
353static void ieee80211_mesh_path_root_timer(unsigned long data) 351static void ieee80211_mesh_path_root_timer(unsigned long data)
@@ -364,7 +362,7 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)
364 return; 362 return;
365 } 363 }
366 364
367 ieee80211_queue_work(&local->hw, &ifmsh->work); 365 ieee80211_queue_work(&local->hw, &sdata->work);
368} 366}
369 367
370void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) 368void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -486,9 +484,6 @@ void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata)
486{ 484{
487 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 485 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
488 486
489 /* might restart the timer but that doesn't matter */
490 cancel_work_sync(&ifmsh->work);
491
492 /* use atomic bitops in case both timers fire at the same time */ 487 /* use atomic bitops in case both timers fire at the same time */
493 488
494 if (del_timer_sync(&ifmsh->housekeeping_timer)) 489 if (del_timer_sync(&ifmsh->housekeeping_timer))
@@ -520,7 +515,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
520 515
521 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); 516 set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
522 ieee80211_mesh_root_setup(ifmsh); 517 ieee80211_mesh_root_setup(ifmsh);
523 ieee80211_queue_work(&local->hw, &ifmsh->work); 518 ieee80211_queue_work(&local->hw, &sdata->work);
524 sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL; 519 sdata->vif.bss_conf.beacon_int = MESH_DEFAULT_BEACON_INTERVAL;
525 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON | 520 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
526 BSS_CHANGED_BEACON_ENABLED | 521 BSS_CHANGED_BEACON_ENABLED |
@@ -538,16 +533,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
538 * whether the interface is running, which, at this point, 533 * whether the interface is running, which, at this point,
539 * it no longer is. 534 * it no longer is.
540 */ 535 */
541 cancel_work_sync(&sdata->u.mesh.work); 536 cancel_work_sync(&sdata->work);
542
543 /*
544 * When we get here, the interface is marked down.
545 * Call synchronize_rcu() to wait for the RX path
546 * should it be using the interface and enqueuing
547 * frames at this very time on another CPU.
548 */
549 rcu_barrier(); /* Wait for RX path and call_rcu()'s */
550 skb_queue_purge(&sdata->u.mesh.skb_queue);
551} 537}
552 538
553static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, 539static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
@@ -601,17 +587,17 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
601 struct ieee80211_rx_status *rx_status) 587 struct ieee80211_rx_status *rx_status)
602{ 588{
603 switch (mgmt->u.action.category) { 589 switch (mgmt->u.action.category) {
604 case MESH_PLINK_CATEGORY: 590 case WLAN_CATEGORY_MESH_PLINK:
605 mesh_rx_plink_frame(sdata, mgmt, len, rx_status); 591 mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
606 break; 592 break;
607 case MESH_PATH_SEL_CATEGORY: 593 case WLAN_CATEGORY_MESH_PATH_SEL:
608 mesh_rx_path_sel_frame(sdata, mgmt, len); 594 mesh_rx_path_sel_frame(sdata, mgmt, len);
609 break; 595 break;
610 } 596 }
611} 597}
612 598
613static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 599void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
614 struct sk_buff *skb) 600 struct sk_buff *skb)
615{ 601{
616 struct ieee80211_rx_status *rx_status; 602 struct ieee80211_rx_status *rx_status;
617 struct ieee80211_if_mesh *ifmsh; 603 struct ieee80211_if_mesh *ifmsh;
@@ -634,26 +620,11 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
634 ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); 620 ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status);
635 break; 621 break;
636 } 622 }
637
638 kfree_skb(skb);
639} 623}
640 624
641static void ieee80211_mesh_work(struct work_struct *work) 625void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata)
642{ 626{
643 struct ieee80211_sub_if_data *sdata =
644 container_of(work, struct ieee80211_sub_if_data, u.mesh.work);
645 struct ieee80211_local *local = sdata->local;
646 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 627 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
647 struct sk_buff *skb;
648
649 if (!ieee80211_sdata_running(sdata))
650 return;
651
652 if (local->scanning)
653 return;
654
655 while ((skb = skb_dequeue(&ifmsh->skb_queue)))
656 ieee80211_mesh_rx_queued_mgmt(sdata, skb);
657 628
658 if (ifmsh->preq_queue_len && 629 if (ifmsh->preq_queue_len &&
659 time_after(jiffies, 630 time_after(jiffies,
@@ -680,7 +651,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
680 rcu_read_lock(); 651 rcu_read_lock();
681 list_for_each_entry_rcu(sdata, &local->interfaces, list) 652 list_for_each_entry_rcu(sdata, &local->interfaces, list)
682 if (ieee80211_vif_is_mesh(&sdata->vif)) 653 if (ieee80211_vif_is_mesh(&sdata->vif))
683 ieee80211_queue_work(&local->hw, &sdata->u.mesh.work); 654 ieee80211_queue_work(&local->hw, &sdata->work);
684 rcu_read_unlock(); 655 rcu_read_unlock();
685} 656}
686 657
@@ -688,11 +659,9 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
688{ 659{
689 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; 660 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
690 661
691 INIT_WORK(&ifmsh->work, ieee80211_mesh_work);
692 setup_timer(&ifmsh->housekeeping_timer, 662 setup_timer(&ifmsh->housekeeping_timer,
693 ieee80211_mesh_housekeeping_timer, 663 ieee80211_mesh_housekeeping_timer,
694 (unsigned long) sdata); 664 (unsigned long) sdata);
695 skb_queue_head_init(&sdata->u.mesh.skb_queue);
696 665
697 ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; 666 ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T;
698 ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; 667 ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T;
@@ -733,29 +702,3 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
733 INIT_LIST_HEAD(&ifmsh->preq_queue.list); 702 INIT_LIST_HEAD(&ifmsh->preq_queue.list);
734 spin_lock_init(&ifmsh->mesh_preq_queue_lock); 703 spin_lock_init(&ifmsh->mesh_preq_queue_lock);
735} 704}
736
737ieee80211_rx_result
738ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
739{
740 struct ieee80211_local *local = sdata->local;
741 struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
742 struct ieee80211_mgmt *mgmt;
743 u16 fc;
744
745 if (skb->len < 24)
746 return RX_DROP_MONITOR;
747
748 mgmt = (struct ieee80211_mgmt *) skb->data;
749 fc = le16_to_cpu(mgmt->frame_control);
750
751 switch (fc & IEEE80211_FCTL_STYPE) {
752 case IEEE80211_STYPE_ACTION:
753 case IEEE80211_STYPE_PROBE_RESP:
754 case IEEE80211_STYPE_BEACON:
755 skb_queue_tail(&ifmsh->skb_queue, skb);
756 ieee80211_queue_work(&local->hw, &ifmsh->work);
757 return RX_QUEUED;
758 }
759
760 return RX_CONTINUE;
761}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 85562c59d7d6..ebd3f1d9d889 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -209,8 +209,6 @@ struct mesh_rmc {
209#define MESH_MAX_MPATHS 1024 209#define MESH_MAX_MPATHS 1024
210 210
211/* Pending ANA approval */ 211/* Pending ANA approval */
212#define MESH_PLINK_CATEGORY 30
213#define MESH_PATH_SEL_CATEGORY 32
214#define MESH_PATH_SEL_ACTION 0 212#define MESH_PATH_SEL_ACTION 0
215 213
216/* PERR reason codes */ 214/* PERR reason codes */
@@ -239,8 +237,6 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
239 struct sta_info *stainfo, struct sk_buff *skb); 237 struct sta_info *stainfo, struct sk_buff *skb);
240void ieee80211s_stop(void); 238void ieee80211s_stop(void);
241void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); 239void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
242ieee80211_rx_result
243ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
244void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); 240void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
245void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); 241void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
246void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh); 242void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index fefc45c4b4e8..829e08a657d0 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -132,7 +132,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
132 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); 132 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
133 /* BSSID == SA */ 133 /* BSSID == SA */
134 memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); 134 memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
135 mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; 135 mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
136 mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; 136 mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
137 137
138 switch (action) { 138 switch (action) {
@@ -225,7 +225,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
225 memcpy(mgmt->da, ra, ETH_ALEN); 225 memcpy(mgmt->da, ra, ETH_ALEN);
226 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); 226 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
227 /* BSSID is left zeroed, wildcard value */ 227 /* BSSID is left zeroed, wildcard value */
228 mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; 228 mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
229 mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION; 229 mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
230 ie_len = 15; 230 ie_len = 15;
231 pos = skb_put(skb, 2 + ie_len); 231 pos = skb_put(skb, 2 + ie_len);
@@ -624,7 +624,6 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
624fail: 624fail:
625 rcu_read_unlock(); 625 rcu_read_unlock();
626 sdata->u.mesh.mshstats.dropped_frames_no_route++; 626 sdata->u.mesh.mshstats.dropped_frames_no_route++;
627 return;
628} 627}
629 628
630static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, 629static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
@@ -806,14 +805,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
806 spin_unlock(&ifmsh->mesh_preq_queue_lock); 805 spin_unlock(&ifmsh->mesh_preq_queue_lock);
807 806
808 if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) 807 if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata)))
809 ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); 808 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
810 809
811 else if (time_before(jiffies, ifmsh->last_preq)) { 810 else if (time_before(jiffies, ifmsh->last_preq)) {
812 /* avoid long wait if did not send preqs for a long time 811 /* avoid long wait if did not send preqs for a long time
813 * and jiffies wrapped around 812 * and jiffies wrapped around
814 */ 813 */
815 ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; 814 ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
816 ieee80211_queue_work(&sdata->local->hw, &ifmsh->work); 815 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
817 } else 816 } else
818 mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + 817 mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq +
819 min_preq_int_jiff(sdata)); 818 min_preq_int_jiff(sdata));
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 181ffd6efd81..349e466cf08b 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -315,7 +315,7 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
315 read_unlock(&pathtbl_resize_lock); 315 read_unlock(&pathtbl_resize_lock);
316 if (grow) { 316 if (grow) {
317 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags); 317 set_bit(MESH_WORK_GROW_MPATH_TABLE, &ifmsh->wrkq_flags);
318 ieee80211_queue_work(&local->hw, &ifmsh->work); 318 ieee80211_queue_work(&local->hw, &sdata->work);
319 } 319 }
320 return 0; 320 return 0;
321 321
@@ -425,7 +425,7 @@ int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata)
425 read_unlock(&pathtbl_resize_lock); 425 read_unlock(&pathtbl_resize_lock);
426 if (grow) { 426 if (grow) {
427 set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags); 427 set_bit(MESH_WORK_GROW_MPP_TABLE, &ifmsh->wrkq_flags);
428 ieee80211_queue_work(&local->hw, &ifmsh->work); 428 ieee80211_queue_work(&local->hw, &sdata->work);
429 } 429 }
430 return 0; 430 return 0;
431 431
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 7b7080e2b49f..ea13a80a476c 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -65,7 +65,6 @@ void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
65{ 65{
66 atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); 66 atomic_inc(&sdata->u.mesh.mshstats.estab_plinks);
67 mesh_accept_plinks_update(sdata); 67 mesh_accept_plinks_update(sdata);
68 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
69} 68}
70 69
71static inline 70static inline
@@ -73,7 +72,6 @@ void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
73{ 72{
74 atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); 73 atomic_dec(&sdata->u.mesh.mshstats.estab_plinks);
75 mesh_accept_plinks_update(sdata); 74 mesh_accept_plinks_update(sdata);
76 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
77} 75}
78 76
79/** 77/**
@@ -115,7 +113,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
115} 113}
116 114
117/** 115/**
118 * mesh_plink_deactivate - deactivate mesh peer link 116 * __mesh_plink_deactivate - deactivate mesh peer link
119 * 117 *
120 * @sta: mesh peer link to deactivate 118 * @sta: mesh peer link to deactivate
121 * 119 *
@@ -123,18 +121,23 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
123 * 121 *
124 * Locking: the caller must hold sta->lock 122 * Locking: the caller must hold sta->lock
125 */ 123 */
126static void __mesh_plink_deactivate(struct sta_info *sta) 124static bool __mesh_plink_deactivate(struct sta_info *sta)
127{ 125{
128 struct ieee80211_sub_if_data *sdata = sta->sdata; 126 struct ieee80211_sub_if_data *sdata = sta->sdata;
127 bool deactivated = false;
129 128
130 if (sta->plink_state == PLINK_ESTAB) 129 if (sta->plink_state == PLINK_ESTAB) {
131 mesh_plink_dec_estab_count(sdata); 130 mesh_plink_dec_estab_count(sdata);
131 deactivated = true;
132 }
132 sta->plink_state = PLINK_BLOCKED; 133 sta->plink_state = PLINK_BLOCKED;
133 mesh_path_flush_by_nexthop(sta); 134 mesh_path_flush_by_nexthop(sta);
135
136 return deactivated;
134} 137}
135 138
136/** 139/**
137 * __mesh_plink_deactivate - deactivate mesh peer link 140 * mesh_plink_deactivate - deactivate mesh peer link
138 * 141 *
139 * @sta: mesh peer link to deactivate 142 * @sta: mesh peer link to deactivate
140 * 143 *
@@ -142,9 +145,15 @@ static void __mesh_plink_deactivate(struct sta_info *sta)
142 */ 145 */
143void mesh_plink_deactivate(struct sta_info *sta) 146void mesh_plink_deactivate(struct sta_info *sta)
144{ 147{
148 struct ieee80211_sub_if_data *sdata = sta->sdata;
149 bool deactivated;
150
145 spin_lock_bh(&sta->lock); 151 spin_lock_bh(&sta->lock);
146 __mesh_plink_deactivate(sta); 152 deactivated = __mesh_plink_deactivate(sta);
147 spin_unlock_bh(&sta->lock); 153 spin_unlock_bh(&sta->lock);
154
155 if (deactivated)
156 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
148} 157}
149 158
150static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 159static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
@@ -172,7 +181,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
172 memcpy(mgmt->da, da, ETH_ALEN); 181 memcpy(mgmt->da, da, ETH_ALEN);
173 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); 182 memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
174 /* BSSID is left zeroed, wildcard value */ 183 /* BSSID is left zeroed, wildcard value */
175 mgmt->u.action.category = MESH_PLINK_CATEGORY; 184 mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
176 mgmt->u.action.u.plink_action.action_code = action; 185 mgmt->u.action.u.plink_action.action_code = action;
177 186
178 if (action == PLINK_CLOSE) 187 if (action == PLINK_CLOSE)
@@ -381,10 +390,16 @@ int mesh_plink_open(struct sta_info *sta)
381 390
382void mesh_plink_block(struct sta_info *sta) 391void mesh_plink_block(struct sta_info *sta)
383{ 392{
393 struct ieee80211_sub_if_data *sdata = sta->sdata;
394 bool deactivated;
395
384 spin_lock_bh(&sta->lock); 396 spin_lock_bh(&sta->lock);
385 __mesh_plink_deactivate(sta); 397 deactivated = __mesh_plink_deactivate(sta);
386 sta->plink_state = PLINK_BLOCKED; 398 sta->plink_state = PLINK_BLOCKED;
387 spin_unlock_bh(&sta->lock); 399 spin_unlock_bh(&sta->lock);
400
401 if (deactivated)
402 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
388} 403}
389 404
390 405
@@ -397,6 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
397 enum plink_event event; 412 enum plink_event event;
398 enum plink_frame_type ftype; 413 enum plink_frame_type ftype;
399 size_t baselen; 414 size_t baselen;
415 bool deactivated;
400 u8 ie_len; 416 u8 ie_len;
401 u8 *baseaddr; 417 u8 *baseaddr;
402 __le16 plid, llid, reason; 418 __le16 plid, llid, reason;
@@ -651,8 +667,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
651 case CNF_ACPT: 667 case CNF_ACPT:
652 del_timer(&sta->plink_timer); 668 del_timer(&sta->plink_timer);
653 sta->plink_state = PLINK_ESTAB; 669 sta->plink_state = PLINK_ESTAB;
654 mesh_plink_inc_estab_count(sdata);
655 spin_unlock_bh(&sta->lock); 670 spin_unlock_bh(&sta->lock);
671 mesh_plink_inc_estab_count(sdata);
672 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
656 mpl_dbg("Mesh plink with %pM ESTABLISHED\n", 673 mpl_dbg("Mesh plink with %pM ESTABLISHED\n",
657 sta->sta.addr); 674 sta->sta.addr);
658 break; 675 break;
@@ -684,8 +701,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
684 case OPN_ACPT: 701 case OPN_ACPT:
685 del_timer(&sta->plink_timer); 702 del_timer(&sta->plink_timer);
686 sta->plink_state = PLINK_ESTAB; 703 sta->plink_state = PLINK_ESTAB;
687 mesh_plink_inc_estab_count(sdata);
688 spin_unlock_bh(&sta->lock); 704 spin_unlock_bh(&sta->lock);
705 mesh_plink_inc_estab_count(sdata);
706 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
689 mpl_dbg("Mesh plink with %pM ESTABLISHED\n", 707 mpl_dbg("Mesh plink with %pM ESTABLISHED\n",
690 sta->sta.addr); 708 sta->sta.addr);
691 mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, 709 mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid,
@@ -702,11 +720,13 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
702 case CLS_ACPT: 720 case CLS_ACPT:
703 reason = cpu_to_le16(MESH_CLOSE_RCVD); 721 reason = cpu_to_le16(MESH_CLOSE_RCVD);
704 sta->reason = reason; 722 sta->reason = reason;
705 __mesh_plink_deactivate(sta); 723 deactivated = __mesh_plink_deactivate(sta);
706 sta->plink_state = PLINK_HOLDING; 724 sta->plink_state = PLINK_HOLDING;
707 llid = sta->llid; 725 llid = sta->llid;
708 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); 726 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
709 spin_unlock_bh(&sta->lock); 727 spin_unlock_bh(&sta->lock);
728 if (deactivated)
729 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON);
710 mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, 730 mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid,
711 plid, reason); 731 plid, reason);
712 break; 732 break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4aefa6dc3091..b6c163ac22da 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -47,6 +47,13 @@
47 */ 47 */
48#define IEEE80211_PROBE_WAIT (HZ / 2) 48#define IEEE80211_PROBE_WAIT (HZ / 2)
49 49
50/*
51 * Weight given to the latest Beacon frame when calculating average signal
52 * strength for Beacon frames received in the current BSS. This must be
53 * between 1 and 15.
54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56
50#define TMR_RUNNING_TIMER 0 57#define TMR_RUNNING_TIMER 0
51#define TMR_RUNNING_CHANSW 1 58#define TMR_RUNNING_CHANSW 1
52 59
@@ -130,11 +137,14 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
130 struct sta_info *sta; 137 struct sta_info *sta;
131 u32 changed = 0; 138 u32 changed = 0;
132 u16 ht_opmode; 139 u16 ht_opmode;
133 bool enable_ht = true, ht_changed; 140 bool enable_ht = true;
141 enum nl80211_channel_type prev_chantype;
134 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 142 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
135 143
136 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 144 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
137 145
146 prev_chantype = sdata->vif.bss_conf.channel_type;
147
138 /* HT is not supported */ 148 /* HT is not supported */
139 if (!sband->ht_cap.ht_supported) 149 if (!sband->ht_cap.ht_supported)
140 enable_ht = false; 150 enable_ht = false;
@@ -165,38 +175,37 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
165 } 175 }
166 } 176 }
167 177
168 ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
169 channel_type != local->hw.conf.channel_type;
170
171 if (local->tmp_channel) 178 if (local->tmp_channel)
172 local->tmp_channel_type = channel_type; 179 local->tmp_channel_type = channel_type;
173 local->oper_channel_type = channel_type;
174 180
175 if (ht_changed) { 181 if (!ieee80211_set_channel_type(local, sdata, channel_type)) {
176 /* channel_type change automatically detected */ 182 /* can only fail due to HT40+/- mismatch */
177 ieee80211_hw_config(local, 0); 183 channel_type = NL80211_CHAN_HT20;
184 WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type));
185 }
178 186
187 /* channel_type change automatically detected */
188 ieee80211_hw_config(local, 0);
189
190 if (prev_chantype != channel_type) {
179 rcu_read_lock(); 191 rcu_read_lock();
180 sta = sta_info_get(sdata, bssid); 192 sta = sta_info_get(sdata, bssid);
181 if (sta) 193 if (sta)
182 rate_control_rate_update(local, sband, sta, 194 rate_control_rate_update(local, sband, sta,
183 IEEE80211_RC_HT_CHANGED, 195 IEEE80211_RC_HT_CHANGED,
184 local->oper_channel_type); 196 channel_type);
185 rcu_read_unlock(); 197 rcu_read_unlock();
186 } 198 }
187
188 /* disable HT */
189 if (!enable_ht)
190 return 0;
191 199
192 ht_opmode = le16_to_cpu(hti->operation_mode); 200 ht_opmode = le16_to_cpu(hti->operation_mode);
193 201
194 /* if bss configuration changed store the new one */ 202 /* if bss configuration changed store the new one */
195 if (!sdata->ht_opmode_valid || 203 if (sdata->ht_opmode_valid != enable_ht ||
196 sdata->vif.bss_conf.ht_operation_mode != ht_opmode) { 204 sdata->vif.bss_conf.ht_operation_mode != ht_opmode ||
205 prev_chantype != channel_type) {
197 changed |= BSS_CHANGED_HT; 206 changed |= BSS_CHANGED_HT;
198 sdata->vif.bss_conf.ht_operation_mode = ht_opmode; 207 sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
199 sdata->ht_opmode_valid = true; 208 sdata->ht_opmode_valid = enable_ht;
200 } 209 }
201 210
202 return changed; 211 return changed;
@@ -206,7 +215,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
206 215
207static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, 216static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
208 const u8 *bssid, u16 stype, u16 reason, 217 const u8 *bssid, u16 stype, u16 reason,
209 void *cookie) 218 void *cookie, bool send_frame)
210{ 219{
211 struct ieee80211_local *local = sdata->local; 220 struct ieee80211_local *local = sdata->local;
212 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 221 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -243,7 +252,11 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
243 cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); 252 cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
244 if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) 253 if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
245 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; 254 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
246 ieee80211_tx_skb(sdata, skb); 255
256 if (send_frame)
257 ieee80211_tx_skb(sdata, skb);
258 else
259 kfree_skb(skb);
247} 260}
248 261
249void ieee80211_send_pspoll(struct ieee80211_local *local, 262void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -329,7 +342,11 @@ static void ieee80211_chswitch_work(struct work_struct *work)
329 goto out; 342 goto out;
330 343
331 sdata->local->oper_channel = sdata->local->csa_channel; 344 sdata->local->oper_channel = sdata->local->csa_channel;
332 ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL); 345 if (!sdata->local->ops->channel_switch) {
346 /* call "hw_config" only if doing sw channel switch */
347 ieee80211_hw_config(sdata->local,
348 IEEE80211_CONF_CHANGE_CHANNEL);
349 }
333 350
334 /* XXX: shouldn't really modify cfg80211-owned data! */ 351 /* XXX: shouldn't really modify cfg80211-owned data! */
335 ifmgd->associated->channel = sdata->local->oper_channel; 352 ifmgd->associated->channel = sdata->local->oper_channel;
@@ -341,6 +358,29 @@ static void ieee80211_chswitch_work(struct work_struct *work)
341 mutex_unlock(&ifmgd->mtx); 358 mutex_unlock(&ifmgd->mtx);
342} 359}
343 360
361void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
362{
363 struct ieee80211_sub_if_data *sdata;
364 struct ieee80211_if_managed *ifmgd;
365
366 sdata = vif_to_sdata(vif);
367 ifmgd = &sdata->u.mgd;
368
369 trace_api_chswitch_done(sdata, success);
370 if (!success) {
371 /*
372 * If the channel switch was not successful, stay
373 * around on the old channel. We currently lack
374 * good handling of this situation, possibly we
375 * should just drop the association.
376 */
377 sdata->local->csa_channel = sdata->local->oper_channel;
378 }
379
380 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
381}
382EXPORT_SYMBOL(ieee80211_chswitch_done);
383
344static void ieee80211_chswitch_timer(unsigned long data) 384static void ieee80211_chswitch_timer(unsigned long data)
345{ 385{
346 struct ieee80211_sub_if_data *sdata = 386 struct ieee80211_sub_if_data *sdata =
@@ -357,7 +397,8 @@ static void ieee80211_chswitch_timer(unsigned long data)
357 397
358void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, 398void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
359 struct ieee80211_channel_sw_ie *sw_elem, 399 struct ieee80211_channel_sw_ie *sw_elem,
360 struct ieee80211_bss *bss) 400 struct ieee80211_bss *bss,
401 u64 timestamp)
361{ 402{
362 struct cfg80211_bss *cbss = 403 struct cfg80211_bss *cbss =
363 container_of((void *)bss, struct cfg80211_bss, priv); 404 container_of((void *)bss, struct cfg80211_bss, priv);
@@ -385,10 +426,29 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
385 426
386 sdata->local->csa_channel = new_ch; 427 sdata->local->csa_channel = new_ch;
387 428
429 if (sdata->local->ops->channel_switch) {
430 /* use driver's channel switch callback */
431 struct ieee80211_channel_switch ch_switch;
432 memset(&ch_switch, 0, sizeof(ch_switch));
433 ch_switch.timestamp = timestamp;
434 if (sw_elem->mode) {
435 ch_switch.block_tx = true;
436 ieee80211_stop_queues_by_reason(&sdata->local->hw,
437 IEEE80211_QUEUE_STOP_REASON_CSA);
438 }
439 ch_switch.channel = new_ch;
440 ch_switch.count = sw_elem->count;
441 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
442 drv_channel_switch(sdata->local, &ch_switch);
443 return;
444 }
445
446 /* channel switch handled in software */
388 if (sw_elem->count <= 1) { 447 if (sw_elem->count <= 1) {
389 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); 448 ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
390 } else { 449 } else {
391 ieee80211_stop_queues_by_reason(&sdata->local->hw, 450 if (sw_elem->mode)
451 ieee80211_stop_queues_by_reason(&sdata->local->hw,
392 IEEE80211_QUEUE_STOP_REASON_CSA); 452 IEEE80211_QUEUE_STOP_REASON_CSA);
393 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; 453 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
394 mod_timer(&ifmgd->chswitch_timer, 454 mod_timer(&ifmgd->chswitch_timer,
@@ -418,6 +478,39 @@ static void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
418 } 478 }
419} 479}
420 480
481void ieee80211_enable_dyn_ps(struct ieee80211_vif *vif)
482{
483 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
484 struct ieee80211_local *local = sdata->local;
485 struct ieee80211_conf *conf = &local->hw.conf;
486
487 WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
488 !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
489 (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
490
491 local->disable_dynamic_ps = false;
492 conf->dynamic_ps_timeout = local->dynamic_ps_user_timeout;
493}
494EXPORT_SYMBOL(ieee80211_enable_dyn_ps);
495
496void ieee80211_disable_dyn_ps(struct ieee80211_vif *vif)
497{
498 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
499 struct ieee80211_local *local = sdata->local;
500 struct ieee80211_conf *conf = &local->hw.conf;
501
502 WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION ||
503 !(local->hw.flags & IEEE80211_HW_SUPPORTS_PS) ||
504 (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS));
505
506 local->disable_dynamic_ps = true;
507 conf->dynamic_ps_timeout = 0;
508 del_timer_sync(&local->dynamic_ps_timer);
509 ieee80211_queue_work(&local->hw,
510 &local->dynamic_ps_enable_work);
511}
512EXPORT_SYMBOL(ieee80211_disable_dyn_ps);
513
421/* powersave */ 514/* powersave */
422static void ieee80211_enable_ps(struct ieee80211_local *local, 515static void ieee80211_enable_ps(struct ieee80211_local *local,
423 struct ieee80211_sub_if_data *sdata) 516 struct ieee80211_sub_if_data *sdata)
@@ -467,6 +560,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
467{ 560{
468 struct ieee80211_sub_if_data *sdata, *found = NULL; 561 struct ieee80211_sub_if_data *sdata, *found = NULL;
469 int count = 0; 562 int count = 0;
563 int timeout;
470 564
471 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) { 565 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) {
472 local->ps_sdata = NULL; 566 local->ps_sdata = NULL;
@@ -492,14 +586,34 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
492 found->u.mgd.associated->beacon_ies && 586 found->u.mgd.associated->beacon_ies &&
493 !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL | 587 !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
494 IEEE80211_STA_CONNECTION_POLL))) { 588 IEEE80211_STA_CONNECTION_POLL))) {
589 struct ieee80211_conf *conf = &local->hw.conf;
495 s32 beaconint_us; 590 s32 beaconint_us;
496 591
497 if (latency < 0) 592 if (latency < 0)
498 latency = pm_qos_requirement(PM_QOS_NETWORK_LATENCY); 593 latency = pm_qos_request(PM_QOS_NETWORK_LATENCY);
499 594
500 beaconint_us = ieee80211_tu_to_usec( 595 beaconint_us = ieee80211_tu_to_usec(
501 found->vif.bss_conf.beacon_int); 596 found->vif.bss_conf.beacon_int);
502 597
598 timeout = local->dynamic_ps_forced_timeout;
599 if (timeout < 0) {
600 /*
601 * Go to full PSM if the user configures a very low
602 * latency requirement.
603 * The 2 second value is there for compatibility until
604 * the PM_QOS_NETWORK_LATENCY is configured with real
605 * values.
606 */
607 if (latency > 1900000000 && latency != 2000000000)
608 timeout = 0;
609 else
610 timeout = 100;
611 }
612 local->dynamic_ps_user_timeout = timeout;
613 if (!local->disable_dynamic_ps)
614 conf->dynamic_ps_timeout =
615 local->dynamic_ps_user_timeout;
616
503 if (beaconint_us > latency) { 617 if (beaconint_us > latency) {
504 local->ps_sdata = NULL; 618 local->ps_sdata = NULL;
505 } else { 619 } else {
@@ -584,14 +698,18 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
584 698
585/* MLME */ 699/* MLME */
586static void ieee80211_sta_wmm_params(struct ieee80211_local *local, 700static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
587 struct ieee80211_if_managed *ifmgd, 701 struct ieee80211_sub_if_data *sdata,
588 u8 *wmm_param, size_t wmm_param_len) 702 u8 *wmm_param, size_t wmm_param_len)
589{ 703{
590 struct ieee80211_tx_queue_params params; 704 struct ieee80211_tx_queue_params params;
705 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
591 size_t left; 706 size_t left;
592 int count; 707 int count;
593 u8 *pos, uapsd_queues = 0; 708 u8 *pos, uapsd_queues = 0;
594 709
710 if (!local->ops->conf_tx)
711 return;
712
595 if (local->hw.queues < 4) 713 if (local->hw.queues < 4)
596 return; 714 return;
597 715
@@ -666,11 +784,15 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
666 params.aifs, params.cw_min, params.cw_max, params.txop, 784 params.aifs, params.cw_min, params.cw_max, params.txop,
667 params.uapsd); 785 params.uapsd);
668#endif 786#endif
669 if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx) 787 if (drv_conf_tx(local, queue, &params))
670 printk(KERN_DEBUG "%s: failed to set TX queue " 788 printk(KERN_DEBUG "%s: failed to set TX queue "
671 "parameters for queue %d\n", 789 "parameters for queue %d\n",
672 wiphy_name(local->hw.wiphy), queue); 790 wiphy_name(local->hw.wiphy), queue);
673 } 791 }
792
793 /* enable WMM or activate new settings */
794 sdata->vif.bss_conf.qos = true;
795 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
674} 796}
675 797
676static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, 798static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
@@ -718,11 +840,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
718{ 840{
719 struct ieee80211_bss *bss = (void *)cbss->priv; 841 struct ieee80211_bss *bss = (void *)cbss->priv;
720 struct ieee80211_local *local = sdata->local; 842 struct ieee80211_local *local = sdata->local;
843 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
721 844
722 bss_info_changed |= BSS_CHANGED_ASSOC; 845 bss_info_changed |= BSS_CHANGED_ASSOC;
723 /* set timing information */ 846 /* set timing information */
724 sdata->vif.bss_conf.beacon_int = cbss->beacon_interval; 847 bss_conf->beacon_int = cbss->beacon_interval;
725 sdata->vif.bss_conf.timestamp = cbss->tsf; 848 bss_conf->timestamp = cbss->tsf;
726 849
727 bss_info_changed |= BSS_CHANGED_BEACON_INT; 850 bss_info_changed |= BSS_CHANGED_BEACON_INT;
728 bss_info_changed |= ieee80211_handle_bss_capability(sdata, 851 bss_info_changed |= ieee80211_handle_bss_capability(sdata,
@@ -731,6 +854,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
731 sdata->u.mgd.associated = cbss; 854 sdata->u.mgd.associated = cbss;
732 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); 855 memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
733 856
857 sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
858
734 /* just to be sure */ 859 /* just to be sure */
735 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | 860 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
736 IEEE80211_STA_BEACON_POLL); 861 IEEE80211_STA_BEACON_POLL);
@@ -745,7 +870,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
745 870
746 ieee80211_led_assoc(local, 1); 871 ieee80211_led_assoc(local, 1);
747 872
748 sdata->vif.bss_conf.assoc = 1; 873 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
874 bss_conf->dtim_period = bss->dtim_period;
875 else
876 bss_conf->dtim_period = 0;
877
878 bss_conf->assoc = 1;
749 /* 879 /*
750 * For now just always ask the driver to update the basic rateset 880 * For now just always ask the driver to update the basic rateset
751 * when we have associated, we aren't checking whether it actually 881 * when we have associated, we aren't checking whether it actually
@@ -756,6 +886,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
756 /* And the BSSID changed - we're associated now */ 886 /* And the BSSID changed - we're associated now */
757 bss_info_changed |= BSS_CHANGED_BSSID; 887 bss_info_changed |= BSS_CHANGED_BSSID;
758 888
889 /* Tell the driver to monitor connection quality (if supported) */
890 if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
891 bss_conf->cqm_rssi_thold)
892 bss_info_changed |= BSS_CHANGED_CQM;
893
894 /* Enable ARP filtering */
895 if (bss_conf->arp_filter_enabled != sdata->arp_filter_state) {
896 bss_conf->arp_filter_enabled = sdata->arp_filter_state;
897 bss_info_changed |= BSS_CHANGED_ARP_FILTER;
898 }
899
759 ieee80211_bss_info_change_notify(sdata, bss_info_changed); 900 ieee80211_bss_info_change_notify(sdata, bss_info_changed);
760 901
761 mutex_lock(&local->iflist_mtx); 902 mutex_lock(&local->iflist_mtx);
@@ -767,7 +908,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
767 netif_carrier_on(sdata->dev); 908 netif_carrier_on(sdata->dev);
768} 909}
769 910
770static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata) 911static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
912 bool remove_sta)
771{ 913{
772 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 914 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
773 struct ieee80211_local *local = sdata->local; 915 struct ieee80211_local *local = sdata->local;
@@ -802,13 +944,13 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
802 netif_tx_stop_all_queues(sdata->dev); 944 netif_tx_stop_all_queues(sdata->dev);
803 netif_carrier_off(sdata->dev); 945 netif_carrier_off(sdata->dev);
804 946
805 rcu_read_lock(); 947 mutex_lock(&local->sta_mtx);
806 sta = sta_info_get(sdata, bssid); 948 sta = sta_info_get(sdata, bssid);
807 if (sta) { 949 if (sta) {
808 set_sta_flags(sta, WLAN_STA_DISASSOC); 950 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
809 ieee80211_sta_tear_down_BA_sessions(sta); 951 ieee80211_sta_tear_down_BA_sessions(sta);
810 } 952 }
811 rcu_read_unlock(); 953 mutex_unlock(&local->sta_mtx);
812 954
813 changed |= ieee80211_reset_erp_info(sdata); 955 changed |= ieee80211_reset_erp_info(sdata);
814 956
@@ -819,7 +961,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
819 ieee80211_set_wmm_default(sdata); 961 ieee80211_set_wmm_default(sdata);
820 962
821 /* channel(_type) changes are handled by ieee80211_hw_config */ 963 /* channel(_type) changes are handled by ieee80211_hw_config */
822 local->oper_channel_type = NL80211_CHAN_NO_HT; 964 WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
823 965
824 /* on the next assoc, re-program HT parameters */ 966 /* on the next assoc, re-program HT parameters */
825 sdata->ht_opmode_valid = false; 967 sdata->ht_opmode_valid = false;
@@ -836,11 +978,18 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
836 978
837 ieee80211_hw_config(local, config_changed); 979 ieee80211_hw_config(local, config_changed);
838 980
839 /* And the BSSID changed -- not very interesting here */ 981 /* Disable ARP filtering */
840 changed |= BSS_CHANGED_BSSID; 982 if (sdata->vif.bss_conf.arp_filter_enabled) {
983 sdata->vif.bss_conf.arp_filter_enabled = false;
984 changed |= BSS_CHANGED_ARP_FILTER;
985 }
986
987 /* The BSSID (not really interesting) and HT changed */
988 changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
841 ieee80211_bss_info_change_notify(sdata, changed); 989 ieee80211_bss_info_change_notify(sdata, changed);
842 990
843 sta_info_destroy_addr(sdata, bssid); 991 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid);
844} 993}
845 994
846void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -857,6 +1006,9 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
857 if (is_multicast_ether_addr(hdr->addr1)) 1006 if (is_multicast_ether_addr(hdr->addr1))
858 return; 1007 return;
859 1008
1009 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
1010 return;
1011
860 mod_timer(&sdata->u.mgd.conn_mon_timer, 1012 mod_timer(&sdata->u.mgd.conn_mon_timer,
861 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME)); 1013 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
862} 1014}
@@ -934,23 +1086,72 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
934 mutex_unlock(&ifmgd->mtx); 1086 mutex_unlock(&ifmgd->mtx);
935} 1087}
936 1088
937void ieee80211_beacon_loss_work(struct work_struct *work) 1089static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1090{
1091 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1092 struct ieee80211_local *local = sdata->local;
1093 u8 bssid[ETH_ALEN];
1094
1095 mutex_lock(&ifmgd->mtx);
1096 if (!ifmgd->associated) {
1097 mutex_unlock(&ifmgd->mtx);
1098 return;
1099 }
1100
1101 memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
1102
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104
1105 ieee80211_set_disassoc(sdata, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx);
1108 /*
1109 * must be outside lock due to cfg80211,
1110 * but that's not a problem.
1111 */
1112 ieee80211_send_deauth_disassoc(sdata, bssid,
1113 IEEE80211_STYPE_DEAUTH,
1114 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
1115 NULL, true);
1116}
1117
1118void ieee80211_beacon_connection_loss_work(struct work_struct *work)
938{ 1119{
939 struct ieee80211_sub_if_data *sdata = 1120 struct ieee80211_sub_if_data *sdata =
940 container_of(work, struct ieee80211_sub_if_data, 1121 container_of(work, struct ieee80211_sub_if_data,
941 u.mgd.beacon_loss_work); 1122 u.mgd.beacon_connection_loss_work);
942 1123
943 ieee80211_mgd_probe_ap(sdata, true); 1124 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
1125 __ieee80211_connection_loss(sdata);
1126 else
1127 ieee80211_mgd_probe_ap(sdata, true);
944} 1128}
945 1129
946void ieee80211_beacon_loss(struct ieee80211_vif *vif) 1130void ieee80211_beacon_loss(struct ieee80211_vif *vif)
947{ 1131{
948 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); 1132 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
1133 struct ieee80211_hw *hw = &sdata->local->hw;
949 1134
950 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work); 1135 trace_api_beacon_loss(sdata);
1136
1137 WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
1138 ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
951} 1139}
952EXPORT_SYMBOL(ieee80211_beacon_loss); 1140EXPORT_SYMBOL(ieee80211_beacon_loss);
953 1141
1142void ieee80211_connection_loss(struct ieee80211_vif *vif)
1143{
1144 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
1145 struct ieee80211_hw *hw = &sdata->local->hw;
1146
1147 trace_api_connection_loss(sdata);
1148
1149 WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
1150 ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
1151}
1152EXPORT_SYMBOL(ieee80211_connection_loss);
1153
1154
954static enum rx_mgmt_action __must_check 1155static enum rx_mgmt_action __must_check
955ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, 1156ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
956 struct ieee80211_mgmt *mgmt, size_t len) 1157 struct ieee80211_mgmt *mgmt, size_t len)
@@ -971,7 +1172,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
971 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n", 1172 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
972 sdata->name, bssid, reason_code); 1173 sdata->name, bssid, reason_code);
973 1174
974 ieee80211_set_disassoc(sdata); 1175 ieee80211_set_disassoc(sdata, true);
975 ieee80211_recalc_idle(sdata->local); 1176 ieee80211_recalc_idle(sdata->local);
976 1177
977 return RX_MGMT_CFG80211_DEAUTH; 1178 return RX_MGMT_CFG80211_DEAUTH;
@@ -1001,7 +1202,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1001 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n", 1202 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
1002 sdata->name, mgmt->sa, reason_code); 1203 sdata->name, mgmt->sa, reason_code);
1003 1204
1004 ieee80211_set_disassoc(sdata); 1205 ieee80211_set_disassoc(sdata, true);
1005 ieee80211_recalc_idle(sdata->local); 1206 ieee80211_recalc_idle(sdata->local);
1006 return RX_MGMT_CFG80211_DISASSOC; 1207 return RX_MGMT_CFG80211_DISASSOC;
1007} 1208}
@@ -1130,7 +1331,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1130 } 1331 }
1131 1332
1132 if (elems.wmm_param) 1333 if (elems.wmm_param)
1133 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, 1334 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
1134 elems.wmm_param_len); 1335 elems.wmm_param_len);
1135 else 1336 else
1136 ieee80211_set_wmm_default(sdata); 1337 ieee80211_set_wmm_default(sdata);
@@ -1215,7 +1416,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1215 ETH_ALEN) == 0)) { 1416 ETH_ALEN) == 0)) {
1216 struct ieee80211_channel_sw_ie *sw_elem = 1417 struct ieee80211_channel_sw_ie *sw_elem =
1217 (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; 1418 (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
1218 ieee80211_sta_process_chanswitch(sdata, sw_elem, bss); 1419 ieee80211_sta_process_chanswitch(sdata, sw_elem,
1420 bss, rx_status->mactime);
1219 } 1421 }
1220} 1422}
1221 1423
@@ -1254,12 +1456,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1254 mutex_lock(&sdata->local->iflist_mtx); 1456 mutex_lock(&sdata->local->iflist_mtx);
1255 ieee80211_recalc_ps(sdata->local, -1); 1457 ieee80211_recalc_ps(sdata->local, -1);
1256 mutex_unlock(&sdata->local->iflist_mtx); 1458 mutex_unlock(&sdata->local->iflist_mtx);
1459
1460 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
1461 return;
1462
1257 /* 1463 /*
1258 * We've received a probe response, but are not sure whether 1464 * We've received a probe response, but are not sure whether
1259 * we have or will be receiving any beacons or data, so let's 1465 * we have or will be receiving any beacons or data, so let's
1260 * schedule the timers again, just in case. 1466 * schedule the timers again, just in case.
1261 */ 1467 */
1262 mod_beacon_timer(sdata); 1468 mod_beacon_timer(sdata);
1469
1263 mod_timer(&ifmgd->conn_mon_timer, 1470 mod_timer(&ifmgd->conn_mon_timer,
1264 round_jiffies_up(jiffies + 1471 round_jiffies_up(jiffies +
1265 IEEE80211_CONNECTION_IDLE_TIME)); 1472 IEEE80211_CONNECTION_IDLE_TIME));
@@ -1293,6 +1500,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1293 struct ieee80211_rx_status *rx_status) 1500 struct ieee80211_rx_status *rx_status)
1294{ 1501{
1295 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1502 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1503 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
1296 size_t baselen; 1504 size_t baselen;
1297 struct ieee802_11_elems elems; 1505 struct ieee802_11_elems elems;
1298 struct ieee80211_local *local = sdata->local; 1506 struct ieee80211_local *local = sdata->local;
@@ -1328,6 +1536,41 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1328 if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0) 1536 if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0)
1329 return; 1537 return;
1330 1538
1539 /* Track average RSSI from the Beacon frames of the current AP */
1540 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal;
1544 ifmgd->last_cqm_event_signal = 0;
1545 } else {
1546 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16;
1550 }
1551 if (bss_conf->cqm_rssi_thold &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal;
1555 int thold = bss_conf->cqm_rssi_thold;
1556 int hyst = bss_conf->cqm_rssi_hyst;
1557 if (sig < thold &&
1558 (last_event == 0 || sig < last_event - hyst)) {
1559 ifmgd->last_cqm_event_signal = sig;
1560 ieee80211_cqm_rssi_notify(
1561 &sdata->vif,
1562 NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
1563 GFP_KERNEL);
1564 } else if (sig > thold &&
1565 (last_event == 0 || sig > last_event + hyst)) {
1566 ifmgd->last_cqm_event_signal = sig;
1567 ieee80211_cqm_rssi_notify(
1568 &sdata->vif,
1569 NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
1570 GFP_KERNEL);
1571 }
1572 }
1573
1331 if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) { 1574 if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) {
1332#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1575#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1333 if (net_ratelimit()) { 1576 if (net_ratelimit()) {
@@ -1360,7 +1603,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1360 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 1603 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
1361 true); 1604 true);
1362 1605
1363 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, 1606 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
1364 elems.wmm_param_len); 1607 elems.wmm_param_len);
1365 } 1608 }
1366 1609
@@ -1442,35 +1685,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1442 ieee80211_bss_info_change_notify(sdata, changed); 1685 ieee80211_bss_info_change_notify(sdata, changed);
1443} 1686}
1444 1687
1445ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1688void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1446 struct sk_buff *skb) 1689 struct sk_buff *skb)
1447{
1448 struct ieee80211_local *local = sdata->local;
1449 struct ieee80211_mgmt *mgmt;
1450 u16 fc;
1451
1452 if (skb->len < 24)
1453 return RX_DROP_MONITOR;
1454
1455 mgmt = (struct ieee80211_mgmt *) skb->data;
1456 fc = le16_to_cpu(mgmt->frame_control);
1457
1458 switch (fc & IEEE80211_FCTL_STYPE) {
1459 case IEEE80211_STYPE_PROBE_RESP:
1460 case IEEE80211_STYPE_BEACON:
1461 case IEEE80211_STYPE_DEAUTH:
1462 case IEEE80211_STYPE_DISASSOC:
1463 case IEEE80211_STYPE_ACTION:
1464 skb_queue_tail(&sdata->u.mgd.skb_queue, skb);
1465 ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
1466 return RX_QUEUED;
1467 }
1468
1469 return RX_DROP_MONITOR;
1470}
1471
1472static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1473 struct sk_buff *skb)
1474{ 1690{
1475 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1691 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1476 struct ieee80211_rx_status *rx_status; 1692 struct ieee80211_rx_status *rx_status;
@@ -1501,13 +1717,14 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1501 rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); 1717 rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
1502 break; 1718 break;
1503 case IEEE80211_STYPE_ACTION: 1719 case IEEE80211_STYPE_ACTION:
1504 if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT) 1720 switch (mgmt->u.action.category) {
1721 case WLAN_CATEGORY_SPECTRUM_MGMT:
1722 ieee80211_sta_process_chanswitch(sdata,
1723 &mgmt->u.action.u.chan_switch.sw_elem,
1724 (void *)ifmgd->associated->priv,
1725 rx_status->mactime);
1505 break; 1726 break;
1506 1727 }
1507 ieee80211_sta_process_chanswitch(sdata,
1508 &mgmt->u.action.u.chan_switch.sw_elem,
1509 (void *)ifmgd->associated->priv);
1510 break;
1511 } 1728 }
1512 mutex_unlock(&ifmgd->mtx); 1729 mutex_unlock(&ifmgd->mtx);
1513 1730
@@ -1524,17 +1741,52 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1524 default: 1741 default:
1525 WARN(1, "unexpected: %d", rma); 1742 WARN(1, "unexpected: %d", rma);
1526 } 1743 }
1527 goto out; 1744 return;
1528 } 1745 }
1529 1746
1530 mutex_unlock(&ifmgd->mtx); 1747 mutex_unlock(&ifmgd->mtx);
1531 1748
1532 if (skb->len >= 24 + 2 /* mgmt + deauth reason */ && 1749 if (skb->len >= 24 + 2 /* mgmt + deauth reason */ &&
1533 (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) 1750 (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_DEAUTH) {
1534 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1751 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk;
1535 1753
1536 out: 1754 mutex_lock(&local->work_mtx);
1537 kfree_skb(skb); 1755 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata)
1757 continue;
1758
1759 if (wk->type != IEEE80211_WORK_ASSOC &&
1760 wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
1761 continue;
1762
1763 if (memcmp(mgmt->bssid, wk->filter_ta, ETH_ALEN))
1764 continue;
1765 if (memcmp(mgmt->sa, wk->filter_ta, ETH_ALEN))
1766 continue;
1767
1768 /*
1769 * Printing the message only here means we can't
1770 * spuriously print it, but it also means that it
1771 * won't be printed when the frame comes in before
1772 * we even tried to associate or in similar cases.
1773 *
1774 * Ultimately, I suspect cfg80211 should print the
1775 * messages instead.
1776 */
1777 printk(KERN_DEBUG
1778 "%s: deauthenticated from %pM (Reason: %u)\n",
1779 sdata->name, mgmt->bssid,
1780 le16_to_cpu(mgmt->u.deauth.reason_code));
1781
1782 list_del_rcu(&wk->list);
1783 free_work(wk);
1784 break;
1785 }
1786 mutex_unlock(&local->work_mtx);
1787
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 }
1538} 1790}
1539 1791
1540static void ieee80211_sta_timer(unsigned long data) 1792static void ieee80211_sta_timer(unsigned long data)
@@ -1549,39 +1801,13 @@ static void ieee80211_sta_timer(unsigned long data)
1549 return; 1801 return;
1550 } 1802 }
1551 1803
1552 ieee80211_queue_work(&local->hw, &ifmgd->work); 1804 ieee80211_queue_work(&local->hw, &sdata->work);
1553} 1805}
1554 1806
1555static void ieee80211_sta_work(struct work_struct *work) 1807void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1556{ 1808{
1557 struct ieee80211_sub_if_data *sdata =
1558 container_of(work, struct ieee80211_sub_if_data, u.mgd.work);
1559 struct ieee80211_local *local = sdata->local; 1809 struct ieee80211_local *local = sdata->local;
1560 struct ieee80211_if_managed *ifmgd; 1810 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1561 struct sk_buff *skb;
1562
1563 if (!ieee80211_sdata_running(sdata))
1564 return;
1565
1566 if (local->scanning)
1567 return;
1568
1569 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
1570 return;
1571
1572 /*
1573 * ieee80211_queue_work() should have picked up most cases,
1574 * here we'll pick the the rest.
1575 */
1576 if (WARN(local->suspended, "STA MLME work scheduled while "
1577 "going to suspend\n"))
1578 return;
1579
1580 ifmgd = &sdata->u.mgd;
1581
1582 /* first process frames to avoid timing out while a frame is pending */
1583 while ((skb = skb_dequeue(&ifmgd->skb_queue)))
1584 ieee80211_sta_rx_queued_mgmt(sdata, skb);
1585 1811
1586 /* then process the rest of the work */ 1812 /* then process the rest of the work */
1587 mutex_lock(&ifmgd->mtx); 1813 mutex_lock(&ifmgd->mtx);
@@ -1613,7 +1839,7 @@ static void ieee80211_sta_work(struct work_struct *work)
1613 printk(KERN_DEBUG "No probe response from AP %pM" 1839 printk(KERN_DEBUG "No probe response from AP %pM"
1614 " after %dms, disconnecting.\n", 1840 " after %dms, disconnecting.\n",
1615 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1616 ieee80211_set_disassoc(sdata); 1842 ieee80211_set_disassoc(sdata, true);
1617 ieee80211_recalc_idle(local); 1843 ieee80211_recalc_idle(local);
1618 mutex_unlock(&ifmgd->mtx); 1844 mutex_unlock(&ifmgd->mtx);
1619 /* 1845 /*
@@ -1623,7 +1849,7 @@ static void ieee80211_sta_work(struct work_struct *work)
1623 ieee80211_send_deauth_disassoc(sdata, bssid, 1849 ieee80211_send_deauth_disassoc(sdata, bssid,
1624 IEEE80211_STYPE_DEAUTH, 1850 IEEE80211_STYPE_DEAUTH,
1625 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, 1851 WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
1626 NULL); 1852 NULL, true);
1627 mutex_lock(&ifmgd->mtx); 1853 mutex_lock(&ifmgd->mtx);
1628 } 1854 }
1629 } 1855 }
@@ -1640,7 +1866,8 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
1640 if (local->quiescing) 1866 if (local->quiescing)
1641 return; 1867 return;
1642 1868
1643 ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work); 1869 ieee80211_queue_work(&sdata->local->hw,
1870 &sdata->u.mgd.beacon_connection_loss_work);
1644} 1871}
1645 1872
1646static void ieee80211_sta_conn_mon_timer(unsigned long data) 1873static void ieee80211_sta_conn_mon_timer(unsigned long data)
@@ -1675,8 +1902,7 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
1675 ieee80211_queue_work(&sdata->local->hw, 1902 ieee80211_queue_work(&sdata->local->hw,
1676 &sdata->u.mgd.monitor_work); 1903 &sdata->u.mgd.monitor_work);
1677 /* and do all the other regular work too */ 1904 /* and do all the other regular work too */
1678 ieee80211_queue_work(&sdata->local->hw, 1905 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
1679 &sdata->u.mgd.work);
1680 } 1906 }
1681} 1907}
1682 1908
@@ -1691,8 +1917,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1691 * time -- the code here is properly synchronised. 1917 * time -- the code here is properly synchronised.
1692 */ 1918 */
1693 1919
1694 cancel_work_sync(&ifmgd->work); 1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1695 cancel_work_sync(&ifmgd->beacon_loss_work);
1696 if (del_timer_sync(&ifmgd->timer)) 1921 if (del_timer_sync(&ifmgd->timer))
1697 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
1698 1923
@@ -1723,10 +1948,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1723 struct ieee80211_if_managed *ifmgd; 1948 struct ieee80211_if_managed *ifmgd;
1724 1949
1725 ifmgd = &sdata->u.mgd; 1950 ifmgd = &sdata->u.mgd;
1726 INIT_WORK(&ifmgd->work, ieee80211_sta_work);
1727 INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work); 1951 INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
1728 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1729 INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work); 1953 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work);
1730 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1731 (unsigned long) sdata); 1956 (unsigned long) sdata);
1732 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -1735,7 +1960,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1735 (unsigned long) sdata); 1960 (unsigned long) sdata);
1736 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, 1961 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
1737 (unsigned long) sdata); 1962 (unsigned long) sdata);
1738 skb_queue_head_init(&ifmgd->skb_queue);
1739 1963
1740 ifmgd->flags = 0; 1964 ifmgd->flags = 0;
1741 1965
@@ -1805,11 +2029,16 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
1805 struct ieee80211_work *wk; 2029 struct ieee80211_work *wk;
1806 u16 auth_alg; 2030 u16 auth_alg;
1807 2031
2032 if (req->local_state_change)
2033 return 0; /* no need to update mac80211 state */
2034
1808 switch (req->auth_type) { 2035 switch (req->auth_type) {
1809 case NL80211_AUTHTYPE_OPEN_SYSTEM: 2036 case NL80211_AUTHTYPE_OPEN_SYSTEM:
1810 auth_alg = WLAN_AUTH_OPEN; 2037 auth_alg = WLAN_AUTH_OPEN;
1811 break; 2038 break;
1812 case NL80211_AUTHTYPE_SHARED_KEY: 2039 case NL80211_AUTHTYPE_SHARED_KEY:
2040 if (IS_ERR(sdata->local->wep_tx_tfm))
2041 return -EOPNOTSUPP;
1813 auth_alg = WLAN_AUTH_SHARED_KEY; 2042 auth_alg = WLAN_AUTH_SHARED_KEY;
1814 break; 2043 break;
1815 case NL80211_AUTHTYPE_FT: 2044 case NL80211_AUTHTYPE_FT:
@@ -1863,6 +2092,8 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
1863 struct sk_buff *skb) 2092 struct sk_buff *skb)
1864{ 2093{
1865 struct ieee80211_mgmt *mgmt; 2094 struct ieee80211_mgmt *mgmt;
2095 struct ieee80211_rx_status *rx_status;
2096 struct ieee802_11_elems elems;
1866 u16 status; 2097 u16 status;
1867 2098
1868 if (!skb) { 2099 if (!skb) {
@@ -1870,6 +2101,19 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
1870 return WORK_DONE_DESTROY; 2101 return WORK_DONE_DESTROY;
1871 } 2102 }
1872 2103
2104 if (wk->type == IEEE80211_WORK_ASSOC_BEACON_WAIT) {
2105 mutex_lock(&wk->sdata->u.mgd.mtx);
2106 rx_status = (void *) skb->cb;
2107 ieee802_11_parse_elems(skb->data + 24 + 12, skb->len - 24 - 12, &elems);
2108 ieee80211_rx_bss_info(wk->sdata, (void *)skb->data, skb->len, rx_status,
2109 &elems, true);
2110 mutex_unlock(&wk->sdata->u.mgd.mtx);
2111
2112 wk->type = IEEE80211_WORK_ASSOC;
2113 /* not really done yet */
2114 return WORK_DONE_REQUEUE;
2115 }
2116
1873 mgmt = (void *)skb->data; 2117 mgmt = (void *)skb->data;
1874 status = le16_to_cpu(mgmt->u.assoc_resp.status_code); 2118 status = le16_to_cpu(mgmt->u.assoc_resp.status_code);
1875 2119
@@ -1882,6 +2126,7 @@ static enum work_done_result ieee80211_assoc_done(struct ieee80211_work *wk,
1882 wk->filter_ta); 2126 wk->filter_ta);
1883 return WORK_DONE_DESTROY; 2127 return WORK_DONE_DESTROY;
1884 } 2128 }
2129
1885 mutex_unlock(&wk->sdata->u.mgd.mtx); 2130 mutex_unlock(&wk->sdata->u.mgd.mtx);
1886 } 2131 }
1887 2132
@@ -1913,7 +2158,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
1913 } 2158 }
1914 2159
1915 /* Trying to reassociate - clear previous association state */ 2160 /* Trying to reassociate - clear previous association state */
1916 ieee80211_set_disassoc(sdata); 2161 ieee80211_set_disassoc(sdata, true);
1917 } 2162 }
1918 mutex_unlock(&ifmgd->mtx); 2163 mutex_unlock(&ifmgd->mtx);
1919 2164
@@ -1982,10 +2227,14 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
1982 if (req->prev_bssid) 2227 if (req->prev_bssid)
1983 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN); 2228 memcpy(wk->assoc.prev_bssid, req->prev_bssid, ETH_ALEN);
1984 2229
1985 wk->type = IEEE80211_WORK_ASSOC;
1986 wk->chan = req->bss->channel; 2230 wk->chan = req->bss->channel;
1987 wk->sdata = sdata; 2231 wk->sdata = sdata;
1988 wk->done = ieee80211_assoc_done; 2232 wk->done = ieee80211_assoc_done;
2233 if (!bss->dtim_period &&
2234 sdata->local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
2235 wk->type = IEEE80211_WORK_ASSOC_BEACON_WAIT;
2236 else
2237 wk->type = IEEE80211_WORK_ASSOC;
1989 2238
1990 if (req->use_mfp) { 2239 if (req->use_mfp) {
1991 ifmgd->mfp = IEEE80211_MFP_REQUIRED; 2240 ifmgd->mfp = IEEE80211_MFP_REQUIRED;
@@ -2011,14 +2260,16 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2011 struct ieee80211_local *local = sdata->local; 2260 struct ieee80211_local *local = sdata->local;
2012 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2261 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2013 struct ieee80211_work *wk; 2262 struct ieee80211_work *wk;
2014 const u8 *bssid = req->bss->bssid; 2263 u8 bssid[ETH_ALEN];
2264 bool assoc_bss = false;
2015 2265
2016 mutex_lock(&ifmgd->mtx); 2266 mutex_lock(&ifmgd->mtx);
2017 2267
2268 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2018 if (ifmgd->associated == req->bss) { 2269 if (ifmgd->associated == req->bss) {
2019 bssid = req->bss->bssid; 2270 ieee80211_set_disassoc(sdata, false);
2020 ieee80211_set_disassoc(sdata);
2021 mutex_unlock(&ifmgd->mtx); 2271 mutex_unlock(&ifmgd->mtx);
2272 assoc_bss = true;
2022 } else { 2273 } else {
2023 bool not_auth_yet = false; 2274 bool not_auth_yet = false;
2024 2275
@@ -2030,7 +2281,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2030 continue; 2281 continue;
2031 2282
2032 if (wk->type != IEEE80211_WORK_DIRECT_PROBE && 2283 if (wk->type != IEEE80211_WORK_DIRECT_PROBE &&
2033 wk->type != IEEE80211_WORK_AUTH) 2284 wk->type != IEEE80211_WORK_AUTH &&
2285 wk->type != IEEE80211_WORK_ASSOC &&
2286 wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
2034 continue; 2287 continue;
2035 2288
2036 if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN)) 2289 if (memcmp(req->bss->bssid, wk->filter_ta, ETH_ALEN))
@@ -2060,9 +2313,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2060 printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n", 2313 printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
2061 sdata->name, bssid, req->reason_code); 2314 sdata->name, bssid, req->reason_code);
2062 2315
2063 ieee80211_send_deauth_disassoc(sdata, bssid, 2316 ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH,
2064 IEEE80211_STYPE_DEAUTH, req->reason_code, 2317 req->reason_code, cookie,
2065 cookie); 2318 !req->local_state_change);
2319 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid);
2066 2321
2067 ieee80211_recalc_idle(sdata->local); 2322 ieee80211_recalc_idle(sdata->local);
2068 2323
@@ -2074,6 +2329,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2074 void *cookie) 2329 void *cookie)
2075{ 2330{
2076 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 2331 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2332 u8 bssid[ETH_ALEN];
2077 2333
2078 mutex_lock(&ifmgd->mtx); 2334 mutex_lock(&ifmgd->mtx);
2079 2335
@@ -2091,50 +2347,29 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2091 printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n", 2347 printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n",
2092 sdata->name, req->bss->bssid, req->reason_code); 2348 sdata->name, req->bss->bssid, req->reason_code);
2093 2349
2094 ieee80211_set_disassoc(sdata); 2350 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2351 ieee80211_set_disassoc(sdata, false);
2095 2352
2096 mutex_unlock(&ifmgd->mtx); 2353 mutex_unlock(&ifmgd->mtx);
2097 2354
2098 ieee80211_send_deauth_disassoc(sdata, req->bss->bssid, 2355 ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
2099 IEEE80211_STYPE_DISASSOC, req->reason_code, 2356 IEEE80211_STYPE_DISASSOC, req->reason_code,
2100 cookie); 2357 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid);
2101 2359
2102 ieee80211_recalc_idle(sdata->local); 2360 ieee80211_recalc_idle(sdata->local);
2103 2361
2104 return 0; 2362 return 0;
2105} 2363}
2106 2364
2107int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata, 2365void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
2108 struct ieee80211_channel *chan, 2366 enum nl80211_cqm_rssi_threshold_event rssi_event,
2109 enum nl80211_channel_type channel_type, 2367 gfp_t gfp)
2110 const u8 *buf, size_t len, u64 *cookie)
2111{ 2368{
2112 struct ieee80211_local *local = sdata->local; 2369 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
2113 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2114 struct sk_buff *skb;
2115
2116 /* Check that we are on the requested channel for transmission */
2117 if ((chan != local->tmp_channel ||
2118 channel_type != local->tmp_channel_type) &&
2119 (chan != local->oper_channel ||
2120 channel_type != local->oper_channel_type))
2121 return -EBUSY;
2122
2123 skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
2124 if (!skb)
2125 return -ENOMEM;
2126 skb_reserve(skb, local->hw.extra_tx_headroom);
2127
2128 memcpy(skb_put(skb, len), buf, len);
2129 2370
2130 if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED)) 2371 trace_api_cqm_rssi_notify(sdata, rssi_event);
2131 IEEE80211_SKB_CB(skb)->flags |=
2132 IEEE80211_TX_INTFL_DONT_ENCRYPT;
2133 IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
2134 IEEE80211_TX_CTL_REQ_TX_STATUS;
2135 skb->dev = sdata->dev;
2136 ieee80211_tx_skb(sdata, skb);
2137 2372
2138 *cookie = (unsigned long) skb; 2373 cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
2139 return 0;
2140} 2374}
2375EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 0e64484e861c..d287fde0431d 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -40,22 +40,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
40 list_for_each_entry(sdata, &local->interfaces, list) 40 list_for_each_entry(sdata, &local->interfaces, list)
41 ieee80211_disable_keys(sdata); 41 ieee80211_disable_keys(sdata);
42 42
43 /* Tear down aggregation sessions */ 43 /* tear down aggregation sessions and remove STAs */
44 44 mutex_lock(&local->sta_mtx);
45 rcu_read_lock(); 45 list_for_each_entry(sta, &local->sta_list, list) {
46 46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
47 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 47 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
48 list_for_each_entry_rcu(sta, &local->sta_list, list) {
49 set_sta_flags(sta, WLAN_STA_SUSPEND);
50 ieee80211_sta_tear_down_BA_sessions(sta); 48 ieee80211_sta_tear_down_BA_sessions(sta);
51 } 49 }
52 }
53 50
54 rcu_read_unlock();
55
56 /* remove STAs */
57 mutex_lock(&local->sta_mtx);
58 list_for_each_entry(sta, &local->sta_list, list) {
59 if (sta->uploaded) { 51 if (sta->uploaded) {
60 sdata = sta->sdata; 52 sdata = sta->sdata;
61 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 53 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -72,6 +64,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
72 64
73 /* remove all interfaces */ 65 /* remove all interfaces */
74 list_for_each_entry(sdata, &local->interfaces, list) { 66 list_for_each_entry(sdata, &local->interfaces, list) {
67 cancel_work_sync(&sdata->work);
68
75 switch(sdata->vif.type) { 69 switch(sdata->vif.type) {
76 case NL80211_IFTYPE_STATION: 70 case NL80211_IFTYPE_STATION:
77 ieee80211_sta_quiesce(sdata); 71 ieee80211_sta_quiesce(sdata);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 065a96190e32..168427b0ffdc 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -147,5 +147,18 @@ static inline void rc80211_minstrel_exit(void)
147} 147}
148#endif 148#endif
149 149
150#ifdef CONFIG_MAC80211_RC_MINSTREL_HT
151extern int rc80211_minstrel_ht_init(void);
152extern void rc80211_minstrel_ht_exit(void);
153#else
154static inline int rc80211_minstrel_ht_init(void)
155{
156 return 0;
157}
158static inline void rc80211_minstrel_ht_exit(void)
159{
160}
161#endif
162
150 163
151#endif /* IEEE80211_RATE_H */ 164#endif /* IEEE80211_RATE_H */
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 818abfae9007..778c604d7939 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -67,7 +67,6 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
67 for (i = rix; i >= 0; i--) 67 for (i = rix; i >= 0; i--)
68 if (mi->r[i].rix == rix) 68 if (mi->r[i].rix == rix)
69 break; 69 break;
70 WARN_ON(i < 0);
71 return i; 70 return i;
72} 71}
73 72
@@ -542,7 +541,7 @@ minstrel_free(void *priv)
542 kfree(priv); 541 kfree(priv);
543} 542}
544 543
545static struct rate_control_ops mac80211_minstrel = { 544struct rate_control_ops mac80211_minstrel = {
546 .name = "minstrel", 545 .name = "minstrel",
547 .tx_status = minstrel_tx_status, 546 .tx_status = minstrel_tx_status,
548 .get_rate = minstrel_get_rate, 547 .get_rate = minstrel_get_rate,
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 38bf4168fc3a..0f5a83370aa6 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -80,7 +80,18 @@ struct minstrel_priv {
80 unsigned int lookaround_rate_mrr; 80 unsigned int lookaround_rate_mrr;
81}; 81};
82 82
83struct minstrel_debugfs_info {
84 size_t len;
85 char buf[];
86};
87
88extern struct rate_control_ops mac80211_minstrel;
83void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); 89void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
84void minstrel_remove_sta_debugfs(void *priv, void *priv_sta); 90void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
85 91
92/* debugfs */
93int minstrel_stats_open(struct inode *inode, struct file *file);
94ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
95int minstrel_stats_release(struct inode *inode, struct file *file);
96
86#endif 97#endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 0e1f12b1b6dd..241e76f3fdf2 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -53,21 +53,15 @@
53#include <net/mac80211.h> 53#include <net/mac80211.h>
54#include "rc80211_minstrel.h" 54#include "rc80211_minstrel.h"
55 55
56struct minstrel_stats_info { 56int
57 struct minstrel_sta_info *mi;
58 char buf[4096];
59 size_t len;
60};
61
62static int
63minstrel_stats_open(struct inode *inode, struct file *file) 57minstrel_stats_open(struct inode *inode, struct file *file)
64{ 58{
65 struct minstrel_sta_info *mi = inode->i_private; 59 struct minstrel_sta_info *mi = inode->i_private;
66 struct minstrel_stats_info *ms; 60 struct minstrel_debugfs_info *ms;
67 unsigned int i, tp, prob, eprob; 61 unsigned int i, tp, prob, eprob;
68 char *p; 62 char *p;
69 63
70 ms = kmalloc(sizeof(*ms), GFP_KERNEL); 64 ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL);
71 if (!ms) 65 if (!ms)
72 return -ENOMEM; 66 return -ENOMEM;
73 67
@@ -107,36 +101,19 @@ minstrel_stats_open(struct inode *inode, struct file *file)
107 return 0; 101 return 0;
108} 102}
109 103
110static ssize_t 104ssize_t
111minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o) 105minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
112{ 106{
113 struct minstrel_stats_info *ms; 107 struct minstrel_debugfs_info *ms;
114 char *src;
115 108
116 ms = file->private_data; 109 ms = file->private_data;
117 src = ms->buf; 110 return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
118
119 len = min(len, ms->len);
120 if (len <= *o)
121 return 0;
122
123 src += *o;
124 len -= *o;
125 *o += len;
126
127 if (copy_to_user(buf, src, len))
128 return -EFAULT;
129
130 return len;
131} 111}
132 112
133static int 113int
134minstrel_stats_release(struct inode *inode, struct file *file) 114minstrel_stats_release(struct inode *inode, struct file *file)
135{ 115{
136 struct minstrel_stats_info *ms = file->private_data; 116 kfree(file->private_data);
137
138 kfree(ms);
139
140 return 0; 117 return 0;
141} 118}
142 119
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
new file mode 100644
index 000000000000..c5b465904e3b
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -0,0 +1,827 @@
1/*
2 * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/netdevice.h>
9#include <linux/types.h>
10#include <linux/skbuff.h>
11#include <linux/debugfs.h>
12#include <linux/random.h>
13#include <linux/ieee80211.h>
14#include <net/mac80211.h>
15#include "rate.h"
16#include "rc80211_minstrel.h"
17#include "rc80211_minstrel_ht.h"
18
19#define AVG_PKT_SIZE 1200
20#define SAMPLE_COLUMNS 10
21#define EWMA_LEVEL 75
22
23/* Number of bits for an average sized packet */
24#define MCS_NBITS (AVG_PKT_SIZE << 3)
25
26/* Number of symbols for a packet with (bps) bits per symbol */
27#define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps))
28
29/* Transmission time for a packet containing (syms) symbols */
30#define MCS_SYMBOL_TIME(sgi, syms) \
31 (sgi ? \
32 ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \
33 (syms) << 2 /* syms * 4 us */ \
34 )
35
36/* Transmit duration for the raw data part of an average sized packet */
37#define MCS_DURATION(streams, sgi, bps) MCS_SYMBOL_TIME(sgi, MCS_NSYMS((streams) * (bps)))
38
39/* MCS rate information for an MCS group */
40#define MCS_GROUP(_streams, _sgi, _ht40) { \
41 .streams = _streams, \
42 .flags = \
43 (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \
44 (_ht40 ? IEEE80211_TX_RC_40_MHZ_WIDTH : 0), \
45 .duration = { \
46 MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26), \
47 MCS_DURATION(_streams, _sgi, _ht40 ? 108 : 52), \
48 MCS_DURATION(_streams, _sgi, _ht40 ? 162 : 78), \
49 MCS_DURATION(_streams, _sgi, _ht40 ? 216 : 104), \
50 MCS_DURATION(_streams, _sgi, _ht40 ? 324 : 156), \
51 MCS_DURATION(_streams, _sgi, _ht40 ? 432 : 208), \
52 MCS_DURATION(_streams, _sgi, _ht40 ? 486 : 234), \
53 MCS_DURATION(_streams, _sgi, _ht40 ? 540 : 260) \
54 } \
55}
56
57/*
58 * To enable sufficiently targeted rate sampling, MCS rates are divided into
59 * groups, based on the number of streams and flags (HT40, SGI) that they
60 * use.
61 */
62const struct mcs_group minstrel_mcs_groups[] = {
63 MCS_GROUP(1, 0, 0),
64 MCS_GROUP(2, 0, 0),
65#if MINSTREL_MAX_STREAMS >= 3
66 MCS_GROUP(3, 0, 0),
67#endif
68
69 MCS_GROUP(1, 1, 0),
70 MCS_GROUP(2, 1, 0),
71#if MINSTREL_MAX_STREAMS >= 3
72 MCS_GROUP(3, 1, 0),
73#endif
74
75 MCS_GROUP(1, 0, 1),
76 MCS_GROUP(2, 0, 1),
77#if MINSTREL_MAX_STREAMS >= 3
78 MCS_GROUP(3, 0, 1),
79#endif
80
81 MCS_GROUP(1, 1, 1),
82 MCS_GROUP(2, 1, 1),
83#if MINSTREL_MAX_STREAMS >= 3
84 MCS_GROUP(3, 1, 1),
85#endif
86};
87
88static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES];
89
90/*
91 * Perform EWMA (Exponentially Weighted Moving Average) calculation
92 */
93static int
94minstrel_ewma(int old, int new, int weight)
95{
96 return (new * (100 - weight) + old * weight) / 100;
97}
98
99/*
100 * Look up an MCS group index based on mac80211 rate information
101 */
102static int
103minstrel_ht_get_group_idx(struct ieee80211_tx_rate *rate)
104{
105 int streams = (rate->idx / MCS_GROUP_RATES) + 1;
106 u32 flags = IEEE80211_TX_RC_SHORT_GI | IEEE80211_TX_RC_40_MHZ_WIDTH;
107 int i;
108
109 for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) {
110 if (minstrel_mcs_groups[i].streams != streams)
111 continue;
112 if (minstrel_mcs_groups[i].flags != (rate->flags & flags))
113 continue;
114
115 return i;
116 }
117
118 WARN_ON(1);
119 return 0;
120}
121
122static inline struct minstrel_rate_stats *
123minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
124{
125 return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
126}
127
128
129/*
130 * Recalculate success probabilities and counters for a rate using EWMA
131 */
132static void
133minstrel_calc_rate_ewma(struct minstrel_priv *mp, struct minstrel_rate_stats *mr)
134{
135 if (unlikely(mr->attempts > 0)) {
136 mr->sample_skipped = 0;
137 mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);
138 if (!mr->att_hist)
139 mr->probability = mr->cur_prob;
140 else
141 mr->probability = minstrel_ewma(mr->probability,
142 mr->cur_prob, EWMA_LEVEL);
143 mr->att_hist += mr->attempts;
144 mr->succ_hist += mr->success;
145 } else {
146 mr->sample_skipped++;
147 }
148 mr->last_success = mr->success;
149 mr->last_attempts = mr->attempts;
150 mr->success = 0;
151 mr->attempts = 0;
152}
153
154/*
155 * Calculate throughput based on the average A-MPDU length, taking into account
156 * the expected number of retransmissions and their expected length
157 */
158static void
159minstrel_ht_calc_tp(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
160 int group, int rate)
161{
162 struct minstrel_rate_stats *mr;
163 unsigned int usecs;
164
165 mr = &mi->groups[group].rates[rate];
166
167 if (mr->probability < MINSTREL_FRAC(1, 10)) {
168 mr->cur_tp = 0;
169 return;
170 }
171
172 usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
173 usecs += minstrel_mcs_groups[group].duration[rate];
174 mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability);
175}
176
177/*
178 * Update rate statistics and select new primary rates
179 *
180 * Rules for rate selection:
181 * - max_prob_rate must use only one stream, as a tradeoff between delivery
182 * probability and throughput during strong fluctuations
183 * - as long as the max prob rate has a probability of more than 3/4, pick
184 * higher throughput rates, even if the probablity is a bit lower
185 */
186static void
187minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
188{
189 struct minstrel_mcs_group_data *mg;
190 struct minstrel_rate_stats *mr;
191 int cur_prob, cur_prob_tp, cur_tp, cur_tp2;
192 int group, i, index;
193
194 if (mi->ampdu_packets > 0) {
195 mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
196 MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL);
197 mi->ampdu_len = 0;
198 mi->ampdu_packets = 0;
199 }
200
201 mi->sample_slow = 0;
202 mi->sample_count = 0;
203 mi->max_tp_rate = 0;
204 mi->max_tp_rate2 = 0;
205 mi->max_prob_rate = 0;
206
207 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
208 cur_prob = 0;
209 cur_prob_tp = 0;
210 cur_tp = 0;
211 cur_tp2 = 0;
212
213 mg = &mi->groups[group];
214 if (!mg->supported)
215 continue;
216
217 mg->max_tp_rate = 0;
218 mg->max_tp_rate2 = 0;
219 mg->max_prob_rate = 0;
220 mi->sample_count++;
221
222 for (i = 0; i < MCS_GROUP_RATES; i++) {
223 if (!(mg->supported & BIT(i)))
224 continue;
225
226 mr = &mg->rates[i];
227 mr->retry_updated = false;
228 index = MCS_GROUP_RATES * group + i;
229 minstrel_calc_rate_ewma(mp, mr);
230 minstrel_ht_calc_tp(mp, mi, group, i);
231
232 if (!mr->cur_tp)
233 continue;
234
235 /* ignore the lowest rate of each single-stream group */
236 if (!i && minstrel_mcs_groups[group].streams == 1)
237 continue;
238
239 if ((mr->cur_tp > cur_prob_tp && mr->probability >
240 MINSTREL_FRAC(3, 4)) || mr->probability > cur_prob) {
241 mg->max_prob_rate = index;
242 cur_prob = mr->probability;
243 cur_prob_tp = mr->cur_tp;
244 }
245
246 if (mr->cur_tp > cur_tp) {
247 swap(index, mg->max_tp_rate);
248 cur_tp = mr->cur_tp;
249 mr = minstrel_get_ratestats(mi, index);
250 }
251
252 if (index >= mg->max_tp_rate)
253 continue;
254
255 if (mr->cur_tp > cur_tp2) {
256 mg->max_tp_rate2 = index;
257 cur_tp2 = mr->cur_tp;
258 }
259 }
260 }
261
262 /* try to sample up to half of the availble rates during each interval */
263 mi->sample_count *= 4;
264
265 cur_prob = 0;
266 cur_prob_tp = 0;
267 cur_tp = 0;
268 cur_tp2 = 0;
269 for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) {
270 mg = &mi->groups[group];
271 if (!mg->supported)
272 continue;
273
274 mr = minstrel_get_ratestats(mi, mg->max_prob_rate);
275 if (cur_prob_tp < mr->cur_tp &&
276 minstrel_mcs_groups[group].streams == 1) {
277 mi->max_prob_rate = mg->max_prob_rate;
278 cur_prob = mr->cur_prob;
279 cur_prob_tp = mr->cur_tp;
280 }
281
282 mr = minstrel_get_ratestats(mi, mg->max_tp_rate);
283 if (cur_tp < mr->cur_tp) {
284 mi->max_tp_rate = mg->max_tp_rate;
285 cur_tp = mr->cur_tp;
286 }
287
288 mr = minstrel_get_ratestats(mi, mg->max_tp_rate2);
289 if (cur_tp2 < mr->cur_tp) {
290 mi->max_tp_rate2 = mg->max_tp_rate2;
291 cur_tp2 = mr->cur_tp;
292 }
293 }
294
295 mi->stats_update = jiffies;
296}
297
298static bool
299minstrel_ht_txstat_valid(struct ieee80211_tx_rate *rate)
300{
301 if (!rate->count)
302 return false;
303
304 if (rate->idx < 0)
305 return false;
306
307 return !!(rate->flags & IEEE80211_TX_RC_MCS);
308}
309
310static void
311minstrel_next_sample_idx(struct minstrel_ht_sta *mi)
312{
313 struct minstrel_mcs_group_data *mg;
314
315 for (;;) {
316 mi->sample_group++;
317 mi->sample_group %= ARRAY_SIZE(minstrel_mcs_groups);
318 mg = &mi->groups[mi->sample_group];
319
320 if (!mg->supported)
321 continue;
322
323 if (++mg->index >= MCS_GROUP_RATES) {
324 mg->index = 0;
325 if (++mg->column >= ARRAY_SIZE(sample_table))
326 mg->column = 0;
327 }
328 break;
329 }
330}
331
332static void
333minstrel_downgrade_rate(struct minstrel_ht_sta *mi, unsigned int *idx,
334 bool primary)
335{
336 int group, orig_group;
337
338 orig_group = group = *idx / MCS_GROUP_RATES;
339 while (group > 0) {
340 group--;
341
342 if (!mi->groups[group].supported)
343 continue;
344
345 if (minstrel_mcs_groups[group].streams >
346 minstrel_mcs_groups[orig_group].streams)
347 continue;
348
349 if (primary)
350 *idx = mi->groups[group].max_tp_rate;
351 else
352 *idx = mi->groups[group].max_tp_rate2;
353 break;
354 }
355}
356
357static void
358minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, struct sk_buff *skb)
359{
360 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
361 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
362 u16 tid;
363
364 if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
365 return;
366
367 if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
368 return;
369
370 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
371 if (likely(sta->ampdu_mlme.tid_tx[tid]))
372 return;
373
374 ieee80211_start_tx_ba_session(pubsta, tid);
375}
376
377static void
378minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
379 struct ieee80211_sta *sta, void *priv_sta,
380 struct sk_buff *skb)
381{
382 struct minstrel_ht_sta_priv *msp = priv_sta;
383 struct minstrel_ht_sta *mi = &msp->ht;
384 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
385 struct ieee80211_tx_rate *ar = info->status.rates;
386 struct minstrel_rate_stats *rate, *rate2;
387 struct minstrel_priv *mp = priv;
388 bool last = false;
389 int group;
390 int i = 0;
391
392 if (!msp->is_ht)
393 return mac80211_minstrel.tx_status(priv, sband, sta, &msp->legacy, skb);
394
395 /* This packet was aggregated but doesn't carry status info */
396 if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
397 !(info->flags & IEEE80211_TX_STAT_AMPDU))
398 return;
399
400 if (!info->status.ampdu_len) {
401 info->status.ampdu_ack_len = 1;
402 info->status.ampdu_len = 1;
403 }
404
405 mi->ampdu_packets++;
406 mi->ampdu_len += info->status.ampdu_len;
407
408 if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
409 mi->sample_wait = 4 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
410 mi->sample_tries = 3;
411 mi->sample_count--;
412 }
413
414 if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
415 mi->sample_packets += info->status.ampdu_len;
416 minstrel_next_sample_idx(mi);
417 }
418
419 for (i = 0; !last; i++) {
420 last = (i == IEEE80211_TX_MAX_RATES - 1) ||
421 !minstrel_ht_txstat_valid(&ar[i + 1]);
422
423 if (!minstrel_ht_txstat_valid(&ar[i]))
424 break;
425
426 group = minstrel_ht_get_group_idx(&ar[i]);
427 rate = &mi->groups[group].rates[ar[i].idx % 8];
428
429 if (last && (info->flags & IEEE80211_TX_STAT_ACK))
430 rate->success += info->status.ampdu_ack_len;
431
432 rate->attempts += ar[i].count * info->status.ampdu_len;
433 }
434
435 /*
436 * check for sudden death of spatial multiplexing,
437 * downgrade to a lower number of streams if necessary.
438 */
439 rate = minstrel_get_ratestats(mi, mi->max_tp_rate);
440 if (rate->attempts > 30 &&
441 MINSTREL_FRAC(rate->success, rate->attempts) <
442 MINSTREL_FRAC(20, 100))
443 minstrel_downgrade_rate(mi, &mi->max_tp_rate, true);
444
445 rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);
446 if (rate2->attempts > 30 &&
447 MINSTREL_FRAC(rate2->success, rate2->attempts) <
448 MINSTREL_FRAC(20, 100))
449 minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false);
450
451 if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) {
452 minstrel_ht_update_stats(mp, mi);
453 minstrel_aggr_check(mp, sta, skb);
454 }
455}
456
457static void
458minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
459 int index)
460{
461 struct minstrel_rate_stats *mr;
462 const struct mcs_group *group;
463 unsigned int tx_time, tx_time_rtscts, tx_time_data;
464 unsigned int cw = mp->cw_min;
465 unsigned int t_slot = 9; /* FIXME */
466 unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
467
468 mr = minstrel_get_ratestats(mi, index);
469 if (mr->probability < MINSTREL_FRAC(1, 10)) {
470 mr->retry_count = 1;
471 mr->retry_count_rtscts = 1;
472 return;
473 }
474
475 mr->retry_count = 2;
476 mr->retry_count_rtscts = 2;
477 mr->retry_updated = true;
478
479 group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
480 tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len;
481 tx_time = 2 * (t_slot + mi->overhead + tx_time_data);
482 tx_time_rtscts = 2 * (t_slot + mi->overhead_rtscts + tx_time_data);
483 do {
484 cw = (cw << 1) | 1;
485 cw = min(cw, mp->cw_max);
486 tx_time += cw + t_slot + mi->overhead;
487 tx_time_rtscts += cw + t_slot + mi->overhead_rtscts;
488 if (tx_time_rtscts < mp->segment_size)
489 mr->retry_count_rtscts++;
490 } while ((tx_time < mp->segment_size) &&
491 (++mr->retry_count < mp->max_retry));
492}
493
494
495static void
496minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
497 struct ieee80211_tx_rate *rate, int index,
498 struct ieee80211_tx_rate_control *txrc,
499 bool sample, bool rtscts)
500{
501 const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
502 struct minstrel_rate_stats *mr;
503
504 mr = minstrel_get_ratestats(mi, index);
505 if (!mr->retry_updated)
506 minstrel_calc_retransmit(mp, mi, index);
507
508 if (mr->probability < MINSTREL_FRAC(20, 100))
509 rate->count = 2;
510 else if (rtscts)
511 rate->count = mr->retry_count_rtscts;
512 else
513 rate->count = mr->retry_count;
514
515 rate->flags = IEEE80211_TX_RC_MCS | group->flags;
516 if (txrc->short_preamble)
517 rate->flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE;
518 if (txrc->rts || rtscts)
519 rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS;
520 rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES;
521}
522
523static inline int
524minstrel_get_duration(int index)
525{
526 const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];
527 return group->duration[index % MCS_GROUP_RATES];
528}
529
530static int
531minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
532{
533 struct minstrel_rate_stats *mr;
534 struct minstrel_mcs_group_data *mg;
535 int sample_idx = 0;
536
537 if (mi->sample_wait > 0) {
538 mi->sample_wait--;
539 return -1;
540 }
541
542 if (!mi->sample_tries)
543 return -1;
544
545 mi->sample_tries--;
546 mg = &mi->groups[mi->sample_group];
547 sample_idx = sample_table[mg->column][mg->index];
548 mr = &mg->rates[sample_idx];
549 sample_idx += mi->sample_group * MCS_GROUP_RATES;
550
551 /*
552 * When not using MRR, do not sample if the probability is already
553 * higher than 95% to avoid wasting airtime
554 */
555 if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100)))
556 goto next;
557
558 /*
559 * Make sure that lower rates get sampled only occasionally,
560 * if the link is working perfectly.
561 */
562 if (minstrel_get_duration(sample_idx) >
563 minstrel_get_duration(mi->max_tp_rate)) {
564 if (mr->sample_skipped < 10)
565 goto next;
566
567 if (mi->sample_slow++ > 2)
568 goto next;
569 }
570
571 return sample_idx;
572
573next:
574 minstrel_next_sample_idx(mi);
575 return -1;
576}
577
578static void
579minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
580 struct ieee80211_tx_rate_control *txrc)
581{
582 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
583 struct ieee80211_tx_rate *ar = info->status.rates;
584 struct minstrel_ht_sta_priv *msp = priv_sta;
585 struct minstrel_ht_sta *mi = &msp->ht;
586 struct minstrel_priv *mp = priv;
587 int sample_idx;
588
589 if (rate_control_send_low(sta, priv_sta, txrc))
590 return;
591
592 if (!msp->is_ht)
593 return mac80211_minstrel.get_rate(priv, sta, &msp->legacy, txrc);
594
595 info->flags |= mi->tx_flags;
596 sample_idx = minstrel_get_sample_rate(mp, mi);
597 if (sample_idx >= 0) {
598 minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx,
599 txrc, true, false);
600 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate,
601 txrc, false, true);
602 info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
603 } else {
604 minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate,
605 txrc, false, false);
606 minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2,
607 txrc, false, true);
608 }
609 minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, txrc, false, true);
610
611 ar[3].count = 0;
612 ar[3].idx = -1;
613
614 mi->total_packets++;
615
616 /* wraparound */
617 if (mi->total_packets == ~0) {
618 mi->total_packets = 0;
619 mi->sample_packets = 0;
620 }
621}
622
623static void
624minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
625 struct ieee80211_sta *sta, void *priv_sta,
626 enum nl80211_channel_type oper_chan_type)
627{
628 struct minstrel_priv *mp = priv;
629 struct minstrel_ht_sta_priv *msp = priv_sta;
630 struct minstrel_ht_sta *mi = &msp->ht;
631 struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
632 struct ieee80211_local *local = hw_to_local(mp->hw);
633 u16 sta_cap = sta->ht_cap.cap;
634 int ack_dur;
635 int stbc;
636 int i;
637
638 /* fall back to the old minstrel for legacy stations */
639 if (!sta->ht_cap.ht_supported) {
640 msp->is_ht = false;
641 memset(&msp->legacy, 0, sizeof(msp->legacy));
642 msp->legacy.r = msp->ratelist;
643 msp->legacy.sample_table = msp->sample_table;
644 return mac80211_minstrel.rate_init(priv, sband, sta, &msp->legacy);
645 }
646
647 BUILD_BUG_ON(ARRAY_SIZE(minstrel_mcs_groups) !=
648 MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS);
649
650 msp->is_ht = true;
651 memset(mi, 0, sizeof(*mi));
652 mi->stats_update = jiffies;
653
654 ack_dur = ieee80211_frame_duration(local, 10, 60, 1, 1);
655 mi->overhead = ieee80211_frame_duration(local, 0, 60, 1, 1) + ack_dur;
656 mi->overhead_rtscts = mi->overhead + 2 * ack_dur;
657
658 mi->avg_ampdu_len = MINSTREL_FRAC(1, 1);
659
660 /* When using MRR, sample more on the first attempt, without delay */
661 if (mp->has_mrr) {
662 mi->sample_count = 16;
663 mi->sample_wait = 0;
664 } else {
665 mi->sample_count = 8;
666 mi->sample_wait = 8;
667 }
668 mi->sample_tries = 4;
669
670 stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >>
671 IEEE80211_HT_CAP_RX_STBC_SHIFT;
672 mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
673
674 if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
675 mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
676
677 if (oper_chan_type != NL80211_CHAN_HT40MINUS &&
678 oper_chan_type != NL80211_CHAN_HT40PLUS)
679 sta_cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40;
680
681 for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
682 u16 req = 0;
683
684 mi->groups[i].supported = 0;
685 if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI) {
686 if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
687 req |= IEEE80211_HT_CAP_SGI_40;
688 else
689 req |= IEEE80211_HT_CAP_SGI_20;
690 }
691
692 if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
693 req |= IEEE80211_HT_CAP_SUP_WIDTH_20_40;
694
695 if ((sta_cap & req) != req)
696 continue;
697
698 mi->groups[i].supported =
699 mcs->rx_mask[minstrel_mcs_groups[i].streams - 1];
700 }
701}
702
703static void
704minstrel_ht_rate_init(void *priv, struct ieee80211_supported_band *sband,
705 struct ieee80211_sta *sta, void *priv_sta)
706{
707 struct minstrel_priv *mp = priv;
708
709 minstrel_ht_update_caps(priv, sband, sta, priv_sta, mp->hw->conf.channel_type);
710}
711
712static void
713minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband,
714 struct ieee80211_sta *sta, void *priv_sta,
715 u32 changed, enum nl80211_channel_type oper_chan_type)
716{
717 minstrel_ht_update_caps(priv, sband, sta, priv_sta, oper_chan_type);
718}
719
720static void *
721minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp)
722{
723 struct ieee80211_supported_band *sband;
724 struct minstrel_ht_sta_priv *msp;
725 struct minstrel_priv *mp = priv;
726 struct ieee80211_hw *hw = mp->hw;
727 int max_rates = 0;
728 int i;
729
730 for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
731 sband = hw->wiphy->bands[i];
732 if (sband && sband->n_bitrates > max_rates)
733 max_rates = sband->n_bitrates;
734 }
735
736 msp = kzalloc(sizeof(struct minstrel_ht_sta), gfp);
737 if (!msp)
738 return NULL;
739
740 msp->ratelist = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp);
741 if (!msp->ratelist)
742 goto error;
743
744 msp->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp);
745 if (!msp->sample_table)
746 goto error1;
747
748 return msp;
749
750error1:
751 kfree(msp->ratelist);
752error:
753 kfree(msp);
754 return NULL;
755}
756
757static void
758minstrel_ht_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta)
759{
760 struct minstrel_ht_sta_priv *msp = priv_sta;
761
762 kfree(msp->sample_table);
763 kfree(msp->ratelist);
764 kfree(msp);
765}
766
767static void *
768minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)
769{
770 return mac80211_minstrel.alloc(hw, debugfsdir);
771}
772
773static void
774minstrel_ht_free(void *priv)
775{
776 mac80211_minstrel.free(priv);
777}
778
779static struct rate_control_ops mac80211_minstrel_ht = {
780 .name = "minstrel_ht",
781 .tx_status = minstrel_ht_tx_status,
782 .get_rate = minstrel_ht_get_rate,
783 .rate_init = minstrel_ht_rate_init,
784 .rate_update = minstrel_ht_rate_update,
785 .alloc_sta = minstrel_ht_alloc_sta,
786 .free_sta = minstrel_ht_free_sta,
787 .alloc = minstrel_ht_alloc,
788 .free = minstrel_ht_free,
789#ifdef CONFIG_MAC80211_DEBUGFS
790 .add_sta_debugfs = minstrel_ht_add_sta_debugfs,
791 .remove_sta_debugfs = minstrel_ht_remove_sta_debugfs,
792#endif
793};
794
795
796static void
797init_sample_table(void)
798{
799 int col, i, new_idx;
800 u8 rnd[MCS_GROUP_RATES];
801
802 memset(sample_table, 0xff, sizeof(sample_table));
803 for (col = 0; col < SAMPLE_COLUMNS; col++) {
804 for (i = 0; i < MCS_GROUP_RATES; i++) {
805 get_random_bytes(rnd, sizeof(rnd));
806 new_idx = (i + rnd[i]) % MCS_GROUP_RATES;
807
808 while (sample_table[col][new_idx] != 0xff)
809 new_idx = (new_idx + 1) % MCS_GROUP_RATES;
810
811 sample_table[col][new_idx] = i;
812 }
813 }
814}
815
816int __init
817rc80211_minstrel_ht_init(void)
818{
819 init_sample_table();
820 return ieee80211_rate_control_register(&mac80211_minstrel_ht);
821}
822
823void
824rc80211_minstrel_ht_exit(void)
825{
826 ieee80211_rate_control_unregister(&mac80211_minstrel_ht);
827}
diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h
new file mode 100644
index 000000000000..462d2b227ed5
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht.h
@@ -0,0 +1,130 @@
1/*
2 * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef __RC_MINSTREL_HT_H
10#define __RC_MINSTREL_HT_H
11
12/*
13 * The number of streams can be changed to 2 to reduce code
14 * size and memory footprint.
15 */
16#define MINSTREL_MAX_STREAMS 3
17#define MINSTREL_STREAM_GROUPS 4
18
19/* scaled fraction values */
20#define MINSTREL_SCALE 16
21#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div)
22#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE)
23
24#define MCS_GROUP_RATES 8
25
26struct mcs_group {
27 u32 flags;
28 unsigned int streams;
29 unsigned int duration[MCS_GROUP_RATES];
30};
31
32extern const struct mcs_group minstrel_mcs_groups[];
33
34struct minstrel_rate_stats {
35 /* current / last sampling period attempts/success counters */
36 unsigned int attempts, last_attempts;
37 unsigned int success, last_success;
38
39 /* total attempts/success counters */
40 u64 att_hist, succ_hist;
41
42 /* current throughput */
43 unsigned int cur_tp;
44
45 /* packet delivery probabilities */
46 unsigned int cur_prob, probability;
47
48 /* maximum retry counts */
49 unsigned int retry_count;
50 unsigned int retry_count_rtscts;
51
52 bool retry_updated;
53 u8 sample_skipped;
54};
55
56struct minstrel_mcs_group_data {
57 u8 index;
58 u8 column;
59
60 /* bitfield of supported MCS rates of this group */
61 u8 supported;
62
63 /* selected primary rates */
64 unsigned int max_tp_rate;
65 unsigned int max_tp_rate2;
66 unsigned int max_prob_rate;
67
68 /* MCS rate statistics */
69 struct minstrel_rate_stats rates[MCS_GROUP_RATES];
70};
71
72struct minstrel_ht_sta {
73 /* ampdu length (average, per sampling interval) */
74 unsigned int ampdu_len;
75 unsigned int ampdu_packets;
76
77 /* ampdu length (EWMA) */
78 unsigned int avg_ampdu_len;
79
80 /* best throughput rate */
81 unsigned int max_tp_rate;
82
83 /* second best throughput rate */
84 unsigned int max_tp_rate2;
85
86 /* best probability rate */
87 unsigned int max_prob_rate;
88
89 /* time of last status update */
90 unsigned long stats_update;
91
92 /* overhead time in usec for each frame */
93 unsigned int overhead;
94 unsigned int overhead_rtscts;
95
96 unsigned int total_packets;
97 unsigned int sample_packets;
98
99 /* tx flags to add for frames for this sta */
100 u32 tx_flags;
101
102 u8 sample_wait;
103 u8 sample_tries;
104 u8 sample_count;
105 u8 sample_slow;
106
107 /* current MCS group to be sampled */
108 u8 sample_group;
109
110 /* MCS rate group info and statistics */
111 struct minstrel_mcs_group_data groups[MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS];
112};
113
114struct minstrel_ht_sta_priv {
115 union {
116 struct minstrel_ht_sta ht;
117 struct minstrel_sta_info legacy;
118 };
119#ifdef CONFIG_MAC80211_DEBUGFS
120 struct dentry *dbg_stats;
121#endif
122 void *ratelist;
123 void *sample_table;
124 bool is_ht;
125};
126
127void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
128void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta);
129
130#endif
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
new file mode 100644
index 000000000000..4a5a4b3e7799
--- /dev/null
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -0,0 +1,118 @@
1/*
2 * Copyright (C) 2010 Felix Fietkau <nbd@openwrt.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/netdevice.h>
9#include <linux/types.h>
10#include <linux/skbuff.h>
11#include <linux/debugfs.h>
12#include <linux/ieee80211.h>
13#include <net/mac80211.h>
14#include "rc80211_minstrel.h"
15#include "rc80211_minstrel_ht.h"
16
17static int
18minstrel_ht_stats_open(struct inode *inode, struct file *file)
19{
20 struct minstrel_ht_sta_priv *msp = inode->i_private;
21 struct minstrel_ht_sta *mi = &msp->ht;
22 struct minstrel_debugfs_info *ms;
23 unsigned int i, j, tp, prob, eprob;
24 char *p;
25 int ret;
26
27 if (!msp->is_ht) {
28 inode->i_private = &msp->legacy;
29 ret = minstrel_stats_open(inode, file);
30 inode->i_private = msp;
31 return ret;
32 }
33
34 ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL);
35 if (!ms)
36 return -ENOMEM;
37
38 file->private_data = ms;
39 p = ms->buf;
40 p += sprintf(p, "type rate throughput ewma prob this prob "
41 "this succ/attempt success attempts\n");
42 for (i = 0; i < MINSTREL_MAX_STREAMS * MINSTREL_STREAM_GROUPS; i++) {
43 char htmode = '2';
44 char gimode = 'L';
45
46 if (!mi->groups[i].supported)
47 continue;
48
49 if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
50 htmode = '4';
51 if (minstrel_mcs_groups[i].flags & IEEE80211_TX_RC_SHORT_GI)
52 gimode = 'S';
53
54 for (j = 0; j < MCS_GROUP_RATES; j++) {
55 struct minstrel_rate_stats *mr = &mi->groups[i].rates[j];
56 int idx = i * MCS_GROUP_RATES + j;
57
58 if (!(mi->groups[i].supported & BIT(j)))
59 continue;
60
61 p += sprintf(p, "HT%c0/%cGI ", htmode, gimode);
62
63 *(p++) = (idx == mi->max_tp_rate) ? 'T' : ' ';
64 *(p++) = (idx == mi->max_tp_rate2) ? 't' : ' ';
65 *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' ';
66 p += sprintf(p, "MCS%-2u", (minstrel_mcs_groups[i].streams - 1) *
67 MCS_GROUP_RATES + j);
68
69 tp = mr->cur_tp / 10;
70 prob = MINSTREL_TRUNC(mr->cur_prob * 1000);
71 eprob = MINSTREL_TRUNC(mr->probability * 1000);
72
73 p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u "
74 "%3u(%3u) %8llu %8llu\n",
75 tp / 10, tp % 10,
76 eprob / 10, eprob % 10,
77 prob / 10, prob % 10,
78 mr->last_success,
79 mr->last_attempts,
80 (unsigned long long)mr->succ_hist,
81 (unsigned long long)mr->att_hist);
82 }
83 }
84 p += sprintf(p, "\nTotal packet count:: ideal %d "
85 "lookaround %d\n",
86 max(0, (int) mi->total_packets - (int) mi->sample_packets),
87 mi->sample_packets);
88 p += sprintf(p, "Average A-MPDU length: %d.%d\n",
89 MINSTREL_TRUNC(mi->avg_ampdu_len),
90 MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
91 ms->len = p - ms->buf;
92
93 return 0;
94}
95
96static const struct file_operations minstrel_ht_stat_fops = {
97 .owner = THIS_MODULE,
98 .open = minstrel_ht_stats_open,
99 .read = minstrel_stats_read,
100 .release = minstrel_stats_release,
101};
102
103void
104minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir)
105{
106 struct minstrel_ht_sta_priv *msp = priv_sta;
107
108 msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp,
109 &minstrel_ht_stat_fops);
110}
111
112void
113minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta)
114{
115 struct minstrel_ht_sta_priv *msp = priv_sta;
116
117 debugfs_remove(msp->dbg_stats);
118}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 04ea07f0e78a..fa0f37e4afe4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -39,7 +39,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
39{ 39{
40 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) { 40 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) {
41 if (likely(skb->len > FCS_LEN)) 41 if (likely(skb->len > FCS_LEN))
42 skb_trim(skb, skb->len - FCS_LEN); 42 __pskb_trim(skb, skb->len - FCS_LEN);
43 else { 43 else {
44 /* driver bug */ 44 /* driver bug */
45 WARN_ON(1); 45 WARN_ON(1);
@@ -81,8 +81,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
81 len += 8; 81 len += 8;
82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 82 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
83 len += 1; 83 len += 1;
84 if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
85 len += 1;
86 84
87 if (len & 1) /* padding for RX_FLAGS if necessary */ 85 if (len & 1) /* padding for RX_FLAGS if necessary */
88 len++; 86 len++;
@@ -179,14 +177,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
179 pos++; 177 pos++;
180 } 178 }
181 179
182 /* IEEE80211_RADIOTAP_DBM_ANTNOISE */
183 if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
184 *pos = status->noise;
185 rthdr->it_present |=
186 cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE);
187 pos++;
188 }
189
190 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */ 180 /* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
191 181
192 /* IEEE80211_RADIOTAP_ANTENNA */ 182 /* IEEE80211_RADIOTAP_ANTENNA */
@@ -236,6 +226,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
236 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 226 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
237 present_fcs_len = FCS_LEN; 227 present_fcs_len = FCS_LEN;
238 228
229 /* make sure hdr->frame_control is on the linear part */
230 if (!pskb_may_pull(origskb, 2)) {
231 dev_kfree_skb(origskb);
232 return NULL;
233 }
234
239 if (!local->monitors) { 235 if (!local->monitors) {
240 if (should_drop_frame(origskb, present_fcs_len)) { 236 if (should_drop_frame(origskb, present_fcs_len)) {
241 dev_kfree_skb(origskb); 237 dev_kfree_skb(origskb);
@@ -297,7 +293,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
297 skb2 = skb_clone(skb, GFP_ATOMIC); 293 skb2 = skb_clone(skb, GFP_ATOMIC);
298 if (skb2) { 294 if (skb2) {
299 skb2->dev = prev_dev; 295 skb2->dev = prev_dev;
300 netif_rx(skb2); 296 netif_receive_skb(skb2);
301 } 297 }
302 } 298 }
303 299
@@ -308,7 +304,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
308 304
309 if (prev_dev) { 305 if (prev_dev) {
310 skb->dev = prev_dev; 306 skb->dev = prev_dev;
311 netif_rx(skb); 307 netif_receive_skb(skb);
312 } else 308 } else
313 dev_kfree_skb(skb); 309 dev_kfree_skb(skb);
314 310
@@ -493,7 +489,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
493 489
494 if (ieee80211_is_action(hdr->frame_control)) { 490 if (ieee80211_is_action(hdr->frame_control)) {
495 mgmt = (struct ieee80211_mgmt *)hdr; 491 mgmt = (struct ieee80211_mgmt *)hdr;
496 if (mgmt->u.action.category != MESH_PLINK_CATEGORY) 492 if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
497 return RX_DROP_MONITOR; 493 return RX_DROP_MONITOR;
498 return RX_CONTINUE; 494 return RX_CONTINUE;
499 } 495 }
@@ -723,11 +719,10 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
723 719
724 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; 720 tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
725 721
726 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) 722 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
723 if (!tid_agg_rx)
727 goto dont_reorder; 724 goto dont_reorder;
728 725
729 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
730
731 /* qos null data frames are excluded */ 726 /* qos null data frames are excluded */
732 if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) 727 if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
733 goto dont_reorder; 728 goto dont_reorder;
@@ -742,12 +737,19 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
742 /* if this mpdu is fragmented - terminate rx aggregation session */ 737 /* if this mpdu is fragmented - terminate rx aggregation session */
743 sc = le16_to_cpu(hdr->seq_ctrl); 738 sc = le16_to_cpu(hdr->seq_ctrl);
744 if (sc & IEEE80211_SCTL_FRAG) { 739 if (sc & IEEE80211_SCTL_FRAG) {
745 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, 740 skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
746 tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); 741 skb_queue_tail(&rx->sdata->skb_queue, skb);
747 dev_kfree_skb(skb); 742 ieee80211_queue_work(&local->hw, &rx->sdata->work);
748 return; 743 return;
749 } 744 }
750 745
746 /*
747 * No locking needed -- we will only ever process one
748 * RX packet at a time, and thus own tid_agg_rx. All
749 * other code manipulating it needs to (and does) make
750 * sure that we cannot get to it any more before doing
751 * anything with it.
752 */
751 if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) 753 if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames))
752 return; 754 return;
753 755
@@ -822,6 +824,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
822 ieee80211_rx_result result = RX_DROP_UNUSABLE; 824 ieee80211_rx_result result = RX_DROP_UNUSABLE;
823 struct ieee80211_key *stakey = NULL; 825 struct ieee80211_key *stakey = NULL;
824 int mmie_keyidx = -1; 826 int mmie_keyidx = -1;
827 __le16 fc;
825 828
826 /* 829 /*
827 * Key selection 101 830 * Key selection 101
@@ -863,13 +866,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
863 if (rx->sta) 866 if (rx->sta)
864 stakey = rcu_dereference(rx->sta->key); 867 stakey = rcu_dereference(rx->sta->key);
865 868
866 if (!ieee80211_has_protected(hdr->frame_control)) 869 fc = hdr->frame_control;
870
871 if (!ieee80211_has_protected(fc))
867 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); 872 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
868 873
869 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
870 rx->key = stakey; 875 rx->key = stakey;
871 /* Skip decryption if the frame is not protected. */ 876 /* Skip decryption if the frame is not protected. */
872 if (!ieee80211_has_protected(hdr->frame_control)) 877 if (!ieee80211_has_protected(fc))
873 return RX_CONTINUE; 878 return RX_CONTINUE;
874 } else if (mmie_keyidx >= 0) { 879 } else if (mmie_keyidx >= 0) {
875 /* Broadcast/multicast robust management frame / BIP */ 880 /* Broadcast/multicast robust management frame / BIP */
@@ -881,7 +886,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
881 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) 886 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
882 return RX_DROP_MONITOR; /* unexpected BIP keyidx */ 887 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
883 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); 888 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
884 } else if (!ieee80211_has_protected(hdr->frame_control)) { 889 } else if (!ieee80211_has_protected(fc)) {
885 /* 890 /*
886 * The frame was not protected, so skip decryption. However, we 891 * The frame was not protected, so skip decryption. However, we
887 * need to set rx->key if there is a key that could have been 892 * need to set rx->key if there is a key that could have been
@@ -889,7 +894,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
889 * have been expected. 894 * have been expected.
890 */ 895 */
891 struct ieee80211_key *key = NULL; 896 struct ieee80211_key *key = NULL;
892 if (ieee80211_is_mgmt(hdr->frame_control) && 897 if (ieee80211_is_mgmt(fc) &&
893 is_multicast_ether_addr(hdr->addr1) && 898 is_multicast_ether_addr(hdr->addr1) &&
894 (key = rcu_dereference(rx->sdata->default_mgmt_key))) 899 (key = rcu_dereference(rx->sdata->default_mgmt_key)))
895 rx->key = key; 900 rx->key = key;
@@ -897,6 +902,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
897 rx->key = key; 902 rx->key = key;
898 return RX_CONTINUE; 903 return RX_CONTINUE;
899 } else { 904 } else {
905 u8 keyid;
900 /* 906 /*
901 * The device doesn't give us the IV so we won't be 907 * The device doesn't give us the IV so we won't be
902 * able to look up the key. That's ok though, we 908 * able to look up the key. That's ok though, we
@@ -910,7 +916,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
910 (status->flag & RX_FLAG_IV_STRIPPED)) 916 (status->flag & RX_FLAG_IV_STRIPPED))
911 return RX_CONTINUE; 917 return RX_CONTINUE;
912 918
913 hdrlen = ieee80211_hdrlen(hdr->frame_control); 919 hdrlen = ieee80211_hdrlen(fc);
914 920
915 if (rx->skb->len < 8 + hdrlen) 921 if (rx->skb->len < 8 + hdrlen)
916 return RX_DROP_UNUSABLE; /* TODO: count this? */ 922 return RX_DROP_UNUSABLE; /* TODO: count this? */
@@ -919,7 +925,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
919 * no need to call ieee80211_wep_get_keyidx, 925 * no need to call ieee80211_wep_get_keyidx,
920 * it verifies a bunch of things we've done already 926 * it verifies a bunch of things we've done already
921 */ 927 */
922 keyidx = rx->skb->data[hdrlen + 3] >> 6; 928 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
929 keyidx = keyid >> 6;
923 930
924 rx->key = rcu_dereference(rx->sdata->keys[keyidx]); 931 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
925 932
@@ -940,16 +947,19 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
940 return RX_DROP_MONITOR; 947 return RX_DROP_MONITOR;
941 } 948 }
942 949
943 /* Check for weak IVs if possible */ 950 if (skb_linearize(rx->skb))
944 if (rx->sta && rx->key->conf.alg == ALG_WEP && 951 return RX_DROP_UNUSABLE;
945 ieee80211_is_data(hdr->frame_control) && 952 /* the hdr variable is invalid now! */
946 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
947 !(status->flag & RX_FLAG_DECRYPTED)) &&
948 ieee80211_wep_is_weak_iv(rx->skb, rx->key))
949 rx->sta->wep_weak_iv_count++;
950 953
951 switch (rx->key->conf.alg) { 954 switch (rx->key->conf.alg) {
952 case ALG_WEP: 955 case ALG_WEP:
956 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
959 !(status->flag & RX_FLAG_DECRYPTED)) &&
960 ieee80211_wep_is_weak_iv(rx->skb, rx->key))
961 rx->sta->wep_weak_iv_count++;
962
953 result = ieee80211_crypto_wep_decrypt(rx); 963 result = ieee80211_crypto_wep_decrypt(rx);
954 break; 964 break;
955 case ALG_TKIP: 965 case ALG_TKIP:
@@ -1078,7 +1088,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1078 sta->rx_fragments++; 1088 sta->rx_fragments++;
1079 sta->rx_bytes += rx->skb->len; 1089 sta->rx_bytes += rx->skb->len;
1080 sta->last_signal = status->signal; 1090 sta->last_signal = status->signal;
1081 sta->last_noise = status->noise;
1082 1091
1083 /* 1092 /*
1084 * Change STA power saving mode only at the end of a frame 1093 * Change STA power saving mode only at the end of a frame
@@ -1241,6 +1250,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1241 } 1250 }
1242 I802_DEBUG_INC(rx->local->rx_handlers_fragments); 1251 I802_DEBUG_INC(rx->local->rx_handlers_fragments);
1243 1252
1253 if (skb_linearize(rx->skb))
1254 return RX_DROP_UNUSABLE;
1255
1256 /*
1257 * skb_linearize() might change the skb->data and
1258 * previously cached variables (in this case, hdr) need to
1259 * be refreshed with the new data.
1260 */
1261 hdr = (struct ieee80211_hdr *)rx->skb->data;
1244 seq = (sc & IEEE80211_SCTL_SEQ) >> 4; 1262 seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
1245 1263
1246 if (frag == 0) { 1264 if (frag == 0) {
@@ -1249,11 +1267,13 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1249 rx->queue, &(rx->skb)); 1267 rx->queue, &(rx->skb));
1250 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1268 if (rx->key && rx->key->conf.alg == ALG_CCMP &&
1251 ieee80211_has_protected(fc)) { 1269 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue;
1252 /* Store CCMP PN so that we can verify that the next 1272 /* Store CCMP PN so that we can verify that the next
1253 * fragment has a sequential PN value. */ 1273 * fragment has a sequential PN value. */
1254 entry->ccmp = 1; 1274 entry->ccmp = 1;
1255 memcpy(entry->last_pn, 1275 memcpy(entry->last_pn,
1256 rx->key->u.ccmp.rx_pn[rx->queue], 1276 rx->key->u.ccmp.rx_pn[queue],
1257 CCMP_PN_LEN); 1277 CCMP_PN_LEN);
1258 } 1278 }
1259 return RX_QUEUED; 1279 return RX_QUEUED;
@@ -1273,6 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1273 if (entry->ccmp) { 1293 if (entry->ccmp) {
1274 int i; 1294 int i;
1275 u8 pn[CCMP_PN_LEN], *rpn; 1295 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue;
1276 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP)
1277 return RX_DROP_UNUSABLE; 1298 return RX_DROP_UNUSABLE;
1278 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
@@ -1281,7 +1302,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1281 if (pn[i]) 1302 if (pn[i])
1282 break; 1303 break;
1283 } 1304 }
1284 rpn = rx->key->u.ccmp.rx_pn[rx->queue]; 1305 queue = ieee80211_is_mgmt(fc) ?
1306 NUM_RX_DATA_QUEUES : rx->queue;
1307 rpn = rx->key->u.ccmp.rx_pn[queue];
1285 if (memcmp(pn, rpn, CCMP_PN_LEN)) 1308 if (memcmp(pn, rpn, CCMP_PN_LEN))
1286 return RX_DROP_UNUSABLE; 1309 return RX_DROP_UNUSABLE;
1287 memcpy(entry->last_pn, pn, CCMP_PN_LEN); 1310 memcpy(entry->last_pn, pn, CCMP_PN_LEN);
@@ -1406,21 +1429,24 @@ static int
1406ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) 1429ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
1407{ 1430{
1408 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 1431 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
1432 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1409 __le16 fc = hdr->frame_control; 1433 __le16 fc = hdr->frame_control;
1410 int res;
1411 1434
1412 res = ieee80211_drop_unencrypted(rx, fc); 1435 /*
1413 if (unlikely(res)) 1436 * Pass through unencrypted frames if the hardware has
1414 return res; 1437 * decrypted them already.
1438 */
1439 if (status->flag & RX_FLAG_DECRYPTED)
1440 return 0;
1415 1441
1416 if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) { 1442 if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
1417 if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && 1443 if (unlikely(!ieee80211_has_protected(fc) &&
1444 ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
1418 rx->key)) 1445 rx->key))
1419 return -EACCES; 1446 return -EACCES;
1420 /* BIP does not use Protected field, so need to check MMIE */ 1447 /* BIP does not use Protected field, so need to check MMIE */
1421 if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && 1448 if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
1422 ieee80211_get_mmie_keyidx(rx->skb) < 0 && 1449 ieee80211_get_mmie_keyidx(rx->skb) < 0))
1423 rx->key))
1424 return -EACCES; 1450 return -EACCES;
1425 /* 1451 /*
1426 * When using MFP, Action frames are not allowed prior to 1452 * When using MFP, Action frames are not allowed prior to
@@ -1552,7 +1578,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1552 /* deliver to local stack */ 1578 /* deliver to local stack */
1553 skb->protocol = eth_type_trans(skb, dev); 1579 skb->protocol = eth_type_trans(skb, dev);
1554 memset(skb->cb, 0, sizeof(skb->cb)); 1580 memset(skb->cb, 0, sizeof(skb->cb));
1555 netif_rx(skb); 1581 netif_receive_skb(skb);
1556 } 1582 }
1557 } 1583 }
1558 1584
@@ -1598,6 +1624,9 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1598 skb->dev = dev; 1624 skb->dev = dev;
1599 __skb_queue_head_init(&frame_list); 1625 __skb_queue_head_init(&frame_list);
1600 1626
1627 if (skb_linearize(skb))
1628 return RX_DROP_UNUSABLE;
1629
1601 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, 1630 ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
1602 rx->sdata->vif.type, 1631 rx->sdata->vif.type,
1603 rx->local->hw.extra_tx_headroom); 1632 rx->local->hw.extra_tx_headroom);
@@ -1794,15 +1823,24 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
1794 return RX_CONTINUE; 1823 return RX_CONTINUE;
1795 1824
1796 if (ieee80211_is_back_req(bar->frame_control)) { 1825 if (ieee80211_is_back_req(bar->frame_control)) {
1826 struct {
1827 __le16 control, start_seq_num;
1828 } __packed bar_data;
1829
1797 if (!rx->sta) 1830 if (!rx->sta)
1798 return RX_DROP_MONITOR; 1831 return RX_DROP_MONITOR;
1799 tid = le16_to_cpu(bar->control) >> 12; 1832
1800 if (rx->sta->ampdu_mlme.tid_state_rx[tid] 1833 if (skb_copy_bits(skb, offsetof(struct ieee80211_bar, control),
1801 != HT_AGG_STATE_OPERATIONAL) 1834 &bar_data, sizeof(bar_data)))
1802 return RX_DROP_MONITOR; 1835 return RX_DROP_MONITOR;
1803 tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
1804 1836
1805 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; 1837 tid = le16_to_cpu(bar_data.control) >> 12;
1838
1839 tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]);
1840 if (!tid_agg_rx)
1841 return RX_DROP_MONITOR;
1842
1843 start_seq_num = le16_to_cpu(bar_data.start_seq_num) >> 4;
1806 1844
1807 /* reset session timer */ 1845 /* reset session timer */
1808 if (tid_agg_rx->timeout) 1846 if (tid_agg_rx->timeout)
@@ -1816,7 +1854,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
1816 return RX_QUEUED; 1854 return RX_QUEUED;
1817 } 1855 }
1818 1856
1819 return RX_CONTINUE; 1857 /*
1858 * After this point, we only want management frames,
1859 * so we can drop all remaining control frames to
1860 * cooked monitor interfaces.
1861 */
1862 return RX_DROP_MONITOR;
1820} 1863}
1821 1864
1822static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, 1865static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
@@ -1912,23 +1955,23 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1912 case WLAN_ACTION_ADDBA_REQ: 1955 case WLAN_ACTION_ADDBA_REQ:
1913 if (len < (IEEE80211_MIN_ACTION_SIZE + 1956 if (len < (IEEE80211_MIN_ACTION_SIZE +
1914 sizeof(mgmt->u.action.u.addba_req))) 1957 sizeof(mgmt->u.action.u.addba_req)))
1915 return RX_DROP_MONITOR; 1958 goto invalid;
1916 ieee80211_process_addba_request(local, rx->sta, mgmt, len); 1959 break;
1917 goto handled;
1918 case WLAN_ACTION_ADDBA_RESP: 1960 case WLAN_ACTION_ADDBA_RESP:
1919 if (len < (IEEE80211_MIN_ACTION_SIZE + 1961 if (len < (IEEE80211_MIN_ACTION_SIZE +
1920 sizeof(mgmt->u.action.u.addba_resp))) 1962 sizeof(mgmt->u.action.u.addba_resp)))
1921 break; 1963 goto invalid;
1922 ieee80211_process_addba_resp(local, rx->sta, mgmt, len); 1964 break;
1923 goto handled;
1924 case WLAN_ACTION_DELBA: 1965 case WLAN_ACTION_DELBA:
1925 if (len < (IEEE80211_MIN_ACTION_SIZE + 1966 if (len < (IEEE80211_MIN_ACTION_SIZE +
1926 sizeof(mgmt->u.action.u.delba))) 1967 sizeof(mgmt->u.action.u.delba)))
1927 break; 1968 goto invalid;
1928 ieee80211_process_delba(sdata, rx->sta, mgmt, len); 1969 break;
1929 goto handled; 1970 default:
1971 goto invalid;
1930 } 1972 }
1931 break; 1973
1974 goto queue;
1932 case WLAN_CATEGORY_SPECTRUM_MGMT: 1975 case WLAN_CATEGORY_SPECTRUM_MGMT:
1933 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) 1976 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
1934 break; 1977 break;
@@ -1958,7 +2001,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1958 if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN)) 2001 if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
1959 break; 2002 break;
1960 2003
1961 return ieee80211_sta_rx_mgmt(sdata, rx->skb); 2004 goto queue;
1962 } 2005 }
1963 break; 2006 break;
1964 case WLAN_CATEGORY_SA_QUERY: 2007 case WLAN_CATEGORY_SA_QUERY:
@@ -1974,13 +2017,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1974 goto handled; 2017 goto handled;
1975 } 2018 }
1976 break; 2019 break;
1977 case MESH_PLINK_CATEGORY: 2020 case WLAN_CATEGORY_MESH_PLINK:
1978 case MESH_PATH_SEL_CATEGORY: 2021 case WLAN_CATEGORY_MESH_PATH_SEL:
1979 if (ieee80211_vif_is_mesh(&sdata->vif)) 2022 if (!ieee80211_vif_is_mesh(&sdata->vif))
1980 return ieee80211_mesh_rx_mgmt(sdata, rx->skb); 2023 break;
1981 break; 2024 goto queue;
1982 } 2025 }
1983 2026
2027 invalid:
1984 /* 2028 /*
1985 * For AP mode, hostapd is responsible for handling any action 2029 * For AP mode, hostapd is responsible for handling any action
1986 * frames that we didn't handle, including returning unknown 2030 * frames that we didn't handle, including returning unknown
@@ -2000,8 +2044,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2000 */ 2044 */
2001 status = IEEE80211_SKB_RXCB(rx->skb); 2045 status = IEEE80211_SKB_RXCB(rx->skb);
2002 2046
2003 if (sdata->vif.type == NL80211_IFTYPE_STATION && 2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq,
2004 cfg80211_rx_action(rx->sdata->dev, status->freq,
2005 rx->skb->data, rx->skb->len, 2048 rx->skb->data, rx->skb->len,
2006 GFP_ATOMIC)) 2049 GFP_ATOMIC))
2007 goto handled; 2050 goto handled;
@@ -2013,11 +2056,11 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2013 nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0, 2056 nskb = skb_copy_expand(rx->skb, local->hw.extra_tx_headroom, 0,
2014 GFP_ATOMIC); 2057 GFP_ATOMIC);
2015 if (nskb) { 2058 if (nskb) {
2016 struct ieee80211_mgmt *mgmt = (void *)nskb->data; 2059 struct ieee80211_mgmt *nmgmt = (void *)nskb->data;
2017 2060
2018 mgmt->u.action.category |= 0x80; 2061 nmgmt->u.action.category |= 0x80;
2019 memcpy(mgmt->da, mgmt->sa, ETH_ALEN); 2062 memcpy(nmgmt->da, nmgmt->sa, ETH_ALEN);
2020 memcpy(mgmt->sa, rx->sdata->vif.addr, ETH_ALEN); 2063 memcpy(nmgmt->sa, rx->sdata->vif.addr, ETH_ALEN);
2021 2064
2022 memset(nskb->cb, 0, sizeof(nskb->cb)); 2065 memset(nskb->cb, 0, sizeof(nskb->cb));
2023 2066
@@ -2029,6 +2072,14 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2029 rx->sta->rx_packets++; 2072 rx->sta->rx_packets++;
2030 dev_kfree_skb(rx->skb); 2073 dev_kfree_skb(rx->skb);
2031 return RX_QUEUED; 2074 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2032} 2083}
2033 2084
2034static ieee80211_rx_result debug_noinline 2085static ieee80211_rx_result debug_noinline
@@ -2036,10 +2087,15 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2036{ 2087{
2037 struct ieee80211_sub_if_data *sdata = rx->sdata; 2088 struct ieee80211_sub_if_data *sdata = rx->sdata;
2038 ieee80211_rx_result rxs; 2089 ieee80211_rx_result rxs;
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype;
2039 2092
2040 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2041 return RX_DROP_MONITOR; 2094 return RX_DROP_MONITOR;
2042 2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2043 if (ieee80211_drop_unencrypted_mgmt(rx)) 2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2044 return RX_DROP_UNUSABLE; 2100 return RX_DROP_UNUSABLE;
2045 2101
@@ -2047,16 +2103,42 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2047 if (rxs != RX_CONTINUE) 2103 if (rxs != RX_CONTINUE)
2048 return rxs; 2104 return rxs;
2049 2105
2050 if (ieee80211_vif_is_mesh(&sdata->vif)) 2106 stype = mgmt->frame_control & cpu_to_le16(IEEE80211_FCTL_STYPE);
2051 return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
2052 2107
2053 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) 2108 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
2054 return ieee80211_ibss_rx_mgmt(sdata, rx->skb); 2109 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
2110 sdata->vif.type != NL80211_IFTYPE_STATION)
2111 return RX_DROP_MONITOR;
2112
2113 switch (stype) {
2114 case cpu_to_le16(IEEE80211_STYPE_BEACON):
2115 case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
2116 /* process for all: mesh, mlme, ibss */
2117 break;
2118 case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
2119 case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
2120 /* process only for station */
2121 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2122 return RX_DROP_MONITOR;
2123 break;
2124 case cpu_to_le16(IEEE80211_STYPE_PROBE_REQ):
2125 case cpu_to_le16(IEEE80211_STYPE_AUTH):
2126 /* process only for ibss */
2127 if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
2128 return RX_DROP_MONITOR;
2129 break;
2130 default:
2131 return RX_DROP_MONITOR;
2132 }
2055 2133
2056 if (sdata->vif.type == NL80211_IFTYPE_STATION) 2134 /* queue up frame and kick off work to process it */
2057 return ieee80211_sta_rx_mgmt(sdata, rx->skb); 2135 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2136 skb_queue_tail(&sdata->skb_queue, rx->skb);
2137 ieee80211_queue_work(&rx->local->hw, &sdata->work);
2138 if (rx->sta)
2139 rx->sta->rx_packets++;
2058 2140
2059 return RX_DROP_MONITOR; 2141 return RX_QUEUED;
2060} 2142}
2061 2143
2062static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr, 2144static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr,
@@ -2112,7 +2194,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2112 u8 rate_or_pad; 2194 u8 rate_or_pad;
2113 __le16 chan_freq; 2195 __le16 chan_freq;
2114 __le16 chan_flags; 2196 __le16 chan_flags;
2115 } __attribute__ ((packed)) *rthdr; 2197 } __packed *rthdr;
2116 struct sk_buff *skb = rx->skb, *skb2; 2198 struct sk_buff *skb = rx->skb, *skb2;
2117 struct net_device *prev_dev = NULL; 2199 struct net_device *prev_dev = NULL;
2118 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
@@ -2162,7 +2244,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2162 skb2 = skb_clone(skb, GFP_ATOMIC); 2244 skb2 = skb_clone(skb, GFP_ATOMIC);
2163 if (skb2) { 2245 if (skb2) {
2164 skb2->dev = prev_dev; 2246 skb2->dev = prev_dev;
2165 netif_rx(skb2); 2247 netif_receive_skb(skb2);
2166 } 2248 }
2167 } 2249 }
2168 2250
@@ -2173,7 +2255,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2173 2255
2174 if (prev_dev) { 2256 if (prev_dev) {
2175 skb->dev = prev_dev; 2257 skb->dev = prev_dev;
2176 netif_rx(skb); 2258 netif_receive_skb(skb);
2177 skb = NULL; 2259 skb = NULL;
2178 } else 2260 } else
2179 goto out_free_skb; 2261 goto out_free_skb;
@@ -2372,29 +2454,42 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2372 struct ieee80211_local *local = hw_to_local(hw); 2454 struct ieee80211_local *local = hw_to_local(hw);
2373 struct ieee80211_sub_if_data *sdata; 2455 struct ieee80211_sub_if_data *sdata;
2374 struct ieee80211_hdr *hdr; 2456 struct ieee80211_hdr *hdr;
2457 __le16 fc;
2375 struct ieee80211_rx_data rx; 2458 struct ieee80211_rx_data rx;
2376 int prepares; 2459 int prepares;
2377 struct ieee80211_sub_if_data *prev = NULL; 2460 struct ieee80211_sub_if_data *prev = NULL;
2378 struct sk_buff *skb_new; 2461 struct sk_buff *skb_new;
2379 struct sta_info *sta, *tmp; 2462 struct sta_info *sta, *tmp;
2380 bool found_sta = false; 2463 bool found_sta = false;
2464 int err = 0;
2381 2465
2382 hdr = (struct ieee80211_hdr *)skb->data; 2466 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
2383 memset(&rx, 0, sizeof(rx)); 2467 memset(&rx, 0, sizeof(rx));
2384 rx.skb = skb; 2468 rx.skb = skb;
2385 rx.local = local; 2469 rx.local = local;
2386 2470
2387 if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control)) 2471 if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
2388 local->dot11ReceivedFragmentCount++; 2472 local->dot11ReceivedFragmentCount++;
2389 2473
2390 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2474 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2391 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2475 test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
2392 rx.flags |= IEEE80211_RX_IN_SCAN; 2476 rx.flags |= IEEE80211_RX_IN_SCAN;
2393 2477
2478 if (ieee80211_is_mgmt(fc))
2479 err = skb_linearize(skb);
2480 else
2481 err = !pskb_may_pull(skb, ieee80211_hdrlen(fc));
2482
2483 if (err) {
2484 dev_kfree_skb(skb);
2485 return;
2486 }
2487
2488 hdr = (struct ieee80211_hdr *)skb->data;
2394 ieee80211_parse_qos(&rx); 2489 ieee80211_parse_qos(&rx);
2395 ieee80211_verify_alignment(&rx); 2490 ieee80211_verify_alignment(&rx);
2396 2491
2397 if (ieee80211_is_data(hdr->frame_control)) { 2492 if (ieee80211_is_data(fc)) {
2398 for_each_sta_info(local, hdr->addr2, sta, tmp) { 2493 for_each_sta_info(local, hdr->addr2, sta, tmp) {
2399 rx.sta = sta; 2494 rx.sta = sta;
2400 found_sta = true; 2495 found_sta = true;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 85507bd9e341..872d7b6ef6b3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -14,6 +14,8 @@
14 14
15#include <linux/if_arp.h> 15#include <linux/if_arp.h>
16#include <linux/rtnetlink.h> 16#include <linux/rtnetlink.h>
17#include <linux/pm_qos_params.h>
18#include <net/sch_generic.h>
17#include <linux/slab.h> 19#include <linux/slab.h>
18#include <net/mac80211.h> 20#include <net/mac80211.h>
19 21
@@ -83,7 +85,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
83{ 85{
84 struct cfg80211_bss *cbss; 86 struct cfg80211_bss *cbss;
85 struct ieee80211_bss *bss; 87 struct ieee80211_bss *bss;
86 int clen; 88 int clen, srlen;
87 s32 signal = 0; 89 s32 signal = 0;
88 90
89 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 91 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -112,23 +114,28 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
112 bss->dtim_period = tim_ie->dtim_period; 114 bss->dtim_period = tim_ie->dtim_period;
113 } 115 }
114 116
115 bss->supp_rates_len = 0; 117 /* If the beacon had no TIM IE, or it was invalid, use 1 */
118 if (beacon && !bss->dtim_period)
119 bss->dtim_period = 1;
120
121 /* replace old supported rates if we get new values */
122 srlen = 0;
116 if (elems->supp_rates) { 123 if (elems->supp_rates) {
117 clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; 124 clen = IEEE80211_MAX_SUPP_RATES;
118 if (clen > elems->supp_rates_len) 125 if (clen > elems->supp_rates_len)
119 clen = elems->supp_rates_len; 126 clen = elems->supp_rates_len;
120 memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, 127 memcpy(bss->supp_rates, elems->supp_rates, clen);
121 clen); 128 srlen += clen;
122 bss->supp_rates_len += clen;
123 } 129 }
124 if (elems->ext_supp_rates) { 130 if (elems->ext_supp_rates) {
125 clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; 131 clen = IEEE80211_MAX_SUPP_RATES - srlen;
126 if (clen > elems->ext_supp_rates_len) 132 if (clen > elems->ext_supp_rates_len)
127 clen = elems->ext_supp_rates_len; 133 clen = elems->ext_supp_rates_len;
128 memcpy(&bss->supp_rates[bss->supp_rates_len], 134 memcpy(bss->supp_rates + srlen, elems->ext_supp_rates, clen);
129 elems->ext_supp_rates, clen); 135 srlen += clen;
130 bss->supp_rates_len += clen;
131 } 136 }
137 if (srlen)
138 bss->supp_rates_len = srlen;
132 139
133 bss->wmm_used = elems->wmm_param || elems->wmm_info; 140 bss->wmm_used = elems->wmm_param || elems->wmm_info;
134 bss->uapsd_supported = is_uapsd_supported(elems); 141 bss->uapsd_supported = is_uapsd_supported(elems);
@@ -246,6 +253,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
246 struct ieee80211_local *local = hw_to_local(hw); 253 struct ieee80211_local *local = hw_to_local(hw);
247 bool was_hw_scan; 254 bool was_hw_scan;
248 255
256 trace_api_scan_completed(local, aborted);
257
249 mutex_lock(&local->scan_mtx); 258 mutex_lock(&local->scan_mtx);
250 259
251 /* 260 /*
@@ -322,6 +331,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
322 331
323 ieee80211_offchannel_stop_beaconing(local); 332 ieee80211_offchannel_stop_beaconing(local);
324 333
334 local->leave_oper_channel_time = 0;
325 local->next_scan_state = SCAN_DECISION; 335 local->next_scan_state = SCAN_DECISION;
326 local->scan_channel_idx = 0; 336 local->scan_channel_idx = 0;
327 337
@@ -390,28 +400,14 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
390 else 400 else
391 __set_bit(SCAN_SW_SCANNING, &local->scanning); 401 __set_bit(SCAN_SW_SCANNING, &local->scanning);
392 402
393 /*
394 * Kicking off the scan need not be protected,
395 * only the scan variable stuff, since now
396 * local->scan_req is assigned and other callers
397 * will abort their scan attempts.
398 *
399 * This avoids too many locking dependencies
400 * so that the scan completed calls have more
401 * locking freedom.
402 */
403
404 ieee80211_recalc_idle(local); 403 ieee80211_recalc_idle(local);
405 mutex_unlock(&local->scan_mtx);
406 404
407 if (local->ops->hw_scan) { 405 if (local->ops->hw_scan) {
408 WARN_ON(!ieee80211_prep_hw_scan(local)); 406 WARN_ON(!ieee80211_prep_hw_scan(local));
409 rc = drv_hw_scan(local, local->hw_scan_req); 407 rc = drv_hw_scan(local, sdata, local->hw_scan_req);
410 } else 408 } else
411 rc = ieee80211_start_sw_scan(local); 409 rc = ieee80211_start_sw_scan(local);
412 410
413 mutex_lock(&local->scan_mtx);
414
415 if (rc) { 411 if (rc) {
416 kfree(local->hw_scan_req); 412 kfree(local->hw_scan_req);
417 local->hw_scan_req = NULL; 413 local->hw_scan_req = NULL;
@@ -426,11 +422,28 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
426 return rc; 422 return rc;
427} 423}
428 424
425static unsigned long
426ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
427{
428 /*
429 * TODO: channel switching also consumes quite some time,
430 * add that delay as well to get a better estimation
431 */
432 if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN)
433 return IEEE80211_PASSIVE_CHANNEL_TIME;
434 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
435}
436
429static int ieee80211_scan_state_decision(struct ieee80211_local *local, 437static int ieee80211_scan_state_decision(struct ieee80211_local *local,
430 unsigned long *next_delay) 438 unsigned long *next_delay)
431{ 439{
432 bool associated = false; 440 bool associated = false;
441 bool tx_empty = true;
442 bool bad_latency;
443 bool listen_int_exceeded;
444 unsigned long min_beacon_int = 0;
433 struct ieee80211_sub_if_data *sdata; 445 struct ieee80211_sub_if_data *sdata;
446 struct ieee80211_channel *next_chan;
434 447
435 /* if no more bands/channels left, complete scan and advance to the idle state */ 448 /* if no more bands/channels left, complete scan and advance to the idle state */
436 if (local->scan_channel_idx >= local->scan_req->n_channels) { 449 if (local->scan_channel_idx >= local->scan_req->n_channels) {
@@ -438,7 +451,11 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
438 return 1; 451 return 1;
439 } 452 }
440 453
441 /* check if at least one STA interface is associated */ 454 /*
455 * check if at least one STA interface is associated,
456 * check if at least one STA interface has pending tx frames
457 * and grab the lowest used beacon interval
458 */
442 mutex_lock(&local->iflist_mtx); 459 mutex_lock(&local->iflist_mtx);
443 list_for_each_entry(sdata, &local->interfaces, list) { 460 list_for_each_entry(sdata, &local->interfaces, list) {
444 if (!ieee80211_sdata_running(sdata)) 461 if (!ieee80211_sdata_running(sdata))
@@ -447,7 +464,16 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
447 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 464 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
448 if (sdata->u.mgd.associated) { 465 if (sdata->u.mgd.associated) {
449 associated = true; 466 associated = true;
450 break; 467
468 if (sdata->vif.bss_conf.beacon_int <
469 min_beacon_int || min_beacon_int == 0)
470 min_beacon_int =
471 sdata->vif.bss_conf.beacon_int;
472
473 if (!qdisc_all_tx_empty(sdata->dev)) {
474 tx_empty = false;
475 break;
476 }
451 } 477 }
452 } 478 }
453 } 479 }
@@ -456,11 +482,34 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
456 if (local->scan_channel) { 482 if (local->scan_channel) {
457 /* 483 /*
458 * we're currently scanning a different channel, let's 484 * we're currently scanning a different channel, let's
459 * switch back to the operating channel now if at least 485 * see if we can scan another channel without interfering
460 * one interface is associated. Otherwise just scan the 486 * with the current traffic situation.
461 * next channel 487 *
488 * Since we don't know if the AP has pending frames for us
489 * we can only check for our tx queues and use the current
490 * pm_qos requirements for rx. Hence, if no tx traffic occurs
491 * at all we will scan as many channels in a row as the pm_qos
492 * latency allows us to. Additionally we also check for the
493 * currently negotiated listen interval to prevent losing
494 * frames unnecessarily.
495 *
496 * Otherwise switch back to the operating channel.
462 */ 497 */
463 if (associated) 498 next_chan = local->scan_req->channels[local->scan_channel_idx];
499
500 bad_latency = time_after(jiffies +
501 ieee80211_scan_get_channel_time(next_chan),
502 local->leave_oper_channel_time +
503 usecs_to_jiffies(pm_qos_request(PM_QOS_NETWORK_LATENCY)));
504
505 listen_int_exceeded = time_after(jiffies +
506 ieee80211_scan_get_channel_time(next_chan),
507 local->leave_oper_channel_time +
508 usecs_to_jiffies(min_beacon_int * 1024) *
509 local->hw.conf.listen_interval);
510
511 if (associated && ( !tx_empty || bad_latency ||
512 listen_int_exceeded))
464 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL; 513 local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
465 else 514 else
466 local->next_scan_state = SCAN_SET_CHANNEL; 515 local->next_scan_state = SCAN_SET_CHANNEL;
@@ -492,6 +541,9 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
492 else 541 else
493 *next_delay = HZ / 10; 542 *next_delay = HZ / 10;
494 543
544 /* remember when we left the operating channel */
545 local->leave_oper_channel_time = jiffies;
546
495 /* advance to the next channel to be scanned */ 547 /* advance to the next channel to be scanned */
496 local->next_scan_state = SCAN_SET_CHANNEL; 548 local->next_scan_state = SCAN_SET_CHANNEL;
497} 549}
@@ -594,7 +646,7 @@ void ieee80211_scan_work(struct work_struct *work)
594 } 646 }
595 647
596 if (local->hw_scan_req) { 648 if (local->hw_scan_req) {
597 int rc = drv_hw_scan(local, local->hw_scan_req); 649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
598 mutex_unlock(&local->scan_mtx); 650 mutex_unlock(&local->scan_mtx);
599 if (rc) 651 if (rc)
600 ieee80211_scan_completed(&local->hw, true); 652 ieee80211_scan_completed(&local->hw, true);
@@ -667,10 +719,12 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
667} 719}
668 720
669int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata, 721int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
670 const u8 *ssid, u8 ssid_len) 722 const u8 *ssid, u8 ssid_len,
723 struct ieee80211_channel *chan)
671{ 724{
672 struct ieee80211_local *local = sdata->local; 725 struct ieee80211_local *local = sdata->local;
673 int ret = -EBUSY; 726 int ret = -EBUSY;
727 enum ieee80211_band band;
674 728
675 mutex_lock(&local->scan_mtx); 729 mutex_lock(&local->scan_mtx);
676 730
@@ -678,6 +732,30 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
678 if (local->scan_req) 732 if (local->scan_req)
679 goto unlock; 733 goto unlock;
680 734
735 /* fill internal scan request */
736 if (!chan) {
737 int i, nchan = 0;
738
739 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
740 if (!local->hw.wiphy->bands[band])
741 continue;
742 for (i = 0;
743 i < local->hw.wiphy->bands[band]->n_channels;
744 i++) {
745 local->int_scan_req->channels[nchan] =
746 &local->hw.wiphy->bands[band]->channels[i];
747 nchan++;
748 }
749 }
750
751 local->int_scan_req->n_channels = nchan;
752 } else {
753 local->int_scan_req->channels[0] = chan;
754 local->int_scan_req->n_channels = 1;
755 }
756
757 local->int_scan_req->ssids = &local->scan_ssid;
758 local->int_scan_req->n_ssids = 1;
681 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN); 759 memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
682 local->int_scan_req->ssids[0].ssid_len = ssid_len; 760 local->int_scan_req->ssids[0].ssid_len = ssid_len;
683 761
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index fb12cec4d333..6d86f0c1ad04 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -235,6 +235,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
235 spin_lock_init(&sta->lock); 235 spin_lock_init(&sta->lock);
236 spin_lock_init(&sta->flaglock); 236 spin_lock_init(&sta->flaglock);
237 INIT_WORK(&sta->drv_unblock_wk, sta_unblock); 237 INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
238 INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
239 mutex_init(&sta->ampdu_mlme.mtx);
238 240
239 memcpy(sta->sta.addr, addr, ETH_ALEN); 241 memcpy(sta->sta.addr, addr, ETH_ALEN);
240 sta->local = local; 242 sta->local = local;
@@ -246,23 +248,18 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
246 } 248 }
247 249
248 for (i = 0; i < STA_TID_NUM; i++) { 250 for (i = 0; i < STA_TID_NUM; i++) {
249 /* timer_to_tid must be initialized with identity mapping to 251 /*
250 * enable session_timer's data differentiation. refer to 252 * timer_to_tid must be initialized with identity mapping
251 * sta_rx_agg_session_timer_expired for useage */ 253 * to enable session_timer's data differentiation. See
254 * sta_rx_agg_session_timer_expired for usage.
255 */
252 sta->timer_to_tid[i] = i; 256 sta->timer_to_tid[i] = i;
253 /* rx */
254 sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
255 sta->ampdu_mlme.tid_rx[i] = NULL;
256 /* tx */
257 sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE;
258 sta->ampdu_mlme.tid_tx[i] = NULL;
259 sta->ampdu_mlme.addba_req_num[i] = 0;
260 } 257 }
261 skb_queue_head_init(&sta->ps_tx_buf); 258 skb_queue_head_init(&sta->ps_tx_buf);
262 skb_queue_head_init(&sta->tx_filtered); 259 skb_queue_head_init(&sta->tx_filtered);
263 260
264 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 261 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
265 sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX); 262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
266 263
267#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
268 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 265 printk(KERN_DEBUG "%s: Allocated STA %pM\n",
@@ -578,7 +575,7 @@ static int sta_info_buffer_expired(struct sta_info *sta,
578} 575}
579 576
580 577
581static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, 578static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
582 struct sta_info *sta) 579 struct sta_info *sta)
583{ 580{
584 unsigned long flags; 581 unsigned long flags;
@@ -586,7 +583,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
586 struct ieee80211_sub_if_data *sdata; 583 struct ieee80211_sub_if_data *sdata;
587 584
588 if (skb_queue_empty(&sta->ps_tx_buf)) 585 if (skb_queue_empty(&sta->ps_tx_buf))
589 return; 586 return false;
590 587
591 for (;;) { 588 for (;;) {
592 spin_lock_irqsave(&sta->ps_tx_buf.lock, flags); 589 spin_lock_irqsave(&sta->ps_tx_buf.lock, flags);
@@ -611,6 +608,8 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
611 if (skb_queue_empty(&sta->ps_tx_buf)) 608 if (skb_queue_empty(&sta->ps_tx_buf))
612 sta_info_clear_tim_bit(sta); 609 sta_info_clear_tim_bit(sta);
613 } 610 }
611
612 return true;
614} 613}
615 614
616static int __must_check __sta_info_destroy(struct sta_info *sta) 615static int __must_check __sta_info_destroy(struct sta_info *sta)
@@ -619,7 +618,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
619 struct ieee80211_sub_if_data *sdata; 618 struct ieee80211_sub_if_data *sdata;
620 struct sk_buff *skb; 619 struct sk_buff *skb;
621 unsigned long flags; 620 unsigned long flags;
622 int ret, i; 621 int ret;
623 622
624 might_sleep(); 623 might_sleep();
625 624
@@ -629,6 +628,15 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
629 local = sta->local; 628 local = sta->local;
630 sdata = sta->sdata; 629 sdata = sta->sdata;
631 630
631 /*
632 * Before removing the station from the driver and
633 * rate control, it might still start new aggregation
634 * sessions -- block that to make sure the tear-down
635 * will be sufficient.
636 */
637 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
638 ieee80211_sta_tear_down_BA_sessions(sta);
639
632 spin_lock_irqsave(&local->sta_lock, flags); 640 spin_lock_irqsave(&local->sta_lock, flags);
633 ret = sta_info_hash_del(local, sta); 641 ret = sta_info_hash_del(local, sta);
634 /* this might still be the pending list ... which is fine */ 642 /* this might still be the pending list ... which is fine */
@@ -639,18 +647,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
639 return ret; 647 return ret;
640 648
641 if (sta->key) { 649 if (sta->key) {
642 ieee80211_key_free(sta->key); 650 ieee80211_key_free(local, sta->key);
643 /*
644 * We have only unlinked the key, and actually destroying it
645 * may mean it is removed from hardware which requires that
646 * the key->sta pointer is still valid, so flush the key todo
647 * list here.
648 *
649 * ieee80211_key_todo() will synchronize_rcu() so after this
650 * nothing can reference this sta struct any more.
651 */
652 ieee80211_key_todo();
653
654 WARN_ON(sta->key); 651 WARN_ON(sta->key);
655 } 652 }
656 653
@@ -679,11 +676,17 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
679 sdata = sta->sdata; 676 sdata = sta->sdata;
680 } 677 }
681 678
679 /*
680 * At this point, after we wait for an RCU grace period,
681 * neither mac80211 nor the driver can reference this
682 * sta struct any more except by still existing timers
683 * associated with this station that we clean up below.
684 */
685 synchronize_rcu();
686
682#ifdef CONFIG_MAC80211_MESH 687#ifdef CONFIG_MAC80211_MESH
683 if (ieee80211_vif_is_mesh(&sdata->vif)) { 688 if (ieee80211_vif_is_mesh(&sdata->vif))
684 mesh_accept_plinks_update(sdata); 689 mesh_accept_plinks_update(sdata);
685 del_timer(&sta->plink_timer);
686 }
687#endif 690#endif
688 691
689#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -710,50 +713,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
710 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) 713 while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
711 dev_kfree_skb_any(skb); 714 dev_kfree_skb_any(skb);
712 715
713 for (i = 0; i < STA_TID_NUM; i++) {
714 struct tid_ampdu_rx *tid_rx;
715 struct tid_ampdu_tx *tid_tx;
716
717 spin_lock_bh(&sta->lock);
718 tid_rx = sta->ampdu_mlme.tid_rx[i];
719 /* Make sure timer won't free the tid_rx struct, see below */
720 if (tid_rx)
721 tid_rx->shutdown = true;
722
723 spin_unlock_bh(&sta->lock);
724
725 /*
726 * Outside spinlock - shutdown is true now so that the timer
727 * won't free tid_rx, we have to do that now. Can't let the
728 * timer do it because we have to sync the timer outside the
729 * lock that it takes itself.
730 */
731 if (tid_rx) {
732 del_timer_sync(&tid_rx->session_timer);
733 kfree(tid_rx);
734 }
735
736 /*
737 * No need to do such complications for TX agg sessions, the
738 * path leading to freeing the tid_tx struct goes via a call
739 * from the driver, and thus needs to look up the sta struct
740 * again, which cannot be found when we get here. Hence, we
741 * just need to delete the timer and free the aggregation
742 * info; we won't be telling the peer about it then but that
743 * doesn't matter if we're not talking to it again anyway.
744 */
745 tid_tx = sta->ampdu_mlme.tid_tx[i];
746 if (tid_tx) {
747 del_timer_sync(&tid_tx->addba_resp_timer);
748 /*
749 * STA removed while aggregation session being
750 * started? Bit odd, but purge frames anyway.
751 */
752 skb_queue_purge(&tid_tx->pending);
753 kfree(tid_tx);
754 }
755 }
756
757 __sta_info_free(local, sta); 716 __sta_info_free(local, sta);
758 717
759 return 0; 718 return 0;
@@ -790,15 +749,20 @@ static void sta_info_cleanup(unsigned long data)
790{ 749{
791 struct ieee80211_local *local = (struct ieee80211_local *) data; 750 struct ieee80211_local *local = (struct ieee80211_local *) data;
792 struct sta_info *sta; 751 struct sta_info *sta;
752 bool timer_needed = false;
793 753
794 rcu_read_lock(); 754 rcu_read_lock();
795 list_for_each_entry_rcu(sta, &local->sta_list, list) 755 list_for_each_entry_rcu(sta, &local->sta_list, list)
796 sta_info_cleanup_expire_buffered(local, sta); 756 if (sta_info_cleanup_expire_buffered(local, sta))
757 timer_needed = true;
797 rcu_read_unlock(); 758 rcu_read_unlock();
798 759
799 if (local->quiescing) 760 if (local->quiescing)
800 return; 761 return;
801 762
763 if (!timer_needed)
764 return;
765
802 local->sta_cleanup.expires = 766 local->sta_cleanup.expires =
803 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL); 767 round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
804 add_timer(&local->sta_cleanup); 768 add_timer(&local->sta_cleanup);
@@ -883,8 +847,12 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
883 struct sta_info *sta, *nxt; 847 struct sta_info *sta, *nxt;
884 848
885 /* Just return a random station ... first in list ... */ 849 /* Just return a random station ... first in list ... */
886 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) 850 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
851 if (!sta->uploaded)
852 return NULL;
887 return &sta->sta; 853 return &sta->sta;
854 }
855
888 return NULL; 856 return NULL;
889} 857}
890EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); 858EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
@@ -892,14 +860,19 @@ EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
892struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, 860struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
893 const u8 *addr) 861 const u8 *addr)
894{ 862{
895 struct ieee80211_sub_if_data *sdata; 863 struct sta_info *sta;
896 864
897 if (!vif) 865 if (!vif)
898 return NULL; 866 return NULL;
899 867
900 sdata = vif_to_sdata(vif); 868 sta = sta_info_get_bss(vif_to_sdata(vif), addr);
869 if (!sta)
870 return NULL;
871
872 if (!sta->uploaded)
873 return NULL;
901 874
902 return ieee80211_find_sta_by_hw(&sdata->local->hw, addr); 875 return &sta->sta;
903} 876}
904EXPORT_SYMBOL(ieee80211_find_sta); 877EXPORT_SYMBOL(ieee80211_find_sta);
905 878
@@ -992,6 +965,8 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
992{ 965{
993 struct sta_info *sta = container_of(pubsta, struct sta_info, sta); 966 struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
994 967
968 trace_api_sta_block_awake(sta->local, pubsta, block);
969
995 if (block) 970 if (block)
996 set_sta_flags(sta, WLAN_STA_PS_DRIVER); 971 set_sta_flags(sta, WLAN_STA_PS_DRIVER);
997 else 972 else
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 822d84522937..54262e72376d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -35,16 +35,13 @@
35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next 35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
36 * frame to this station is transmitted. 36 * frame to this station is transmitted.
37 * @WLAN_STA_MFP: Management frame protection is used with this STA. 37 * @WLAN_STA_MFP: Management frame protection is used with this STA.
38 * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle. 38 * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
39 * Used to deny ADDBA requests (both TX and RX). 39 * during suspend/resume and station removal.
40 * @WLAN_STA_PS_DRIVER: driver requires keeping this station in 40 * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
41 * power-save mode logically to flush frames that might still 41 * power-save mode logically to flush frames that might still
42 * be in the queues 42 * be in the queues
43 * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping 43 * @WLAN_STA_PSPOLL: Station sent PS-poll while driver was keeping
44 * station in power-save mode, reply when the driver unblocks. 44 * station in power-save mode, reply when the driver unblocks.
45 * @WLAN_STA_DISASSOC: Disassociation in progress.
46 * This is used to reject TX BA session requests when disassociation
47 * is in progress.
48 */ 45 */
49enum ieee80211_sta_info_flags { 46enum ieee80211_sta_info_flags {
50 WLAN_STA_AUTH = 1<<0, 47 WLAN_STA_AUTH = 1<<0,
@@ -57,41 +54,47 @@ enum ieee80211_sta_info_flags {
57 WLAN_STA_WDS = 1<<7, 54 WLAN_STA_WDS = 1<<7,
58 WLAN_STA_CLEAR_PS_FILT = 1<<9, 55 WLAN_STA_CLEAR_PS_FILT = 1<<9,
59 WLAN_STA_MFP = 1<<10, 56 WLAN_STA_MFP = 1<<10,
60 WLAN_STA_SUSPEND = 1<<11, 57 WLAN_STA_BLOCK_BA = 1<<11,
61 WLAN_STA_PS_DRIVER = 1<<12, 58 WLAN_STA_PS_DRIVER = 1<<12,
62 WLAN_STA_PSPOLL = 1<<13, 59 WLAN_STA_PSPOLL = 1<<13,
63 WLAN_STA_DISASSOC = 1<<14,
64}; 60};
65 61
66#define STA_TID_NUM 16 62#define STA_TID_NUM 16
67#define ADDBA_RESP_INTERVAL HZ 63#define ADDBA_RESP_INTERVAL HZ
68#define HT_AGG_MAX_RETRIES (0x3) 64#define HT_AGG_MAX_RETRIES 0x3
69 65
70#define HT_AGG_STATE_INITIATOR_SHIFT (4) 66#define HT_AGG_STATE_DRV_READY 0
71 67#define HT_AGG_STATE_RESPONSE_RECEIVED 1
72#define HT_ADDBA_REQUESTED_MSK BIT(0) 68#define HT_AGG_STATE_OPERATIONAL 2
73#define HT_ADDBA_DRV_READY_MSK BIT(1) 69#define HT_AGG_STATE_STOPPING 3
74#define HT_ADDBA_RECEIVED_MSK BIT(2) 70#define HT_AGG_STATE_WANT_START 4
75#define HT_AGG_STATE_REQ_STOP_BA_MSK BIT(3) 71#define HT_AGG_STATE_WANT_STOP 5
76#define HT_AGG_STATE_INITIATOR_MSK BIT(HT_AGG_STATE_INITIATOR_SHIFT)
77#define HT_AGG_STATE_IDLE (0x0)
78#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \
79 HT_ADDBA_DRV_READY_MSK | \
80 HT_ADDBA_RECEIVED_MSK)
81 72
82/** 73/**
83 * struct tid_ampdu_tx - TID aggregation information (Tx). 74 * struct tid_ampdu_tx - TID aggregation information (Tx).
84 * 75 *
76 * @rcu_head: rcu head for freeing structure
85 * @addba_resp_timer: timer for peer's response to addba request 77 * @addba_resp_timer: timer for peer's response to addba request
86 * @pending: pending frames queue -- use sta's spinlock to protect 78 * @pending: pending frames queue -- use sta's spinlock to protect
87 * @ssn: Starting Sequence Number expected to be aggregated.
88 * @dialog_token: dialog token for aggregation session 79 * @dialog_token: dialog token for aggregation session
80 * @state: session state (see above)
81 * @stop_initiator: initiator of a session stop
82 *
83 * This structure is protected by RCU and the per-station
84 * spinlock. Assignments to the array holding it must hold
85 * the spinlock, only the TX path can access it under RCU
86 * lock-free if, and only if, the state has the flag
87 * %HT_AGG_STATE_OPERATIONAL set. Otherwise, the TX path
88 * must also acquire the spinlock and re-check the state,
89 * see comments in the tx code touching it.
89 */ 90 */
90struct tid_ampdu_tx { 91struct tid_ampdu_tx {
92 struct rcu_head rcu_head;
91 struct timer_list addba_resp_timer; 93 struct timer_list addba_resp_timer;
92 struct sk_buff_head pending; 94 struct sk_buff_head pending;
93 u16 ssn; 95 unsigned long state;
94 u8 dialog_token; 96 u8 dialog_token;
97 u8 stop_initiator;
95}; 98};
96 99
97/** 100/**
@@ -106,9 +109,18 @@ struct tid_ampdu_tx {
106 * @buf_size: buffer size for incoming A-MPDUs 109 * @buf_size: buffer size for incoming A-MPDUs
107 * @timeout: reset timer value (in TUs). 110 * @timeout: reset timer value (in TUs).
108 * @dialog_token: dialog token for aggregation session 111 * @dialog_token: dialog token for aggregation session
109 * @shutdown: this session is being shut down due to STA removal 112 * @rcu_head: RCU head used for freeing this struct
113 *
114 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU
117 * lock-free. The RX path, since it is single-threaded,
118 * can even modify the structure without locking since the
119 * only other modifications to it are done when the struct
120 * can not yet or no longer be found by the RX path.
110 */ 121 */
111struct tid_ampdu_rx { 122struct tid_ampdu_rx {
123 struct rcu_head rcu_head;
112 struct sk_buff **reorder_buf; 124 struct sk_buff **reorder_buf;
113 unsigned long *reorder_time; 125 unsigned long *reorder_time;
114 struct timer_list session_timer; 126 struct timer_list session_timer;
@@ -118,10 +130,35 @@ struct tid_ampdu_rx {
118 u16 buf_size; 130 u16 buf_size;
119 u16 timeout; 131 u16 timeout;
120 u8 dialog_token; 132 u8 dialog_token;
121 bool shutdown;
122}; 133};
123 134
124/** 135/**
136 * struct sta_ampdu_mlme - STA aggregation information.
137 *
138 * @tid_rx: aggregation info for Rx per TID -- RCU protected
139 * @tid_tx: aggregation info for Tx per TID
140 * @addba_req_num: number of times addBA request has been sent.
141 * @dialog_token_allocator: dialog token enumerator for each new session;
142 * @work: work struct for starting/stopping aggregation
143 * @tid_rx_timer_expired: bitmap indicating on which TIDs the
144 * RX timer expired until the work for it runs
145 * @mtx: mutex to protect all TX data (except non-NULL assignments
146 * to tid_tx[idx], which are protected by the sta spinlock)
147 */
148struct sta_ampdu_mlme {
149 struct mutex mtx;
150 /* rx */
151 struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
152 unsigned long tid_rx_timer_expired[BITS_TO_LONGS(STA_TID_NUM)];
153 /* tx */
154 struct work_struct work;
155 struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
156 u8 addba_req_num[STA_TID_NUM];
157 u8 dialog_token_allocator;
158};
159
160
161/**
125 * enum plink_state - state of a mesh peer link finite state machine 162 * enum plink_state - state of a mesh peer link finite state machine
126 * 163 *
127 * @PLINK_LISTEN: initial state, considered the implicit state of non existant 164 * @PLINK_LISTEN: initial state, considered the implicit state of non existant
@@ -145,28 +182,6 @@ enum plink_state {
145}; 182};
146 183
147/** 184/**
148 * struct sta_ampdu_mlme - STA aggregation information.
149 *
150 * @tid_state_rx: TID's state in Rx session state machine.
151 * @tid_rx: aggregation info for Rx per TID
152 * @tid_state_tx: TID's state in Tx session state machine.
153 * @tid_tx: aggregation info for Tx per TID
154 * @addba_req_num: number of times addBA request has been sent.
155 * @dialog_token_allocator: dialog token enumerator for each new session;
156 */
157struct sta_ampdu_mlme {
158 /* rx */
159 u8 tid_state_rx[STA_TID_NUM];
160 struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
161 /* tx */
162 u8 tid_state_tx[STA_TID_NUM];
163 struct tid_ampdu_tx *tid_tx[STA_TID_NUM];
164 u8 addba_req_num[STA_TID_NUM];
165 u8 dialog_token_allocator;
166};
167
168
169/**
170 * struct sta_info - STA information 185 * struct sta_info - STA information
171 * 186 *
172 * This structure collects information about a station that 187 * This structure collects information about a station that
@@ -200,7 +215,6 @@ struct sta_ampdu_mlme {
200 * @rx_fragments: number of received MPDUs 215 * @rx_fragments: number of received MPDUs
201 * @rx_dropped: number of dropped MPDUs from this STA 216 * @rx_dropped: number of dropped MPDUs from this STA
202 * @last_signal: signal of last received frame from this STA 217 * @last_signal: signal of last received frame from this STA
203 * @last_noise: noise of last received frame from this STA
204 * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) 218 * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
205 * @tx_filtered_count: number of frames the hardware filtered for this STA 219 * @tx_filtered_count: number of frames the hardware filtered for this STA
206 * @tx_retry_failed: number of frames that failed retry 220 * @tx_retry_failed: number of frames that failed retry
@@ -267,7 +281,6 @@ struct sta_info {
267 unsigned long rx_fragments; 281 unsigned long rx_fragments;
268 unsigned long rx_dropped; 282 unsigned long rx_dropped;
269 int last_signal; 283 int last_signal;
270 int last_noise;
271 __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; 284 __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
272 285
273 /* Updated from TX status path only, no locking requirements */ 286 /* Updated from TX status path only, no locking requirements */
@@ -414,20 +427,20 @@ void for_each_sta_info_type_check(struct ieee80211_local *local,
414{ 427{
415} 428}
416 429
417#define for_each_sta_info(local, _addr, sta, nxt) \ 430#define for_each_sta_info(local, _addr, _sta, nxt) \
418 for ( /* initialise loop */ \ 431 for ( /* initialise loop */ \
419 sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ 432 _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\
420 nxt = sta ? rcu_dereference(sta->hnext) : NULL; \ 433 nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \
421 /* typecheck */ \ 434 /* typecheck */ \
422 for_each_sta_info_type_check(local, (_addr), sta, nxt), \ 435 for_each_sta_info_type_check(local, (_addr), _sta, nxt),\
423 /* continue condition */ \ 436 /* continue condition */ \
424 sta; \ 437 _sta; \
425 /* advance loop */ \ 438 /* advance loop */ \
426 sta = nxt, \ 439 _sta = nxt, \
427 nxt = sta ? rcu_dereference(sta->hnext) : NULL \ 440 nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \
428 ) \ 441 ) \
429 /* compare address and run code only if it matches */ \ 442 /* compare address and run code only if it matches */ \
430 if (memcmp(sta->sta.addr, (_addr), ETH_ALEN) == 0) 443 if (memcmp(_sta->sta.addr, (_addr), ETH_ALEN) == 0)
431 444
432/* 445/*
433 * Get STA info by index, BROKEN! 446 * Get STA info by index, BROKEN!
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 56d5b9a6ec5b..10caec5ea8fa 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -47,7 +47,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
47 /* 47 /*
48 * This skb 'survived' a round-trip through the driver, and 48 * This skb 'survived' a round-trip through the driver, and
49 * hopefully the driver didn't mangle it too badly. However, 49 * hopefully the driver didn't mangle it too badly. However,
50 * we can definitely not rely on the the control information 50 * we can definitely not rely on the control information
51 * being correct. Clear it so we don't get junk there, and 51 * being correct. Clear it so we don't get junk there, and
52 * indicate that it needs new processing, but must not be 52 * indicate that it needs new processing, but must not be
53 * modified/encrypted again. 53 * modified/encrypted again.
@@ -171,13 +171,16 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
171 struct net_device *prev_dev = NULL; 171 struct net_device *prev_dev = NULL;
172 struct sta_info *sta, *tmp; 172 struct sta_info *sta, *tmp;
173 int retry_count = -1, i; 173 int retry_count = -1, i;
174 bool injected; 174 int rates_idx = -1;
175 bool send_to_cooked;
175 176
176 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 177 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
177 /* the HW cannot have attempted that rate */ 178 /* the HW cannot have attempted that rate */
178 if (i >= hw->max_rates) { 179 if (i >= hw->max_rates) {
179 info->status.rates[i].idx = -1; 180 info->status.rates[i].idx = -1;
180 info->status.rates[i].count = 0; 181 info->status.rates[i].count = 0;
182 } else if (info->status.rates[i].idx >= 0) {
183 rates_idx = i;
181 } 184 }
182 185
183 retry_count += info->status.rates[i].count; 186 retry_count += info->status.rates[i].count;
@@ -206,6 +209,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
206 return; 209 return;
207 } 210 }
208 211
212 if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
213 (rates_idx != -1))
214 sta->last_tx_rate = info->status.rates[rates_idx];
215
209 if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && 216 if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
210 (ieee80211_is_data_qos(fc))) { 217 (ieee80211_is_data_qos(fc))) {
211 u16 tid, ssn; 218 u16 tid, ssn;
@@ -296,11 +303,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 /* this was a transmitted frame, but now we want to reuse it */ 303 /* this was a transmitted frame, but now we want to reuse it */
297 skb_orphan(skb); 304 skb_orphan(skb);
298 305
306 /* Need to make a copy before skb->cb gets cleared */
307 send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) ||
308 (type != IEEE80211_FTYPE_DATA);
309
299 /* 310 /*
300 * This is a bit racy but we can avoid a lot of work 311 * This is a bit racy but we can avoid a lot of work
301 * with this test... 312 * with this test...
302 */ 313 */
303 if (!local->monitors && !local->cooked_mntrs) { 314 if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
304 dev_kfree_skb(skb); 315 dev_kfree_skb(skb);
305 return; 316 return;
306 } 317 }
@@ -345,9 +356,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
345 /* for now report the total retry_count */ 356 /* for now report the total retry_count */
346 rthdr->data_retries = retry_count; 357 rthdr->data_retries = retry_count;
347 358
348 /* Need to make a copy before skb->cb gets cleared */
349 injected = !!(info->flags & IEEE80211_TX_CTL_INJECTED);
350
351 /* XXX: is this sufficient for BPF? */ 359 /* XXX: is this sufficient for BPF? */
352 skb_set_mac_header(skb, 0); 360 skb_set_mac_header(skb, 0);
353 skb->ip_summed = CHECKSUM_UNNECESSARY; 361 skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -362,15 +370,14 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
362 continue; 370 continue;
363 371
364 if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) && 372 if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
365 !injected && 373 !send_to_cooked)
366 (type == IEEE80211_FTYPE_DATA))
367 continue; 374 continue;
368 375
369 if (prev_dev) { 376 if (prev_dev) {
370 skb2 = skb_clone(skb, GFP_ATOMIC); 377 skb2 = skb_clone(skb, GFP_ATOMIC);
371 if (skb2) { 378 if (skb2) {
372 skb2->dev = prev_dev; 379 skb2->dev = prev_dev;
373 netif_rx(skb2); 380 netif_receive_skb(skb2);
374 } 381 }
375 } 382 }
376 383
@@ -379,7 +386,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
379 } 386 }
380 if (prev_dev) { 387 if (prev_dev) {
381 skb->dev = prev_dev; 388 skb->dev = prev_dev;
382 netif_rx(skb); 389 netif_receive_skb(skb);
383 skb = NULL; 390 skb = NULL;
384 } 391 }
385 rcu_read_unlock(); 392 rcu_read_unlock();
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 7ef491e9d66d..e840c9cd46db 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -202,9 +202,9 @@ EXPORT_SYMBOL(ieee80211_get_tkip_key);
202 * @payload_len is the length of payload (_not_ including IV/ICV length). 202 * @payload_len is the length of payload (_not_ including IV/ICV length).
203 * @ta is the transmitter addresses. 203 * @ta is the transmitter addresses.
204 */ 204 */
205void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, 205int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
206 struct ieee80211_key *key, 206 struct ieee80211_key *key,
207 u8 *pos, size_t payload_len, u8 *ta) 207 u8 *pos, size_t payload_len, u8 *ta)
208{ 208{
209 u8 rc4key[16]; 209 u8 rc4key[16];
210 struct tkip_ctx *ctx = &key->u.tkip.tx; 210 struct tkip_ctx *ctx = &key->u.tkip.tx;
@@ -216,7 +216,7 @@ void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
216 216
217 tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key); 217 tkip_mixing_phase2(tk, ctx, ctx->iv16, rc4key);
218 218
219 ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len); 219 return ieee80211_wep_encrypt_data(tfm, rc4key, 16, pos, payload_len);
220} 220}
221 221
222/* Decrypt packet payload with TKIP using @key. @pos is a pointer to the 222/* Decrypt packet payload with TKIP using @key. @pos is a pointer to the
diff --git a/net/mac80211/tkip.h b/net/mac80211/tkip.h
index d4714383f5fc..7e83dee976fa 100644
--- a/net/mac80211/tkip.h
+++ b/net/mac80211/tkip.h
@@ -15,7 +15,7 @@
15 15
16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16); 16u8 *ieee80211_tkip_add_iv(u8 *pos, struct ieee80211_key *key, u16 iv16);
17 17
18void ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm, 18int ieee80211_tkip_encrypt_data(struct crypto_blkcipher *tfm,
19 struct ieee80211_key *key, 19 struct ieee80211_key *key,
20 u8 *pos, size_t payload_len, u8 *ta); 20 u8 *pos, size_t payload_len, u8 *ta);
21enum { 21enum {
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cfc473e1b050..c54db966926b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -429,6 +429,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
429 struct sta_info *sta = tx->sta; 429 struct sta_info *sta = tx->sta;
430 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 430 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
431 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; 431 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
432 struct ieee80211_local *local = tx->local;
432 u32 staflags; 433 u32 staflags;
433 434
434 if (unlikely(!sta || 435 if (unlikely(!sta ||
@@ -476,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
476 info->control.vif = &tx->sdata->vif; 477 info->control.vif = &tx->sdata->vif;
477 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; 478 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
478 skb_queue_tail(&sta->ps_tx_buf, tx->skb); 479 skb_queue_tail(&sta->ps_tx_buf, tx->skb);
480
481 if (!timer_pending(&local->sta_cleanup))
482 mod_timer(&local->sta_cleanup,
483 round_jiffies(jiffies +
484 STA_INFO_CLEANUP_INTERVAL));
485
479 return TX_QUEUED; 486 return TX_QUEUED;
480 } 487 }
481#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 488#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
@@ -513,6 +520,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513 else if (tx->sta && (key = rcu_dereference(tx->sta->key))) 520 else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
514 tx->key = key; 521 tx->key = key;
515 else if (ieee80211_is_mgmt(hdr->frame_control) && 522 else if (ieee80211_is_mgmt(hdr->frame_control) &&
523 is_multicast_ether_addr(hdr->addr1) &&
524 ieee80211_is_robust_mgmt_frame(hdr) &&
516 (key = rcu_dereference(tx->sdata->default_mgmt_key))) 525 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
517 tx->key = key; 526 tx->key = key;
518 else if ((key = rcu_dereference(tx->sdata->default_key))) 527 else if ((key = rcu_dereference(tx->sdata->default_key)))
@@ -567,24 +576,14 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
567} 576}
568 577
569static ieee80211_tx_result debug_noinline 578static ieee80211_tx_result debug_noinline
570ieee80211_tx_h_sta(struct ieee80211_tx_data *tx)
571{
572 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
573
574 if (tx->sta && tx->sta->uploaded)
575 info->control.sta = &tx->sta->sta;
576
577 return TX_CONTINUE;
578}
579
580static ieee80211_tx_result debug_noinline
581ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) 579ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
582{ 580{
583 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 581 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
584 struct ieee80211_hdr *hdr = (void *)tx->skb->data; 582 struct ieee80211_hdr *hdr = (void *)tx->skb->data;
585 struct ieee80211_supported_band *sband; 583 struct ieee80211_supported_band *sband;
586 struct ieee80211_rate *rate; 584 struct ieee80211_rate *rate;
587 int i, len; 585 int i;
586 u32 len;
588 bool inval = false, rts = false, short_preamble = false; 587 bool inval = false, rts = false, short_preamble = false;
589 struct ieee80211_tx_rate_control txrc; 588 struct ieee80211_tx_rate_control txrc;
590 u32 sta_flags; 589 u32 sta_flags;
@@ -593,7 +592,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
593 592
594 sband = tx->local->hw.wiphy->bands[tx->channel->band]; 593 sband = tx->local->hw.wiphy->bands[tx->channel->band];
595 594
596 len = min_t(int, tx->skb->len + FCS_LEN, 595 len = min_t(u32, tx->skb->len + FCS_LEN,
597 tx->local->hw.wiphy->frag_threshold); 596 tx->local->hw.wiphy->frag_threshold);
598 597
599 /* set up the tx rate control struct we give the RC algo */ 598 /* set up the tx rate control struct we give the RC algo */
@@ -1082,6 +1081,59 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
1082 return true; 1081 return true;
1083} 1082}
1084 1083
1084static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
1085 struct sk_buff *skb,
1086 struct ieee80211_tx_info *info,
1087 struct tid_ampdu_tx *tid_tx,
1088 int tid)
1089{
1090 bool queued = false;
1091
1092 if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
1093 info->flags |= IEEE80211_TX_CTL_AMPDU;
1094 } else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
1095 /*
1096 * nothing -- this aggregation session is being started
1097 * but that might still fail with the driver
1098 */
1099 } else {
1100 spin_lock(&tx->sta->lock);
1101 /*
1102 * Need to re-check now, because we may get here
1103 *
1104 * 1) in the window during which the setup is actually
1105 * already done, but not marked yet because not all
1106 * packets are spliced over to the driver pending
1107 * queue yet -- if this happened we acquire the lock
1108 * either before or after the splice happens, but
1109 * need to recheck which of these cases happened.
1110 *
1111 * 2) during session teardown, if the OPERATIONAL bit
1112 * was cleared due to the teardown but the pointer
1113 * hasn't been assigned NULL yet (or we loaded it
1114 * before it was assigned) -- in this case it may
1115 * now be NULL which means we should just let the
1116 * packet pass through because splicing the frames
1117 * back is already done.
1118 */
1119 tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
1120
1121 if (!tid_tx) {
1122 /* do nothing, let packet pass through */
1123 } else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
1124 info->flags |= IEEE80211_TX_CTL_AMPDU;
1125 } else {
1126 queued = true;
1127 info->control.vif = &tx->sdata->vif;
1128 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
1129 __skb_queue_tail(&tid_tx->pending, skb);
1130 }
1131 spin_unlock(&tx->sta->lock);
1132 }
1133
1134 return queued;
1135}
1136
1085/* 1137/*
1086 * initialises @tx 1138 * initialises @tx
1087 */ 1139 */
@@ -1094,8 +1146,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1094 struct ieee80211_hdr *hdr; 1146 struct ieee80211_hdr *hdr;
1095 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1147 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1096 int hdrlen, tid; 1148 int hdrlen, tid;
1097 u8 *qc, *state; 1149 u8 *qc;
1098 bool queued = false;
1099 1150
1100 memset(tx, 0, sizeof(*tx)); 1151 memset(tx, 0, sizeof(*tx));
1101 tx->skb = skb; 1152 tx->skb = skb;
@@ -1142,41 +1193,21 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
1142 1193
1143 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && 1194 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
1144 (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) { 1195 (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
1145 unsigned long flags;
1146 struct tid_ampdu_tx *tid_tx; 1196 struct tid_ampdu_tx *tid_tx;
1147 1197
1148 qc = ieee80211_get_qos_ctl(hdr); 1198 qc = ieee80211_get_qos_ctl(hdr);
1149 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 1199 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
1150 1200
1151 spin_lock_irqsave(&tx->sta->lock, flags); 1201 tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]);
1152 /* 1202 if (tid_tx) {
1153 * XXX: This spinlock could be fairly expensive, but see the 1203 bool queued;
1154 * comment in agg-tx.c:ieee80211_agg_tx_operational().
1155 * One way to solve this would be to do something RCU-like
1156 * for managing the tid_tx struct and using atomic bitops
1157 * for the actual state -- by introducing an actual
1158 * 'operational' bit that would be possible. It would
1159 * require changing ieee80211_agg_tx_operational() to
1160 * set that bit, and changing the way tid_tx is managed
1161 * everywhere, including races between that bit and
1162 * tid_tx going away (tid_tx being added can be easily
1163 * committed to memory before the 'operational' bit).
1164 */
1165 tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
1166 state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
1167 if (*state == HT_AGG_STATE_OPERATIONAL) {
1168 info->flags |= IEEE80211_TX_CTL_AMPDU;
1169 } else if (*state != HT_AGG_STATE_IDLE) {
1170 /* in progress */
1171 queued = true;
1172 info->control.vif = &sdata->vif;
1173 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
1174 __skb_queue_tail(&tid_tx->pending, skb);
1175 }
1176 spin_unlock_irqrestore(&tx->sta->lock, flags);
1177 1204
1178 if (unlikely(queued)) 1205 queued = ieee80211_tx_prep_agg(tx, skb, info,
1179 return TX_QUEUED; 1206 tid_tx, tid);
1207
1208 if (unlikely(queued))
1209 return TX_QUEUED;
1210 }
1180 } 1211 }
1181 1212
1182 if (is_multicast_ether_addr(hdr->addr1)) { 1213 if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1265,6 +1296,11 @@ static int __ieee80211_tx(struct ieee80211_local *local,
1265 break; 1296 break;
1266 } 1297 }
1267 1298
1299 if (sta && sta->uploaded)
1300 info->control.sta = &sta->sta;
1301 else
1302 info->control.sta = NULL;
1303
1268 ret = drv_tx(local, skb); 1304 ret = drv_tx(local, skb);
1269 if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { 1305 if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
1270 dev_kfree_skb(skb); 1306 dev_kfree_skb(skb);
@@ -1304,7 +1340,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1304 CALL_TXH(ieee80211_tx_h_check_assoc); 1340 CALL_TXH(ieee80211_tx_h_check_assoc);
1305 CALL_TXH(ieee80211_tx_h_ps_buf); 1341 CALL_TXH(ieee80211_tx_h_ps_buf);
1306 CALL_TXH(ieee80211_tx_h_select_key); 1342 CALL_TXH(ieee80211_tx_h_select_key);
1307 CALL_TXH(ieee80211_tx_h_sta);
1308 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1309 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1344 CALL_TXH(ieee80211_tx_h_rate_ctrl);
1310 1345
@@ -1900,11 +1935,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1900 h_pos += encaps_len; 1935 h_pos += encaps_len;
1901 } 1936 }
1902 1937
1938#ifdef CONFIG_MAC80211_MESH
1903 if (meshhdrlen > 0) { 1939 if (meshhdrlen > 0) {
1904 memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); 1940 memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
1905 nh_pos += meshhdrlen; 1941 nh_pos += meshhdrlen;
1906 h_pos += meshhdrlen; 1942 h_pos += meshhdrlen;
1907 } 1943 }
1944#endif
1908 1945
1909 if (ieee80211_is_data_qos(fc)) { 1946 if (ieee80211_is_data_qos(fc)) {
1910 __le16 *qos_control; 1947 __le16 *qos_control;
@@ -2011,14 +2048,12 @@ void ieee80211_tx_pending(unsigned long data)
2011 while (!skb_queue_empty(&local->pending[i])) { 2048 while (!skb_queue_empty(&local->pending[i])) {
2012 struct sk_buff *skb = __skb_dequeue(&local->pending[i]); 2049 struct sk_buff *skb = __skb_dequeue(&local->pending[i]);
2013 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 2050 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
2014 struct ieee80211_sub_if_data *sdata;
2015 2051
2016 if (WARN_ON(!info->control.vif)) { 2052 if (WARN_ON(!info->control.vif)) {
2017 kfree_skb(skb); 2053 kfree_skb(skb);
2018 continue; 2054 continue;
2019 } 2055 }
2020 2056
2021 sdata = vif_to_sdata(info->control.vif);
2022 spin_unlock_irqrestore(&local->queue_stop_reason_lock, 2057 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
2023 flags); 2058 flags);
2024 2059
@@ -2244,8 +2279,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
2244 2279
2245 info->control.vif = vif; 2280 info->control.vif = vif;
2246 2281
2247 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; 2282 info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
2248 info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; 2283 IEEE80211_TX_CTL_ASSIGN_SEQ |
2284 IEEE80211_TX_CTL_FIRST_FRAGMENT;
2249 out: 2285 out:
2250 rcu_read_unlock(); 2286 rcu_read_unlock();
2251 return skb; 2287 return skb;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 53af57047435..748387d45bc0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -270,6 +270,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
270 struct ieee80211_local *local = hw_to_local(hw); 270 struct ieee80211_local *local = hw_to_local(hw);
271 struct ieee80211_sub_if_data *sdata; 271 struct ieee80211_sub_if_data *sdata;
272 272
273 trace_wake_queue(local, queue, reason);
274
273 if (WARN_ON(queue >= hw->queues)) 275 if (WARN_ON(queue >= hw->queues))
274 return; 276 return;
275 277
@@ -312,6 +314,8 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
312 struct ieee80211_local *local = hw_to_local(hw); 314 struct ieee80211_local *local = hw_to_local(hw);
313 struct ieee80211_sub_if_data *sdata; 315 struct ieee80211_sub_if_data *sdata;
314 316
317 trace_stop_queue(local, queue, reason);
318
315 if (WARN_ON(queue >= hw->queues)) 319 if (WARN_ON(queue >= hw->queues))
316 return; 320 return;
317 321
@@ -796,6 +800,15 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
796 800
797 drv_conf_tx(local, queue, &qparam); 801 drv_conf_tx(local, queue, &qparam);
798 } 802 }
803
804 /* after reinitialize QoS TX queues setting to default,
805 * disable QoS at all */
806
807 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
808 sdata->vif.bss_conf.qos =
809 sdata->vif.type != NL80211_IFTYPE_STATION;
810 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_QOS);
811 }
799} 812}
800 813
801void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, 814void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
@@ -1129,18 +1142,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1129 } 1142 }
1130 mutex_unlock(&local->sta_mtx); 1143 mutex_unlock(&local->sta_mtx);
1131 1144
1132 /* Clear Suspend state so that ADDBA requests can be processed */
1133
1134 rcu_read_lock();
1135
1136 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
1137 list_for_each_entry_rcu(sta, &local->sta_list, list) {
1138 clear_sta_flags(sta, WLAN_STA_SUSPEND);
1139 }
1140 }
1141
1142 rcu_read_unlock();
1143
1144 /* setup RTS threshold */ 1145 /* setup RTS threshold */
1145 drv_set_rts_threshold(local, hw->wiphy->rts_threshold); 1146 drv_set_rts_threshold(local, hw->wiphy->rts_threshold);
1146 1147
@@ -1151,18 +1152,34 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1151 1152
1152 /* Finally also reconfigure all the BSS information */ 1153 /* Finally also reconfigure all the BSS information */
1153 list_for_each_entry(sdata, &local->interfaces, list) { 1154 list_for_each_entry(sdata, &local->interfaces, list) {
1154 u32 changed = ~0; 1155 u32 changed;
1156
1155 if (!ieee80211_sdata_running(sdata)) 1157 if (!ieee80211_sdata_running(sdata))
1156 continue; 1158 continue;
1159
1160 /* common change flags for all interface types */
1161 changed = BSS_CHANGED_ERP_CTS_PROT |
1162 BSS_CHANGED_ERP_PREAMBLE |
1163 BSS_CHANGED_ERP_SLOT |
1164 BSS_CHANGED_HT |
1165 BSS_CHANGED_BASIC_RATES |
1166 BSS_CHANGED_BEACON_INT |
1167 BSS_CHANGED_BSSID |
1168 BSS_CHANGED_CQM |
1169 BSS_CHANGED_QOS;
1170
1157 switch (sdata->vif.type) { 1171 switch (sdata->vif.type) {
1158 case NL80211_IFTYPE_STATION: 1172 case NL80211_IFTYPE_STATION:
1159 /* disable beacon change bits */ 1173 changed |= BSS_CHANGED_ASSOC;
1160 changed &= ~(BSS_CHANGED_BEACON | 1174 ieee80211_bss_info_change_notify(sdata, changed);
1161 BSS_CHANGED_BEACON_ENABLED); 1175 break;
1162 /* fall through */
1163 case NL80211_IFTYPE_ADHOC: 1176 case NL80211_IFTYPE_ADHOC:
1177 changed |= BSS_CHANGED_IBSS;
1178 /* fall through */
1164 case NL80211_IFTYPE_AP: 1179 case NL80211_IFTYPE_AP:
1165 case NL80211_IFTYPE_MESH_POINT: 1180 case NL80211_IFTYPE_MESH_POINT:
1181 changed |= BSS_CHANGED_BEACON |
1182 BSS_CHANGED_BEACON_ENABLED;
1166 ieee80211_bss_info_change_notify(sdata, changed); 1183 ieee80211_bss_info_change_notify(sdata, changed);
1167 break; 1184 break;
1168 case NL80211_IFTYPE_WDS: 1185 case NL80211_IFTYPE_WDS:
@@ -1178,13 +1195,26 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1178 } 1195 }
1179 } 1196 }
1180 1197
1181 rcu_read_lock(); 1198 /*
1199 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation
1200 * sessions can be established after a resume.
1201 *
1202 * Also tear down aggregation sessions since reconfiguring
1203 * them in a hardware restart scenario is not easily done
1204 * right now, and the hardware will have lost information
1205 * about the sessions, but we and the AP still think they
1206 * are active. This is really a workaround though.
1207 */
1182 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 1208 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
1183 list_for_each_entry_rcu(sta, &local->sta_list, list) { 1209 mutex_lock(&local->sta_mtx);
1210
1211 list_for_each_entry(sta, &local->sta_list, list) {
1184 ieee80211_sta_tear_down_BA_sessions(sta); 1212 ieee80211_sta_tear_down_BA_sessions(sta);
1213 clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
1185 } 1214 }
1215
1216 mutex_unlock(&local->sta_mtx);
1186 } 1217 }
1187 rcu_read_unlock();
1188 1218
1189 /* add back keys */ 1219 /* add back keys */
1190 list_for_each_entry(sdata, &local->interfaces, list) 1220 list_for_each_entry(sdata, &local->interfaces, list)
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 5f3a4113bda1..9ebc8d8a1f5b 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -32,13 +32,16 @@ int ieee80211_wep_init(struct ieee80211_local *local)
32 32
33 local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, 33 local->wep_tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
34 CRYPTO_ALG_ASYNC); 34 CRYPTO_ALG_ASYNC);
35 if (IS_ERR(local->wep_tx_tfm)) 35 if (IS_ERR(local->wep_tx_tfm)) {
36 local->wep_rx_tfm = ERR_PTR(-EINVAL);
36 return PTR_ERR(local->wep_tx_tfm); 37 return PTR_ERR(local->wep_tx_tfm);
38 }
37 39
38 local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, 40 local->wep_rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0,
39 CRYPTO_ALG_ASYNC); 41 CRYPTO_ALG_ASYNC);
40 if (IS_ERR(local->wep_rx_tfm)) { 42 if (IS_ERR(local->wep_rx_tfm)) {
41 crypto_free_blkcipher(local->wep_tx_tfm); 43 crypto_free_blkcipher(local->wep_tx_tfm);
44 local->wep_tx_tfm = ERR_PTR(-EINVAL);
42 return PTR_ERR(local->wep_rx_tfm); 45 return PTR_ERR(local->wep_rx_tfm);
43 } 46 }
44 47
@@ -47,8 +50,10 @@ int ieee80211_wep_init(struct ieee80211_local *local)
47 50
48void ieee80211_wep_free(struct ieee80211_local *local) 51void ieee80211_wep_free(struct ieee80211_local *local)
49{ 52{
50 crypto_free_blkcipher(local->wep_tx_tfm); 53 if (!IS_ERR(local->wep_tx_tfm))
51 crypto_free_blkcipher(local->wep_rx_tfm); 54 crypto_free_blkcipher(local->wep_tx_tfm);
55 if (!IS_ERR(local->wep_rx_tfm))
56 crypto_free_blkcipher(local->wep_rx_tfm);
52} 57}
53 58
54static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen) 59static inline bool ieee80211_wep_weak_iv(u32 iv, int keylen)
@@ -122,19 +127,24 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local,
122/* Perform WEP encryption using given key. data buffer must have tailroom 127/* Perform WEP encryption using given key. data buffer must have tailroom
123 * for 4-byte ICV. data_len must not include this ICV. Note: this function 128 * for 4-byte ICV. data_len must not include this ICV. Note: this function
124 * does _not_ add IV. data = RC4(data | CRC32(data)) */ 129 * does _not_ add IV. data = RC4(data | CRC32(data)) */
125void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 130int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
126 size_t klen, u8 *data, size_t data_len) 131 size_t klen, u8 *data, size_t data_len)
127{ 132{
128 struct blkcipher_desc desc = { .tfm = tfm }; 133 struct blkcipher_desc desc = { .tfm = tfm };
129 struct scatterlist sg; 134 struct scatterlist sg;
130 __le32 icv; 135 __le32 icv;
131 136
137 if (IS_ERR(tfm))
138 return -1;
139
132 icv = cpu_to_le32(~crc32_le(~0, data, data_len)); 140 icv = cpu_to_le32(~crc32_le(~0, data, data_len));
133 put_unaligned(icv, (__le32 *)(data + data_len)); 141 put_unaligned(icv, (__le32 *)(data + data_len));
134 142
135 crypto_blkcipher_setkey(tfm, rc4key, klen); 143 crypto_blkcipher_setkey(tfm, rc4key, klen);
136 sg_init_one(&sg, data, data_len + WEP_ICV_LEN); 144 sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
137 crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length); 145 crypto_blkcipher_encrypt(&desc, &sg, &sg, sg.length);
146
147 return 0;
138} 148}
139 149
140 150
@@ -168,10 +178,8 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local,
168 /* Add room for ICV */ 178 /* Add room for ICV */
169 skb_put(skb, WEP_ICV_LEN); 179 skb_put(skb, WEP_ICV_LEN);
170 180
171 ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, 181 return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3,
172 iv + WEP_IV_LEN, len); 182 iv + WEP_IV_LEN, len);
173
174 return 0;
175} 183}
176 184
177 185
@@ -185,6 +193,9 @@ int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
185 struct scatterlist sg; 193 struct scatterlist sg;
186 __le32 crc; 194 __le32 crc;
187 195
196 if (IS_ERR(tfm))
197 return -1;
198
188 crypto_blkcipher_setkey(tfm, rc4key, klen); 199 crypto_blkcipher_setkey(tfm, rc4key, klen);
189 sg_init_one(&sg, data, data_len + WEP_ICV_LEN); 200 sg_init_one(&sg, data, data_len + WEP_ICV_LEN);
190 crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length); 201 crypto_blkcipher_decrypt(&desc, &sg, &sg, sg.length);
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index fe29d7e5759f..58654ee33518 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -18,7 +18,7 @@
18 18
19int ieee80211_wep_init(struct ieee80211_local *local); 19int ieee80211_wep_init(struct ieee80211_local *local);
20void ieee80211_wep_free(struct ieee80211_local *local); 20void ieee80211_wep_free(struct ieee80211_local *local);
21void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key, 21int ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
22 size_t klen, u8 *data, size_t data_len); 22 size_t klen, u8 *data, size_t data_len);
23int ieee80211_wep_encrypt(struct ieee80211_local *local, 23int ieee80211_wep_encrypt(struct ieee80211_local *local,
24 struct sk_buff *skb, 24 struct sk_buff *skb,
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 15e1ba931b87..81d4ad64184a 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -33,6 +33,7 @@
33#define IEEE80211_MAX_PROBE_TRIES 5 33#define IEEE80211_MAX_PROBE_TRIES 5
34 34
35enum work_action { 35enum work_action {
36 WORK_ACT_MISMATCH,
36 WORK_ACT_NONE, 37 WORK_ACT_NONE,
37 WORK_ACT_TIMEOUT, 38 WORK_ACT_TIMEOUT,
38 WORK_ACT_DONE, 39 WORK_ACT_DONE,
@@ -213,15 +214,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
213 214
214 sband = local->hw.wiphy->bands[wk->chan->band]; 215 sband = local->hw.wiphy->bands[wk->chan->band];
215 216
216 /* 217 if (wk->assoc.supp_rates_len) {
217 * Get all rates supported by the device and the AP as 218 /*
218 * some APs don't like getting a superset of their rates 219 * Get all rates supported by the device and the AP as
219 * in the association request (e.g. D-Link DAP 1353 in 220 * some APs don't like getting a superset of their rates
220 * b-only mode)... 221 * in the association request (e.g. D-Link DAP 1353 in
221 */ 222 * b-only mode)...
222 rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates, 223 */
223 wk->assoc.supp_rates_len, 224 rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
224 sband, &rates); 225 wk->assoc.supp_rates_len,
226 sband, &rates);
227 } else {
228 /*
229 * In case AP not provide any supported rates information
230 * before association, we send information element(s) with
231 * all rates that we support.
232 */
233 rates = ~0;
234 rates_len = sband->n_bitrates;
235 }
225 236
226 skb = alloc_skb(local->hw.extra_tx_headroom + 237 skb = alloc_skb(local->hw.extra_tx_headroom +
227 sizeof(*mgmt) + /* bit too much but doesn't matter */ 238 sizeof(*mgmt) + /* bit too much but doesn't matter */
@@ -549,6 +560,22 @@ ieee80211_remain_on_channel_timeout(struct ieee80211_work *wk)
549 return WORK_ACT_TIMEOUT; 560 return WORK_ACT_TIMEOUT;
550} 561}
551 562
563static enum work_action __must_check
564ieee80211_assoc_beacon_wait(struct ieee80211_work *wk)
565{
566 if (wk->started)
567 return WORK_ACT_TIMEOUT;
568
569 /*
570 * Wait up to one beacon interval ...
571 * should this be more if we miss one?
572 */
573 printk(KERN_DEBUG "%s: waiting for beacon from %pM\n",
574 wk->sdata->name, wk->filter_ta);
575 wk->timeout = TU_TO_EXP_TIME(wk->assoc.bss->beacon_interval);
576 return WORK_ACT_NONE;
577}
578
552static void ieee80211_auth_challenge(struct ieee80211_work *wk, 579static void ieee80211_auth_challenge(struct ieee80211_work *wk,
553 struct ieee80211_mgmt *mgmt, 580 struct ieee80211_mgmt *mgmt,
554 size_t len) 581 size_t len)
@@ -575,7 +602,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
575 u16 auth_alg, auth_transaction, status_code; 602 u16 auth_alg, auth_transaction, status_code;
576 603
577 if (wk->type != IEEE80211_WORK_AUTH) 604 if (wk->type != IEEE80211_WORK_AUTH)
578 return WORK_ACT_NONE; 605 return WORK_ACT_MISMATCH;
579 606
580 if (len < 24 + 6) 607 if (len < 24 + 6)
581 return WORK_ACT_NONE; 608 return WORK_ACT_NONE;
@@ -626,6 +653,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
626 struct ieee802_11_elems elems; 653 struct ieee802_11_elems elems;
627 u8 *pos; 654 u8 *pos;
628 655
656 if (wk->type != IEEE80211_WORK_ASSOC)
657 return WORK_ACT_MISMATCH;
658
629 /* 659 /*
630 * AssocResp and ReassocResp have identical structure, so process both 660 * AssocResp and ReassocResp have identical structure, so process both
631 * of them in this function. 661 * of them in this function.
@@ -681,6 +711,12 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
681 711
682 ASSERT_WORK_MTX(local); 712 ASSERT_WORK_MTX(local);
683 713
714 if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
715 return WORK_ACT_MISMATCH;
716
717 if (len < 24 + 12)
718 return WORK_ACT_NONE;
719
684 baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; 720 baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
685 if (baselen > len) 721 if (baselen > len)
686 return WORK_ACT_NONE; 722 return WORK_ACT_NONE;
@@ -689,6 +725,25 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
689 return WORK_ACT_DONE; 725 return WORK_ACT_DONE;
690} 726}
691 727
728static enum work_action __must_check
729ieee80211_rx_mgmt_beacon(struct ieee80211_work *wk,
730 struct ieee80211_mgmt *mgmt, size_t len)
731{
732 struct ieee80211_sub_if_data *sdata = wk->sdata;
733 struct ieee80211_local *local = sdata->local;
734
735 ASSERT_WORK_MTX(local);
736
737 if (wk->type != IEEE80211_WORK_ASSOC_BEACON_WAIT)
738 return WORK_ACT_MISMATCH;
739
740 if (len < 24 + 12)
741 return WORK_ACT_NONE;
742
743 printk(KERN_DEBUG "%s: beacon received\n", sdata->name);
744 return WORK_ACT_DONE;
745}
746
692static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local, 747static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
693 struct sk_buff *skb) 748 struct sk_buff *skb)
694{ 749{
@@ -711,6 +766,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
711 case IEEE80211_WORK_DIRECT_PROBE: 766 case IEEE80211_WORK_DIRECT_PROBE:
712 case IEEE80211_WORK_AUTH: 767 case IEEE80211_WORK_AUTH:
713 case IEEE80211_WORK_ASSOC: 768 case IEEE80211_WORK_ASSOC:
769 case IEEE80211_WORK_ASSOC_BEACON_WAIT:
714 bssid = wk->filter_ta; 770 bssid = wk->filter_ta;
715 break; 771 break;
716 default: 772 default:
@@ -725,6 +781,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
725 continue; 781 continue;
726 782
727 switch (fc & IEEE80211_FCTL_STYPE) { 783 switch (fc & IEEE80211_FCTL_STYPE) {
784 case IEEE80211_STYPE_BEACON:
785 rma = ieee80211_rx_mgmt_beacon(wk, mgmt, skb->len);
786 break;
728 case IEEE80211_STYPE_PROBE_RESP: 787 case IEEE80211_STYPE_PROBE_RESP:
729 rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len, 788 rma = ieee80211_rx_mgmt_probe_resp(wk, mgmt, skb->len,
730 rx_status); 789 rx_status);
@@ -742,7 +801,17 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
742 break; 801 break;
743 default: 802 default:
744 WARN_ON(1); 803 WARN_ON(1);
804 rma = WORK_ACT_NONE;
745 } 805 }
806
807 /*
808 * We've either received an unexpected frame, or we have
809 * multiple work items and need to match the frame to the
810 * right one.
811 */
812 if (rma == WORK_ACT_MISMATCH)
813 continue;
814
746 /* 815 /*
747 * We've processed this frame for that work, so it can't 816 * We've processed this frame for that work, so it can't
748 * belong to another work struct. 817 * belong to another work struct.
@@ -752,6 +821,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
752 } 821 }
753 822
754 switch (rma) { 823 switch (rma) {
824 case WORK_ACT_MISMATCH:
825 /* ignore this unmatched frame */
826 break;
755 case WORK_ACT_NONE: 827 case WORK_ACT_NONE:
756 break; 828 break;
757 case WORK_ACT_DONE: 829 case WORK_ACT_DONE:
@@ -807,7 +879,7 @@ static void ieee80211_work_work(struct work_struct *work)
807 879
808 /* 880 /*
809 * ieee80211_queue_work() should have picked up most cases, 881 * ieee80211_queue_work() should have picked up most cases,
810 * here we'll pick the the rest. 882 * here we'll pick the rest.
811 */ 883 */
812 if (WARN(local->suspended, "work scheduled while going to suspend\n")) 884 if (WARN(local->suspended, "work scheduled while going to suspend\n"))
813 return; 885 return;
@@ -883,6 +955,9 @@ static void ieee80211_work_work(struct work_struct *work)
883 case IEEE80211_WORK_REMAIN_ON_CHANNEL: 955 case IEEE80211_WORK_REMAIN_ON_CHANNEL:
884 rma = ieee80211_remain_on_channel_timeout(wk); 956 rma = ieee80211_remain_on_channel_timeout(wk);
885 break; 957 break;
958 case IEEE80211_WORK_ASSOC_BEACON_WAIT:
959 rma = ieee80211_assoc_beacon_wait(wk);
960 break;
886 } 961 }
887 962
888 wk->started = started; 963 wk->started = started;
@@ -920,11 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
920 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
921 } 996 }
922 997
923 if (list_empty(&local->work_list) && local->scan_req) 998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning)
924 ieee80211_queue_delayed_work(&local->hw, 1002 ieee80211_queue_delayed_work(&local->hw,
925 &local->scan_work, 1003 &local->scan_work,
926 round_jiffies_relative(0)); 1004 round_jiffies_relative(0));
927 1005
1006 mutex_unlock(&local->scan_mtx);
1007
928 mutex_unlock(&local->work_mtx); 1008 mutex_unlock(&local->work_mtx);
929 1009
930 ieee80211_recalc_idle(local); 1010 ieee80211_recalc_idle(local);
@@ -1027,6 +1107,7 @@ ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
1027 case IEEE80211_STYPE_PROBE_RESP: 1107 case IEEE80211_STYPE_PROBE_RESP:
1028 case IEEE80211_STYPE_ASSOC_RESP: 1108 case IEEE80211_STYPE_ASSOC_RESP:
1029 case IEEE80211_STYPE_REASSOC_RESP: 1109 case IEEE80211_STYPE_REASSOC_RESP:
1110 case IEEE80211_STYPE_BEACON:
1030 skb_queue_tail(&local->work_skb_queue, skb); 1111 skb_queue_tail(&local->work_skb_queue, skb);
1031 ieee80211_queue_work(&local->hw, &local->work_work); 1112 ieee80211_queue_work(&local->hw, &local->work_work);
1032 return RX_QUEUED; 1113 return RX_QUEUED;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0adbcc941ac9..8d59d27d887e 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -183,9 +183,8 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
183 skb_put(skb, TKIP_ICV_LEN); 183 skb_put(skb, TKIP_ICV_LEN);
184 184
185 hdr = (struct ieee80211_hdr *) skb->data; 185 hdr = (struct ieee80211_hdr *) skb->data;
186 ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, 186 return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm,
187 key, pos, len, hdr->addr2); 187 key, pos, len, hdr->addr2);
188 return 0;
189} 188}
190 189
191 190
@@ -436,6 +435,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
436 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 435 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
437 u8 pn[CCMP_PN_LEN]; 436 u8 pn[CCMP_PN_LEN];
438 int data_len; 437 int data_len;
438 int queue;
439 439
440 hdrlen = ieee80211_hdrlen(hdr->frame_control); 440 hdrlen = ieee80211_hdrlen(hdr->frame_control);
441 441
@@ -453,7 +453,10 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
453 453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 454 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 455
456 if (memcmp(pn, key->u.ccmp.rx_pn[rx->queue], CCMP_PN_LEN) <= 0) { 456 queue = ieee80211_is_mgmt(hdr->frame_control) ?
457 NUM_RX_DATA_QUEUES : rx->queue;
458
459 if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) {
457 key->u.ccmp.replays++; 460 key->u.ccmp.replays++;
458 return RX_DROP_UNUSABLE; 461 return RX_DROP_UNUSABLE;
459 } 462 }
@@ -470,7 +473,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
470 return RX_DROP_UNUSABLE; 473 return RX_DROP_UNUSABLE;
471 } 474 }
472 475
473 memcpy(key->u.ccmp.rx_pn[rx->queue], pn, CCMP_PN_LEN); 476 memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN);
474 477
475 /* Remove CCMP header and MIC */ 478 /* Remove CCMP header and MIC */
476 skb_trim(skb, skb->len - CCMP_MIC_LEN); 479 skb_trim(skb, skb->len - CCMP_MIC_LEN);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 18d77b5c351a..43288259f4a1 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -40,27 +40,6 @@ config NF_CONNTRACK
40 40
41if NF_CONNTRACK 41if NF_CONNTRACK
42 42
43config NF_CT_ACCT
44 bool "Connection tracking flow accounting"
45 depends on NETFILTER_ADVANCED
46 help
47 If this option is enabled, the connection tracking code will
48 keep per-flow packet and byte counters.
49
50 Those counters can be used for flow-based accounting or the
51 `connbytes' match.
52
53 Please note that currently this option only sets a default state.
54 You may change it at boot time with nf_conntrack.acct=0/1 kernel
55 parameter or by loading the nf_conntrack module with acct=0/1.
56
57 You may also disable/enable it on a running system with:
58 sysctl net.netfilter.nf_conntrack_acct=0/1
59
60 This option will be removed in 2.6.29.
61
62 If unsure, say `N'.
63
64config NF_CONNTRACK_MARK 43config NF_CONNTRACK_MARK
65 bool 'Connection mark tracking support' 44 bool 'Connection mark tracking support'
66 depends on NETFILTER_ADVANCED 45 depends on NETFILTER_ADVANCED
@@ -314,8 +293,55 @@ config NETFILTER_XTABLES
314 293
315if NETFILTER_XTABLES 294if NETFILTER_XTABLES
316 295
296comment "Xtables combined modules"
297
298config NETFILTER_XT_MARK
299 tristate 'nfmark target and match support'
300 default m if NETFILTER_ADVANCED=n
301 ---help---
302 This option adds the "MARK" target and "mark" match.
303
304 Netfilter mark matching allows you to match packets based on the
305 "nfmark" value in the packet.
306 The target allows you to create rules in the "mangle" table which alter
307 the netfilter mark (nfmark) field associated with the packet.
308
309 Prior to routing, the nfmark can influence the routing method (see
310 "Use netfilter MARK value as routing key") and can also be used by
311 other subsystems to change their behavior.
312
313config NETFILTER_XT_CONNMARK
314 tristate 'ctmark target and match support'
315 depends on NF_CONNTRACK
316 depends on NETFILTER_ADVANCED
317 select NF_CONNTRACK_MARK
318 ---help---
319 This option adds the "CONNMARK" target and "connmark" match.
320
321 Netfilter allows you to store a mark value per connection (a.k.a.
322 ctmark), similarly to the packet mark (nfmark). Using this
323 target and match, you can set and match on this mark.
324
317# alphabetically ordered list of targets 325# alphabetically ordered list of targets
318 326
327comment "Xtables targets"
328
329config NETFILTER_XT_TARGET_CHECKSUM
330 tristate "CHECKSUM target support"
331 depends on IP_NF_MANGLE || IP6_NF_MANGLE
332 depends on NETFILTER_ADVANCED
333 ---help---
334 This option adds a `CHECKSUM' target, which can be used in the iptables mangle
335 table.
336
337 You can use this target to compute and fill in the checksum in
338 a packet that lacks a checksum. This is particularly useful,
339 if you need to work around old applications such as dhcp clients,
340 that do not work well with checksum offloads, but don't want to disable
341 checksum offload in your device.
342
343 To compile it as a module, choose M here. If unsure, say N.
344
319config NETFILTER_XT_TARGET_CLASSIFY 345config NETFILTER_XT_TARGET_CLASSIFY
320 tristate '"CLASSIFY" target support' 346 tristate '"CLASSIFY" target support'
321 depends on NETFILTER_ADVANCED 347 depends on NETFILTER_ADVANCED
@@ -332,15 +358,11 @@ config NETFILTER_XT_TARGET_CONNMARK
332 tristate '"CONNMARK" target support' 358 tristate '"CONNMARK" target support'
333 depends on NF_CONNTRACK 359 depends on NF_CONNTRACK
334 depends on NETFILTER_ADVANCED 360 depends on NETFILTER_ADVANCED
335 select NF_CONNTRACK_MARK 361 select NETFILTER_XT_CONNMARK
336 help 362 ---help---
337 This option adds a `CONNMARK' target, which allows one to manipulate 363 This is a backwards-compat option for the user's convenience
338 the connection mark value. Similar to the MARK target, but 364 (e.g. when running oldconfig). It selects
339 affects the connection mark value rather than the packet mark value. 365 CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
340
341 If you want to compile it as a module, say M here and read
342 <file:Documentation/kbuild/modules.txt>. The module will be called
343 ipt_CONNMARK. If unsure, say `N'.
344 366
345config NETFILTER_XT_TARGET_CONNSECMARK 367config NETFILTER_XT_TARGET_CONNSECMARK
346 tristate '"CONNSECMARK" target support' 368 tristate '"CONNSECMARK" target support'
@@ -397,6 +419,18 @@ config NETFILTER_XT_TARGET_HL
397 since you can easily create immortal packets that loop 419 since you can easily create immortal packets that loop
398 forever on the network. 420 forever on the network.
399 421
422config NETFILTER_XT_TARGET_IDLETIMER
423 tristate "IDLETIMER target support"
424 depends on NETFILTER_ADVANCED
425 help
426
427 This option adds the `IDLETIMER' target. Each matching packet
428 resets the timer associated with label specified when the rule is
429 added. When the timer expires, it triggers a sysfs notification.
430 The remaining time for expiration can be read via sysfs.
431
432 To compile it as a module, choose M here. If unsure, say N.
433
400config NETFILTER_XT_TARGET_LED 434config NETFILTER_XT_TARGET_LED
401 tristate '"LED" target support' 435 tristate '"LED" target support'
402 depends on LEDS_CLASS && LEDS_TRIGGERS 436 depends on LEDS_CLASS && LEDS_TRIGGERS
@@ -423,16 +457,12 @@ config NETFILTER_XT_TARGET_LED
423 457
424config NETFILTER_XT_TARGET_MARK 458config NETFILTER_XT_TARGET_MARK
425 tristate '"MARK" target support' 459 tristate '"MARK" target support'
426 default m if NETFILTER_ADVANCED=n 460 depends on NETFILTER_ADVANCED
427 help 461 select NETFILTER_XT_MARK
428 This option adds a `MARK' target, which allows you to create rules 462 ---help---
429 in the `mangle' table which alter the netfilter mark (nfmark) field 463 This is a backwards-compat option for the user's convenience
430 associated with the packet prior to routing. This can change 464 (e.g. when running oldconfig). It selects
431 the routing method (see `Use netfilter MARK value as routing 465 CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
432 key') and can also be used by other subsystems to change their
433 behavior.
434
435 To compile it as a module, choose M here. If unsure, say N.
436 466
437config NETFILTER_XT_TARGET_NFLOG 467config NETFILTER_XT_TARGET_NFLOG
438 tristate '"NFLOG" target support' 468 tristate '"NFLOG" target support'
@@ -479,6 +509,15 @@ config NETFILTER_XT_TARGET_RATEEST
479 509
480 To compile it as a module, choose M here. If unsure, say N. 510 To compile it as a module, choose M here. If unsure, say N.
481 511
512config NETFILTER_XT_TARGET_TEE
513 tristate '"TEE" - packet cloning to alternate destination'
514 depends on NETFILTER_ADVANCED
515 depends on (IPV6 || IPV6=n)
516 depends on !NF_CONNTRACK || NF_CONNTRACK
517 ---help---
518 This option adds a "TEE" target with which a packet can be cloned and
519 this clone be rerouted to another nexthop.
520
482config NETFILTER_XT_TARGET_TPROXY 521config NETFILTER_XT_TARGET_TPROXY
483 tristate '"TPROXY" target support (EXPERIMENTAL)' 522 tristate '"TPROXY" target support (EXPERIMENTAL)'
484 depends on EXPERIMENTAL 523 depends on EXPERIMENTAL
@@ -552,6 +591,10 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
552 This option adds a "TCPOPTSTRIP" target, which allows you to strip 591 This option adds a "TCPOPTSTRIP" target, which allows you to strip
553 TCP options from TCP packets. 592 TCP options from TCP packets.
554 593
594# alphabetically ordered list of matches
595
596comment "Xtables matches"
597
555config NETFILTER_XT_MATCH_CLUSTER 598config NETFILTER_XT_MATCH_CLUSTER
556 tristate '"cluster" match support' 599 tristate '"cluster" match support'
557 depends on NF_CONNTRACK 600 depends on NF_CONNTRACK
@@ -582,7 +625,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
582 tristate '"connbytes" per-connection counter match support' 625 tristate '"connbytes" per-connection counter match support'
583 depends on NF_CONNTRACK 626 depends on NF_CONNTRACK
584 depends on NETFILTER_ADVANCED 627 depends on NETFILTER_ADVANCED
585 select NF_CT_ACCT
586 help 628 help
587 This option adds a `connbytes' match, which allows you to match the 629 This option adds a `connbytes' match, which allows you to match the
588 number of bytes and/or packets for each direction within a connection. 630 number of bytes and/or packets for each direction within a connection.
@@ -602,14 +644,11 @@ config NETFILTER_XT_MATCH_CONNMARK
602 tristate '"connmark" connection mark match support' 644 tristate '"connmark" connection mark match support'
603 depends on NF_CONNTRACK 645 depends on NF_CONNTRACK
604 depends on NETFILTER_ADVANCED 646 depends on NETFILTER_ADVANCED
605 select NF_CONNTRACK_MARK 647 select NETFILTER_XT_CONNMARK
606 help 648 ---help---
607 This option adds a `connmark' match, which allows you to match the 649 This is a backwards-compat option for the user's convenience
608 connection mark value previously set for the session by `CONNMARK'. 650 (e.g. when running oldconfig). It selects
609 651 CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
610 If you want to compile it as a module, say M here and read
611 <file:Documentation/kbuild/modules.txt>. The module will be called
612 ipt_connmark. If unsure, say `N'.
613 652
614config NETFILTER_XT_MATCH_CONNTRACK 653config NETFILTER_XT_MATCH_CONNTRACK
615 tristate '"conntrack" connection tracking match support' 654 tristate '"conntrack" connection tracking match support'
@@ -624,6 +663,15 @@ config NETFILTER_XT_MATCH_CONNTRACK
624 663
625 To compile it as a module, choose M here. If unsure, say N. 664 To compile it as a module, choose M here. If unsure, say N.
626 665
666config NETFILTER_XT_MATCH_CPU
667 tristate '"cpu" match support'
668 depends on NETFILTER_ADVANCED
669 help
670 CPU matching allows you to match packets based on the CPU
671 currently handling the packet.
672
673 To compile it as a module, choose M here. If unsure, say N.
674
627config NETFILTER_XT_MATCH_DCCP 675config NETFILTER_XT_MATCH_DCCP
628 tristate '"dccp" protocol match support' 676 tristate '"dccp" protocol match support'
629 depends on NETFILTER_ADVANCED 677 depends on NETFILTER_ADVANCED
@@ -703,6 +751,16 @@ config NETFILTER_XT_MATCH_IPRANGE
703 751
704 If unsure, say M. 752 If unsure, say M.
705 753
754config NETFILTER_XT_MATCH_IPVS
755 tristate '"ipvs" match support'
756 depends on IP_VS
757 depends on NETFILTER_ADVANCED
758 depends on NF_CONNTRACK
759 help
760 This option allows you to match against IPVS properties of a packet.
761
762 If unsure, say N.
763
706config NETFILTER_XT_MATCH_LENGTH 764config NETFILTER_XT_MATCH_LENGTH
707 tristate '"length" match support' 765 tristate '"length" match support'
708 depends on NETFILTER_ADVANCED 766 depends on NETFILTER_ADVANCED
@@ -733,13 +791,12 @@ config NETFILTER_XT_MATCH_MAC
733 791
734config NETFILTER_XT_MATCH_MARK 792config NETFILTER_XT_MATCH_MARK
735 tristate '"mark" match support' 793 tristate '"mark" match support'
736 default m if NETFILTER_ADVANCED=n 794 depends on NETFILTER_ADVANCED
737 help 795 select NETFILTER_XT_MARK
738 Netfilter mark matching allows you to match packets based on the 796 ---help---
739 `nfmark' value in the packet. This can be set by the MARK target 797 This is a backwards-compat option for the user's convenience
740 (see below). 798 (e.g. when running oldconfig). It selects
741 799 CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
742 To compile it as a module, choose M here. If unsure, say N.
743 800
744config NETFILTER_XT_MATCH_MULTIPORT 801config NETFILTER_XT_MATCH_MULTIPORT
745 tristate '"multiport" Multiple port match support' 802 tristate '"multiport" Multiple port match support'
@@ -751,6 +808,19 @@ config NETFILTER_XT_MATCH_MULTIPORT
751 808
752 To compile it as a module, choose M here. If unsure, say N. 809 To compile it as a module, choose M here. If unsure, say N.
753 810
811config NETFILTER_XT_MATCH_OSF
812 tristate '"osf" Passive OS fingerprint match'
813 depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
814 help
815 This option selects the Passive OS Fingerprinting match module
816 that allows to passively match the remote operating system by
817 analyzing incoming TCP SYN packets.
818
819 Rules and loading software can be downloaded from
820 http://www.ioremap.net/projects/osf
821
822 To compile it as a module, choose M here. If unsure, say N.
823
754config NETFILTER_XT_MATCH_OWNER 824config NETFILTER_XT_MATCH_OWNER
755 tristate '"owner" match support' 825 tristate '"owner" match support'
756 depends on NETFILTER_ADVANCED 826 depends on NETFILTER_ADVANCED
@@ -836,13 +906,6 @@ config NETFILTER_XT_MATCH_RECENT
836 Short options are available by using 'iptables -m recent -h' 906 Short options are available by using 'iptables -m recent -h'
837 Official Website: <http://snowman.net/projects/ipt_recent/> 907 Official Website: <http://snowman.net/projects/ipt_recent/>
838 908
839config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
840 bool 'Enable obsolete /proc/net/ipt_recent'
841 depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
842 ---help---
843 This option enables the old /proc/net/ipt_recent interface,
844 which has been obsoleted by /proc/net/xt_recent.
845
846config NETFILTER_XT_MATCH_SCTP 909config NETFILTER_XT_MATCH_SCTP
847 tristate '"sctp" protocol match support (EXPERIMENTAL)' 910 tristate '"sctp" protocol match support (EXPERIMENTAL)'
848 depends on EXPERIMENTAL 911 depends on EXPERIMENTAL
@@ -942,19 +1005,6 @@ config NETFILTER_XT_MATCH_U32
942 1005
943 Details and examples are in the kernel module source. 1006 Details and examples are in the kernel module source.
944 1007
945config NETFILTER_XT_MATCH_OSF
946 tristate '"osf" Passive OS fingerprint match'
947 depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
948 help
949 This option selects the Passive OS Fingerprinting match module
950 that allows to passively match the remote operating system by
951 analyzing incoming TCP SYN packets.
952
953 Rules and loading software can be downloaded from
954 http://www.ioremap.net/projects/osf
955
956 To compile it as a module, choose M here. If unsure, say N.
957
958endif # NETFILTER_XTABLES 1008endif # NETFILTER_XTABLES
959 1009
960endmenu 1010endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f873644f02f6..441050f31111 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -40,15 +40,18 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
40# generic X tables 40# generic X tables
41obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o 41obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
42 42
43# combos
44obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
45obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
46
43# targets 47# targets
48obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o
44obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 49obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
45obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
46obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 50obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
47obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o 51obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o 52obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o 53obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o 54obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
51obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
52obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o 55obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
53obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o 56obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
54obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o 57obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -57,15 +60,17 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
57obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o 60obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
58obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o 61obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
59obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o 62obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
63obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
60obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o 64obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
65obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
61 66
62# matches 67# matches
63obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o 68obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
64obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 69obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
65obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 70obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
66obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o 71obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
67obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 72obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
@@ -73,10 +78,10 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 78obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
74obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o 79obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
75obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o 80obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o
76obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 82obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
77obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 83obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
78obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o 84obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
79obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
80obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o 85obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
81obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o 86obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
82obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o 87obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 712ccad13344..46a77d5c3887 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER 6 depends on NET && INET && NETFILTER && NF_CONNTRACK
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -26,7 +26,7 @@ if IP_VS
26 26
27config IP_VS_IPV6 27config IP_VS_IPV6
28 bool "IPv6 support for IPVS" 28 bool "IPv6 support for IPVS"
29 depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) 29 depends on IPV6 = y || IP_VS = IPV6
30 ---help--- 30 ---help---
31 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 31 Add IPv6 support to IPVS. This is incomplete and might be dangerous.
32 32
@@ -87,19 +87,16 @@ config IP_VS_PROTO_UDP
87 protocol. Say Y if unsure. 87 protocol. Say Y if unsure.
88 88
89config IP_VS_PROTO_AH_ESP 89config IP_VS_PROTO_AH_ESP
90 bool 90 def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH
91 depends on UNDEFINED
92 91
93config IP_VS_PROTO_ESP 92config IP_VS_PROTO_ESP
94 bool "ESP load balancing support" 93 bool "ESP load balancing support"
95 select IP_VS_PROTO_AH_ESP
96 ---help--- 94 ---help---
97 This option enables support for load balancing ESP (Encapsulation 95 This option enables support for load balancing ESP (Encapsulation
98 Security Payload) transport protocol. Say Y if unsure. 96 Security Payload) transport protocol. Say Y if unsure.
99 97
100config IP_VS_PROTO_AH 98config IP_VS_PROTO_AH
101 bool "AH load balancing support" 99 bool "AH load balancing support"
102 select IP_VS_PROTO_AH_ESP
103 ---help--- 100 ---help---
104 This option enables support for load balancing AH (Authentication 101 This option enables support for load balancing AH (Authentication
105 Header) transport protocol. Say Y if unsure. 102 Header) transport protocol. Say Y if unsure.
@@ -238,7 +235,7 @@ comment 'IPVS application helper'
238 235
239config IP_VS_FTP 236config IP_VS_FTP
240 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
241 depends on IP_VS_PROTO_TCP 238 depends on IP_VS_PROTO_TCP && NF_NAT
242 ---help--- 239 ---help---
243 FTP is a protocol that transfers IP address and/or port number in 240 FTP is a protocol that transfers IP address and/or port number in
244 the payload. In the virtual server via Network Address Translation, 241 the payload. In the virtual server via Network Address Translation,
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1cb0e834f8ff..e76f87f4aca8 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -569,49 +569,6 @@ static const struct file_operations ip_vs_app_fops = {
569}; 569};
570#endif 570#endif
571 571
572
573/*
574 * Replace a segment of data with a new segment
575 */
576int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
577 char *o_buf, int o_len, char *n_buf, int n_len)
578{
579 int diff;
580 int o_offset;
581 int o_left;
582
583 EnterFunction(9);
584
585 diff = n_len - o_len;
586 o_offset = o_buf - (char *)skb->data;
587 /* The length of left data after o_buf+o_len in the skb data */
588 o_left = skb->len - (o_offset + o_len);
589
590 if (diff <= 0) {
591 memmove(o_buf + n_len, o_buf + o_len, o_left);
592 memcpy(o_buf, n_buf, n_len);
593 skb_trim(skb, skb->len + diff);
594 } else if (diff <= skb_tailroom(skb)) {
595 skb_put(skb, diff);
596 memmove(o_buf + n_len, o_buf + o_len, o_left);
597 memcpy(o_buf, n_buf, n_len);
598 } else {
599 if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
600 return -ENOMEM;
601 skb_put(skb, diff);
602 memmove(skb->data + o_offset + n_len,
603 skb->data + o_offset + o_len, o_left);
604 skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
605 }
606
607 /* must update the iph total length here */
608 ip_hdr(skb)->tot_len = htons(skb->len);
609
610 LeaveFunction(9);
611 return 0;
612}
613
614
615int __init ip_vs_app_init(void) 572int __init ip_vs_app_init(void)
616{ 573{
617 /* we will replace it with proc_net_ipvs_create() soon */ 574 /* we will replace it with proc_net_ipvs_create() soon */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..b71c69a2db13 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,10 +158,14 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
158 unsigned hash; 158 unsigned hash;
159 int ret; 159 int ret;
160 160
161 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
162 return 0;
163
161 /* Hash by protocol, client address and port */ 164 /* Hash by protocol, client address and port */
162 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 165 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
163 166
164 ct_write_lock(hash); 167 ct_write_lock(hash);
168 spin_lock(&cp->lock);
165 169
166 if (!(cp->flags & IP_VS_CONN_F_HASHED)) { 170 if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
167 list_add(&cp->c_list, &ip_vs_conn_tab[hash]); 171 list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +178,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
174 ret = 0; 178 ret = 0;
175 } 179 }
176 180
181 spin_unlock(&cp->lock);
177 ct_write_unlock(hash); 182 ct_write_unlock(hash);
178 183
179 return ret; 184 return ret;
@@ -193,6 +198,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
193 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 198 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
194 199
195 ct_write_lock(hash); 200 ct_write_lock(hash);
201 spin_lock(&cp->lock);
196 202
197 if (cp->flags & IP_VS_CONN_F_HASHED) { 203 if (cp->flags & IP_VS_CONN_F_HASHED) {
198 list_del(&cp->c_list); 204 list_del(&cp->c_list);
@@ -202,6 +208,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
202 } else 208 } else
203 ret = 0; 209 ret = 0;
204 210
211 spin_unlock(&cp->lock);
205 ct_write_unlock(hash); 212 ct_write_unlock(hash);
206 213
207 return ret; 214 return ret;
@@ -264,6 +271,29 @@ struct ip_vs_conn *ip_vs_conn_in_get
264 return cp; 271 return cp;
265} 272}
266 273
274struct ip_vs_conn *
275ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
276 struct ip_vs_protocol *pp,
277 const struct ip_vs_iphdr *iph,
278 unsigned int proto_off, int inverse)
279{
280 __be16 _ports[2], *pptr;
281
282 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
283 if (pptr == NULL)
284 return NULL;
285
286 if (likely(!inverse))
287 return ip_vs_conn_in_get(af, iph->protocol,
288 &iph->saddr, pptr[0],
289 &iph->daddr, pptr[1]);
290 else
291 return ip_vs_conn_in_get(af, iph->protocol,
292 &iph->daddr, pptr[1],
293 &iph->saddr, pptr[0]);
294}
295EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
296
267/* Get reference to connection template */ 297/* Get reference to connection template */
268struct ip_vs_conn *ip_vs_ct_in_get 298struct ip_vs_conn *ip_vs_ct_in_get
269(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 299(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
@@ -349,14 +379,37 @@ struct ip_vs_conn *ip_vs_conn_out_get
349 return ret; 379 return ret;
350} 380}
351 381
382struct ip_vs_conn *
383ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
384 struct ip_vs_protocol *pp,
385 const struct ip_vs_iphdr *iph,
386 unsigned int proto_off, int inverse)
387{
388 __be16 _ports[2], *pptr;
389
390 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
391 if (pptr == NULL)
392 return NULL;
393
394 if (likely(!inverse))
395 return ip_vs_conn_out_get(af, iph->protocol,
396 &iph->saddr, pptr[0],
397 &iph->daddr, pptr[1]);
398 else
399 return ip_vs_conn_out_get(af, iph->protocol,
400 &iph->daddr, pptr[1],
401 &iph->saddr, pptr[0]);
402}
403EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
352 404
353/* 405/*
354 * Put back the conn and restart its timer with its timeout 406 * Put back the conn and restart its timer with its timeout
355 */ 407 */
356void ip_vs_conn_put(struct ip_vs_conn *cp) 408void ip_vs_conn_put(struct ip_vs_conn *cp)
357{ 409{
358 /* reset it expire in its timeout */ 410 unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
359 mod_timer(&cp->timer, jiffies+cp->timeout); 411 0 : cp->timeout;
412 mod_timer(&cp->timer, jiffies+t);
360 413
361 __ip_vs_conn_put(cp); 414 __ip_vs_conn_put(cp);
362} 415}
@@ -649,7 +702,7 @@ static void ip_vs_conn_expire(unsigned long data)
649 /* 702 /*
650 * unhash it if it is hashed in the conn table 703 * unhash it if it is hashed in the conn table
651 */ 704 */
652 if (!ip_vs_conn_unhash(cp)) 705 if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
653 goto expire_later; 706 goto expire_later;
654 707
655 /* 708 /*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3fd058b..4f8ddba48011 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -54,7 +54,6 @@
54 54
55EXPORT_SYMBOL(register_ip_vs_scheduler); 55EXPORT_SYMBOL(register_ip_vs_scheduler);
56EXPORT_SYMBOL(unregister_ip_vs_scheduler); 56EXPORT_SYMBOL(unregister_ip_vs_scheduler);
57EXPORT_SYMBOL(ip_vs_skb_replace);
58EXPORT_SYMBOL(ip_vs_proto_name); 57EXPORT_SYMBOL(ip_vs_proto_name);
59EXPORT_SYMBOL(ip_vs_conn_new); 58EXPORT_SYMBOL(ip_vs_conn_new);
60EXPORT_SYMBOL(ip_vs_conn_in_get); 59EXPORT_SYMBOL(ip_vs_conn_in_get);
@@ -194,6 +193,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
194 struct ip_vs_dest *dest; 193 struct ip_vs_dest *dest;
195 struct ip_vs_conn *ct; 194 struct ip_vs_conn *ct;
196 __be16 dport; /* destination port to forward */ 195 __be16 dport; /* destination port to forward */
196 __be16 flags;
197 union nf_inet_addr snet; /* source network of the client, 197 union nf_inet_addr snet; /* source network of the client,
198 after masking */ 198 after masking */
199 199
@@ -340,6 +340,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
340 dport = ports[1]; 340 dport = ports[1];
341 } 341 }
342 342
343 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
344 && iph.protocol == IPPROTO_UDP)?
345 IP_VS_CONN_F_ONE_PACKET : 0;
346
343 /* 347 /*
344 * Create a new connection according to the template 348 * Create a new connection according to the template
345 */ 349 */
@@ -347,7 +351,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
347 &iph.saddr, ports[0], 351 &iph.saddr, ports[0],
348 &iph.daddr, ports[1], 352 &iph.daddr, ports[1],
349 &dest->addr, dport, 353 &dest->addr, dport,
350 0, 354 flags,
351 dest); 355 dest);
352 if (cp == NULL) { 356 if (cp == NULL) {
353 ip_vs_conn_put(ct); 357 ip_vs_conn_put(ct);
@@ -377,7 +381,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
377 struct ip_vs_conn *cp = NULL; 381 struct ip_vs_conn *cp = NULL;
378 struct ip_vs_iphdr iph; 382 struct ip_vs_iphdr iph;
379 struct ip_vs_dest *dest; 383 struct ip_vs_dest *dest;
380 __be16 _ports[2], *pptr; 384 __be16 _ports[2], *pptr, flags;
381 385
382 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 386 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
383 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 387 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
407 return NULL; 411 return NULL;
408 } 412 }
409 413
414 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
415 && iph.protocol == IPPROTO_UDP)?
416 IP_VS_CONN_F_ONE_PACKET : 0;
417
410 /* 418 /*
411 * Create a connection entry. 419 * Create a connection entry.
412 */ 420 */
@@ -414,7 +422,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
414 &iph.saddr, pptr[0], 422 &iph.saddr, pptr[0],
415 &iph.daddr, pptr[1], 423 &iph.daddr, pptr[1],
416 &dest->addr, dest->port ? dest->port : pptr[1], 424 &dest->addr, dest->port ? dest->port : pptr[1],
417 0, 425 flags,
418 dest); 426 dest);
419 if (cp == NULL) 427 if (cp == NULL)
420 return NULL; 428 return NULL;
@@ -464,6 +472,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
464 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 472 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
465 int ret, cs; 473 int ret, cs;
466 struct ip_vs_conn *cp; 474 struct ip_vs_conn *cp;
475 __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
476 iph.protocol == IPPROTO_UDP)?
477 IP_VS_CONN_F_ONE_PACKET : 0;
467 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 478 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
468 479
469 ip_vs_service_put(svc); 480 ip_vs_service_put(svc);
@@ -474,7 +485,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
474 &iph.saddr, pptr[0], 485 &iph.saddr, pptr[0],
475 &iph.daddr, pptr[1], 486 &iph.daddr, pptr[1],
476 &daddr, 0, 487 &daddr, 0,
477 IP_VS_CONN_F_BYPASS, 488 IP_VS_CONN_F_BYPASS | flags,
478 NULL); 489 NULL);
479 if (cp == NULL) 490 if (cp == NULL)
480 return NF_DROP; 491 return NF_DROP;
@@ -524,26 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
524 return NF_DROP; 535 return NF_DROP;
525} 536}
526 537
527
528/*
529 * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
530 * chain, and is used for VS/NAT.
531 * It detects packets for VS/NAT connections and sends the packets
532 * immediately. This can avoid that iptable_nat mangles the packets
533 * for VS/NAT.
534 */
535static unsigned int ip_vs_post_routing(unsigned int hooknum,
536 struct sk_buff *skb,
537 const struct net_device *in,
538 const struct net_device *out,
539 int (*okfn)(struct sk_buff *))
540{
541 if (!skb->ipvs_property)
542 return NF_ACCEPT;
543 /* The packet was sent from IPVS, exit this chain */
544 return NF_STOP;
545}
546
547__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) 538__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
548{ 539{
549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 540 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
@@ -1487,14 +1478,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1487 .hooknum = NF_INET_FORWARD, 1478 .hooknum = NF_INET_FORWARD,
1488 .priority = 99, 1479 .priority = 99,
1489 }, 1480 },
1490 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1491 {
1492 .hook = ip_vs_post_routing,
1493 .owner = THIS_MODULE,
1494 .pf = PF_INET,
1495 .hooknum = NF_INET_POST_ROUTING,
1496 .priority = NF_IP_PRI_NAT_SRC-1,
1497 },
1498#ifdef CONFIG_IP_VS_IPV6 1481#ifdef CONFIG_IP_VS_IPV6
1499 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1482 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1500 * or VS/NAT(change destination), so that filtering rules can be 1483 * or VS/NAT(change destination), so that filtering rules can be
@@ -1523,14 +1506,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1523 .hooknum = NF_INET_FORWARD, 1506 .hooknum = NF_INET_FORWARD,
1524 .priority = 99, 1507 .priority = 99,
1525 }, 1508 },
1526 /* Before the netfilter connection tracking, exit from POST_ROUTING */
1527 {
1528 .hook = ip_vs_post_routing,
1529 .owner = THIS_MODULE,
1530 .pf = PF_INET6,
1531 .hooknum = NF_INET_POST_ROUTING,
1532 .priority = NF_IP6_PRI_NAT_SRC-1,
1533 },
1534#endif 1509#endif
1535}; 1510};
1536 1511
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d88c2fa..0f0c079c422a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1864 svc->scheduler->name); 1864 svc->scheduler->name);
1865 else 1865 else
1866#endif 1866#endif
1867 seq_printf(seq, "%s %08X:%04X %s ", 1867 seq_printf(seq, "%s %08X:%04X %s %s ",
1868 ip_vs_proto_name(svc->protocol), 1868 ip_vs_proto_name(svc->protocol),
1869 ntohl(svc->addr.ip), 1869 ntohl(svc->addr.ip),
1870 ntohs(svc->port), 1870 ntohs(svc->port),
1871 svc->scheduler->name); 1871 svc->scheduler->name,
1872 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1872 } else { 1873 } else {
1873 seq_printf(seq, "FWM %08X %s ", 1874 seq_printf(seq, "FWM %08X %s %s",
1874 svc->fwmark, svc->scheduler->name); 1875 svc->fwmark, svc->scheduler->name,
1876 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1875 } 1877 }
1876 1878
1877 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 1879 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 2c7f185dfae4..f228a17ec649 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,6 +20,17 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
23 */ 34 */
24 35
25#define KMSG_COMPONENT "IPVS" 36#define KMSG_COMPONENT "IPVS"
@@ -32,6 +43,9 @@
32#include <linux/in.h> 43#include <linux/in.h>
33#include <linux/ip.h> 44#include <linux/ip.h>
34#include <linux/netfilter.h> 45#include <linux/netfilter.h>
46#include <net/netfilter/nf_conntrack.h>
47#include <net/netfilter/nf_conntrack_expect.h>
48#include <net/netfilter/nf_nat_helper.h>
35#include <linux/gfp.h> 49#include <linux/gfp.h>
36#include <net/protocol.h> 50#include <net/protocol.h>
37#include <net/tcp.h> 51#include <net/tcp.h>
@@ -43,6 +57,16 @@
43#define SERVER_STRING "227 Entering Passive Mode (" 57#define SERVER_STRING "227 Entering Passive Mode ("
44#define CLIENT_STRING "PORT " 58#define CLIENT_STRING "PORT "
45 59
60#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
61#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
62 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
63 (T)->dst.protonum
64
65#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
66#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
67 &((C)->vaddr.ip), ntohs((C)->vport), \
68 &((C)->daddr.ip), ntohs((C)->dport), \
69 (C)->protocol, (C)->state
46 70
47/* 71/*
48 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 72 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -123,6 +147,119 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
123 return 1; 147 return 1;
124} 148}
125 149
150/*
151 * Called from init_conntrack() as expectfn handler.
152 */
153static void
154ip_vs_expect_callback(struct nf_conn *ct,
155 struct nf_conntrack_expect *exp)
156{
157 struct nf_conntrack_tuple *orig, new_reply;
158 struct ip_vs_conn *cp;
159
160 if (exp->tuple.src.l3num != PF_INET)
161 return;
162
163 /*
164 * We assume that no NF locks are held before this callback.
165 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
166 * expectations even if they use wildcard values, now we provide the
167 * actual values from the newly created original conntrack direction.
168 * The conntrack is confirmed when packet reaches IPVS hooks.
169 */
170
171 /* RS->CLIENT */
172 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
173 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
174 &orig->src.u3, orig->src.u.tcp.port,
175 &orig->dst.u3, orig->dst.u.tcp.port);
176 if (cp) {
177 /* Change reply CLIENT->RS to CLIENT->VS */
178 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
179 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
180 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
181 __func__, ct, ct->status,
182 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
183 ARG_CONN(cp));
184 new_reply.dst.u3 = cp->vaddr;
185 new_reply.dst.u.tcp.port = cp->vport;
186 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
187 ", inout cp=" FMT_CONN "\n",
188 __func__, ct,
189 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
190 ARG_CONN(cp));
191 goto alter;
192 }
193
194 /* CLIENT->VS */
195 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
196 &orig->src.u3, orig->src.u.tcp.port,
197 &orig->dst.u3, orig->dst.u.tcp.port);
198 if (cp) {
199 /* Change reply VS->CLIENT to RS->CLIENT */
200 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
201 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
202 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
203 __func__, ct, ct->status,
204 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
205 ARG_CONN(cp));
206 new_reply.src.u3 = cp->daddr;
207 new_reply.src.u.tcp.port = cp->dport;
208 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
209 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
210 __func__, ct,
211 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
212 ARG_CONN(cp));
213 goto alter;
214 }
215
216 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
217 " - unknown expect\n",
218 __func__, ct, ct->status, ARG_TUPLE(orig));
219 return;
220
221alter:
222 /* Never alter conntrack for non-NAT conns */
223 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
224 nf_conntrack_alter_reply(ct, &new_reply);
225 ip_vs_conn_put(cp);
226 return;
227}
228
229/*
230 * Create NF conntrack expectation with wildcard (optional) source port.
231 * Then the default callback function will alter the reply and will confirm
232 * the conntrack entry when the first packet comes.
233 */
234static void
235ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
236 struct ip_vs_conn *cp, u_int8_t proto,
237 const __be16 *port, int from_rs)
238{
239 struct nf_conntrack_expect *exp;
240
241 BUG_ON(!ct || ct == &nf_conntrack_untracked);
242
243 exp = nf_ct_expect_alloc(ct);
244 if (!exp)
245 return;
246
247 if (from_rs)
248 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
249 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
250 proto, port, &cp->cport);
251 else
252 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
253 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
254 proto, port, &cp->vport);
255
256 exp->expectfn = ip_vs_expect_callback;
257
258 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
259 __func__, ct, ARG_TUPLE(&exp->tuple));
260 nf_ct_expect_related(exp);
261 nf_ct_expect_put(exp);
262}
126 263
127/* 264/*
128 * Look at outgoing ftp packets to catch the response to a PASV command 265 * Look at outgoing ftp packets to catch the response to a PASV command
@@ -149,7 +286,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
149 struct ip_vs_conn *n_cp; 286 struct ip_vs_conn *n_cp;
150 char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ 287 char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
151 unsigned buf_len; 288 unsigned buf_len;
152 int ret; 289 int ret = 0;
290 enum ip_conntrack_info ctinfo;
291 struct nf_conn *ct;
153 292
154#ifdef CONFIG_IP_VS_IPV6 293#ifdef CONFIG_IP_VS_IPV6
155 /* This application helper doesn't work with IPv6 yet, 294 /* This application helper doesn't work with IPv6 yet,
@@ -209,23 +348,36 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
209 */ 348 */
210 from.ip = n_cp->vaddr.ip; 349 from.ip = n_cp->vaddr.ip;
211 port = n_cp->vport; 350 port = n_cp->vport;
212 sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip), 351 snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u",
213 (ntohs(port)>>8)&255, ntohs(port)&255); 352 ((unsigned char *)&from.ip)[0],
353 ((unsigned char *)&from.ip)[1],
354 ((unsigned char *)&from.ip)[2],
355 ((unsigned char *)&from.ip)[3],
356 ntohs(port) >> 8,
357 ntohs(port) & 0xFF);
358
214 buf_len = strlen(buf); 359 buf_len = strlen(buf);
215 360
361 ct = nf_ct_get(skb, &ctinfo);
362 if (ct && !nf_ct_is_untracked(ct)) {
363 /* If mangling fails this function will return 0
364 * which will cause the packet to be dropped.
365 * Mangling can only fail under memory pressure,
366 * hopefully it will succeed on the retransmitted
367 * packet.
368 */
369 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
370 start-data, end-start,
371 buf, buf_len);
372 if (ret)
373 ip_vs_expect_related(skb, ct, n_cp,
374 IPPROTO_TCP, NULL, 0);
375 }
376
216 /* 377 /*
217 * Calculate required delta-offset to keep TCP happy 378 * Not setting 'diff' is intentional, otherwise the sequence
379 * would be adjusted twice.
218 */ 380 */
219 *diff = buf_len - (end-start);
220
221 if (*diff == 0) {
222 /* simply replace it with new passive address */
223 memcpy(start, buf, buf_len);
224 ret = 1;
225 } else {
226 ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
227 end-start, buf, buf_len);
228 }
229 381
230 cp->app_data = NULL; 382 cp->app_data = NULL;
231 ip_vs_tcp_conn_listen(n_cp); 383 ip_vs_tcp_conn_listen(n_cp);
@@ -257,6 +409,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
257 union nf_inet_addr to; 409 union nf_inet_addr to;
258 __be16 port; 410 __be16 port;
259 struct ip_vs_conn *n_cp; 411 struct ip_vs_conn *n_cp;
412 struct nf_conn *ct;
260 413
261#ifdef CONFIG_IP_VS_IPV6 414#ifdef CONFIG_IP_VS_IPV6
262 /* This application helper doesn't work with IPv6 yet, 415 /* This application helper doesn't work with IPv6 yet,
@@ -343,6 +496,11 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
343 ip_vs_control_add(n_cp, cp); 496 ip_vs_control_add(n_cp, cp);
344 } 497 }
345 498
499 ct = (struct nf_conn *)skb->nfct;
500 if (ct && ct != &nf_conntrack_untracked)
501 ip_vs_expect_related(skb, ct, n_cp,
502 IPPROTO_TCP, &n_cp->dport, 1);
503
346 /* 504 /*
347 * Move tunnel to listen state 505 * Move tunnel to listen state
348 */ 506 */
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 94a45213faa6..9323f8944199 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -11,7 +11,7 @@
11 * Changes: 11 * Changes:
12 * Martin Hamilton : fixed the terrible locking bugs 12 * Martin Hamilton : fixed the terrible locking bugs
13 * *lock(tbl->lock) ==> *lock(&tbl->lock) 13 * *lock(tbl->lock) ==> *lock(&tbl->lock)
14 * Wensong Zhang : fixed the uninitilized tbl->lock bug 14 * Wensong Zhang : fixed the uninitialized tbl->lock bug
15 * Wensong Zhang : added doing full expiration check to 15 * Wensong Zhang : added doing full expiration check to
16 * collect stale entries of 24+ hours when 16 * collect stale entries of 24+ hours when
17 * no partial expire check in a half hour 17 * no partial expire check in a half hour
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 535dc2b419d8..dbeed8ea421a 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -386,7 +386,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
386 ip_vs_addr_copy(dest->af, &en->addr, daddr); 386 ip_vs_addr_copy(dest->af, &en->addr, daddr);
387 en->lastuse = jiffies; 387 en->lastuse = jiffies;
388 388
389 /* initilize its dest set */ 389 /* initialize its dest set */
390 atomic_set(&(en->set.size), 0); 390 atomic_set(&(en->set.size), 0);
391 INIT_LIST_HEAD(&en->set.list); 391 INIT_LIST_HEAD(&en->set.list);
392 rwlock_init(&en->set.lock); 392 rwlock_init(&en->set.lock);
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 7fc49f4cf5ad..027f654799fe 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -98,6 +98,7 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto)
98 98
99 return NULL; 99 return NULL;
100} 100}
101EXPORT_SYMBOL(ip_vs_proto_get);
101 102
102 103
103/* 104/*
@@ -167,26 +168,24 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
167 168
168 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 169 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
169 if (ih == NULL) 170 if (ih == NULL)
170 sprintf(buf, "%s TRUNCATED", pp->name); 171 sprintf(buf, "TRUNCATED");
171 else if (ih->frag_off & htons(IP_OFFSET)) 172 else if (ih->frag_off & htons(IP_OFFSET))
172 sprintf(buf, "%s %pI4->%pI4 frag", 173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
173 pp->name, &ih->saddr, &ih->daddr);
174 else { 174 else {
175 __be16 _ports[2], *pptr 175 __be16 _ports[2], *pptr
176; 176;
177 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 177 pptr = skb_header_pointer(skb, offset + ih->ihl*4,
178 sizeof(_ports), _ports); 178 sizeof(_ports), _ports);
179 if (pptr == NULL) 179 if (pptr == NULL)
180 sprintf(buf, "%s TRUNCATED %pI4->%pI4", 180 sprintf(buf, "TRUNCATED %pI4->%pI4",
181 pp->name, &ih->saddr, &ih->daddr); 181 &ih->saddr, &ih->daddr);
182 else 182 else
183 sprintf(buf, "%s %pI4:%u->%pI4:%u", 183 sprintf(buf, "%pI4:%u->%pI4:%u",
184 pp->name,
185 &ih->saddr, ntohs(pptr[0]), 184 &ih->saddr, ntohs(pptr[0]),
186 &ih->daddr, ntohs(pptr[1])); 185 &ih->daddr, ntohs(pptr[1]));
187 } 186 }
188 187
189 pr_debug("%s: %s\n", msg, buf); 188 pr_debug("%s: %s %s\n", msg, pp->name, buf);
190} 189}
191 190
192#ifdef CONFIG_IP_VS_IPV6 191#ifdef CONFIG_IP_VS_IPV6
@@ -201,26 +200,24 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
201 200
202 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 201 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
203 if (ih == NULL) 202 if (ih == NULL)
204 sprintf(buf, "%s TRUNCATED", pp->name); 203 sprintf(buf, "TRUNCATED");
205 else if (ih->nexthdr == IPPROTO_FRAGMENT) 204 else if (ih->nexthdr == IPPROTO_FRAGMENT)
206 sprintf(buf, "%s %pI6->%pI6 frag", 205 sprintf(buf, "%pI6->%pI6 frag", &ih->saddr, &ih->daddr);
207 pp->name, &ih->saddr, &ih->daddr);
208 else { 206 else {
209 __be16 _ports[2], *pptr; 207 __be16 _ports[2], *pptr;
210 208
211 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 209 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
212 sizeof(_ports), _ports); 210 sizeof(_ports), _ports);
213 if (pptr == NULL) 211 if (pptr == NULL)
214 sprintf(buf, "%s TRUNCATED %pI6->%pI6", 212 sprintf(buf, "TRUNCATED %pI6->%pI6",
215 pp->name, &ih->saddr, &ih->daddr); 213 &ih->saddr, &ih->daddr);
216 else 214 else
217 sprintf(buf, "%s %pI6:%u->%pI6:%u", 215 sprintf(buf, "%pI6:%u->%pI6:%u",
218 pp->name,
219 &ih->saddr, ntohs(pptr[0]), 216 &ih->saddr, ntohs(pptr[0]),
220 &ih->daddr, ntohs(pptr[1])); 217 &ih->daddr, ntohs(pptr[1]));
221 } 218 }
222 219
223 pr_debug("%s: %s\n", msg, buf); 220 pr_debug("%s: %s %s\n", msg, pp->name, buf);
224} 221}
225#endif 222#endif
226 223
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index c30b43c36cd7..1892dfc12fdd 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -136,12 +136,11 @@ ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
136 136
137 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 137 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
138 if (ih == NULL) 138 if (ih == NULL)
139 sprintf(buf, "%s TRUNCATED", pp->name); 139 sprintf(buf, "TRUNCATED");
140 else 140 else
141 sprintf(buf, "%s %pI4->%pI4", 141 sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
142 pp->name, &ih->saddr, &ih->daddr);
143 142
144 pr_debug("%s: %s\n", msg, buf); 143 pr_debug("%s: %s %s\n", msg, pp->name, buf);
145} 144}
146 145
147#ifdef CONFIG_IP_VS_IPV6 146#ifdef CONFIG_IP_VS_IPV6
@@ -154,12 +153,11 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
154 153
155 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 154 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
156 if (ih == NULL) 155 if (ih == NULL)
157 sprintf(buf, "%s TRUNCATED", pp->name); 156 sprintf(buf, "TRUNCATED");
158 else 157 else
159 sprintf(buf, "%s %pI6->%pI6", 158 sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
160 pp->name, &ih->saddr, &ih->daddr);
161 159
162 pr_debug("%s: %s\n", msg, buf); 160 pr_debug("%s: %s %s\n", msg, pp->name, buf);
163} 161}
164#endif 162#endif
165 163
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index c9a3f7a21d53..4c0855cb006e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -8,55 +8,6 @@
8#include <net/sctp/checksum.h> 8#include <net/sctp/checksum.h>
9#include <net/ip_vs.h> 9#include <net/ip_vs.h>
10 10
11
12static struct ip_vs_conn *
13sctp_conn_in_get(int af,
14 const struct sk_buff *skb,
15 struct ip_vs_protocol *pp,
16 const struct ip_vs_iphdr *iph,
17 unsigned int proto_off,
18 int inverse)
19{
20 __be16 _ports[2], *pptr;
21
22 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
23 if (pptr == NULL)
24 return NULL;
25
26 if (likely(!inverse))
27 return ip_vs_conn_in_get(af, iph->protocol,
28 &iph->saddr, pptr[0],
29 &iph->daddr, pptr[1]);
30 else
31 return ip_vs_conn_in_get(af, iph->protocol,
32 &iph->daddr, pptr[1],
33 &iph->saddr, pptr[0]);
34}
35
36static struct ip_vs_conn *
37sctp_conn_out_get(int af,
38 const struct sk_buff *skb,
39 struct ip_vs_protocol *pp,
40 const struct ip_vs_iphdr *iph,
41 unsigned int proto_off,
42 int inverse)
43{
44 __be16 _ports[2], *pptr;
45
46 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
47 if (pptr == NULL)
48 return NULL;
49
50 if (likely(!inverse))
51 return ip_vs_conn_out_get(af, iph->protocol,
52 &iph->saddr, pptr[0],
53 &iph->daddr, pptr[1]);
54 else
55 return ip_vs_conn_out_get(af, iph->protocol,
56 &iph->daddr, pptr[1],
57 &iph->saddr, pptr[0]);
58}
59
60static int 11static int
61sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
62 int *verdict, struct ip_vs_conn **cpp) 13 int *verdict, struct ip_vs_conn **cpp)
@@ -173,7 +124,7 @@ sctp_dnat_handler(struct sk_buff *skb,
173 return 0; 124 return 0;
174 125
175 /* Call application helper if needed */ 126 /* Call application helper if needed */
176 if (!ip_vs_app_pkt_out(cp, skb)) 127 if (!ip_vs_app_pkt_in(cp, skb))
177 return 0; 128 return 0;
178 } 129 }
179 130
@@ -1169,8 +1120,8 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
1169 .register_app = sctp_register_app, 1120 .register_app = sctp_register_app,
1170 .unregister_app = sctp_unregister_app, 1121 .unregister_app = sctp_unregister_app,
1171 .conn_schedule = sctp_conn_schedule, 1122 .conn_schedule = sctp_conn_schedule,
1172 .conn_in_get = sctp_conn_in_get, 1123 .conn_in_get = ip_vs_conn_in_get_proto,
1173 .conn_out_get = sctp_conn_out_get, 1124 .conn_out_get = ip_vs_conn_out_get_proto,
1174 .snat_handler = sctp_snat_handler, 1125 .snat_handler = sctp_snat_handler,
1175 .dnat_handler = sctp_dnat_handler, 1126 .dnat_handler = sctp_dnat_handler,
1176 .csum_check = sctp_csum_check, 1127 .csum_check = sctp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 91d28e073742..282d24de8592 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -27,52 +27,6 @@
27 27
28#include <net/ip_vs.h> 28#include <net/ip_vs.h>
29 29
30
31static struct ip_vs_conn *
32tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
33 const struct ip_vs_iphdr *iph, unsigned int proto_off,
34 int inverse)
35{
36 __be16 _ports[2], *pptr;
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
43 return ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
46 } else {
47 return ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
50 }
51}
52
53static struct ip_vs_conn *
54tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
55 const struct ip_vs_iphdr *iph, unsigned int proto_off,
56 int inverse)
57{
58 __be16 _ports[2], *pptr;
59
60 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
61 if (pptr == NULL)
62 return NULL;
63
64 if (likely(!inverse)) {
65 return ip_vs_conn_out_get(af, iph->protocol,
66 &iph->saddr, pptr[0],
67 &iph->daddr, pptr[1]);
68 } else {
69 return ip_vs_conn_out_get(af, iph->protocol,
70 &iph->daddr, pptr[1],
71 &iph->saddr, pptr[0]);
72 }
73}
74
75
76static int 30static int
77tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
78 int *verdict, struct ip_vs_conn **cpp) 32 int *verdict, struct ip_vs_conn **cpp)
@@ -721,8 +675,8 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
721 .register_app = tcp_register_app, 675 .register_app = tcp_register_app,
722 .unregister_app = tcp_unregister_app, 676 .unregister_app = tcp_unregister_app,
723 .conn_schedule = tcp_conn_schedule, 677 .conn_schedule = tcp_conn_schedule,
724 .conn_in_get = tcp_conn_in_get, 678 .conn_in_get = ip_vs_conn_in_get_proto,
725 .conn_out_get = tcp_conn_out_get, 679 .conn_out_get = ip_vs_conn_out_get_proto,
726 .snat_handler = tcp_snat_handler, 680 .snat_handler = tcp_snat_handler,
727 .dnat_handler = tcp_dnat_handler, 681 .dnat_handler = tcp_dnat_handler,
728 .csum_check = tcp_csum_check, 682 .csum_check = tcp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index e7a6885e0167..8553231b5d41 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -27,58 +27,6 @@
27#include <net/ip.h> 27#include <net/ip.h>
28#include <net/ip6_checksum.h> 28#include <net/ip6_checksum.h>
29 29
30static struct ip_vs_conn *
31udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
32 const struct ip_vs_iphdr *iph, unsigned int proto_off,
33 int inverse)
34{
35 struct ip_vs_conn *cp;
36 __be16 _ports[2], *pptr;
37
38 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
39 if (pptr == NULL)
40 return NULL;
41
42 if (likely(!inverse)) {
43 cp = ip_vs_conn_in_get(af, iph->protocol,
44 &iph->saddr, pptr[0],
45 &iph->daddr, pptr[1]);
46 } else {
47 cp = ip_vs_conn_in_get(af, iph->protocol,
48 &iph->daddr, pptr[1],
49 &iph->saddr, pptr[0]);
50 }
51
52 return cp;
53}
54
55
56static struct ip_vs_conn *
57udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
58 const struct ip_vs_iphdr *iph, unsigned int proto_off,
59 int inverse)
60{
61 struct ip_vs_conn *cp;
62 __be16 _ports[2], *pptr;
63
64 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
65 if (pptr == NULL)
66 return NULL;
67
68 if (likely(!inverse)) {
69 cp = ip_vs_conn_out_get(af, iph->protocol,
70 &iph->saddr, pptr[0],
71 &iph->daddr, pptr[1]);
72 } else {
73 cp = ip_vs_conn_out_get(af, iph->protocol,
74 &iph->daddr, pptr[1],
75 &iph->saddr, pptr[0]);
76 }
77
78 return cp;
79}
80
81
82static int 30static int
83udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
84 int *verdict, struct ip_vs_conn **cpp) 32 int *verdict, struct ip_vs_conn **cpp)
@@ -520,8 +468,8 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
520 .init = udp_init, 468 .init = udp_init,
521 .exit = udp_exit, 469 .exit = udp_exit,
522 .conn_schedule = udp_conn_schedule, 470 .conn_schedule = udp_conn_schedule,
523 .conn_in_get = udp_conn_in_get, 471 .conn_in_get = ip_vs_conn_in_get_proto,
524 .conn_out_get = udp_conn_out_get, 472 .conn_out_get = ip_vs_conn_out_get_proto,
525 .snat_handler = udp_snat_handler, 473 .snat_handler = udp_snat_handler,
526 .dnat_handler = udp_dnat_handler, 474 .dnat_handler = udp_dnat_handler,
527 .csum_check = udp_csum_check, 475 .csum_check = udp_csum_check,
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 8fb0ae616761..7ba06939829f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -802,7 +802,7 @@ static int sync_thread_backup(void *data)
802 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 802 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
803 803
804 while (!kthread_should_stop()) { 804 while (!kthread_should_stop()) {
805 wait_event_interruptible(*tinfo->sock->sk->sk_sleep, 805 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
806 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) 806 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
807 || kthread_should_stop()); 807 || kthread_should_stop());
808 808
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index e450cd6f4eb5..21e1a5e9b9d3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -28,6 +28,7 @@
28#include <net/ip6_route.h> 28#include <net/ip6_route.h>
29#include <linux/icmpv6.h> 29#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 30#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
31#include <linux/netfilter_ipv4.h> 32#include <linux/netfilter_ipv4.h>
32 33
33#include <net/ip_vs.h> 34#include <net/ip_vs.h>
@@ -90,10 +91,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
90 &dest->addr.ip); 91 &dest->addr.ip);
91 return NULL; 92 return NULL;
92 } 93 }
93 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); 94 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
94 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 95 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
95 &dest->addr.ip, 96 &dest->addr.ip,
96 atomic_read(&rt->u.dst.__refcnt), rtos); 97 atomic_read(&rt->dst.__refcnt), rtos);
97 } 98 }
98 spin_unlock(&dest->dst_lock); 99 spin_unlock(&dest->dst_lock);
99 } else { 100 } else {
@@ -148,10 +149,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
148 &dest->addr.in6); 149 &dest->addr.in6);
149 return NULL; 150 return NULL;
150 } 151 }
151 __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); 152 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
152 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 153 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
153 &dest->addr.in6, 154 &dest->addr.in6,
154 atomic_read(&rt->u.dst.__refcnt)); 155 atomic_read(&rt->dst.__refcnt));
155 } 156 }
156 spin_unlock(&dest->dst_lock); 157 spin_unlock(&dest->dst_lock);
157 } else { 158 } else {
@@ -198,7 +199,7 @@ do { \
198 (skb)->ipvs_property = 1; \ 199 (skb)->ipvs_property = 1; \
199 skb_forward_csum(skb); \ 200 skb_forward_csum(skb); \
200 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 201 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
201 (rt)->u.dst.dev, dst_output); \ 202 (rt)->dst.dev, dst_output); \
202} while (0) 203} while (0)
203 204
204 205
@@ -245,7 +246,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
245 } 246 }
246 247
247 /* MTU checking */ 248 /* MTU checking */
248 mtu = dst_mtu(&rt->u.dst); 249 mtu = dst_mtu(&rt->dst);
249 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 250 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
250 ip_rt_put(rt); 251 ip_rt_put(rt);
251 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 252 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,12 +266,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
265 266
266 /* drop old route */ 267 /* drop old route */
267 skb_dst_drop(skb); 268 skb_dst_drop(skb);
268 skb_dst_set(skb, &rt->u.dst); 269 skb_dst_set(skb, &rt->dst);
269 270
270 /* Another hack: avoid icmp_send in ip_fragment */ 271 /* Another hack: avoid icmp_send in ip_fragment */
271 skb->local_df = 1; 272 skb->local_df = 1;
272 273
273 IP_VS_XMIT(PF_INET, skb, rt); 274 IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
274 275
275 LeaveFunction(10); 276 LeaveFunction(10);
276 return NF_STOLEN; 277 return NF_STOLEN;
@@ -309,9 +310,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
309 } 310 }
310 311
311 /* MTU checking */ 312 /* MTU checking */
312 mtu = dst_mtu(&rt->u.dst); 313 mtu = dst_mtu(&rt->dst);
313 if (skb->len > mtu) { 314 if (skb->len > mtu) {
314 dst_release(&rt->u.dst); 315 dst_release(&rt->dst);
315 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 316 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
316 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 317 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
317 goto tx_error; 318 goto tx_error;
@@ -323,18 +324,18 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
323 */ 324 */
324 skb = skb_share_check(skb, GFP_ATOMIC); 325 skb = skb_share_check(skb, GFP_ATOMIC);
325 if (unlikely(skb == NULL)) { 326 if (unlikely(skb == NULL)) {
326 dst_release(&rt->u.dst); 327 dst_release(&rt->dst);
327 return NF_STOLEN; 328 return NF_STOLEN;
328 } 329 }
329 330
330 /* drop old route */ 331 /* drop old route */
331 skb_dst_drop(skb); 332 skb_dst_drop(skb);
332 skb_dst_set(skb, &rt->u.dst); 333 skb_dst_set(skb, &rt->dst);
333 334
334 /* Another hack: avoid icmp_send in ip_fragment */ 335 /* Another hack: avoid icmp_send in ip_fragment */
335 skb->local_df = 1; 336 skb->local_df = 1;
336 337
337 IP_VS_XMIT(PF_INET6, skb, rt); 338 IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
338 339
339 LeaveFunction(10); 340 LeaveFunction(10);
340 return NF_STOLEN; 341 return NF_STOLEN;
@@ -348,6 +349,30 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
348} 349}
349#endif 350#endif
350 351
352static void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 new_tuple.src.u3 = cp->daddr;
369 /*
370 * This will also take care of UDP and other protocols.
371 */
372 new_tuple.src.u.tcp.port = cp->dport;
373 nf_conntrack_alter_reply(ct, &new_tuple);
374}
375
351/* 376/*
352 * NAT transmitter (only for outside-to-inside nat forwarding) 377 * NAT transmitter (only for outside-to-inside nat forwarding)
353 * Not used for related ICMP 378 * Not used for related ICMP
@@ -376,7 +401,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
376 goto tx_error_icmp; 401 goto tx_error_icmp;
377 402
378 /* MTU checking */ 403 /* MTU checking */
379 mtu = dst_mtu(&rt->u.dst); 404 mtu = dst_mtu(&rt->dst);
380 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 405 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
381 ip_rt_put(rt); 406 ip_rt_put(rt);
382 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 407 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +413,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
388 if (!skb_make_writable(skb, sizeof(struct iphdr))) 413 if (!skb_make_writable(skb, sizeof(struct iphdr)))
389 goto tx_error_put; 414 goto tx_error_put;
390 415
391 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 416 if (skb_cow(skb, rt->dst.dev->hard_header_len))
392 goto tx_error_put; 417 goto tx_error_put;
393 418
394 /* drop old route */ 419 /* drop old route */
395 skb_dst_drop(skb); 420 skb_dst_drop(skb);
396 skb_dst_set(skb, &rt->u.dst); 421 skb_dst_set(skb, &rt->dst);
397 422
398 /* mangle the packet */ 423 /* mangle the packet */
399 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 424 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -403,6 +428,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
403 428
404 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 429 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
405 430
431 ip_vs_update_conntrack(skb, cp);
432
406 /* FIXME: when application helper enlarges the packet and the length 433 /* FIXME: when application helper enlarges the packet and the length
407 is larger than the MTU of outgoing device, there will be still 434 is larger than the MTU of outgoing device, there will be still
408 MTU problem. */ 435 MTU problem. */
@@ -410,7 +437,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
410 /* Another hack: avoid icmp_send in ip_fragment */ 437 /* Another hack: avoid icmp_send in ip_fragment */
411 skb->local_df = 1; 438 skb->local_df = 1;
412 439
413 IP_VS_XMIT(PF_INET, skb, rt); 440 IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
414 441
415 LeaveFunction(10); 442 LeaveFunction(10);
416 return NF_STOLEN; 443 return NF_STOLEN;
@@ -452,9 +479,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
452 goto tx_error_icmp; 479 goto tx_error_icmp;
453 480
454 /* MTU checking */ 481 /* MTU checking */
455 mtu = dst_mtu(&rt->u.dst); 482 mtu = dst_mtu(&rt->dst);
456 if (skb->len > mtu) { 483 if (skb->len > mtu) {
457 dst_release(&rt->u.dst); 484 dst_release(&rt->dst);
458 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 485 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
459 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 486 IP_VS_DBG_RL_PKT(0, pp, skb, 0,
460 "ip_vs_nat_xmit_v6(): frag needed for"); 487 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +492,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
465 if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) 492 if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
466 goto tx_error_put; 493 goto tx_error_put;
467 494
468 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 495 if (skb_cow(skb, rt->dst.dev->hard_header_len))
469 goto tx_error_put; 496 goto tx_error_put;
470 497
471 /* drop old route */ 498 /* drop old route */
472 skb_dst_drop(skb); 499 skb_dst_drop(skb);
473 skb_dst_set(skb, &rt->u.dst); 500 skb_dst_set(skb, &rt->dst);
474 501
475 /* mangle the packet */ 502 /* mangle the packet */
476 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 503 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -479,6 +506,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
479 506
480 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 507 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
481 508
509 ip_vs_update_conntrack(skb, cp);
510
482 /* FIXME: when application helper enlarges the packet and the length 511 /* FIXME: when application helper enlarges the packet and the length
483 is larger than the MTU of outgoing device, there will be still 512 is larger than the MTU of outgoing device, there will be still
484 MTU problem. */ 513 MTU problem. */
@@ -486,7 +515,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
486 /* Another hack: avoid icmp_send in ip_fragment */ 515 /* Another hack: avoid icmp_send in ip_fragment */
487 skb->local_df = 1; 516 skb->local_df = 1;
488 517
489 IP_VS_XMIT(PF_INET6, skb, rt); 518 IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
490 519
491 LeaveFunction(10); 520 LeaveFunction(10);
492 return NF_STOLEN; 521 return NF_STOLEN;
@@ -498,7 +527,7 @@ tx_error:
498 kfree_skb(skb); 527 kfree_skb(skb);
499 return NF_STOLEN; 528 return NF_STOLEN;
500tx_error_put: 529tx_error_put:
501 dst_release(&rt->u.dst); 530 dst_release(&rt->dst);
502 goto tx_error; 531 goto tx_error;
503} 532}
504#endif 533#endif
@@ -549,9 +578,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
549 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) 578 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
550 goto tx_error_icmp; 579 goto tx_error_icmp;
551 580
552 tdev = rt->u.dst.dev; 581 tdev = rt->dst.dev;
553 582
554 mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr); 583 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
555 if (mtu < 68) { 584 if (mtu < 68) {
556 ip_rt_put(rt); 585 ip_rt_put(rt);
557 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 586 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +630,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
601 630
602 /* drop old route */ 631 /* drop old route */
603 skb_dst_drop(skb); 632 skb_dst_drop(skb);
604 skb_dst_set(skb, &rt->u.dst); 633 skb_dst_set(skb, &rt->dst);
605 634
606 /* 635 /*
607 * Push down and install the IPIP header. 636 * Push down and install the IPIP header.
@@ -615,7 +644,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
615 iph->daddr = rt->rt_dst; 644 iph->daddr = rt->rt_dst;
616 iph->saddr = rt->rt_src; 645 iph->saddr = rt->rt_src;
617 iph->ttl = old_iph->ttl; 646 iph->ttl = old_iph->ttl;
618 ip_select_ident(iph, &rt->u.dst, NULL); 647 ip_select_ident(iph, &rt->dst, NULL);
619 648
620 /* Another hack: avoid icmp_send in ip_fragment */ 649 /* Another hack: avoid icmp_send in ip_fragment */
621 skb->local_df = 1; 650 skb->local_df = 1;
@@ -660,12 +689,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
660 if (!rt) 689 if (!rt)
661 goto tx_error_icmp; 690 goto tx_error_icmp;
662 691
663 tdev = rt->u.dst.dev; 692 tdev = rt->dst.dev;
664 693
665 mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); 694 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
666 /* TODO IPv6: do we need this check in IPv6? */ 695 /* TODO IPv6: do we need this check in IPv6? */
667 if (mtu < 1280) { 696 if (mtu < 1280) {
668 dst_release(&rt->u.dst); 697 dst_release(&rt->dst);
669 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 698 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
670 goto tx_error; 699 goto tx_error;
671 } 700 }
@@ -674,7 +703,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
674 703
675 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 704 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
676 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 705 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
677 dst_release(&rt->u.dst); 706 dst_release(&rt->dst);
678 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 707 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
679 goto tx_error; 708 goto tx_error;
680 } 709 }
@@ -689,7 +718,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
689 struct sk_buff *new_skb = 718 struct sk_buff *new_skb =
690 skb_realloc_headroom(skb, max_headroom); 719 skb_realloc_headroom(skb, max_headroom);
691 if (!new_skb) { 720 if (!new_skb) {
692 dst_release(&rt->u.dst); 721 dst_release(&rt->dst);
693 kfree_skb(skb); 722 kfree_skb(skb);
694 IP_VS_ERR_RL("%s(): no memory\n", __func__); 723 IP_VS_ERR_RL("%s(): no memory\n", __func__);
695 return NF_STOLEN; 724 return NF_STOLEN;
@@ -707,7 +736,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
707 736
708 /* drop old route */ 737 /* drop old route */
709 skb_dst_drop(skb); 738 skb_dst_drop(skb);
710 skb_dst_set(skb, &rt->u.dst); 739 skb_dst_set(skb, &rt->dst);
711 740
712 /* 741 /*
713 * Push down and install the IPIP header. 742 * Push down and install the IPIP header.
@@ -760,7 +789,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
760 goto tx_error_icmp; 789 goto tx_error_icmp;
761 790
762 /* MTU checking */ 791 /* MTU checking */
763 mtu = dst_mtu(&rt->u.dst); 792 mtu = dst_mtu(&rt->dst);
764 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 793 if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
765 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 794 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
766 ip_rt_put(rt); 795 ip_rt_put(rt);
@@ -780,12 +809,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
780 809
781 /* drop old route */ 810 /* drop old route */
782 skb_dst_drop(skb); 811 skb_dst_drop(skb);
783 skb_dst_set(skb, &rt->u.dst); 812 skb_dst_set(skb, &rt->dst);
784 813
785 /* Another hack: avoid icmp_send in ip_fragment */ 814 /* Another hack: avoid icmp_send in ip_fragment */
786 skb->local_df = 1; 815 skb->local_df = 1;
787 816
788 IP_VS_XMIT(PF_INET, skb, rt); 817 IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
789 818
790 LeaveFunction(10); 819 LeaveFunction(10);
791 return NF_STOLEN; 820 return NF_STOLEN;
@@ -813,10 +842,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
813 goto tx_error_icmp; 842 goto tx_error_icmp;
814 843
815 /* MTU checking */ 844 /* MTU checking */
816 mtu = dst_mtu(&rt->u.dst); 845 mtu = dst_mtu(&rt->dst);
817 if (skb->len > mtu) { 846 if (skb->len > mtu) {
818 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 847 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
819 dst_release(&rt->u.dst); 848 dst_release(&rt->dst);
820 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 849 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
821 goto tx_error; 850 goto tx_error;
822 } 851 }
@@ -827,18 +856,18 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
827 */ 856 */
828 skb = skb_share_check(skb, GFP_ATOMIC); 857 skb = skb_share_check(skb, GFP_ATOMIC);
829 if (unlikely(skb == NULL)) { 858 if (unlikely(skb == NULL)) {
830 dst_release(&rt->u.dst); 859 dst_release(&rt->dst);
831 return NF_STOLEN; 860 return NF_STOLEN;
832 } 861 }
833 862
834 /* drop old route */ 863 /* drop old route */
835 skb_dst_drop(skb); 864 skb_dst_drop(skb);
836 skb_dst_set(skb, &rt->u.dst); 865 skb_dst_set(skb, &rt->dst);
837 866
838 /* Another hack: avoid icmp_send in ip_fragment */ 867 /* Another hack: avoid icmp_send in ip_fragment */
839 skb->local_df = 1; 868 skb->local_df = 1;
840 869
841 IP_VS_XMIT(PF_INET6, skb, rt); 870 IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
842 871
843 LeaveFunction(10); 872 LeaveFunction(10);
844 return NF_STOLEN; 873 return NF_STOLEN;
@@ -888,7 +917,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
888 goto tx_error_icmp; 917 goto tx_error_icmp;
889 918
890 /* MTU checking */ 919 /* MTU checking */
891 mtu = dst_mtu(&rt->u.dst); 920 mtu = dst_mtu(&rt->dst);
892 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 921 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
893 ip_rt_put(rt); 922 ip_rt_put(rt);
894 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 923 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,19 +929,19 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
900 if (!skb_make_writable(skb, offset)) 929 if (!skb_make_writable(skb, offset))
901 goto tx_error_put; 930 goto tx_error_put;
902 931
903 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 932 if (skb_cow(skb, rt->dst.dev->hard_header_len))
904 goto tx_error_put; 933 goto tx_error_put;
905 934
906 /* drop the old route when skb is not shared */ 935 /* drop the old route when skb is not shared */
907 skb_dst_drop(skb); 936 skb_dst_drop(skb);
908 skb_dst_set(skb, &rt->u.dst); 937 skb_dst_set(skb, &rt->dst);
909 938
910 ip_vs_nat_icmp(skb, pp, cp, 0); 939 ip_vs_nat_icmp(skb, pp, cp, 0);
911 940
912 /* Another hack: avoid icmp_send in ip_fragment */ 941 /* Another hack: avoid icmp_send in ip_fragment */
913 skb->local_df = 1; 942 skb->local_df = 1;
914 943
915 IP_VS_XMIT(PF_INET, skb, rt); 944 IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
916 945
917 rc = NF_STOLEN; 946 rc = NF_STOLEN;
918 goto out; 947 goto out;
@@ -963,9 +992,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
963 goto tx_error_icmp; 992 goto tx_error_icmp;
964 993
965 /* MTU checking */ 994 /* MTU checking */
966 mtu = dst_mtu(&rt->u.dst); 995 mtu = dst_mtu(&rt->dst);
967 if (skb->len > mtu) { 996 if (skb->len > mtu) {
968 dst_release(&rt->u.dst); 997 dst_release(&rt->dst);
969 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 998 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
970 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 999 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
971 goto tx_error; 1000 goto tx_error;
@@ -975,19 +1004,19 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
975 if (!skb_make_writable(skb, offset)) 1004 if (!skb_make_writable(skb, offset))
976 goto tx_error_put; 1005 goto tx_error_put;
977 1006
978 if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) 1007 if (skb_cow(skb, rt->dst.dev->hard_header_len))
979 goto tx_error_put; 1008 goto tx_error_put;
980 1009
981 /* drop the old route when skb is not shared */ 1010 /* drop the old route when skb is not shared */
982 skb_dst_drop(skb); 1011 skb_dst_drop(skb);
983 skb_dst_set(skb, &rt->u.dst); 1012 skb_dst_set(skb, &rt->dst);
984 1013
985 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1014 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
986 1015
987 /* Another hack: avoid icmp_send in ip_fragment */ 1016 /* Another hack: avoid icmp_send in ip_fragment */
988 skb->local_df = 1; 1017 skb->local_df = 1;
989 1018
990 IP_VS_XMIT(PF_INET6, skb, rt); 1019 IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
991 1020
992 rc = NF_STOLEN; 1021 rc = NF_STOLEN;
993 goto out; 1022 goto out;
@@ -1001,7 +1030,7 @@ out:
1001 LeaveFunction(10); 1030 LeaveFunction(10);
1002 return rc; 1031 return rc;
1003tx_error_put: 1032tx_error_put:
1004 dst_release(&rt->u.dst); 1033 dst_release(&rt->dst);
1005 goto tx_error; 1034 goto tx_error;
1006} 1035}
1007#endif 1036#endif
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index ab81b380eae6..5178c691ecbf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -17,13 +17,7 @@
17#include <net/netfilter/nf_conntrack_extend.h> 17#include <net/netfilter/nf_conntrack_extend.h>
18#include <net/netfilter/nf_conntrack_acct.h> 18#include <net/netfilter/nf_conntrack_acct.h>
19 19
20#ifdef CONFIG_NF_CT_ACCT 20static int nf_ct_acct __read_mostly;
21#define NF_CT_ACCT_DEFAULT 1
22#else
23#define NF_CT_ACCT_DEFAULT 0
24#endif
25
26static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
27 21
28module_param_named(acct, nf_ct_acct, bool, 0644); 22module_param_named(acct, nf_ct_acct, bool, 0644);
29MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); 23MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
@@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net)
114 net->ct.sysctl_acct = nf_ct_acct; 108 net->ct.sysctl_acct = nf_ct_acct;
115 109
116 if (net_eq(net, &init_net)) { 110 if (net_eq(net, &init_net)) {
117#ifdef CONFIG_NF_CT_ACCT
118 printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
119 printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
120 printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
121#endif
122
123 ret = nf_ct_extend_register(&acct_extend); 111 ret = nf_ct_extend_register(&acct_extend);
124 if (ret < 0) { 112 if (ret < 0) {
125 printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); 113 printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 372e80f07a81..13fd2c55e329 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -108,7 +108,7 @@ static int amanda_help(struct sk_buff *skb,
108 dataoff = protoff + sizeof(struct udphdr); 108 dataoff = protoff + sizeof(struct udphdr);
109 if (dataoff >= skb->len) { 109 if (dataoff >= skb->len) {
110 if (net_ratelimit()) 110 if (net_ratelimit())
111 printk("amanda_help: skblen = %u\n", skb->len); 111 printk(KERN_ERR "amanda_help: skblen = %u\n", skb->len);
112 return NF_ACCEPT; 112 return NF_ACCEPT;
113 } 113 }
114 114
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c9bbe93cc16..df3eedb142ff 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
62unsigned int nf_conntrack_max __read_mostly; 62unsigned int nf_conntrack_max __read_mostly;
63EXPORT_SYMBOL_GPL(nf_conntrack_max); 63EXPORT_SYMBOL_GPL(nf_conntrack_max);
64 64
65struct nf_conn nf_conntrack_untracked __read_mostly; 65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
66EXPORT_SYMBOL_GPL(nf_conntrack_untracked); 66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static int nf_conntrack_hash_rnd_initted; 68static int nf_conntrack_hash_rnd_initted;
69static unsigned int nf_conntrack_hash_rnd; 69static unsigned int nf_conntrack_hash_rnd;
@@ -319,8 +319,10 @@ begin:
319 * not the expected one, we must restart lookup. 319 * not the expected one, we must restart lookup.
320 * We probably met an item that was moved to another chain. 320 * We probably met an item that was moved to another chain.
321 */ 321 */
322 if (get_nulls_value(n) != hash) 322 if (get_nulls_value(n) != hash) {
323 NF_CT_STAT_INC(net, search_restart);
323 goto begin; 324 goto begin;
325 }
324 local_bh_enable(); 326 local_bh_enable();
325 327
326 return NULL; 328 return NULL;
@@ -422,6 +424,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
422 424
423 spin_lock_bh(&nf_conntrack_lock); 425 spin_lock_bh(&nf_conntrack_lock);
424 426
427 /* We have to check the DYING flag inside the lock to prevent
428 a race against nf_ct_get_next_corpse() possibly called from
429 user context, else we insert an already 'dead' hash, blocking
430 further use of that particular connection -JM */
431
432 if (unlikely(nf_ct_is_dying(ct))) {
433 spin_unlock_bh(&nf_conntrack_lock);
434 return NF_ACCEPT;
435 }
436
425 /* See if there's one in the list already, including reverse: 437 /* See if there's one in the list already, including reverse:
426 NAT could have grabbed it without realizing, since we're 438 NAT could have grabbed it without realizing, since we're
427 not in the hash. If there is, we lost race. */ 439 not in the hash. If there is, we lost race. */
@@ -607,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
607 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
608 /* Don't set timer yet: wait for confirmation */ 620 /* Don't set timer yet: wait for confirmation */
609 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
610#ifdef CONFIG_NET_NS 622 write_pnet(&ct->ct_net, net);
611 ct->ct_net = net;
612#endif
613#ifdef CONFIG_NF_CONNTRACK_ZONES 623#ifdef CONFIG_NF_CONNTRACK_ZONES
614 if (zone) { 624 if (zone) {
615 struct nf_conntrack_zone *nf_ct_zone; 625 struct nf_conntrack_zone *nf_ct_zone;
@@ -956,8 +966,7 @@ acct:
956 if (acct) { 966 if (acct) {
957 spin_lock_bh(&ct->lock); 967 spin_lock_bh(&ct->lock);
958 acct[CTINFO2DIR(ctinfo)].packets++; 968 acct[CTINFO2DIR(ctinfo)].packets++;
959 acct[CTINFO2DIR(ctinfo)].bytes += 969 acct[CTINFO2DIR(ctinfo)].bytes += skb->len;
960 skb->len - skb_network_offset(skb);
961 spin_unlock_bh(&ct->lock); 970 spin_unlock_bh(&ct->lock);
962 } 971 }
963 } 972 }
@@ -1171,10 +1180,21 @@ static void nf_ct_release_dying_list(struct net *net)
1171 spin_unlock_bh(&nf_conntrack_lock); 1180 spin_unlock_bh(&nf_conntrack_lock);
1172} 1181}
1173 1182
1183static int untrack_refs(void)
1184{
1185 int cnt = 0, cpu;
1186
1187 for_each_possible_cpu(cpu) {
1188 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1189
1190 cnt += atomic_read(&ct->ct_general.use) - 1;
1191 }
1192 return cnt;
1193}
1194
1174static void nf_conntrack_cleanup_init_net(void) 1195static void nf_conntrack_cleanup_init_net(void)
1175{ 1196{
1176 /* wait until all references to nf_conntrack_untracked are dropped */ 1197 while (untrack_refs() > 0)
1177 while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
1178 schedule(); 1198 schedule();
1179 1199
1180 nf_conntrack_helper_fini(); 1200 nf_conntrack_helper_fini();
@@ -1309,10 +1329,19 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
1309module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 1329module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
1310 &nf_conntrack_htable_size, 0600); 1330 &nf_conntrack_htable_size, 0600);
1311 1331
1332void nf_ct_untracked_status_or(unsigned long bits)
1333{
1334 int cpu;
1335
1336 for_each_possible_cpu(cpu)
1337 per_cpu(nf_conntrack_untracked, cpu).status |= bits;
1338}
1339EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
1340
1312static int nf_conntrack_init_init_net(void) 1341static int nf_conntrack_init_init_net(void)
1313{ 1342{
1314 int max_factor = 8; 1343 int max_factor = 8;
1315 int ret; 1344 int ret, cpu;
1316 1345
1317 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1346 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1318 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1347 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1333,7 +1362,7 @@ static int nf_conntrack_init_init_net(void)
1333 } 1362 }
1334 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1363 nf_conntrack_max = max_factor * nf_conntrack_htable_size;
1335 1364
1336 printk("nf_conntrack version %s (%u buckets, %d max)\n", 1365 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
1337 NF_CONNTRACK_VERSION, nf_conntrack_htable_size, 1366 NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
1338 nf_conntrack_max); 1367 nf_conntrack_max);
1339 1368
@@ -1351,13 +1380,13 @@ static int nf_conntrack_init_init_net(void)
1351 goto err_extend; 1380 goto err_extend;
1352#endif 1381#endif
1353 /* Set up fake conntrack: to never be deleted, not in any hashes */ 1382 /* Set up fake conntrack: to never be deleted, not in any hashes */
1354#ifdef CONFIG_NET_NS 1383 for_each_possible_cpu(cpu) {
1355 nf_conntrack_untracked.ct_net = &init_net; 1384 struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
1356#endif 1385 write_pnet(&ct->ct_net, &init_net);
1357 atomic_set(&nf_conntrack_untracked.ct_general.use, 1); 1386 atomic_set(&ct->ct_general.use, 1);
1387 }
1358 /* - and look it like as a confirmed connection */ 1388 /* - and look it like as a confirmed connection */
1359 set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); 1389 nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
1360
1361 return 0; 1390 return 0;
1362 1391
1363#ifdef CONFIG_NF_CONNTRACK_ZONES 1392#ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index f516961a83b4..cdcc7649476b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -85,7 +85,8 @@ int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
85 struct nf_ct_event_notifier *notify; 85 struct nf_ct_event_notifier *notify;
86 86
87 mutex_lock(&nf_ct_ecache_mutex); 87 mutex_lock(&nf_ct_ecache_mutex);
88 notify = rcu_dereference(nf_conntrack_event_cb); 88 notify = rcu_dereference_protected(nf_conntrack_event_cb,
89 lockdep_is_held(&nf_ct_ecache_mutex));
89 if (notify != NULL) { 90 if (notify != NULL) {
90 ret = -EBUSY; 91 ret = -EBUSY;
91 goto out_unlock; 92 goto out_unlock;
@@ -105,7 +106,8 @@ void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
105 struct nf_ct_event_notifier *notify; 106 struct nf_ct_event_notifier *notify;
106 107
107 mutex_lock(&nf_ct_ecache_mutex); 108 mutex_lock(&nf_ct_ecache_mutex);
108 notify = rcu_dereference(nf_conntrack_event_cb); 109 notify = rcu_dereference_protected(nf_conntrack_event_cb,
110 lockdep_is_held(&nf_ct_ecache_mutex));
109 BUG_ON(notify != new); 111 BUG_ON(notify != new);
110 rcu_assign_pointer(nf_conntrack_event_cb, NULL); 112 rcu_assign_pointer(nf_conntrack_event_cb, NULL);
111 mutex_unlock(&nf_ct_ecache_mutex); 113 mutex_unlock(&nf_ct_ecache_mutex);
@@ -118,7 +120,8 @@ int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
118 struct nf_exp_event_notifier *notify; 120 struct nf_exp_event_notifier *notify;
119 121
120 mutex_lock(&nf_ct_ecache_mutex); 122 mutex_lock(&nf_ct_ecache_mutex);
121 notify = rcu_dereference(nf_expect_event_cb); 123 notify = rcu_dereference_protected(nf_expect_event_cb,
124 lockdep_is_held(&nf_ct_ecache_mutex));
122 if (notify != NULL) { 125 if (notify != NULL) {
123 ret = -EBUSY; 126 ret = -EBUSY;
124 goto out_unlock; 127 goto out_unlock;
@@ -138,7 +141,8 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
138 struct nf_exp_event_notifier *notify; 141 struct nf_exp_event_notifier *notify;
139 142
140 mutex_lock(&nf_ct_ecache_mutex); 143 mutex_lock(&nf_ct_ecache_mutex);
141 notify = rcu_dereference(nf_expect_event_cb); 144 notify = rcu_dereference_protected(nf_expect_event_cb,
145 lockdep_is_held(&nf_ct_ecache_mutex));
142 BUG_ON(notify != new); 146 BUG_ON(notify != new);
143 rcu_assign_pointer(nf_expect_event_cb, NULL); 147 rcu_assign_pointer(nf_expect_event_cb, NULL);
144 mutex_unlock(&nf_ct_ecache_mutex); 148 mutex_unlock(&nf_ct_ecache_mutex);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index fdc8fb4ae10f..7dcf7a404190 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -23,9 +23,10 @@ void __nf_ct_ext_destroy(struct nf_conn *ct)
23{ 23{
24 unsigned int i; 24 unsigned int i;
25 struct nf_ct_ext_type *t; 25 struct nf_ct_ext_type *t;
26 struct nf_ct_ext *ext = ct->ext;
26 27
27 for (i = 0; i < NF_CT_EXT_NUM; i++) { 28 for (i = 0; i < NF_CT_EXT_NUM; i++) {
28 if (!nf_ct_ext_exist(ct, i)) 29 if (!__nf_ct_ext_exist(ext, i))
29 continue; 30 continue;
30 31
31 rcu_read_lock(); 32 rcu_read_lock();
@@ -73,44 +74,45 @@ static void __nf_ct_ext_free_rcu(struct rcu_head *head)
73 74
74void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 75void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
75{ 76{
76 struct nf_ct_ext *new; 77 struct nf_ct_ext *old, *new;
77 int i, newlen, newoff; 78 int i, newlen, newoff;
78 struct nf_ct_ext_type *t; 79 struct nf_ct_ext_type *t;
79 80
80 /* Conntrack must not be confirmed to avoid races on reallocation. */ 81 /* Conntrack must not be confirmed to avoid races on reallocation. */
81 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 82 NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
82 83
83 if (!ct->ext) 84 old = ct->ext;
85 if (!old)
84 return nf_ct_ext_create(&ct->ext, id, gfp); 86 return nf_ct_ext_create(&ct->ext, id, gfp);
85 87
86 if (nf_ct_ext_exist(ct, id)) 88 if (__nf_ct_ext_exist(old, id))
87 return NULL; 89 return NULL;
88 90
89 rcu_read_lock(); 91 rcu_read_lock();
90 t = rcu_dereference(nf_ct_ext_types[id]); 92 t = rcu_dereference(nf_ct_ext_types[id]);
91 BUG_ON(t == NULL); 93 BUG_ON(t == NULL);
92 94
93 newoff = ALIGN(ct->ext->len, t->align); 95 newoff = ALIGN(old->len, t->align);
94 newlen = newoff + t->len; 96 newlen = newoff + t->len;
95 rcu_read_unlock(); 97 rcu_read_unlock();
96 98
97 new = __krealloc(ct->ext, newlen, gfp); 99 new = __krealloc(old, newlen, gfp);
98 if (!new) 100 if (!new)
99 return NULL; 101 return NULL;
100 102
101 if (new != ct->ext) { 103 if (new != old) {
102 for (i = 0; i < NF_CT_EXT_NUM; i++) { 104 for (i = 0; i < NF_CT_EXT_NUM; i++) {
103 if (!nf_ct_ext_exist(ct, i)) 105 if (!__nf_ct_ext_exist(old, i))
104 continue; 106 continue;
105 107
106 rcu_read_lock(); 108 rcu_read_lock();
107 t = rcu_dereference(nf_ct_ext_types[i]); 109 t = rcu_dereference(nf_ct_ext_types[i]);
108 if (t && t->move) 110 if (t && t->move)
109 t->move((void *)new + new->offset[i], 111 t->move((void *)new + new->offset[i],
110 (void *)ct->ext + ct->ext->offset[i]); 112 (void *)old + old->offset[i]);
111 rcu_read_unlock(); 113 rcu_read_unlock();
112 } 114 }
113 call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); 115 call_rcu(&old->rcu, __nf_ct_ext_free_rcu);
114 ct->ext = new; 116 ct->ext = new;
115 } 117 }
116 118
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 2ae3169e7633..e17cb7c7dd8f 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -573,8 +573,8 @@ static int __init nf_conntrack_ftp_init(void)
573 ftp[i][j].tuple.src.l3num, ports[i]); 573 ftp[i][j].tuple.src.l3num, ports[i]);
574 ret = nf_conntrack_helper_register(&ftp[i][j]); 574 ret = nf_conntrack_helper_register(&ftp[i][j]);
575 if (ret) { 575 if (ret) {
576 printk("nf_ct_ftp: failed to register helper " 576 printk(KERN_ERR "nf_ct_ftp: failed to register"
577 " for pf: %d port: %d\n", 577 " helper for pf: %d port: %d\n",
578 ftp[i][j].tuple.src.l3num, ports[i]); 578 ftp[i][j].tuple.src.l3num, ports[i]);
579 nf_conntrack_ftp_fini(); 579 nf_conntrack_ftp_fini();
580 return ret; 580 return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a487c8038044..b969025cf82f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -194,8 +194,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
194 return 0; 194 return 0;
195 } 195 }
196 196
197 if (net_ratelimit()) 197 pr_debug("nf_ct_h323: incomplete TPKT (fragmented?)\n");
198 printk("nf_ct_h323: incomplete TPKT (fragmented?)\n");
199 goto clear_out; 198 goto clear_out;
200 } 199 }
201 200
@@ -608,7 +607,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
608 drop: 607 drop:
609 spin_unlock_bh(&nf_h323_lock); 608 spin_unlock_bh(&nf_h323_lock);
610 if (net_ratelimit()) 609 if (net_ratelimit())
611 printk("nf_ct_h245: packet dropped\n"); 610 pr_info("nf_ct_h245: packet dropped\n");
612 return NF_DROP; 611 return NF_DROP;
613} 612}
614 613
@@ -735,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
735 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) { 734 if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
736 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 735 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
737 if (rt1->rt_gateway == rt2->rt_gateway && 736 if (rt1->rt_gateway == rt2->rt_gateway &&
738 rt1->u.dst.dev == rt2->u.dst.dev) 737 rt1->dst.dev == rt2->dst.dev)
739 ret = 1; 738 ret = 1;
740 dst_release(&rt2->u.dst); 739 dst_release(&rt2->dst);
741 } 740 }
742 dst_release(&rt1->u.dst); 741 dst_release(&rt1->dst);
743 } 742 }
744 break; 743 break;
745 } 744 }
@@ -754,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
754 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) { 753 if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
755 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, 754 if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
756 sizeof(rt1->rt6i_gateway)) && 755 sizeof(rt1->rt6i_gateway)) &&
757 rt1->u.dst.dev == rt2->u.dst.dev) 756 rt1->dst.dev == rt2->dst.dev)
758 ret = 1; 757 ret = 1;
759 dst_release(&rt2->u.dst); 758 dst_release(&rt2->dst);
760 } 759 }
761 dst_release(&rt1->u.dst); 760 dst_release(&rt1->dst);
762 } 761 }
763 break; 762 break;
764 } 763 }
@@ -1153,7 +1152,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
1153 drop: 1152 drop:
1154 spin_unlock_bh(&nf_h323_lock); 1153 spin_unlock_bh(&nf_h323_lock);
1155 if (net_ratelimit()) 1154 if (net_ratelimit())
1156 printk("nf_ct_q931: packet dropped\n"); 1155 pr_info("nf_ct_q931: packet dropped\n");
1157 return NF_DROP; 1156 return NF_DROP;
1158} 1157}
1159 1158
@@ -1728,7 +1727,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
1728 drop: 1727 drop:
1729 spin_unlock_bh(&nf_h323_lock); 1728 spin_unlock_bh(&nf_h323_lock);
1730 if (net_ratelimit()) 1729 if (net_ratelimit())
1731 printk("nf_ct_ras: packet dropped\n"); 1730 pr_info("nf_ct_ras: packet dropped\n");
1732 return NF_DROP; 1731 return NF_DROP;
1733} 1732}
1734 1733
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7673930ca342..b394aa318776 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -235,7 +235,7 @@ static int __init nf_conntrack_irc_init(void)
235 char *tmpname; 235 char *tmpname;
236 236
237 if (max_dcc_channels < 1) { 237 if (max_dcc_channels < 1) {
238 printk("nf_ct_irc: max_dcc_channels must not be zero\n"); 238 printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n");
239 return -EINVAL; 239 return -EINVAL;
240 } 240 }
241 241
@@ -267,7 +267,7 @@ static int __init nf_conntrack_irc_init(void)
267 267
268 ret = nf_conntrack_helper_register(&irc[i]); 268 ret = nf_conntrack_helper_register(&irc[i]);
269 if (ret) { 269 if (ret) {
270 printk("nf_ct_irc: failed to register helper " 270 printk(KERN_ERR "nf_ct_irc: failed to register helper "
271 "for pf: %u port: %u\n", 271 "for pf: %u port: %u\n",
272 irc[i].tuple.src.l3num, ports[i]); 272 irc[i].tuple.src.l3num, ports[i]);
273 nf_conntrack_irc_fini(); 273 nf_conntrack_irc_fini();
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536f..aadde018a072 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
61 goto out; 61 goto out;
62 62
63 rcu_read_lock(); 63 rcu_read_lock();
64 in_dev = __in_dev_get_rcu(rt->u.dst.dev); 64 in_dev = __in_dev_get_rcu(rt->dst.dev);
65 if (in_dev != NULL) { 65 if (in_dev != NULL) {
66 for_primary_ifa(in_dev) { 66 for_primary_ifa(in_dev) {
67 if (ifa->ifa_broadcast == iph->daddr) { 67 if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index afc52f2ee4ac..5bae1cd15eea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -427,6 +427,17 @@ ctnetlink_proto_size(const struct nf_conn *ct)
427} 427}
428 428
429static inline size_t 429static inline size_t
430ctnetlink_counters_size(const struct nf_conn *ct)
431{
432 if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
433 return 0;
434 return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
435 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
436 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
437 ;
438}
439
440static inline size_t
430ctnetlink_nlmsg_size(const struct nf_conn *ct) 441ctnetlink_nlmsg_size(const struct nf_conn *ct)
431{ 442{
432 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) 443 return NLMSG_ALIGN(sizeof(struct nfgenmsg))
@@ -436,11 +447,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
436 + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ 447 + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
437 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 448 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
438 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 449 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
439#ifdef CONFIG_NF_CT_ACCT 450 + ctnetlink_counters_size(ct)
440 + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
441 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
442 + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
443#endif
444 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 451 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
445 + nla_total_size(0) /* CTA_PROTOINFO */ 452 + nla_total_size(0) /* CTA_PROTOINFO */
446 + nla_total_size(0) /* CTA_HELP */ 453 + nla_total_size(0) /* CTA_HELP */
@@ -473,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
473 int err; 480 int err;
474 481
475 /* ignore our fake conntrack entry */ 482 /* ignore our fake conntrack entry */
476 if (ct == &nf_conntrack_untracked) 483 if (nf_ct_is_untracked(ct))
477 return 0; 484 return 0;
478 485
479 if (events & (1 << IPCT_DESTROY)) { 486 if (events & (1 << IPCT_DESTROY)) {
@@ -2050,29 +2057,29 @@ static int __init ctnetlink_init(void)
2050{ 2057{
2051 int ret; 2058 int ret;
2052 2059
2053 printk("ctnetlink v%s: registering with nfnetlink.\n", version); 2060 pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
2054 ret = nfnetlink_subsys_register(&ctnl_subsys); 2061 ret = nfnetlink_subsys_register(&ctnl_subsys);
2055 if (ret < 0) { 2062 if (ret < 0) {
2056 printk("ctnetlink_init: cannot register with nfnetlink.\n"); 2063 pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
2057 goto err_out; 2064 goto err_out;
2058 } 2065 }
2059 2066
2060 ret = nfnetlink_subsys_register(&ctnl_exp_subsys); 2067 ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
2061 if (ret < 0) { 2068 if (ret < 0) {
2062 printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); 2069 pr_err("ctnetlink_init: cannot register exp with nfnetlink.\n");
2063 goto err_unreg_subsys; 2070 goto err_unreg_subsys;
2064 } 2071 }
2065 2072
2066#ifdef CONFIG_NF_CONNTRACK_EVENTS 2073#ifdef CONFIG_NF_CONNTRACK_EVENTS
2067 ret = nf_conntrack_register_notifier(&ctnl_notifier); 2074 ret = nf_conntrack_register_notifier(&ctnl_notifier);
2068 if (ret < 0) { 2075 if (ret < 0) {
2069 printk("ctnetlink_init: cannot register notifier.\n"); 2076 pr_err("ctnetlink_init: cannot register notifier.\n");
2070 goto err_unreg_exp_subsys; 2077 goto err_unreg_exp_subsys;
2071 } 2078 }
2072 2079
2073 ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); 2080 ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
2074 if (ret < 0) { 2081 if (ret < 0) {
2075 printk("ctnetlink_init: cannot expect register notifier.\n"); 2082 pr_err("ctnetlink_init: cannot expect register notifier.\n");
2076 goto err_unreg_notifier; 2083 goto err_unreg_notifier;
2077 } 2084 }
2078#endif 2085#endif
@@ -2093,7 +2100,7 @@ err_out:
2093 2100
2094static void __exit ctnetlink_exit(void) 2101static void __exit ctnetlink_exit(void)
2095{ 2102{
2096 printk("ctnetlink: unregistering from nfnetlink.\n"); 2103 pr_info("ctnetlink: unregistering from nfnetlink.\n");
2097 2104
2098#ifdef CONFIG_NF_CONNTRACK_EVENTS 2105#ifdef CONFIG_NF_CONNTRACK_EVENTS
2099 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); 2106 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
@@ -2102,7 +2109,6 @@ static void __exit ctnetlink_exit(void)
2102 2109
2103 nfnetlink_subsys_unregister(&ctnl_exp_subsys); 2110 nfnetlink_subsys_unregister(&ctnl_exp_subsys);
2104 nfnetlink_subsys_unregister(&ctnl_subsys); 2111 nfnetlink_subsys_unregister(&ctnl_subsys);
2105 return;
2106} 2112}
2107 2113
2108module_init(ctnetlink_init); 2114module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a44fa75b5178..5886ba1d52a0 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -14,12 +14,10 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/skbuff.h>
18#include <linux/vmalloc.h> 17#include <linux/vmalloc.h>
19#include <linux/stddef.h> 18#include <linux/stddef.h>
20#include <linux/err.h> 19#include <linux/err.h>
21#include <linux/percpu.h> 20#include <linux/percpu.h>
22#include <linux/moduleparam.h>
23#include <linux/notifier.h> 21#include <linux/notifier.h>
24#include <linux/kernel.h> 22#include <linux/kernel.h>
25#include <linux/netdevice.h> 23#include <linux/netdevice.h>
@@ -119,9 +117,13 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
119{ 117{
120 struct nf_conntrack_l3proto *p; 118 struct nf_conntrack_l3proto *p;
121 119
122 /* rcu_read_lock not necessary since the caller holds a reference */ 120 /* rcu_read_lock not necessary since the caller holds a reference, but
121 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
122 */
123 rcu_read_lock();
123 p = __nf_ct_l3proto_find(l3proto); 124 p = __nf_ct_l3proto_find(l3proto);
124 module_put(p->me); 125 module_put(p->me);
126 rcu_read_unlock();
125} 127}
126EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 128EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
127 129
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b68ff15ed979..c6049c2d5ea8 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -717,12 +717,12 @@ static int __init nf_conntrack_proto_sctp_init(void)
717 717
718 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4); 718 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
719 if (ret) { 719 if (ret) {
720 printk("nf_conntrack_l4proto_sctp4: protocol register failed\n"); 720 pr_err("nf_conntrack_l4proto_sctp4: protocol register failed\n");
721 goto out; 721 goto out;
722 } 722 }
723 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6); 723 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
724 if (ret) { 724 if (ret) {
725 printk("nf_conntrack_l4proto_sctp6: protocol register failed\n"); 725 pr_err("nf_conntrack_l4proto_sctp6: protocol register failed\n");
726 goto cleanup_sctp4; 726 goto cleanup_sctp4;
727 } 727 }
728 728
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e6..c4c885dca3bd 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -585,8 +585,16 @@ static bool tcp_in_window(const struct nf_conn *ct,
585 * Let's try to use the data from the packet. 585 * Let's try to use the data from the packet.
586 */ 586 */
587 sender->td_end = end; 587 sender->td_end = end;
588 win <<= sender->td_scale;
588 sender->td_maxwin = (win == 0 ? 1 : win); 589 sender->td_maxwin = (win == 0 ? 1 : win);
589 sender->td_maxend = end + sender->td_maxwin; 590 sender->td_maxend = end + sender->td_maxwin;
591 /*
592 * We haven't seen traffic in the other direction yet
593 * but we have to tweak window tracking to pass III
594 * and IV until that happens.
595 */
596 if (receiver->td_maxwin == 0)
597 receiver->td_end = receiver->td_maxend = sack;
590 } 598 }
591 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 599 } else if (((state->state == TCP_CONNTRACK_SYN_SENT
592 && dir == IP_CT_DIR_ORIGINAL) 600 && dir == IP_CT_DIR_ORIGINAL)
@@ -680,7 +688,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
680 /* 688 /*
681 * Update receiver data. 689 * Update receiver data.
682 */ 690 */
683 if (after(end, sender->td_maxend)) 691 if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
684 receiver->td_maxwin += end - sender->td_maxend; 692 receiver->td_maxwin += end - sender->td_maxend;
685 if (after(sack + win, receiver->td_maxend - 1)) { 693 if (after(sack + win, receiver->td_maxend - 1)) {
686 receiver->td_maxend = sack + win; 694 receiver->td_maxend = sack + win;
@@ -736,27 +744,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
736 return res; 744 return res;
737} 745}
738 746
739#define TH_FIN 0x01
740#define TH_SYN 0x02
741#define TH_RST 0x04
742#define TH_PUSH 0x08
743#define TH_ACK 0x10
744#define TH_URG 0x20
745#define TH_ECE 0x40
746#define TH_CWR 0x80
747
748/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ 747/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
749static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = 748static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
749 TCPHDR_URG) + 1] =
750{ 750{
751 [TH_SYN] = 1, 751 [TCPHDR_SYN] = 1,
752 [TH_SYN|TH_URG] = 1, 752 [TCPHDR_SYN|TCPHDR_URG] = 1,
753 [TH_SYN|TH_ACK] = 1, 753 [TCPHDR_SYN|TCPHDR_ACK] = 1,
754 [TH_RST] = 1, 754 [TCPHDR_RST] = 1,
755 [TH_RST|TH_ACK] = 1, 755 [TCPHDR_RST|TCPHDR_ACK] = 1,
756 [TH_FIN|TH_ACK] = 1, 756 [TCPHDR_FIN|TCPHDR_ACK] = 1,
757 [TH_FIN|TH_ACK|TH_URG] = 1, 757 [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
758 [TH_ACK] = 1, 758 [TCPHDR_ACK] = 1,
759 [TH_ACK|TH_URG] = 1, 759 [TCPHDR_ACK|TCPHDR_URG] = 1,
760}; 760};
761 761
762/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ 762/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
@@ -803,7 +803,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
803 } 803 }
804 804
805 /* Check TCP flags. */ 805 /* Check TCP flags. */
806 tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); 806 tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
807 if (!tcp_valid_flags[tcpflags]) { 807 if (!tcp_valid_flags[tcpflags]) {
808 if (LOG_INVALID(net, IPPROTO_TCP)) 808 if (LOG_INVALID(net, IPPROTO_TCP))
809 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 809 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c6cd1b84eddd..53d892210a04 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1393,10 +1393,8 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff,
1393 1393
1394 nf_ct_refresh(ct, skb, sip_timeout * HZ); 1394 nf_ct_refresh(ct, skb, sip_timeout * HZ);
1395 1395
1396 if (skb_is_nonlinear(skb)) { 1396 if (unlikely(skb_linearize(skb)))
1397 pr_debug("Copy of skbuff not supported yet.\n"); 1397 return NF_DROP;
1398 return NF_ACCEPT;
1399 }
1400 1398
1401 dptr = skb->data + dataoff; 1399 dptr = skb->data + dataoff;
1402 datalen = skb->len - dataoff; 1400 datalen = skb->len - dataoff;
@@ -1455,10 +1453,8 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
1455 1453
1456 nf_ct_refresh(ct, skb, sip_timeout * HZ); 1454 nf_ct_refresh(ct, skb, sip_timeout * HZ);
1457 1455
1458 if (skb_is_nonlinear(skb)) { 1456 if (unlikely(skb_linearize(skb)))
1459 pr_debug("Copy of skbuff not supported yet.\n"); 1457 return NF_DROP;
1460 return NF_ACCEPT;
1461 }
1462 1458
1463 dptr = skb->data + dataoff; 1459 dptr = skb->data + dataoff;
1464 datalen = skb->len - dataoff; 1460 datalen = skb->len - dataoff;
@@ -1549,8 +1545,8 @@ static int __init nf_conntrack_sip_init(void)
1549 1545
1550 ret = nf_conntrack_helper_register(&sip[i][j]); 1546 ret = nf_conntrack_helper_register(&sip[i][j]);
1551 if (ret) { 1547 if (ret) {
1552 printk("nf_ct_sip: failed to register helper " 1548 printk(KERN_ERR "nf_ct_sip: failed to register"
1553 "for pf: %u port: %u\n", 1549 " helper for pf: %u port: %u\n",
1554 sip[i][j].tuple.src.l3num, ports[i]); 1550 sip[i][j].tuple.src.l3num, ports[i]);
1555 nf_conntrack_sip_fini(); 1551 nf_conntrack_sip_fini();
1556 return ret; 1552 return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index faa8eb3722b9..eb973fcd67ab 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -252,12 +252,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
252 const struct ip_conntrack_stat *st = v; 252 const struct ip_conntrack_stat *st = v;
253 253
254 if (v == SEQ_START_TOKEN) { 254 if (v == SEQ_START_TOKEN) {
255 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); 255 seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
256 return 0; 256 return 0;
257 } 257 }
258 258
259 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " 259 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
260 "%08x %08x %08x %08x %08x %08x %08x %08x \n", 260 "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
261 nr_conntracks, 261 nr_conntracks,
262 st->searched, 262 st->searched,
263 st->found, 263 st->found,
@@ -274,7 +274,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
274 274
275 st->expect_new, 275 st->expect_new,
276 st->expect_create, 276 st->expect_create,
277 st->expect_delete 277 st->expect_delete,
278 st->search_restart
278 ); 279 );
279 return 0; 280 return 0;
280} 281}
@@ -445,7 +446,7 @@ out_kmemdup:
445 if (net_eq(net, &init_net)) 446 if (net_eq(net, &init_net))
446 unregister_sysctl_table(nf_ct_netfilter_header); 447 unregister_sysctl_table(nf_ct_netfilter_header);
447out: 448out:
448 printk("nf_conntrack: can't register to sysctl.\n"); 449 printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
449 return -ENOMEM; 450 return -ENOMEM;
450} 451}
451 452
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 46e646b2e9b9..75466fd72f4f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -138,8 +138,8 @@ static int __init nf_conntrack_tftp_init(void)
138 138
139 ret = nf_conntrack_helper_register(&tftp[i][j]); 139 ret = nf_conntrack_helper_register(&tftp[i][j]);
140 if (ret) { 140 if (ret) {
141 printk("nf_ct_tftp: failed to register helper " 141 printk(KERN_ERR "nf_ct_tftp: failed to register"
142 "for pf: %u port: %u\n", 142 " helper for pf: %u port: %u\n",
143 tftp[i][j].tuple.src.l3num, ports[i]); 143 tftp[i][j].tuple.src.l3num, ports[i]);
144 nf_conntrack_tftp_fini(); 144 nf_conntrack_tftp_fini();
145 return ret; 145 return ret;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bf6609978af7..770f76432ad0 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -6,7 +6,7 @@
6#include <linux/netdevice.h> 6#include <linux/netdevice.h>
7 7
8#ifdef CONFIG_NETFILTER_DEBUG 8#ifdef CONFIG_NETFILTER_DEBUG
9#define NFDEBUG(format, args...) printk(format , ## args) 9#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args)
10#else 10#else
11#define NFDEBUG(format, args...) 11#define NFDEBUG(format, args...)
12#endif 12#endif
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 015725a5cd50..7df37fd786bc 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -52,7 +52,8 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
52 } else { 52 } else {
53 /* register at end of list to honor first register win */ 53 /* register at end of list to honor first register win */
54 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); 54 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
55 llog = rcu_dereference(nf_loggers[pf]); 55 llog = rcu_dereference_protected(nf_loggers[pf],
56 lockdep_is_held(&nf_log_mutex));
56 if (llog == NULL) 57 if (llog == NULL)
57 rcu_assign_pointer(nf_loggers[pf], logger); 58 rcu_assign_pointer(nf_loggers[pf], logger);
58 } 59 }
@@ -70,7 +71,8 @@ void nf_log_unregister(struct nf_logger *logger)
70 71
71 mutex_lock(&nf_log_mutex); 72 mutex_lock(&nf_log_mutex);
72 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { 73 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
73 c_logger = rcu_dereference(nf_loggers[i]); 74 c_logger = rcu_dereference_protected(nf_loggers[i],
75 lockdep_is_held(&nf_log_mutex));
74 if (c_logger == logger) 76 if (c_logger == logger)
75 rcu_assign_pointer(nf_loggers[i], NULL); 77 rcu_assign_pointer(nf_loggers[i], NULL);
76 list_del(&logger->list[i]); 78 list_del(&logger->list[i]);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index c49ef219899e..78b3cf9c519c 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -9,6 +9,7 @@
9#include <linux/rcupdate.h> 9#include <linux/rcupdate.h>
10#include <net/protocol.h> 10#include <net/protocol.h>
11#include <net/netfilter/nf_queue.h> 11#include <net/netfilter/nf_queue.h>
12#include <net/dst.h>
12 13
13#include "nf_internals.h" 14#include "nf_internals.h"
14 15
@@ -170,6 +171,7 @@ static int __nf_queue(struct sk_buff *skb,
170 dev_hold(physoutdev); 171 dev_hold(physoutdev);
171 } 172 }
172#endif 173#endif
174 skb_dst_force(skb);
173 afinfo->saveroute(skb, entry); 175 afinfo->saveroute(skb, entry);
174 status = qh->outfn(entry, queuenum); 176 status = qh->outfn(entry, queuenum);
175 177
@@ -279,7 +281,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
279 } 281 }
280 rcu_read_unlock(); 282 rcu_read_unlock();
281 kfree(entry); 283 kfree(entry);
282 return;
283} 284}
284EXPORT_SYMBOL(nf_reinject); 285EXPORT_SYMBOL(nf_reinject);
285 286
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 6afa3d52ea5f..b4a4532823e8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -18,12 +18,9 @@
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/socket.h> 19#include <linux/socket.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/major.h>
22#include <linux/timer.h>
23#include <linux/string.h> 21#include <linux/string.h>
24#include <linux/sockios.h> 22#include <linux/sockios.h>
25#include <linux/net.h> 23#include <linux/net.h>
26#include <linux/fcntl.h>
27#include <linux/skbuff.h> 24#include <linux/skbuff.h>
28#include <asm/uaccess.h> 25#include <asm/uaccess.h>
29#include <asm/system.h> 26#include <asm/system.h>
@@ -215,13 +212,13 @@ static struct pernet_operations nfnetlink_net_ops = {
215 212
216static int __init nfnetlink_init(void) 213static int __init nfnetlink_init(void)
217{ 214{
218 printk("Netfilter messages via NETLINK v%s.\n", nfversion); 215 pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
219 return register_pernet_subsys(&nfnetlink_net_ops); 216 return register_pernet_subsys(&nfnetlink_net_ops);
220} 217}
221 218
222static void __exit nfnetlink_exit(void) 219static void __exit nfnetlink_exit(void)
223{ 220{
224 printk("Removing netfilter NETLINK layer.\n"); 221 pr_info("Removing netfilter NETLINK layer.\n");
225 unregister_pernet_subsys(&nfnetlink_net_ops); 222 unregister_pernet_subsys(&nfnetlink_net_ops);
226} 223}
227module_init(nfnetlink_init); 224module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 203643fb2c52..6a1572b0ab41 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -66,9 +66,10 @@ struct nfulnl_instance {
66 u_int16_t group_num; /* number of this queue */ 66 u_int16_t group_num; /* number of this queue */
67 u_int16_t flags; 67 u_int16_t flags;
68 u_int8_t copy_mode; 68 u_int8_t copy_mode;
69 struct rcu_head rcu;
69}; 70};
70 71
71static DEFINE_RWLOCK(instances_lock); 72static DEFINE_SPINLOCK(instances_lock);
72static atomic_t global_seq; 73static atomic_t global_seq;
73 74
74#define INSTANCE_BUCKETS 16 75#define INSTANCE_BUCKETS 16
@@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num)
88 struct nfulnl_instance *inst; 89 struct nfulnl_instance *inst;
89 90
90 head = &instance_table[instance_hashfn(group_num)]; 91 head = &instance_table[instance_hashfn(group_num)];
91 hlist_for_each_entry(inst, pos, head, hlist) { 92 hlist_for_each_entry_rcu(inst, pos, head, hlist) {
92 if (inst->group_num == group_num) 93 if (inst->group_num == group_num)
93 return inst; 94 return inst;
94 } 95 }
@@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num)
106{ 107{
107 struct nfulnl_instance *inst; 108 struct nfulnl_instance *inst;
108 109
109 read_lock_bh(&instances_lock); 110 rcu_read_lock_bh();
110 inst = __instance_lookup(group_num); 111 inst = __instance_lookup(group_num);
111 if (inst) 112 if (inst && !atomic_inc_not_zero(&inst->use))
112 instance_get(inst); 113 inst = NULL;
113 read_unlock_bh(&instances_lock); 114 rcu_read_unlock_bh();
114 115
115 return inst; 116 return inst;
116} 117}
117 118
119static void nfulnl_instance_free_rcu(struct rcu_head *head)
120{
121 kfree(container_of(head, struct nfulnl_instance, rcu));
122 module_put(THIS_MODULE);
123}
124
118static void 125static void
119instance_put(struct nfulnl_instance *inst) 126instance_put(struct nfulnl_instance *inst)
120{ 127{
121 if (inst && atomic_dec_and_test(&inst->use)) { 128 if (inst && atomic_dec_and_test(&inst->use))
122 kfree(inst); 129 call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
123 module_put(THIS_MODULE);
124 }
125} 130}
126 131
127static void nfulnl_timer(unsigned long data); 132static void nfulnl_timer(unsigned long data);
@@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid)
132 struct nfulnl_instance *inst; 137 struct nfulnl_instance *inst;
133 int err; 138 int err;
134 139
135 write_lock_bh(&instances_lock); 140 spin_lock_bh(&instances_lock);
136 if (__instance_lookup(group_num)) { 141 if (__instance_lookup(group_num)) {
137 err = -EEXIST; 142 err = -EEXIST;
138 goto out_unlock; 143 goto out_unlock;
@@ -166,32 +171,37 @@ instance_create(u_int16_t group_num, int pid)
166 inst->copy_mode = NFULNL_COPY_PACKET; 171 inst->copy_mode = NFULNL_COPY_PACKET;
167 inst->copy_range = NFULNL_COPY_RANGE_MAX; 172 inst->copy_range = NFULNL_COPY_RANGE_MAX;
168 173
169 hlist_add_head(&inst->hlist, 174 hlist_add_head_rcu(&inst->hlist,
170 &instance_table[instance_hashfn(group_num)]); 175 &instance_table[instance_hashfn(group_num)]);
171 176
172 write_unlock_bh(&instances_lock); 177 spin_unlock_bh(&instances_lock);
173 178
174 return inst; 179 return inst;
175 180
176out_unlock: 181out_unlock:
177 write_unlock_bh(&instances_lock); 182 spin_unlock_bh(&instances_lock);
178 return ERR_PTR(err); 183 return ERR_PTR(err);
179} 184}
180 185
181static void __nfulnl_flush(struct nfulnl_instance *inst); 186static void __nfulnl_flush(struct nfulnl_instance *inst);
182 187
188/* called with BH disabled */
183static void 189static void
184__instance_destroy(struct nfulnl_instance *inst) 190__instance_destroy(struct nfulnl_instance *inst)
185{ 191{
186 /* first pull it out of the global list */ 192 /* first pull it out of the global list */
187 hlist_del(&inst->hlist); 193 hlist_del_rcu(&inst->hlist);
188 194
189 /* then flush all pending packets from skb */ 195 /* then flush all pending packets from skb */
190 196
191 spin_lock_bh(&inst->lock); 197 spin_lock(&inst->lock);
198
199 /* lockless readers wont be able to use us */
200 inst->copy_mode = NFULNL_COPY_DISABLED;
201
192 if (inst->skb) 202 if (inst->skb)
193 __nfulnl_flush(inst); 203 __nfulnl_flush(inst);
194 spin_unlock_bh(&inst->lock); 204 spin_unlock(&inst->lock);
195 205
196 /* and finally put the refcount */ 206 /* and finally put the refcount */
197 instance_put(inst); 207 instance_put(inst);
@@ -200,9 +210,9 @@ __instance_destroy(struct nfulnl_instance *inst)
200static inline void 210static inline void
201instance_destroy(struct nfulnl_instance *inst) 211instance_destroy(struct nfulnl_instance *inst)
202{ 212{
203 write_lock_bh(&instances_lock); 213 spin_lock_bh(&instances_lock);
204 __instance_destroy(inst); 214 __instance_destroy(inst);
205 write_unlock_bh(&instances_lock); 215 spin_unlock_bh(&instances_lock);
206} 216}
207 217
208static int 218static int
@@ -297,7 +307,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
297 n = max(inst_size, pkt_size); 307 n = max(inst_size, pkt_size);
298 skb = alloc_skb(n, GFP_ATOMIC); 308 skb = alloc_skb(n, GFP_ATOMIC);
299 if (!skb) { 309 if (!skb) {
300 PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n", 310 pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
301 inst_size); 311 inst_size);
302 312
303 if (n > pkt_size) { 313 if (n > pkt_size) {
@@ -306,7 +316,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
306 316
307 skb = alloc_skb(pkt_size, GFP_ATOMIC); 317 skb = alloc_skb(pkt_size, GFP_ATOMIC);
308 if (!skb) 318 if (!skb)
309 PRINTR("nfnetlink_log: can't even alloc %u " 319 pr_err("nfnetlink_log: can't even alloc %u "
310 "bytes\n", pkt_size); 320 "bytes\n", pkt_size);
311 } 321 }
312 } 322 }
@@ -403,8 +413,9 @@ __build_packet_message(struct nfulnl_instance *inst,
403 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV, 413 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
404 htonl(indev->ifindex)); 414 htonl(indev->ifindex));
405 /* this is the bridge group "brX" */ 415 /* this is the bridge group "brX" */
416 /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
406 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, 417 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
407 htonl(indev->br_port->br->dev->ifindex)); 418 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
408 } else { 419 } else {
409 /* Case 2: indev is bridge group, we need to look for 420 /* Case 2: indev is bridge group, we need to look for
410 * physical device (when called from ipv4) */ 421 * physical device (when called from ipv4) */
@@ -430,8 +441,9 @@ __build_packet_message(struct nfulnl_instance *inst,
430 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, 441 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
431 htonl(outdev->ifindex)); 442 htonl(outdev->ifindex));
432 /* this is the bridge group "brX" */ 443 /* this is the bridge group "brX" */
444 /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
433 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, 445 NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
434 htonl(outdev->br_port->br->dev->ifindex)); 446 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
435 } else { 447 } else {
436 /* Case 2: indev is a bridge group, we need to look 448 /* Case 2: indev is a bridge group, we need to look
437 * for physical device (when called from ipv4) */ 449 * for physical device (when called from ipv4) */
@@ -619,6 +631,7 @@ nfulnl_log_packet(u_int8_t pf,
619 size += nla_total_size(data_len); 631 size += nla_total_size(data_len);
620 break; 632 break;
621 633
634 case NFULNL_COPY_DISABLED:
622 default: 635 default:
623 goto unlock_and_release; 636 goto unlock_and_release;
624 } 637 }
@@ -672,7 +685,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
672 int i; 685 int i;
673 686
674 /* destroy all instances for this pid */ 687 /* destroy all instances for this pid */
675 write_lock_bh(&instances_lock); 688 spin_lock_bh(&instances_lock);
676 for (i = 0; i < INSTANCE_BUCKETS; i++) { 689 for (i = 0; i < INSTANCE_BUCKETS; i++) {
677 struct hlist_node *tmp, *t2; 690 struct hlist_node *tmp, *t2;
678 struct nfulnl_instance *inst; 691 struct nfulnl_instance *inst;
@@ -684,7 +697,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
684 __instance_destroy(inst); 697 __instance_destroy(inst);
685 } 698 }
686 } 699 }
687 write_unlock_bh(&instances_lock); 700 spin_unlock_bh(&instances_lock);
688 } 701 }
689 return NOTIFY_DONE; 702 return NOTIFY_DONE;
690} 703}
@@ -861,19 +874,19 @@ static struct hlist_node *get_first(struct iter_state *st)
861 874
862 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
863 if (!hlist_empty(&instance_table[st->bucket])) 876 if (!hlist_empty(&instance_table[st->bucket]))
864 return instance_table[st->bucket].first; 877 return rcu_dereference_bh(instance_table[st->bucket].first);
865 } 878 }
866 return NULL; 879 return NULL;
867} 880}
868 881
869static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 882static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
870{ 883{
871 h = h->next; 884 h = rcu_dereference_bh(h->next);
872 while (!h) { 885 while (!h) {
873 if (++st->bucket >= INSTANCE_BUCKETS) 886 if (++st->bucket >= INSTANCE_BUCKETS)
874 return NULL; 887 return NULL;
875 888
876 h = instance_table[st->bucket].first; 889 h = rcu_dereference_bh(instance_table[st->bucket].first);
877 } 890 }
878 return h; 891 return h;
879} 892}
@@ -890,9 +903,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
890} 903}
891 904
892static void *seq_start(struct seq_file *seq, loff_t *pos) 905static void *seq_start(struct seq_file *seq, loff_t *pos)
893 __acquires(instances_lock) 906 __acquires(rcu_bh)
894{ 907{
895 read_lock_bh(&instances_lock); 908 rcu_read_lock_bh();
896 return get_idx(seq->private, *pos); 909 return get_idx(seq->private, *pos);
897} 910}
898 911
@@ -903,9 +916,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
903} 916}
904 917
905static void seq_stop(struct seq_file *s, void *v) 918static void seq_stop(struct seq_file *s, void *v)
906 __releases(instances_lock) 919 __releases(rcu_bh)
907{ 920{
908 read_unlock_bh(&instances_lock); 921 rcu_read_unlock_bh();
909} 922}
910 923
911static int seq_show(struct seq_file *s, void *v) 924static int seq_show(struct seq_file *s, void *v)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e70a6ef1f4f2..68e67d19724d 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -46,17 +46,19 @@ struct nfqnl_instance {
46 int peer_pid; 46 int peer_pid;
47 unsigned int queue_maxlen; 47 unsigned int queue_maxlen;
48 unsigned int copy_range; 48 unsigned int copy_range;
49 unsigned int queue_total;
50 unsigned int queue_dropped; 49 unsigned int queue_dropped;
51 unsigned int queue_user_dropped; 50 unsigned int queue_user_dropped;
52 51
53 unsigned int id_sequence; /* 'sequence' of pkt ids */
54 52
55 u_int16_t queue_num; /* number of this queue */ 53 u_int16_t queue_num; /* number of this queue */
56 u_int8_t copy_mode; 54 u_int8_t copy_mode;
57 55/*
58 spinlock_t lock; 56 * Following fields are dirtied for each queued packet,
59 57 * keep them in same cache line if possible.
58 */
59 spinlock_t lock;
60 unsigned int queue_total;
61 atomic_t id_sequence; /* 'sequence' of pkt ids */
60 struct list_head queue_list; /* packets in queue */ 62 struct list_head queue_list; /* packets in queue */
61}; 63};
62 64
@@ -238,33 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
238 240
239 outdev = entry->outdev; 241 outdev = entry->outdev;
240 242
241 spin_lock_bh(&queue->lock); 243 switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
242
243 switch ((enum nfqnl_config_mode)queue->copy_mode) {
244 case NFQNL_COPY_META: 244 case NFQNL_COPY_META:
245 case NFQNL_COPY_NONE: 245 case NFQNL_COPY_NONE:
246 break; 246 break;
247 247
248 case NFQNL_COPY_PACKET: 248 case NFQNL_COPY_PACKET:
249 if ((entskb->ip_summed == CHECKSUM_PARTIAL || 249 if (entskb->ip_summed == CHECKSUM_PARTIAL &&
250 entskb->ip_summed == CHECKSUM_COMPLETE) && 250 skb_checksum_help(entskb))
251 skb_checksum_help(entskb)) {
252 spin_unlock_bh(&queue->lock);
253 return NULL; 251 return NULL;
254 } 252
255 if (queue->copy_range == 0 253 data_len = ACCESS_ONCE(queue->copy_range);
256 || queue->copy_range > entskb->len) 254 if (data_len == 0 || data_len > entskb->len)
257 data_len = entskb->len; 255 data_len = entskb->len;
258 else
259 data_len = queue->copy_range;
260 256
261 size += nla_total_size(data_len); 257 size += nla_total_size(data_len);
262 break; 258 break;
263 } 259 }
264 260
265 entry->id = queue->id_sequence++;
266
267 spin_unlock_bh(&queue->lock);
268 261
269 skb = alloc_skb(size, GFP_ATOMIC); 262 skb = alloc_skb(size, GFP_ATOMIC);
270 if (!skb) 263 if (!skb)
@@ -279,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
279 nfmsg->version = NFNETLINK_V0; 272 nfmsg->version = NFNETLINK_V0;
280 nfmsg->res_id = htons(queue->queue_num); 273 nfmsg->res_id = htons(queue->queue_num);
281 274
275 entry->id = atomic_inc_return(&queue->id_sequence);
282 pmsg.packet_id = htonl(entry->id); 276 pmsg.packet_id = htonl(entry->id);
283 pmsg.hw_protocol = entskb->protocol; 277 pmsg.hw_protocol = entskb->protocol;
284 pmsg.hook = entry->hook; 278 pmsg.hook = entry->hook;
@@ -297,8 +291,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
297 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV, 291 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
298 htonl(indev->ifindex)); 292 htonl(indev->ifindex));
299 /* this is the bridge group "brX" */ 293 /* this is the bridge group "brX" */
294 /* rcu_read_lock()ed by __nf_queue */
300 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, 295 NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
301 htonl(indev->br_port->br->dev->ifindex)); 296 htonl(br_port_get_rcu(indev)->br->dev->ifindex));
302 } else { 297 } else {
303 /* Case 2: indev is bridge group, we need to look for 298 /* Case 2: indev is bridge group, we need to look for
304 * physical device (when called from ipv4) */ 299 * physical device (when called from ipv4) */
@@ -322,8 +317,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
322 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV, 317 NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
323 htonl(outdev->ifindex)); 318 htonl(outdev->ifindex));
324 /* this is the bridge group "brX" */ 319 /* this is the bridge group "brX" */
320 /* rcu_read_lock()ed by __nf_queue */
325 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, 321 NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
326 htonl(outdev->br_port->br->dev->ifindex)); 322 htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
327 } else { 323 } else {
328 /* Case 2: outdev is bridge group, we need to look for 324 /* Case 2: outdev is bridge group, we need to look for
329 * physical output device (when called from ipv4) */ 325 * physical output device (when called from ipv4) */
@@ -867,7 +863,7 @@ static int seq_show(struct seq_file *s, void *v)
867 inst->peer_pid, inst->queue_total, 863 inst->peer_pid, inst->queue_total,
868 inst->copy_mode, inst->copy_range, 864 inst->copy_mode, inst->copy_range,
869 inst->queue_dropped, inst->queue_user_dropped, 865 inst->queue_dropped, inst->queue_user_dropped,
870 inst->id_sequence, 1); 866 atomic_read(&inst->id_sequence), 1);
871} 867}
872 868
873static const struct seq_operations nfqnl_seq_ops = { 869static const struct seq_operations nfqnl_seq_ops = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 665f5beef6ad..e34622fa0003 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -12,7 +12,7 @@
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 * 13 *
14 */ 14 */
15 15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/socket.h> 17#include <linux/socket.h>
18#include <linux/net.h> 18#include <linux/net.h>
@@ -55,12 +55,6 @@ struct xt_af {
55 55
56static struct xt_af *xt; 56static struct xt_af *xt;
57 57
58#ifdef DEBUG_IP_FIREWALL_USER
59#define duprintf(format, args...) printk(format , ## args)
60#else
61#define duprintf(format, args...)
62#endif
63
64static const char *const xt_prefix[NFPROTO_NUMPROTO] = { 58static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
65 [NFPROTO_UNSPEC] = "x", 59 [NFPROTO_UNSPEC] = "x",
66 [NFPROTO_IPV4] = "ip", 60 [NFPROTO_IPV4] = "ip",
@@ -69,6 +63,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
69 [NFPROTO_IPV6] = "ip6", 63 [NFPROTO_IPV6] = "ip6",
70}; 64};
71 65
66/* Allow this many total (re)entries. */
67static const unsigned int xt_jumpstack_multiplier = 2;
68
72/* Registration hooks for targets. */ 69/* Registration hooks for targets. */
73int 70int
74xt_register_target(struct xt_target *target) 71xt_register_target(struct xt_target *target)
@@ -221,6 +218,17 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
221} 218}
222EXPORT_SYMBOL(xt_find_match); 219EXPORT_SYMBOL(xt_find_match);
223 220
221struct xt_match *
222xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
223{
224 struct xt_match *match;
225
226 match = try_then_request_module(xt_find_match(nfproto, name, revision),
227 "%st_%s", xt_prefix[nfproto], name);
228 return (match != NULL) ? match : ERR_PTR(-ENOENT);
229}
230EXPORT_SYMBOL_GPL(xt_request_find_match);
231
224/* Find target, grabs ref. Returns ERR_PTR() on error. */ 232/* Find target, grabs ref. Returns ERR_PTR() on error. */
225struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) 233struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
226{ 234{
@@ -257,9 +265,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
257 265
258 target = try_then_request_module(xt_find_target(af, name, revision), 266 target = try_then_request_module(xt_find_target(af, name, revision),
259 "%st_%s", xt_prefix[af], name); 267 "%st_%s", xt_prefix[af], name);
260 if (IS_ERR(target) || !target) 268 return (target != NULL) ? target : ERR_PTR(-ENOENT);
261 return NULL;
262 return target;
263} 269}
264EXPORT_SYMBOL_GPL(xt_request_find_target); 270EXPORT_SYMBOL_GPL(xt_request_find_target);
265 271
@@ -361,6 +367,8 @@ static char *textify_hooks(char *buf, size_t size, unsigned int mask)
361int xt_check_match(struct xt_mtchk_param *par, 367int xt_check_match(struct xt_mtchk_param *par,
362 unsigned int size, u_int8_t proto, bool inv_proto) 368 unsigned int size, u_int8_t proto, bool inv_proto)
363{ 369{
370 int ret;
371
364 if (XT_ALIGN(par->match->matchsize) != size && 372 if (XT_ALIGN(par->match->matchsize) != size &&
365 par->match->matchsize != -1) { 373 par->match->matchsize != -1) {
366 /* 374 /*
@@ -397,8 +405,14 @@ int xt_check_match(struct xt_mtchk_param *par,
397 par->match->proto); 405 par->match->proto);
398 return -EINVAL; 406 return -EINVAL;
399 } 407 }
400 if (par->match->checkentry != NULL && !par->match->checkentry(par)) 408 if (par->match->checkentry != NULL) {
401 return -EINVAL; 409 ret = par->match->checkentry(par);
410 if (ret < 0)
411 return ret;
412 else if (ret > 0)
413 /* Flag up potential errors. */
414 return -EIO;
415 }
402 return 0; 416 return 0;
403} 417}
404EXPORT_SYMBOL_GPL(xt_check_match); 418EXPORT_SYMBOL_GPL(xt_check_match);
@@ -518,6 +532,8 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
518int xt_check_target(struct xt_tgchk_param *par, 532int xt_check_target(struct xt_tgchk_param *par,
519 unsigned int size, u_int8_t proto, bool inv_proto) 533 unsigned int size, u_int8_t proto, bool inv_proto)
520{ 534{
535 int ret;
536
521 if (XT_ALIGN(par->target->targetsize) != size) { 537 if (XT_ALIGN(par->target->targetsize) != size) {
522 pr_err("%s_tables: %s.%u target: invalid size " 538 pr_err("%s_tables: %s.%u target: invalid size "
523 "%u (kernel) != (user) %u\n", 539 "%u (kernel) != (user) %u\n",
@@ -549,8 +565,14 @@ int xt_check_target(struct xt_tgchk_param *par,
549 par->target->proto); 565 par->target->proto);
550 return -EINVAL; 566 return -EINVAL;
551 } 567 }
552 if (par->target->checkentry != NULL && !par->target->checkentry(par)) 568 if (par->target->checkentry != NULL) {
553 return -EINVAL; 569 ret = par->target->checkentry(par);
570 if (ret < 0)
571 return ret;
572 else if (ret > 0)
573 /* Flag up potential errors. */
574 return -EIO;
575 }
554 return 0; 576 return 0;
555} 577}
556EXPORT_SYMBOL_GPL(xt_check_target); 578EXPORT_SYMBOL_GPL(xt_check_target);
@@ -662,6 +684,24 @@ void xt_free_table_info(struct xt_table_info *info)
662 else 684 else
663 vfree(info->entries[cpu]); 685 vfree(info->entries[cpu]);
664 } 686 }
687
688 if (info->jumpstack != NULL) {
689 if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
690 for_each_possible_cpu(cpu)
691 vfree(info->jumpstack[cpu]);
692 } else {
693 for_each_possible_cpu(cpu)
694 kfree(info->jumpstack[cpu]);
695 }
696 }
697
698 if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
699 vfree(info->jumpstack);
700 else
701 kfree(info->jumpstack);
702
703 free_percpu(info->stackptr);
704
665 kfree(info); 705 kfree(info);
666} 706}
667EXPORT_SYMBOL(xt_free_table_info); 707EXPORT_SYMBOL(xt_free_table_info);
@@ -706,6 +746,44 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
706DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); 746DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
707EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); 747EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
708 748
749static int xt_jumpstack_alloc(struct xt_table_info *i)
750{
751 unsigned int size;
752 int cpu;
753
754 i->stackptr = alloc_percpu(unsigned int);
755 if (i->stackptr == NULL)
756 return -ENOMEM;
757
758 size = sizeof(void **) * nr_cpu_ids;
759 if (size > PAGE_SIZE)
760 i->jumpstack = vmalloc(size);
761 else
762 i->jumpstack = kmalloc(size, GFP_KERNEL);
763 if (i->jumpstack == NULL)
764 return -ENOMEM;
765 memset(i->jumpstack, 0, size);
766
767 i->stacksize *= xt_jumpstack_multiplier;
768 size = sizeof(void *) * i->stacksize;
769 for_each_possible_cpu(cpu) {
770 if (size > PAGE_SIZE)
771 i->jumpstack[cpu] = vmalloc_node(size,
772 cpu_to_node(cpu));
773 else
774 i->jumpstack[cpu] = kmalloc_node(size,
775 GFP_KERNEL, cpu_to_node(cpu));
776 if (i->jumpstack[cpu] == NULL)
777 /*
778 * Freeing will be done later on by the callers. The
779 * chain is: xt_replace_table -> __do_replace ->
780 * do_replace -> xt_free_table_info.
781 */
782 return -ENOMEM;
783 }
784
785 return 0;
786}
709 787
710struct xt_table_info * 788struct xt_table_info *
711xt_replace_table(struct xt_table *table, 789xt_replace_table(struct xt_table *table,
@@ -714,6 +792,13 @@ xt_replace_table(struct xt_table *table,
714 int *error) 792 int *error)
715{ 793{
716 struct xt_table_info *private; 794 struct xt_table_info *private;
795 int ret;
796
797 ret = xt_jumpstack_alloc(newinfo);
798 if (ret < 0) {
799 *error = ret;
800 return NULL;
801 }
717 802
718 /* Do the substitution. */ 803 /* Do the substitution. */
719 local_bh_disable(); 804 local_bh_disable();
@@ -721,7 +806,7 @@ xt_replace_table(struct xt_table *table,
721 806
722 /* Check inside lock: is the old number correct? */ 807 /* Check inside lock: is the old number correct? */
723 if (num_counters != private->number) { 808 if (num_counters != private->number) {
724 duprintf("num_counters != table->private->number (%u/%u)\n", 809 pr_debug("num_counters != table->private->number (%u/%u)\n",
725 num_counters, private->number); 810 num_counters, private->number);
726 local_bh_enable(); 811 local_bh_enable();
727 *error = -EAGAIN; 812 *error = -EAGAIN;
@@ -778,7 +863,7 @@ struct xt_table *xt_register_table(struct net *net,
778 goto unlock; 863 goto unlock;
779 864
780 private = table->private; 865 private = table->private;
781 duprintf("table->private->number = %u\n", private->number); 866 pr_debug("table->private->number = %u\n", private->number);
782 867
783 /* save number of initial entries */ 868 /* save number of initial entries */
784 private->initial_entries = private->number; 869 private->initial_entries = private->number;
diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c
new file mode 100644
index 000000000000..0f642ef8cd26
--- /dev/null
+++ b/net/netfilter/xt_CHECKSUM.c
@@ -0,0 +1,70 @@
1/* iptables module for the packet checksum mangling
2 *
3 * (C) 2002 by Harald Welte <laforge@netfilter.org>
4 * (C) 2010 Red Hat, Inc.
5 *
6 * Author: Michael S. Tsirkin <mst@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11*/
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h>
14#include <linux/skbuff.h>
15
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_CHECKSUM.h>
18
19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>");
21MODULE_DESCRIPTION("Xtables: checksum modification");
22MODULE_ALIAS("ipt_CHECKSUM");
23MODULE_ALIAS("ip6t_CHECKSUM");
24
25static unsigned int
26checksum_tg(struct sk_buff *skb, const struct xt_action_param *par)
27{
28 if (skb->ip_summed == CHECKSUM_PARTIAL)
29 skb_checksum_help(skb);
30
31 return XT_CONTINUE;
32}
33
34static int checksum_tg_check(const struct xt_tgchk_param *par)
35{
36 const struct xt_CHECKSUM_info *einfo = par->targinfo;
37
38 if (einfo->operation & ~XT_CHECKSUM_OP_FILL) {
39 pr_info("unsupported CHECKSUM operation %x\n", einfo->operation);
40 return -EINVAL;
41 }
42 if (!einfo->operation) {
43 pr_info("no CHECKSUM operation enabled\n");
44 return -EINVAL;
45 }
46 return 0;
47}
48
49static struct xt_target checksum_tg_reg __read_mostly = {
50 .name = "CHECKSUM",
51 .family = NFPROTO_UNSPEC,
52 .target = checksum_tg,
53 .targetsize = sizeof(struct xt_CHECKSUM_info),
54 .table = "mangle",
55 .checkentry = checksum_tg_check,
56 .me = THIS_MODULE,
57};
58
59static int __init checksum_tg_init(void)
60{
61 return xt_register_target(&checksum_tg_reg);
62}
63
64static void __exit checksum_tg_exit(void)
65{
66 xt_unregister_target(&checksum_tg_reg);
67}
68
69module_init(checksum_tg_init);
70module_exit(checksum_tg_exit);
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 011bc80dd2a1..c2c0e4abeb99 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
27MODULE_ALIAS("ip6t_CLASSIFY"); 27MODULE_ALIAS("ip6t_CLASSIFY");
28 28
29static unsigned int 29static unsigned int
30classify_tg(struct sk_buff *skb, const struct xt_target_param *par) 30classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
31{ 31{
32 const struct xt_classify_target_info *clinfo = par->targinfo; 32 const struct xt_classify_target_info *clinfo = par->targinfo;
33 33
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
deleted file mode 100644
index 593457068ae1..000000000000
--- a/net/netfilter/xt_CONNMARK.c
+++ /dev/null
@@ -1,113 +0,0 @@
1/*
2 * xt_CONNMARK - Netfilter module to modify the connection mark values
3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23#include <linux/module.h>
24#include <linux/skbuff.h>
25#include <linux/ip.h>
26#include <net/checksum.h>
27
28MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
29MODULE_DESCRIPTION("Xtables: connection mark modification");
30MODULE_LICENSE("GPL");
31MODULE_ALIAS("ipt_CONNMARK");
32MODULE_ALIAS("ip6t_CONNMARK");
33
34#include <linux/netfilter/x_tables.h>
35#include <linux/netfilter/xt_CONNMARK.h>
36#include <net/netfilter/nf_conntrack_ecache.h>
37
38static unsigned int
39connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
40{
41 const struct xt_connmark_tginfo1 *info = par->targinfo;
42 enum ip_conntrack_info ctinfo;
43 struct nf_conn *ct;
44 u_int32_t newmark;
45
46 ct = nf_ct_get(skb, &ctinfo);
47 if (ct == NULL)
48 return XT_CONTINUE;
49
50 switch (info->mode) {
51 case XT_CONNMARK_SET:
52 newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
53 if (ct->mark != newmark) {
54 ct->mark = newmark;
55 nf_conntrack_event_cache(IPCT_MARK, ct);
56 }
57 break;
58 case XT_CONNMARK_SAVE:
59 newmark = (ct->mark & ~info->ctmask) ^
60 (skb->mark & info->nfmask);
61 if (ct->mark != newmark) {
62 ct->mark = newmark;
63 nf_conntrack_event_cache(IPCT_MARK, ct);
64 }
65 break;
66 case XT_CONNMARK_RESTORE:
67 newmark = (skb->mark & ~info->nfmask) ^
68 (ct->mark & info->ctmask);
69 skb->mark = newmark;
70 break;
71 }
72
73 return XT_CONTINUE;
74}
75
76static bool connmark_tg_check(const struct xt_tgchk_param *par)
77{
78 if (nf_ct_l3proto_try_module_get(par->family) < 0) {
79 printk(KERN_WARNING "cannot load conntrack support for "
80 "proto=%u\n", par->family);
81 return false;
82 }
83 return true;
84}
85
86static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
87{
88 nf_ct_l3proto_module_put(par->family);
89}
90
91static struct xt_target connmark_tg_reg __read_mostly = {
92 .name = "CONNMARK",
93 .revision = 1,
94 .family = NFPROTO_UNSPEC,
95 .checkentry = connmark_tg_check,
96 .target = connmark_tg,
97 .targetsize = sizeof(struct xt_connmark_tginfo1),
98 .destroy = connmark_tg_destroy,
99 .me = THIS_MODULE,
100};
101
102static int __init connmark_tg_init(void)
103{
104 return xt_register_target(&connmark_tg_reg);
105}
106
107static void __exit connmark_tg_exit(void)
108{
109 xt_unregister_target(&connmark_tg_reg);
110}
111
112module_init(connmark_tg_init);
113module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index b54c3756fdc3..e04dc282e3bb 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -15,6 +15,7 @@
15 * published by the Free Software Foundation. 15 * published by the Free Software Foundation.
16 * 16 *
17 */ 17 */
18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18#include <linux/module.h> 19#include <linux/module.h>
19#include <linux/skbuff.h> 20#include <linux/skbuff.h>
20#include <linux/netfilter/x_tables.h> 21#include <linux/netfilter/x_tables.h>
@@ -22,8 +23,6 @@
22#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_ecache.h> 24#include <net/netfilter/nf_conntrack_ecache.h>
24 25
25#define PFX "CONNSECMARK: "
26
27MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
28MODULE_AUTHOR("James Morris <jmorris@redhat.com>"); 27MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
29MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark"); 28MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
@@ -65,7 +64,7 @@ static void secmark_restore(struct sk_buff *skb)
65} 64}
66 65
67static unsigned int 66static unsigned int
68connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par) 67connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
69{ 68{
70 const struct xt_connsecmark_target_info *info = par->targinfo; 69 const struct xt_connsecmark_target_info *info = par->targinfo;
71 70
@@ -85,15 +84,16 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
85 return XT_CONTINUE; 84 return XT_CONTINUE;
86} 85}
87 86
88static bool connsecmark_tg_check(const struct xt_tgchk_param *par) 87static int connsecmark_tg_check(const struct xt_tgchk_param *par)
89{ 88{
90 const struct xt_connsecmark_target_info *info = par->targinfo; 89 const struct xt_connsecmark_target_info *info = par->targinfo;
90 int ret;
91 91
92 if (strcmp(par->table, "mangle") != 0 && 92 if (strcmp(par->table, "mangle") != 0 &&
93 strcmp(par->table, "security") != 0) { 93 strcmp(par->table, "security") != 0) {
94 printk(KERN_INFO PFX "target only valid in the \'mangle\' " 94 pr_info("target only valid in the \'mangle\' "
95 "or \'security\' tables, not \'%s\'.\n", par->table); 95 "or \'security\' tables, not \'%s\'.\n", par->table);
96 return false; 96 return -EINVAL;
97 } 97 }
98 98
99 switch (info->mode) { 99 switch (info->mode) {
@@ -102,16 +102,15 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
102 break; 102 break;
103 103
104 default: 104 default:
105 printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); 105 pr_info("invalid mode: %hu\n", info->mode);
106 return false; 106 return -EINVAL;
107 } 107 }
108 108
109 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 109 ret = nf_ct_l3proto_try_module_get(par->family);
110 printk(KERN_WARNING "can't load conntrack support for " 110 if (ret < 0)
111 "proto=%u\n", par->family); 111 pr_info("cannot load conntrack support for proto=%u\n",
112 return false; 112 par->family);
113 } 113 return ret;
114 return true;
115} 114}
116 115
117static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) 116static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index ee18b231b950..0cb6053f02fd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -20,7 +20,7 @@
20#include <net/netfilter/nf_conntrack_zones.h> 20#include <net/netfilter/nf_conntrack_zones.h>
21 21
22static unsigned int xt_ct_target(struct sk_buff *skb, 22static unsigned int xt_ct_target(struct sk_buff *skb,
23 const struct xt_target_param *par) 23 const struct xt_action_param *par)
24{ 24{
25 const struct xt_ct_target_info *info = par->targinfo; 25 const struct xt_ct_target_info *info = par->targinfo;
26 struct nf_conn *ct = info->ct; 26 struct nf_conn *ct = info->ct;
@@ -38,13 +38,13 @@ static unsigned int xt_ct_target(struct sk_buff *skb,
38 38
39static u8 xt_ct_find_proto(const struct xt_tgchk_param *par) 39static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
40{ 40{
41 if (par->family == AF_INET) { 41 if (par->family == NFPROTO_IPV4) {
42 const struct ipt_entry *e = par->entryinfo; 42 const struct ipt_entry *e = par->entryinfo;
43 43
44 if (e->ip.invflags & IPT_INV_PROTO) 44 if (e->ip.invflags & IPT_INV_PROTO)
45 return 0; 45 return 0;
46 return e->ip.proto; 46 return e->ip.proto;
47 } else if (par->family == AF_INET6) { 47 } else if (par->family == NFPROTO_IPV6) {
48 const struct ip6t_entry *e = par->entryinfo; 48 const struct ip6t_entry *e = par->entryinfo;
49 49
50 if (e->ipv6.invflags & IP6T_INV_PROTO) 50 if (e->ipv6.invflags & IP6T_INV_PROTO)
@@ -54,19 +54,20 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
54 return 0; 54 return 0;
55} 55}
56 56
57static bool xt_ct_tg_check(const struct xt_tgchk_param *par) 57static int xt_ct_tg_check(const struct xt_tgchk_param *par)
58{ 58{
59 struct xt_ct_target_info *info = par->targinfo; 59 struct xt_ct_target_info *info = par->targinfo;
60 struct nf_conntrack_tuple t; 60 struct nf_conntrack_tuple t;
61 struct nf_conn_help *help; 61 struct nf_conn_help *help;
62 struct nf_conn *ct; 62 struct nf_conn *ct;
63 int ret = 0;
63 u8 proto; 64 u8 proto;
64 65
65 if (info->flags & ~XT_CT_NOTRACK) 66 if (info->flags & ~XT_CT_NOTRACK)
66 return false; 67 return -EINVAL;
67 68
68 if (info->flags & XT_CT_NOTRACK) { 69 if (info->flags & XT_CT_NOTRACK) {
69 ct = &nf_conntrack_untracked; 70 ct = nf_ct_untracked_get();
70 atomic_inc(&ct->ct_general.use); 71 atomic_inc(&ct->ct_general.use);
71 goto out; 72 goto out;
72 } 73 }
@@ -76,28 +77,34 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
76 goto err1; 77 goto err1;
77#endif 78#endif
78 79
79 if (nf_ct_l3proto_try_module_get(par->family) < 0) 80 ret = nf_ct_l3proto_try_module_get(par->family);
81 if (ret < 0)
80 goto err1; 82 goto err1;
81 83
82 memset(&t, 0, sizeof(t)); 84 memset(&t, 0, sizeof(t));
83 ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); 85 ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
86 ret = PTR_ERR(ct);
84 if (IS_ERR(ct)) 87 if (IS_ERR(ct))
85 goto err2; 88 goto err2;
86 89
90 ret = 0;
87 if ((info->ct_events || info->exp_events) && 91 if ((info->ct_events || info->exp_events) &&
88 !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events, 92 !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
89 GFP_KERNEL)) 93 GFP_KERNEL))
90 goto err3; 94 goto err3;
91 95
92 if (info->helper[0]) { 96 if (info->helper[0]) {
97 ret = -ENOENT;
93 proto = xt_ct_find_proto(par); 98 proto = xt_ct_find_proto(par);
94 if (!proto) 99 if (!proto)
95 goto err3; 100 goto err3;
96 101
102 ret = -ENOMEM;
97 help = nf_ct_helper_ext_add(ct, GFP_KERNEL); 103 help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
98 if (help == NULL) 104 if (help == NULL)
99 goto err3; 105 goto err3;
100 106
107 ret = -ENOENT;
101 help->helper = nf_conntrack_helper_try_module_get(info->helper, 108 help->helper = nf_conntrack_helper_try_module_get(info->helper,
102 par->family, 109 par->family,
103 proto); 110 proto);
@@ -109,14 +116,14 @@ static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
109 __set_bit(IPS_CONFIRMED_BIT, &ct->status); 116 __set_bit(IPS_CONFIRMED_BIT, &ct->status);
110out: 117out:
111 info->ct = ct; 118 info->ct = ct;
112 return true; 119 return 0;
113 120
114err3: 121err3:
115 nf_conntrack_free(ct); 122 nf_conntrack_free(ct);
116err2: 123err2:
117 nf_ct_l3proto_module_put(par->family); 124 nf_ct_l3proto_module_put(par->family);
118err1: 125err1:
119 return false; 126 return ret;
120} 127}
121 128
122static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) 129static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
@@ -125,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
125 struct nf_conn *ct = info->ct; 132 struct nf_conn *ct = info->ct;
126 struct nf_conn_help *help; 133 struct nf_conn_help *help;
127 134
128 if (ct != &nf_conntrack_untracked) { 135 if (!nf_ct_is_untracked(ct)) {
129 help = nfct_help(ct); 136 help = nfct_help(ct);
130 if (help) 137 if (help)
131 module_put(help->helper->me); 138 module_put(help->helper->me);
@@ -138,7 +145,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
138static struct xt_target xt_ct_tg __read_mostly = { 145static struct xt_target xt_ct_tg __read_mostly = {
139 .name = "CT", 146 .name = "CT",
140 .family = NFPROTO_UNSPEC, 147 .family = NFPROTO_UNSPEC,
141 .targetsize = XT_ALIGN(sizeof(struct xt_ct_target_info)), 148 .targetsize = sizeof(struct xt_ct_target_info),
142 .checkentry = xt_ct_tg_check, 149 .checkentry = xt_ct_tg_check,
143 .destroy = xt_ct_tg_destroy, 150 .destroy = xt_ct_tg_destroy,
144 .target = xt_ct_target, 151 .target = xt_ct_target,
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 74ce89260056..0a229191e55b 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -9,7 +9,7 @@
9 * 9 *
10 * See RFC2474 for a description of the DSCP field within the IP Header. 10 * See RFC2474 for a description of the DSCP field within the IP Header.
11*/ 11*/
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/ip.h> 15#include <linux/ip.h>
@@ -28,7 +28,7 @@ MODULE_ALIAS("ipt_TOS");
28MODULE_ALIAS("ip6t_TOS"); 28MODULE_ALIAS("ip6t_TOS");
29 29
30static unsigned int 30static unsigned int
31dscp_tg(struct sk_buff *skb, const struct xt_target_param *par) 31dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
32{ 32{
33 const struct xt_DSCP_info *dinfo = par->targinfo; 33 const struct xt_DSCP_info *dinfo = par->targinfo;
34 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 34 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -45,7 +45,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
45} 45}
46 46
47static unsigned int 47static unsigned int
48dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par) 48dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
49{ 49{
50 const struct xt_DSCP_info *dinfo = par->targinfo; 50 const struct xt_DSCP_info *dinfo = par->targinfo;
51 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 51 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -60,19 +60,19 @@ dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
60 return XT_CONTINUE; 60 return XT_CONTINUE;
61} 61}
62 62
63static bool dscp_tg_check(const struct xt_tgchk_param *par) 63static int dscp_tg_check(const struct xt_tgchk_param *par)
64{ 64{
65 const struct xt_DSCP_info *info = par->targinfo; 65 const struct xt_DSCP_info *info = par->targinfo;
66 66
67 if (info->dscp > XT_DSCP_MAX) { 67 if (info->dscp > XT_DSCP_MAX) {
68 printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp); 68 pr_info("dscp %x out of range\n", info->dscp);
69 return false; 69 return -EDOM;
70 } 70 }
71 return true; 71 return 0;
72} 72}
73 73
74static unsigned int 74static unsigned int
75tos_tg(struct sk_buff *skb, const struct xt_target_param *par) 75tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
76{ 76{
77 const struct xt_tos_target_info *info = par->targinfo; 77 const struct xt_tos_target_info *info = par->targinfo;
78 struct iphdr *iph = ip_hdr(skb); 78 struct iphdr *iph = ip_hdr(skb);
@@ -92,7 +92,7 @@ tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
92} 92}
93 93
94static unsigned int 94static unsigned int
95tos_tg6(struct sk_buff *skb, const struct xt_target_param *par) 95tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
96{ 96{
97 const struct xt_tos_target_info *info = par->targinfo; 97 const struct xt_tos_target_info *info = par->targinfo;
98 struct ipv6hdr *iph = ipv6_hdr(skb); 98 struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 10e789e2d12a..95b084800fcc 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -9,7 +9,7 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/ip.h> 15#include <linux/ip.h>
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
26MODULE_LICENSE("GPL"); 26MODULE_LICENSE("GPL");
27 27
28static unsigned int 28static unsigned int
29ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) 29ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
30{ 30{
31 struct iphdr *iph; 31 struct iphdr *iph;
32 const struct ipt_TTL_info *info = par->targinfo; 32 const struct ipt_TTL_info *info = par->targinfo;
@@ -66,7 +66,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
66} 66}
67 67
68static unsigned int 68static unsigned int
69hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) 69hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
70{ 70{
71 struct ipv6hdr *ip6h; 71 struct ipv6hdr *ip6h;
72 const struct ip6t_HL_info *info = par->targinfo; 72 const struct ip6t_HL_info *info = par->targinfo;
@@ -101,35 +101,33 @@ hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
101 return XT_CONTINUE; 101 return XT_CONTINUE;
102} 102}
103 103
104static bool ttl_tg_check(const struct xt_tgchk_param *par) 104static int ttl_tg_check(const struct xt_tgchk_param *par)
105{ 105{
106 const struct ipt_TTL_info *info = par->targinfo; 106 const struct ipt_TTL_info *info = par->targinfo;
107 107
108 if (info->mode > IPT_TTL_MAXMODE) { 108 if (info->mode > IPT_TTL_MAXMODE) {
109 printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", 109 pr_info("TTL: invalid or unknown mode %u\n", info->mode);
110 info->mode); 110 return -EINVAL;
111 return false;
112 } 111 }
113 if (info->mode != IPT_TTL_SET && info->ttl == 0) 112 if (info->mode != IPT_TTL_SET && info->ttl == 0)
114 return false; 113 return -EINVAL;
115 return true; 114 return 0;
116} 115}
117 116
118static bool hl_tg6_check(const struct xt_tgchk_param *par) 117static int hl_tg6_check(const struct xt_tgchk_param *par)
119{ 118{
120 const struct ip6t_HL_info *info = par->targinfo; 119 const struct ip6t_HL_info *info = par->targinfo;
121 120
122 if (info->mode > IP6T_HL_MAXMODE) { 121 if (info->mode > IP6T_HL_MAXMODE) {
123 printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", 122 pr_info("invalid or unknown mode %u\n", info->mode);
124 info->mode); 123 return -EINVAL;
125 return false;
126 } 124 }
127 if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { 125 if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
128 printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " 126 pr_info("increment/decrement does not "
129 "make sense with value 0\n"); 127 "make sense with value 0\n");
130 return false; 128 return -EINVAL;
131 } 129 }
132 return true; 130 return 0;
133} 131}
134 132
135static struct xt_target hl_tg_reg[] __read_mostly = { 133static struct xt_target hl_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 000000000000..be1f22e13545
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,315 @@
1/*
2 * linux/net/netfilter/xt_IDLETIMER.c
3 *
4 * Netfilter module to trigger a timer when packet matches.
5 * After timer expires a kevent will be sent.
6 *
7 * Copyright (C) 2004, 2010 Nokia Corporation
8 * Written by Timo Teras <ext-timo.teras@nokia.com>
9 *
10 * Converted to x_tables and reworked for upstream inclusion
11 * by Luciano Coelho <luciano.coelho@nokia.com>
12 *
13 * Contact: Luciano Coelho <luciano.coelho@nokia.com>
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * version 2 as published by the Free Software Foundation.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with this program; if not, write to the Free Software
26 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 * 02110-1301 USA
28 */
29
30#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
31
32#include <linux/module.h>
33#include <linux/timer.h>
34#include <linux/list.h>
35#include <linux/mutex.h>
36#include <linux/netfilter.h>
37#include <linux/netfilter/x_tables.h>
38#include <linux/netfilter/xt_IDLETIMER.h>
39#include <linux/kdev_t.h>
40#include <linux/kobject.h>
41#include <linux/workqueue.h>
42#include <linux/sysfs.h>
43
44struct idletimer_tg_attr {
45 struct attribute attr;
46 ssize_t (*show)(struct kobject *kobj,
47 struct attribute *attr, char *buf);
48};
49
50struct idletimer_tg {
51 struct list_head entry;
52 struct timer_list timer;
53 struct work_struct work;
54
55 struct kobject *kobj;
56 struct idletimer_tg_attr attr;
57
58 unsigned int refcnt;
59};
60
61static LIST_HEAD(idletimer_tg_list);
62static DEFINE_MUTEX(list_mutex);
63
64static struct kobject *idletimer_tg_kobj;
65
66static
67struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
68{
69 struct idletimer_tg *entry;
70
71 BUG_ON(!label);
72
73 list_for_each_entry(entry, &idletimer_tg_list, entry) {
74 if (!strcmp(label, entry->attr.attr.name))
75 return entry;
76 }
77
78 return NULL;
79}
80
81static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
82 char *buf)
83{
84 struct idletimer_tg *timer;
85 unsigned long expires = 0;
86
87 mutex_lock(&list_mutex);
88
89 timer = __idletimer_tg_find_by_label(attr->name);
90 if (timer)
91 expires = timer->timer.expires;
92
93 mutex_unlock(&list_mutex);
94
95 if (time_after(expires, jiffies))
96 return sprintf(buf, "%u\n",
97 jiffies_to_msecs(expires - jiffies) / 1000);
98
99 return sprintf(buf, "0\n");
100}
101
102static void idletimer_tg_work(struct work_struct *work)
103{
104 struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
105 work);
106
107 sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
108}
109
110static void idletimer_tg_expired(unsigned long data)
111{
112 struct idletimer_tg *timer = (struct idletimer_tg *) data;
113
114 pr_debug("timer %s expired\n", timer->attr.attr.name);
115
116 schedule_work(&timer->work);
117}
118
119static int idletimer_tg_create(struct idletimer_tg_info *info)
120{
121 int ret;
122
123 info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
124 if (!info->timer) {
125 pr_debug("couldn't alloc timer\n");
126 ret = -ENOMEM;
127 goto out;
128 }
129
130 info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
131 if (!info->timer->attr.attr.name) {
132 pr_debug("couldn't alloc attribute name\n");
133 ret = -ENOMEM;
134 goto out_free_timer;
135 }
136 info->timer->attr.attr.mode = S_IRUGO;
137 info->timer->attr.show = idletimer_tg_show;
138
139 ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
140 if (ret < 0) {
141 pr_debug("couldn't add file to sysfs");
142 goto out_free_attr;
143 }
144
145 list_add(&info->timer->entry, &idletimer_tg_list);
146
147 setup_timer(&info->timer->timer, idletimer_tg_expired,
148 (unsigned long) info->timer);
149 info->timer->refcnt = 1;
150
151 mod_timer(&info->timer->timer,
152 msecs_to_jiffies(info->timeout * 1000) + jiffies);
153
154 INIT_WORK(&info->timer->work, idletimer_tg_work);
155
156 return 0;
157
158out_free_attr:
159 kfree(info->timer->attr.attr.name);
160out_free_timer:
161 kfree(info->timer);
162out:
163 return ret;
164}
165
166/*
167 * The actual xt_tables plugin.
168 */
169static unsigned int idletimer_tg_target(struct sk_buff *skb,
170 const struct xt_action_param *par)
171{
172 const struct idletimer_tg_info *info = par->targinfo;
173
174 pr_debug("resetting timer %s, timeout period %u\n",
175 info->label, info->timeout);
176
177 BUG_ON(!info->timer);
178
179 mod_timer(&info->timer->timer,
180 msecs_to_jiffies(info->timeout * 1000) + jiffies);
181
182 return XT_CONTINUE;
183}
184
185static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
186{
187 struct idletimer_tg_info *info = par->targinfo;
188 int ret;
189
190 pr_debug("checkentry targinfo%s\n", info->label);
191
192 if (info->timeout == 0) {
193 pr_debug("timeout value is zero\n");
194 return -EINVAL;
195 }
196
197 if (info->label[0] == '\0' ||
198 strnlen(info->label,
199 MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
200 pr_debug("label is empty or not nul-terminated\n");
201 return -EINVAL;
202 }
203
204 mutex_lock(&list_mutex);
205
206 info->timer = __idletimer_tg_find_by_label(info->label);
207 if (info->timer) {
208 info->timer->refcnt++;
209 mod_timer(&info->timer->timer,
210 msecs_to_jiffies(info->timeout * 1000) + jiffies);
211
212 pr_debug("increased refcnt of timer %s to %u\n",
213 info->label, info->timer->refcnt);
214 } else {
215 ret = idletimer_tg_create(info);
216 if (ret < 0) {
217 pr_debug("failed to create timer\n");
218 mutex_unlock(&list_mutex);
219 return ret;
220 }
221 }
222
223 mutex_unlock(&list_mutex);
224 return 0;
225}
226
227static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
228{
229 const struct idletimer_tg_info *info = par->targinfo;
230
231 pr_debug("destroy targinfo %s\n", info->label);
232
233 mutex_lock(&list_mutex);
234
235 if (--info->timer->refcnt == 0) {
236 pr_debug("deleting timer %s\n", info->label);
237
238 list_del(&info->timer->entry);
239 del_timer_sync(&info->timer->timer);
240 sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
241 kfree(info->timer->attr.attr.name);
242 kfree(info->timer);
243 } else {
244 pr_debug("decreased refcnt of timer %s to %u\n",
245 info->label, info->timer->refcnt);
246 }
247
248 mutex_unlock(&list_mutex);
249}
250
251static struct xt_target idletimer_tg __read_mostly = {
252 .name = "IDLETIMER",
253 .family = NFPROTO_UNSPEC,
254 .target = idletimer_tg_target,
255 .targetsize = sizeof(struct idletimer_tg_info),
256 .checkentry = idletimer_tg_checkentry,
257 .destroy = idletimer_tg_destroy,
258 .me = THIS_MODULE,
259};
260
261static struct class *idletimer_tg_class;
262
263static struct device *idletimer_tg_device;
264
265static int __init idletimer_tg_init(void)
266{
267 int err;
268
269 idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
270 err = PTR_ERR(idletimer_tg_class);
271 if (IS_ERR(idletimer_tg_class)) {
272 pr_debug("couldn't register device class\n");
273 goto out;
274 }
275
276 idletimer_tg_device = device_create(idletimer_tg_class, NULL,
277 MKDEV(0, 0), NULL, "timers");
278 err = PTR_ERR(idletimer_tg_device);
279 if (IS_ERR(idletimer_tg_device)) {
280 pr_debug("couldn't register system device\n");
281 goto out_class;
282 }
283
284 idletimer_tg_kobj = &idletimer_tg_device->kobj;
285
286 err = xt_register_target(&idletimer_tg);
287 if (err < 0) {
288 pr_debug("couldn't register xt target\n");
289 goto out_dev;
290 }
291
292 return 0;
293out_dev:
294 device_destroy(idletimer_tg_class, MKDEV(0, 0));
295out_class:
296 class_destroy(idletimer_tg_class);
297out:
298 return err;
299}
300
301static void __exit idletimer_tg_exit(void)
302{
303 xt_unregister_target(&idletimer_tg);
304
305 device_destroy(idletimer_tg_class, MKDEV(0, 0));
306 class_destroy(idletimer_tg_class);
307}
308
309module_init(idletimer_tg_init);
310module_exit(idletimer_tg_exit);
311
312MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
313MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
314MODULE_DESCRIPTION("Xtables: idle time monitor");
315MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 3271c8e52153..a4140509eea1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -18,7 +18,7 @@
18 * 02110-1301 USA. 18 * 02110-1301 USA.
19 * 19 *
20 */ 20 */
21 21#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22#include <linux/module.h> 22#include <linux/module.h>
23#include <linux/skbuff.h> 23#include <linux/skbuff.h>
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
@@ -32,18 +32,24 @@ MODULE_LICENSE("GPL");
32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); 32MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); 33MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
34 34
35static LIST_HEAD(xt_led_triggers);
36static DEFINE_MUTEX(xt_led_mutex);
37
35/* 38/*
36 * This is declared in here (the kernel module) only, to avoid having these 39 * This is declared in here (the kernel module) only, to avoid having these
37 * dependencies in userspace code. This is what xt_led_info.internal_data 40 * dependencies in userspace code. This is what xt_led_info.internal_data
38 * points to. 41 * points to.
39 */ 42 */
40struct xt_led_info_internal { 43struct xt_led_info_internal {
44 struct list_head list;
45 int refcnt;
46 char *trigger_id;
41 struct led_trigger netfilter_led_trigger; 47 struct led_trigger netfilter_led_trigger;
42 struct timer_list timer; 48 struct timer_list timer;
43}; 49};
44 50
45static unsigned int 51static unsigned int
46led_tg(struct sk_buff *skb, const struct xt_target_param *par) 52led_tg(struct sk_buff *skb, const struct xt_action_param *par)
47{ 53{
48 const struct xt_led_info *ledinfo = par->targinfo; 54 const struct xt_led_info *ledinfo = par->targinfo;
49 struct xt_led_info_internal *ledinternal = ledinfo->internal_data; 55 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
@@ -54,7 +60,7 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
54 */ 60 */
55 if ((ledinfo->delay > 0) && ledinfo->always_blink && 61 if ((ledinfo->delay > 0) && ledinfo->always_blink &&
56 timer_pending(&ledinternal->timer)) 62 timer_pending(&ledinternal->timer))
57 led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF); 63 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
58 64
59 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); 65 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
60 66
@@ -75,54 +81,86 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
75 81
76static void led_timeout_callback(unsigned long data) 82static void led_timeout_callback(unsigned long data)
77{ 83{
78 struct xt_led_info *ledinfo = (struct xt_led_info *)data; 84 struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data;
79 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
80 85
81 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); 86 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
82} 87}
83 88
84static bool led_tg_check(const struct xt_tgchk_param *par) 89static struct xt_led_info_internal *led_trigger_lookup(const char *name)
90{
91 struct xt_led_info_internal *ledinternal;
92
93 list_for_each_entry(ledinternal, &xt_led_triggers, list) {
94 if (!strcmp(name, ledinternal->netfilter_led_trigger.name)) {
95 return ledinternal;
96 }
97 }
98 return NULL;
99}
100
101static int led_tg_check(const struct xt_tgchk_param *par)
85{ 102{
86 struct xt_led_info *ledinfo = par->targinfo; 103 struct xt_led_info *ledinfo = par->targinfo;
87 struct xt_led_info_internal *ledinternal; 104 struct xt_led_info_internal *ledinternal;
88 int err; 105 int err;
89 106
90 if (ledinfo->id[0] == '\0') { 107 if (ledinfo->id[0] == '\0') {
91 printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n"); 108 pr_info("No 'id' parameter given.\n");
92 return false; 109 return -EINVAL;
93 } 110 }
94 111
95 ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL); 112 mutex_lock(&xt_led_mutex);
96 if (!ledinternal) { 113
97 printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n"); 114 ledinternal = led_trigger_lookup(ledinfo->id);
98 return false; 115 if (ledinternal) {
116 ledinternal->refcnt++;
117 goto out;
99 } 118 }
100 119
101 ledinternal->netfilter_led_trigger.name = ledinfo->id; 120 err = -ENOMEM;
121 ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
122 if (!ledinternal)
123 goto exit_mutex_only;
124
125 ledinternal->trigger_id = kstrdup(ledinfo->id, GFP_KERNEL);
126 if (!ledinternal->trigger_id)
127 goto exit_internal_alloc;
128
129 ledinternal->refcnt = 1;
130 ledinternal->netfilter_led_trigger.name = ledinternal->trigger_id;
102 131
103 err = led_trigger_register(&ledinternal->netfilter_led_trigger); 132 err = led_trigger_register(&ledinternal->netfilter_led_trigger);
104 if (err) { 133 if (err) {
105 printk(KERN_CRIT KBUILD_MODNAME 134 pr_warning("led_trigger_register() failed\n");
106 ": led_trigger_register() failed\n");
107 if (err == -EEXIST) 135 if (err == -EEXIST)
108 printk(KERN_ERR KBUILD_MODNAME 136 pr_warning("Trigger name is already in use.\n");
109 ": Trigger name is already in use.\n");
110 goto exit_alloc; 137 goto exit_alloc;
111 } 138 }
112 139
113 /* See if we need to set up a timer */ 140 /* See if we need to set up a timer */
114 if (ledinfo->delay > 0) 141 if (ledinfo->delay > 0)
115 setup_timer(&ledinternal->timer, led_timeout_callback, 142 setup_timer(&ledinternal->timer, led_timeout_callback,
116 (unsigned long)ledinfo); 143 (unsigned long)ledinternal);
144
145 list_add_tail(&ledinternal->list, &xt_led_triggers);
146
147out:
148 mutex_unlock(&xt_led_mutex);
117 149
118 ledinfo->internal_data = ledinternal; 150 ledinfo->internal_data = ledinternal;
119 151
120 return true; 152 return 0;
121 153
122exit_alloc: 154exit_alloc:
155 kfree(ledinternal->trigger_id);
156
157exit_internal_alloc:
123 kfree(ledinternal); 158 kfree(ledinternal);
124 159
125 return false; 160exit_mutex_only:
161 mutex_unlock(&xt_led_mutex);
162
163 return err;
126} 164}
127 165
128static void led_tg_destroy(const struct xt_tgdtor_param *par) 166static void led_tg_destroy(const struct xt_tgdtor_param *par)
@@ -130,10 +168,23 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
130 const struct xt_led_info *ledinfo = par->targinfo; 168 const struct xt_led_info *ledinfo = par->targinfo;
131 struct xt_led_info_internal *ledinternal = ledinfo->internal_data; 169 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
132 170
171 mutex_lock(&xt_led_mutex);
172
173 if (--ledinternal->refcnt) {
174 mutex_unlock(&xt_led_mutex);
175 return;
176 }
177
178 list_del(&ledinternal->list);
179
133 if (ledinfo->delay > 0) 180 if (ledinfo->delay > 0)
134 del_timer_sync(&ledinternal->timer); 181 del_timer_sync(&ledinternal->timer);
135 182
136 led_trigger_unregister(&ledinternal->netfilter_led_trigger); 183 led_trigger_unregister(&ledinternal->netfilter_led_trigger);
184
185 mutex_unlock(&xt_led_mutex);
186
187 kfree(ledinternal->trigger_id);
137 kfree(ledinternal); 188 kfree(ledinternal);
138} 189}
139 190
@@ -142,7 +193,7 @@ static struct xt_target led_tg_reg __read_mostly = {
142 .revision = 0, 193 .revision = 0,
143 .family = NFPROTO_UNSPEC, 194 .family = NFPROTO_UNSPEC,
144 .target = led_tg, 195 .target = led_tg,
145 .targetsize = XT_ALIGN(sizeof(struct xt_led_info)), 196 .targetsize = sizeof(struct xt_led_info),
146 .checkentry = led_tg_check, 197 .checkentry = led_tg_check,
147 .destroy = led_tg_destroy, 198 .destroy = led_tg_destroy,
148 .me = THIS_MODULE, 199 .me = THIS_MODULE,
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
deleted file mode 100644
index 225f8d11e173..000000000000
--- a/net/netfilter/xt_MARK.c
+++ /dev/null
@@ -1,56 +0,0 @@
1/*
2 * xt_MARK - Netfilter module to modify the NFMARK field of an skb
3 *
4 * (C) 1999-2001 Marc Boucher <marc@mbsi.ca>
5 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
6 * Jan Engelhardt <jengelh@computergmbh.de>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <net/checksum.h>
17
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_MARK.h>
20
21MODULE_LICENSE("GPL");
22MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
23MODULE_DESCRIPTION("Xtables: packet mark modification");
24MODULE_ALIAS("ipt_MARK");
25MODULE_ALIAS("ip6t_MARK");
26
27static unsigned int
28mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
29{
30 const struct xt_mark_tginfo2 *info = par->targinfo;
31
32 skb->mark = (skb->mark & ~info->mask) ^ info->mark;
33 return XT_CONTINUE;
34}
35
36static struct xt_target mark_tg_reg __read_mostly = {
37 .name = "MARK",
38 .revision = 2,
39 .family = NFPROTO_UNSPEC,
40 .target = mark_tg,
41 .targetsize = sizeof(struct xt_mark_tginfo2),
42 .me = THIS_MODULE,
43};
44
45static int __init mark_tg_init(void)
46{
47 return xt_register_target(&mark_tg_reg);
48}
49
50static void __exit mark_tg_exit(void)
51{
52 xt_unregister_target(&mark_tg_reg);
53}
54
55module_init(mark_tg_init);
56module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a57c5cf018ec..a17dd0f589b2 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_NFLOG");
22MODULE_ALIAS("ip6t_NFLOG"); 22MODULE_ALIAS("ip6t_NFLOG");
23 23
24static unsigned int 24static unsigned int
25nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) 25nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
26{ 26{
27 const struct xt_nflog_info *info = par->targinfo; 27 const struct xt_nflog_info *info = par->targinfo;
28 struct nf_loginfo li; 28 struct nf_loginfo li;
@@ -37,15 +37,15 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
37 return XT_CONTINUE; 37 return XT_CONTINUE;
38} 38}
39 39
40static bool nflog_tg_check(const struct xt_tgchk_param *par) 40static int nflog_tg_check(const struct xt_tgchk_param *par)
41{ 41{
42 const struct xt_nflog_info *info = par->targinfo; 42 const struct xt_nflog_info *info = par->targinfo;
43 43
44 if (info->flags & ~XT_NFLOG_MASK) 44 if (info->flags & ~XT_NFLOG_MASK)
45 return false; 45 return -EINVAL;
46 if (info->prefix[sizeof(info->prefix) - 1] != '\0') 46 if (info->prefix[sizeof(info->prefix) - 1] != '\0')
47 return false; 47 return -EINVAL;
48 return true; 48 return 0;
49} 49}
50 50
51static struct xt_target nflog_tg_reg __read_mostly = { 51static struct xt_target nflog_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 12dcd7007c3e..039cce1bde3d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -31,7 +31,7 @@ static u32 jhash_initval __read_mostly;
31static bool rnd_inited __read_mostly; 31static bool rnd_inited __read_mostly;
32 32
33static unsigned int 33static unsigned int
34nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) 34nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
35{ 35{
36 const struct xt_NFQ_info *tinfo = par->targinfo; 36 const struct xt_NFQ_info *tinfo = par->targinfo;
37 37
@@ -49,17 +49,6 @@ static u32 hash_v4(const struct sk_buff *skb)
49 return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); 49 return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
50} 50}
51 51
52static unsigned int
53nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
54{
55 const struct xt_NFQ_info_v1 *info = par->targinfo;
56 u32 queue = info->queuenum;
57
58 if (info->queues_total > 1)
59 queue = hash_v4(skb) % info->queues_total + queue;
60 return NF_QUEUE_NR(queue);
61}
62
63#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 52#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
64static u32 hash_v6(const struct sk_buff *skb) 53static u32 hash_v6(const struct sk_buff *skb)
65{ 54{
@@ -73,20 +62,26 @@ static u32 hash_v6(const struct sk_buff *skb)
73 62
74 return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval); 63 return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval);
75} 64}
65#endif
76 66
77static unsigned int 67static unsigned int
78nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) 68nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
79{ 69{
80 const struct xt_NFQ_info_v1 *info = par->targinfo; 70 const struct xt_NFQ_info_v1 *info = par->targinfo;
81 u32 queue = info->queuenum; 71 u32 queue = info->queuenum;
82 72
83 if (info->queues_total > 1) 73 if (info->queues_total > 1) {
84 queue = hash_v6(skb) % info->queues_total + queue; 74 if (par->family == NFPROTO_IPV4)
75 queue = hash_v4(skb) % info->queues_total + queue;
76#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
77 else if (par->family == NFPROTO_IPV6)
78 queue = hash_v6(skb) % info->queues_total + queue;
79#endif
80 }
85 return NF_QUEUE_NR(queue); 81 return NF_QUEUE_NR(queue);
86} 82}
87#endif
88 83
89static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) 84static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
90{ 85{
91 const struct xt_NFQ_info_v1 *info = par->targinfo; 86 const struct xt_NFQ_info_v1 *info = par->targinfo;
92 u32 maxid; 87 u32 maxid;
@@ -97,15 +92,15 @@ static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
97 } 92 }
98 if (info->queues_total == 0) { 93 if (info->queues_total == 0) {
99 pr_err("NFQUEUE: number of total queues is 0\n"); 94 pr_err("NFQUEUE: number of total queues is 0\n");
100 return false; 95 return -EINVAL;
101 } 96 }
102 maxid = info->queues_total - 1 + info->queuenum; 97 maxid = info->queues_total - 1 + info->queuenum;
103 if (maxid > 0xffff) { 98 if (maxid > 0xffff) {
104 pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", 99 pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
105 info->queues_total, maxid); 100 info->queues_total, maxid);
106 return false; 101 return -ERANGE;
107 } 102 }
108 return true; 103 return 0;
109} 104}
110 105
111static struct xt_target nfqueue_tg_reg[] __read_mostly = { 106static struct xt_target nfqueue_tg_reg[] __read_mostly = {
@@ -119,23 +114,12 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
119 { 114 {
120 .name = "NFQUEUE", 115 .name = "NFQUEUE",
121 .revision = 1, 116 .revision = 1,
122 .family = NFPROTO_IPV4, 117 .family = NFPROTO_UNSPEC,
123 .checkentry = nfqueue_tg_v1_check,
124 .target = nfqueue_tg4_v1,
125 .targetsize = sizeof(struct xt_NFQ_info_v1),
126 .me = THIS_MODULE,
127 },
128#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
129 {
130 .name = "NFQUEUE",
131 .revision = 1,
132 .family = NFPROTO_IPV6,
133 .checkentry = nfqueue_tg_v1_check, 118 .checkentry = nfqueue_tg_v1_check,
134 .target = nfqueue_tg6_v1, 119 .target = nfqueue_tg_v1,
135 .targetsize = sizeof(struct xt_NFQ_info_v1), 120 .targetsize = sizeof(struct xt_NFQ_info_v1),
136 .me = THIS_MODULE, 121 .me = THIS_MODULE,
137 }, 122 },
138#endif
139}; 123};
140 124
141static int __init nfqueue_tg_init(void) 125static int __init nfqueue_tg_init(void)
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index e7a0a54fd4ea..9d782181b6c8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,7 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
13MODULE_ALIAS("ip6t_NOTRACK"); 13MODULE_ALIAS("ip6t_NOTRACK");
14 14
15static unsigned int 15static unsigned int
16notrack_tg(struct sk_buff *skb, const struct xt_target_param *par) 16notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
17{ 17{
18 /* Previously seen (loopback)? Ignore. */ 18 /* Previously seen (loopback)? Ignore. */
19 if (skb->nfct != NULL) 19 if (skb->nfct != NULL)
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
23 If there is a real ct entry correspondig to this packet, 23 If there is a real ct entry correspondig to this packet,
24 it'll hang aroun till timing out. We don't deal with it 24 it'll hang aroun till timing out. We don't deal with it
25 for performance reasons. JK */ 25 for performance reasons. JK */
26 skb->nfct = &nf_conntrack_untracked.ct_general; 26 skb->nfct = &nf_ct_untracked_get()->ct_general;
27 skb->nfctinfo = IP_CT_NEW; 27 skb->nfctinfo = IP_CT_NEW;
28 nf_conntrack_get(skb->nfct); 28 nf_conntrack_get(skb->nfct);
29 29
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index d16d55df4f61..de079abd5bc8 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,20 +60,29 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
60} 60}
61EXPORT_SYMBOL_GPL(xt_rateest_lookup); 61EXPORT_SYMBOL_GPL(xt_rateest_lookup);
62 62
63static void xt_rateest_free_rcu(struct rcu_head *head)
64{
65 kfree(container_of(head, struct xt_rateest, rcu));
66}
67
63void xt_rateest_put(struct xt_rateest *est) 68void xt_rateest_put(struct xt_rateest *est)
64{ 69{
65 mutex_lock(&xt_rateest_mutex); 70 mutex_lock(&xt_rateest_mutex);
66 if (--est->refcnt == 0) { 71 if (--est->refcnt == 0) {
67 hlist_del(&est->list); 72 hlist_del(&est->list);
68 gen_kill_estimator(&est->bstats, &est->rstats); 73 gen_kill_estimator(&est->bstats, &est->rstats);
69 kfree(est); 74 /*
75 * gen_estimator est_timer() might access est->lock or bstats,
76 * wait a RCU grace period before freeing 'est'
77 */
78 call_rcu(&est->rcu, xt_rateest_free_rcu);
70 } 79 }
71 mutex_unlock(&xt_rateest_mutex); 80 mutex_unlock(&xt_rateest_mutex);
72} 81}
73EXPORT_SYMBOL_GPL(xt_rateest_put); 82EXPORT_SYMBOL_GPL(xt_rateest_put);
74 83
75static unsigned int 84static unsigned int
76xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) 85xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
77{ 86{
78 const struct xt_rateest_target_info *info = par->targinfo; 87 const struct xt_rateest_target_info *info = par->targinfo;
79 struct gnet_stats_basic_packed *stats = &info->est->bstats; 88 struct gnet_stats_basic_packed *stats = &info->est->bstats;
@@ -86,7 +95,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
86 return XT_CONTINUE; 95 return XT_CONTINUE;
87} 96}
88 97
89static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) 98static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
90{ 99{
91 struct xt_rateest_target_info *info = par->targinfo; 100 struct xt_rateest_target_info *info = par->targinfo;
92 struct xt_rateest *est; 101 struct xt_rateest *est;
@@ -94,6 +103,7 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
94 struct nlattr opt; 103 struct nlattr opt;
95 struct gnet_estimator est; 104 struct gnet_estimator est;
96 } cfg; 105 } cfg;
106 int ret;
97 107
98 if (unlikely(!rnd_inited)) { 108 if (unlikely(!rnd_inited)) {
99 get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); 109 get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
@@ -110,12 +120,13 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
110 (info->interval != est->params.interval || 120 (info->interval != est->params.interval ||
111 info->ewma_log != est->params.ewma_log)) { 121 info->ewma_log != est->params.ewma_log)) {
112 xt_rateest_put(est); 122 xt_rateest_put(est);
113 return false; 123 return -EINVAL;
114 } 124 }
115 info->est = est; 125 info->est = est;
116 return true; 126 return 0;
117 } 127 }
118 128
129 ret = -ENOMEM;
119 est = kzalloc(sizeof(*est), GFP_KERNEL); 130 est = kzalloc(sizeof(*est), GFP_KERNEL);
120 if (!est) 131 if (!est)
121 goto err1; 132 goto err1;
@@ -131,19 +142,19 @@ static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
131 cfg.est.interval = info->interval; 142 cfg.est.interval = info->interval;
132 cfg.est.ewma_log = info->ewma_log; 143 cfg.est.ewma_log = info->ewma_log;
133 144
134 if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock, 145 ret = gen_new_estimator(&est->bstats, &est->rstats,
135 &cfg.opt) < 0) 146 &est->lock, &cfg.opt);
147 if (ret < 0)
136 goto err2; 148 goto err2;
137 149
138 info->est = est; 150 info->est = est;
139 xt_rateest_hash_insert(est); 151 xt_rateest_hash_insert(est);
140 152 return 0;
141 return true;
142 153
143err2: 154err2:
144 kfree(est); 155 kfree(est);
145err1: 156err1:
146 return false; 157 return ret;
147} 158}
148 159
149static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) 160static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
@@ -177,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
177static void __exit xt_rateest_tg_fini(void) 188static void __exit xt_rateest_tg_fini(void)
178{ 189{
179 xt_unregister_target(&xt_rateest_tg_reg); 190 xt_unregister_target(&xt_rateest_tg_reg);
191 rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
180} 192}
181 193
182 194
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 7a6f9e6f5dfa..23b2d6c486b5 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -12,6 +12,7 @@
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 * 13 *
14 */ 14 */
15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15#include <linux/module.h> 16#include <linux/module.h>
16#include <linux/skbuff.h> 17#include <linux/skbuff.h>
17#include <linux/selinux.h> 18#include <linux/selinux.h>
@@ -29,7 +30,7 @@ MODULE_ALIAS("ip6t_SECMARK");
29static u8 mode; 30static u8 mode;
30 31
31static unsigned int 32static unsigned int
32secmark_tg(struct sk_buff *skb, const struct xt_target_param *par) 33secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
33{ 34{
34 u32 secmark = 0; 35 u32 secmark = 0;
35 const struct xt_secmark_target_info *info = par->targinfo; 36 const struct xt_secmark_target_info *info = par->targinfo;
@@ -49,7 +50,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
49 return XT_CONTINUE; 50 return XT_CONTINUE;
50} 51}
51 52
52static bool checkentry_selinux(struct xt_secmark_target_info *info) 53static int checkentry_selinux(struct xt_secmark_target_info *info)
53{ 54{
54 int err; 55 int err;
55 struct xt_secmark_target_selinux_info *sel = &info->u.sel; 56 struct xt_secmark_target_selinux_info *sel = &info->u.sel;
@@ -59,58 +60,59 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
59 err = selinux_string_to_sid(sel->selctx, &sel->selsid); 60 err = selinux_string_to_sid(sel->selctx, &sel->selsid);
60 if (err) { 61 if (err) {
61 if (err == -EINVAL) 62 if (err == -EINVAL)
62 printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n", 63 pr_info("invalid SELinux context \'%s\'\n",
63 sel->selctx); 64 sel->selctx);
64 return false; 65 return err;
65 } 66 }
66 67
67 if (!sel->selsid) { 68 if (!sel->selsid) {
68 printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n", 69 pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
69 sel->selctx); 70 return -ENOENT;
70 return false;
71 } 71 }
72 72
73 err = selinux_secmark_relabel_packet_permission(sel->selsid); 73 err = selinux_secmark_relabel_packet_permission(sel->selsid);
74 if (err) { 74 if (err) {
75 printk(KERN_INFO PFX "unable to obtain relabeling permission\n"); 75 pr_info("unable to obtain relabeling permission\n");
76 return false; 76 return err;
77 } 77 }
78 78
79 selinux_secmark_refcount_inc(); 79 selinux_secmark_refcount_inc();
80 return true; 80 return 0;
81} 81}
82 82
83static bool secmark_tg_check(const struct xt_tgchk_param *par) 83static int secmark_tg_check(const struct xt_tgchk_param *par)
84{ 84{
85 struct xt_secmark_target_info *info = par->targinfo; 85 struct xt_secmark_target_info *info = par->targinfo;
86 int err;
86 87
87 if (strcmp(par->table, "mangle") != 0 && 88 if (strcmp(par->table, "mangle") != 0 &&
88 strcmp(par->table, "security") != 0) { 89 strcmp(par->table, "security") != 0) {
89 printk(KERN_INFO PFX "target only valid in the \'mangle\' " 90 pr_info("target only valid in the \'mangle\' "
90 "or \'security\' tables, not \'%s\'.\n", par->table); 91 "or \'security\' tables, not \'%s\'.\n", par->table);
91 return false; 92 return -EINVAL;
92 } 93 }
93 94
94 if (mode && mode != info->mode) { 95 if (mode && mode != info->mode) {
95 printk(KERN_INFO PFX "mode already set to %hu cannot mix with " 96 pr_info("mode already set to %hu cannot mix with "
96 "rules for mode %hu\n", mode, info->mode); 97 "rules for mode %hu\n", mode, info->mode);
97 return false; 98 return -EINVAL;
98 } 99 }
99 100
100 switch (info->mode) { 101 switch (info->mode) {
101 case SECMARK_MODE_SEL: 102 case SECMARK_MODE_SEL:
102 if (!checkentry_selinux(info)) 103 err = checkentry_selinux(info);
103 return false; 104 if (err <= 0)
105 return err;
104 break; 106 break;
105 107
106 default: 108 default:
107 printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode); 109 pr_info("invalid mode: %hu\n", info->mode);
108 return false; 110 return -EINVAL;
109 } 111 }
110 112
111 if (!mode) 113 if (!mode)
112 mode = info->mode; 114 mode = info->mode;
113 return true; 115 return 0;
114} 116}
115 117
116static void secmark_tg_destroy(const struct xt_tgdtor_param *par) 118static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index c5f4b9919e9a..eb81c380da1b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -7,7 +7,7 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/ip.h> 13#include <linux/ip.h>
@@ -68,15 +68,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
68 if (info->mss == XT_TCPMSS_CLAMP_PMTU) { 68 if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
69 if (dst_mtu(skb_dst(skb)) <= minlen) { 69 if (dst_mtu(skb_dst(skb)) <= minlen) {
70 if (net_ratelimit()) 70 if (net_ratelimit())
71 printk(KERN_ERR "xt_TCPMSS: " 71 pr_err("unknown or invalid path-MTU (%u)\n",
72 "unknown or invalid path-MTU (%u)\n",
73 dst_mtu(skb_dst(skb))); 72 dst_mtu(skb_dst(skb)));
74 return -1; 73 return -1;
75 } 74 }
76 if (in_mtu <= minlen) { 75 if (in_mtu <= minlen) {
77 if (net_ratelimit()) 76 if (net_ratelimit())
78 printk(KERN_ERR "xt_TCPMSS: unknown or " 77 pr_err("unknown or invalid path-MTU (%u)\n",
79 "invalid path-MTU (%u)\n", in_mtu); 78 in_mtu);
80 return -1; 79 return -1;
81 } 80 }
82 newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; 81 newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
@@ -166,14 +165,14 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
166 rcu_read_unlock(); 165 rcu_read_unlock();
167 166
168 if (rt != NULL) { 167 if (rt != NULL) {
169 mtu = dst_mtu(&rt->u.dst); 168 mtu = dst_mtu(&rt->dst);
170 dst_release(&rt->u.dst); 169 dst_release(&rt->dst);
171 } 170 }
172 return mtu; 171 return mtu;
173} 172}
174 173
175static unsigned int 174static unsigned int
176tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par) 175tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
177{ 176{
178 struct iphdr *iph = ip_hdr(skb); 177 struct iphdr *iph = ip_hdr(skb);
179 __be16 newlen; 178 __be16 newlen;
@@ -196,7 +195,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
196 195
197#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 196#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
198static unsigned int 197static unsigned int
199tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par) 198tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
200{ 199{
201 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 200 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
202 u8 nexthdr; 201 u8 nexthdr;
@@ -221,22 +220,20 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
221} 220}
222#endif 221#endif
223 222
224#define TH_SYN 0x02
225
226/* Must specify -p tcp --syn */ 223/* Must specify -p tcp --syn */
227static inline bool find_syn_match(const struct xt_entry_match *m) 224static inline bool find_syn_match(const struct xt_entry_match *m)
228{ 225{
229 const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; 226 const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
230 227
231 if (strcmp(m->u.kernel.match->name, "tcp") == 0 && 228 if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
232 tcpinfo->flg_cmp & TH_SYN && 229 tcpinfo->flg_cmp & TCPHDR_SYN &&
233 !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) 230 !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
234 return true; 231 return true;
235 232
236 return false; 233 return false;
237} 234}
238 235
239static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) 236static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
240{ 237{
241 const struct xt_tcpmss_info *info = par->targinfo; 238 const struct xt_tcpmss_info *info = par->targinfo;
242 const struct ipt_entry *e = par->entryinfo; 239 const struct ipt_entry *e = par->entryinfo;
@@ -246,19 +243,19 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
246 (par->hook_mask & ~((1 << NF_INET_FORWARD) | 243 (par->hook_mask & ~((1 << NF_INET_FORWARD) |
247 (1 << NF_INET_LOCAL_OUT) | 244 (1 << NF_INET_LOCAL_OUT) |
248 (1 << NF_INET_POST_ROUTING))) != 0) { 245 (1 << NF_INET_POST_ROUTING))) != 0) {
249 printk("xt_TCPMSS: path-MTU clamping only supported in " 246 pr_info("path-MTU clamping only supported in "
250 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 247 "FORWARD, OUTPUT and POSTROUTING hooks\n");
251 return false; 248 return -EINVAL;
252 } 249 }
253 xt_ematch_foreach(ematch, e) 250 xt_ematch_foreach(ematch, e)
254 if (find_syn_match(ematch)) 251 if (find_syn_match(ematch))
255 return true; 252 return 0;
256 printk("xt_TCPMSS: Only works on TCP SYN packets\n"); 253 pr_info("Only works on TCP SYN packets\n");
257 return false; 254 return -EINVAL;
258} 255}
259 256
260#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 257#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
261static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) 258static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
262{ 259{
263 const struct xt_tcpmss_info *info = par->targinfo; 260 const struct xt_tcpmss_info *info = par->targinfo;
264 const struct ip6t_entry *e = par->entryinfo; 261 const struct ip6t_entry *e = par->entryinfo;
@@ -268,15 +265,15 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
268 (par->hook_mask & ~((1 << NF_INET_FORWARD) | 265 (par->hook_mask & ~((1 << NF_INET_FORWARD) |
269 (1 << NF_INET_LOCAL_OUT) | 266 (1 << NF_INET_LOCAL_OUT) |
270 (1 << NF_INET_POST_ROUTING))) != 0) { 267 (1 << NF_INET_POST_ROUTING))) != 0) {
271 printk("xt_TCPMSS: path-MTU clamping only supported in " 268 pr_info("path-MTU clamping only supported in "
272 "FORWARD, OUTPUT and POSTROUTING hooks\n"); 269 "FORWARD, OUTPUT and POSTROUTING hooks\n");
273 return false; 270 return -EINVAL;
274 } 271 }
275 xt_ematch_foreach(ematch, e) 272 xt_ematch_foreach(ematch, e)
276 if (find_syn_match(ematch)) 273 if (find_syn_match(ematch))
277 return true; 274 return 0;
278 printk("xt_TCPMSS: Only works on TCP SYN packets\n"); 275 pr_info("Only works on TCP SYN packets\n");
279 return false; 276 return -EINVAL;
280} 277}
281#endif 278#endif
282 279
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9dd8c8ef63eb..9dc9ecfdd546 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org> 4 * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
5 * Copyright © CC Computer Consultants GmbH, 2007 5 * Copyright © CC Computer Consultants GmbH, 2007
6 * Contact: Jan Engelhardt <jengelh@computergmbh.de>
7 * 6 *
8 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
@@ -75,7 +74,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
75} 74}
76 75
77static unsigned int 76static unsigned int
78tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par) 77tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
79{ 78{
80 return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb), 79 return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
81 sizeof(struct iphdr) + sizeof(struct tcphdr)); 80 sizeof(struct iphdr) + sizeof(struct tcphdr));
@@ -83,7 +82,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
83 82
84#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) 83#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
85static unsigned int 84static unsigned int
86tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par) 85tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
87{ 86{
88 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 87 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
89 int tcphoff; 88 int tcphoff;
@@ -136,7 +135,7 @@ static void __exit tcpoptstrip_tg_exit(void)
136 135
137module_init(tcpoptstrip_tg_init); 136module_init(tcpoptstrip_tg_init);
138module_exit(tcpoptstrip_tg_exit); 137module_exit(tcpoptstrip_tg_exit);
139MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>"); 138MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@medozas.de>");
140MODULE_DESCRIPTION("Xtables: TCP option stripping"); 139MODULE_DESCRIPTION("Xtables: TCP option stripping");
141MODULE_LICENSE("GPL"); 140MODULE_LICENSE("GPL");
142MODULE_ALIAS("ipt_TCPOPTSTRIP"); 141MODULE_ALIAS("ipt_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 000000000000..22a2d421e7eb
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,309 @@
1/*
2 * "TEE" target extension for Xtables
3 * Copyright © Sebastian Claßen, 2007
4 * Jan Engelhardt, 2007-2010
5 *
6 * based on ipt_ROUTE.c from Cédric de Launois
7 * <delaunois@info.ucl.be>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * version 2 or later, as published by the Free Software Foundation.
12 */
13#include <linux/ip.h>
14#include <linux/module.h>
15#include <linux/percpu.h>
16#include <linux/route.h>
17#include <linux/skbuff.h>
18#include <linux/notifier.h>
19#include <net/checksum.h>
20#include <net/icmp.h>
21#include <net/ip.h>
22#include <net/ipv6.h>
23#include <net/ip6_route.h>
24#include <net/route.h>
25#include <linux/netfilter/x_tables.h>
26#include <linux/netfilter/xt_TEE.h>
27
28#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
29# define WITH_CONNTRACK 1
30# include <net/netfilter/nf_conntrack.h>
31#endif
32#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
33# define WITH_IPV6 1
34#endif
35
36struct xt_tee_priv {
37 struct notifier_block notifier;
38 struct xt_tee_tginfo *tginfo;
39 int oif;
40};
41
42static const union nf_inet_addr tee_zero_address;
43static DEFINE_PER_CPU(bool, tee_active);
44
45static struct net *pick_net(struct sk_buff *skb)
46{
47#ifdef CONFIG_NET_NS
48 const struct dst_entry *dst;
49
50 if (skb->dev != NULL)
51 return dev_net(skb->dev);
52 dst = skb_dst(skb);
53 if (dst != NULL && dst->dev != NULL)
54 return dev_net(dst->dev);
55#endif
56 return &init_net;
57}
58
59static bool
60tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
61{
62 const struct iphdr *iph = ip_hdr(skb);
63 struct net *net = pick_net(skb);
64 struct rtable *rt;
65 struct flowi fl;
66
67 memset(&fl, 0, sizeof(fl));
68 if (info->priv) {
69 if (info->priv->oif == -1)
70 return false;
71 fl.oif = info->priv->oif;
72 }
73 fl.nl_u.ip4_u.daddr = info->gw.ip;
74 fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
75 fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
76 if (ip_route_output_key(net, &rt, &fl) != 0)
77 return false;
78
79 skb_dst_drop(skb);
80 skb_dst_set(skb, &rt->dst);
81 skb->dev = rt->dst.dev;
82 skb->protocol = htons(ETH_P_IP);
83 return true;
84}
85
86static unsigned int
87tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
88{
89 const struct xt_tee_tginfo *info = par->targinfo;
90 struct iphdr *iph;
91
92 if (percpu_read(tee_active))
93 return XT_CONTINUE;
94 /*
95 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
96 * the original skb, which should continue on its way as if nothing has
97 * happened. The copy should be independently delivered to the TEE
98 * --gateway.
99 */
100 skb = pskb_copy(skb, GFP_ATOMIC);
101 if (skb == NULL)
102 return XT_CONTINUE;
103
104#ifdef WITH_CONNTRACK
105 /* Avoid counting cloned packets towards the original connection. */
106 nf_conntrack_put(skb->nfct);
107 skb->nfct = &nf_ct_untracked_get()->ct_general;
108 skb->nfctinfo = IP_CT_NEW;
109 nf_conntrack_get(skb->nfct);
110#endif
111 /*
112 * If we are in PREROUTING/INPUT, the checksum must be recalculated
113 * since the length could have changed as a result of defragmentation.
114 *
115 * We also decrease the TTL to mitigate potential TEE loops
116 * between two hosts.
117 *
118 * Set %IP_DF so that the original source is notified of a potentially
119 * decreased MTU on the clone route. IPv6 does this too.
120 */
121 iph = ip_hdr(skb);
122 iph->frag_off |= htons(IP_DF);
123 if (par->hooknum == NF_INET_PRE_ROUTING ||
124 par->hooknum == NF_INET_LOCAL_IN)
125 --iph->ttl;
126 ip_send_check(iph);
127
128 if (tee_tg_route4(skb, info)) {
129 percpu_write(tee_active, true);
130 ip_local_out(skb);
131 percpu_write(tee_active, false);
132 } else {
133 kfree_skb(skb);
134 }
135 return XT_CONTINUE;
136}
137
138#ifdef WITH_IPV6
139static bool
140tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
141{
142 const struct ipv6hdr *iph = ipv6_hdr(skb);
143 struct net *net = pick_net(skb);
144 struct dst_entry *dst;
145 struct flowi fl;
146
147 memset(&fl, 0, sizeof(fl));
148 if (info->priv) {
149 if (info->priv->oif == -1)
150 return false;
151 fl.oif = info->priv->oif;
152 }
153 fl.nl_u.ip6_u.daddr = info->gw.in6;
154 fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
155 (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
156 dst = ip6_route_output(net, NULL, &fl);
157 if (dst == NULL)
158 return false;
159
160 skb_dst_drop(skb);
161 skb_dst_set(skb, dst);
162 skb->dev = dst->dev;
163 skb->protocol = htons(ETH_P_IPV6);
164 return true;
165}
166
167static unsigned int
168tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
169{
170 const struct xt_tee_tginfo *info = par->targinfo;
171
172 if (percpu_read(tee_active))
173 return XT_CONTINUE;
174 skb = pskb_copy(skb, GFP_ATOMIC);
175 if (skb == NULL)
176 return XT_CONTINUE;
177
178#ifdef WITH_CONNTRACK
179 nf_conntrack_put(skb->nfct);
180 skb->nfct = &nf_ct_untracked_get()->ct_general;
181 skb->nfctinfo = IP_CT_NEW;
182 nf_conntrack_get(skb->nfct);
183#endif
184 if (par->hooknum == NF_INET_PRE_ROUTING ||
185 par->hooknum == NF_INET_LOCAL_IN) {
186 struct ipv6hdr *iph = ipv6_hdr(skb);
187 --iph->hop_limit;
188 }
189 if (tee_tg_route6(skb, info)) {
190 percpu_write(tee_active, true);
191 ip6_local_out(skb);
192 percpu_write(tee_active, false);
193 } else {
194 kfree_skb(skb);
195 }
196 return XT_CONTINUE;
197}
198#endif /* WITH_IPV6 */
199
200static int tee_netdev_event(struct notifier_block *this, unsigned long event,
201 void *ptr)
202{
203 struct net_device *dev = ptr;
204 struct xt_tee_priv *priv;
205
206 priv = container_of(this, struct xt_tee_priv, notifier);
207 switch (event) {
208 case NETDEV_REGISTER:
209 if (!strcmp(dev->name, priv->tginfo->oif))
210 priv->oif = dev->ifindex;
211 break;
212 case NETDEV_UNREGISTER:
213 if (dev->ifindex == priv->oif)
214 priv->oif = -1;
215 break;
216 case NETDEV_CHANGENAME:
217 if (!strcmp(dev->name, priv->tginfo->oif))
218 priv->oif = dev->ifindex;
219 else if (dev->ifindex == priv->oif)
220 priv->oif = -1;
221 break;
222 }
223
224 return NOTIFY_DONE;
225}
226
227static int tee_tg_check(const struct xt_tgchk_param *par)
228{
229 struct xt_tee_tginfo *info = par->targinfo;
230 struct xt_tee_priv *priv;
231
232 /* 0.0.0.0 and :: not allowed */
233 if (memcmp(&info->gw, &tee_zero_address,
234 sizeof(tee_zero_address)) == 0)
235 return -EINVAL;
236
237 if (info->oif[0]) {
238 if (info->oif[sizeof(info->oif)-1] != '\0')
239 return -EINVAL;
240
241 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
242 if (priv == NULL)
243 return -ENOMEM;
244
245 priv->tginfo = info;
246 priv->oif = -1;
247 priv->notifier.notifier_call = tee_netdev_event;
248 info->priv = priv;
249
250 register_netdevice_notifier(&priv->notifier);
251 } else
252 info->priv = NULL;
253
254 return 0;
255}
256
257static void tee_tg_destroy(const struct xt_tgdtor_param *par)
258{
259 struct xt_tee_tginfo *info = par->targinfo;
260
261 if (info->priv) {
262 unregister_netdevice_notifier(&info->priv->notifier);
263 kfree(info->priv);
264 }
265}
266
267static struct xt_target tee_tg_reg[] __read_mostly = {
268 {
269 .name = "TEE",
270 .revision = 1,
271 .family = NFPROTO_IPV4,
272 .target = tee_tg4,
273 .targetsize = sizeof(struct xt_tee_tginfo),
274 .checkentry = tee_tg_check,
275 .destroy = tee_tg_destroy,
276 .me = THIS_MODULE,
277 },
278#ifdef WITH_IPV6
279 {
280 .name = "TEE",
281 .revision = 1,
282 .family = NFPROTO_IPV6,
283 .target = tee_tg6,
284 .targetsize = sizeof(struct xt_tee_tginfo),
285 .checkentry = tee_tg_check,
286 .destroy = tee_tg_destroy,
287 .me = THIS_MODULE,
288 },
289#endif
290};
291
292static int __init tee_tg_init(void)
293{
294 return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
295}
296
297static void __exit tee_tg_exit(void)
298{
299 xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
300}
301
302module_init(tee_tg_init);
303module_exit(tee_tg_exit);
304MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
305MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
306MODULE_DESCRIPTION("Xtables: Reroute packet copy");
307MODULE_LICENSE("GPL");
308MODULE_ALIAS("ipt_TEE");
309MODULE_ALIAS("ip6t_TEE");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 1340c2fa3621..c61294d85fda 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -9,7 +9,7 @@
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 * 10 *
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/ip.h> 15#include <linux/ip.h>
@@ -25,7 +25,7 @@
25#include <net/netfilter/nf_tproxy_core.h> 25#include <net/netfilter/nf_tproxy_core.h>
26 26
27static unsigned int 27static unsigned int
28tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par) 28tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
29{ 29{
30 const struct iphdr *iph = ip_hdr(skb); 30 const struct iphdr *iph = ip_hdr(skb);
31 const struct xt_tproxy_target_info *tgi = par->targinfo; 31 const struct xt_tproxy_target_info *tgi = par->targinfo;
@@ -37,8 +37,10 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
37 return NF_DROP; 37 return NF_DROP;
38 38
39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
40 iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, 40 iph->saddr,
41 hp->source, tgi->lport ? tgi->lport : hp->dest, 41 tgi->laddr ? tgi->laddr : iph->daddr,
42 hp->source,
43 tgi->lport ? tgi->lport : hp->dest,
42 par->in, true); 44 par->in, true);
43 45
44 /* NOTE: assign_sock consumes our sk reference */ 46 /* NOTE: assign_sock consumes our sk reference */
@@ -59,17 +61,17 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
59 return NF_DROP; 61 return NF_DROP;
60} 62}
61 63
62static bool tproxy_tg_check(const struct xt_tgchk_param *par) 64static int tproxy_tg_check(const struct xt_tgchk_param *par)
63{ 65{
64 const struct ipt_ip *i = par->entryinfo; 66 const struct ipt_ip *i = par->entryinfo;
65 67
66 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) 68 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
67 && !(i->invflags & IPT_INV_PROTO)) 69 && !(i->invflags & IPT_INV_PROTO))
68 return true; 70 return 0;
69 71
70 pr_info("xt_TPROXY: Can be used only in combination with " 72 pr_info("Can be used only in combination with "
71 "either -p tcp or -p udp\n"); 73 "either -p tcp or -p udp\n");
72 return false; 74 return -EINVAL;
73} 75}
74 76
75static struct xt_target tproxy_tg_reg __read_mostly = { 77static struct xt_target tproxy_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index fbb04b86c46b..df48967af382 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,7 +11,7 @@ MODULE_ALIAS("ipt_TRACE");
11MODULE_ALIAS("ip6t_TRACE"); 11MODULE_ALIAS("ip6t_TRACE");
12 12
13static unsigned int 13static unsigned int
14trace_tg(struct sk_buff *skb, const struct xt_target_param *par) 14trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
15{ 15{
16 skb->nf_trace = 1; 16 skb->nf_trace = 1;
17 return XT_CONTINUE; 17 return XT_CONTINUE;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 225ee3ecd69d..f4af1bfafb1c 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -5,6 +5,7 @@
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8#include <linux/module.h> 9#include <linux/module.h>
9#include <linux/skbuff.h> 10#include <linux/skbuff.h>
10#include <linux/jhash.h> 11#include <linux/jhash.h>
@@ -85,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
85} 86}
86 87
87static bool 88static bool
88xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) 89xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
89{ 90{
90 struct sk_buff *pskb = (struct sk_buff *)skb; 91 struct sk_buff *pskb = (struct sk_buff *)skb;
91 const struct xt_cluster_match_info *info = par->matchinfo; 92 const struct xt_cluster_match_info *info = par->matchinfo;
@@ -119,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
119 if (ct == NULL) 120 if (ct == NULL)
120 return false; 121 return false;
121 122
122 if (ct == &nf_conntrack_untracked) 123 if (nf_ct_is_untracked(ct))
123 return false; 124 return false;
124 125
125 if (ct->master) 126 if (ct->master)
@@ -131,22 +132,22 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
131 !!(info->flags & XT_CLUSTER_F_INV); 132 !!(info->flags & XT_CLUSTER_F_INV);
132} 133}
133 134
134static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) 135static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
135{ 136{
136 struct xt_cluster_match_info *info = par->matchinfo; 137 struct xt_cluster_match_info *info = par->matchinfo;
137 138
138 if (info->total_nodes > XT_CLUSTER_NODES_MAX) { 139 if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
139 printk(KERN_ERR "xt_cluster: you have exceeded the maximum " 140 pr_info("you have exceeded the maximum "
140 "number of cluster nodes (%u > %u)\n", 141 "number of cluster nodes (%u > %u)\n",
141 info->total_nodes, XT_CLUSTER_NODES_MAX); 142 info->total_nodes, XT_CLUSTER_NODES_MAX);
142 return false; 143 return -EINVAL;
143 } 144 }
144 if (info->node_mask >= (1ULL << info->total_nodes)) { 145 if (info->node_mask >= (1ULL << info->total_nodes)) {
145 printk(KERN_ERR "xt_cluster: this node mask cannot be " 146 pr_info("this node mask cannot be "
146 "higher than the total number of nodes\n"); 147 "higher than the total number of nodes\n");
147 return false; 148 return -EDOM;
148 } 149 }
149 return true; 150 return 0;
150} 151}
151 152
152static struct xt_match xt_cluster_match __read_mostly = { 153static struct xt_match xt_cluster_match __read_mostly = {
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index e82179832acd..5c861d2f21ca 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
16MODULE_ALIAS("ip6t_comment"); 16MODULE_ALIAS("ip6t_comment");
17 17
18static bool 18static bool
19comment_mt(const struct sk_buff *skb, const struct xt_match_param *par) 19comment_mt(const struct sk_buff *skb, struct xt_action_param *par)
20{ 20{
21 /* We always match */ 21 /* We always match */
22 return true; 22 return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 955e6598a7f0..5b138506690e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,6 +1,7 @@
1/* Kernel module to match connection tracking byte counter. 1/* Kernel module to match connection tracking byte counter.
2 * GPL (C) 2002 Martin Devera (devik@cdi.cz). 2 * GPL (C) 2002 Martin Devera (devik@cdi.cz).
3 */ 3 */
4#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
4#include <linux/module.h> 5#include <linux/module.h>
5#include <linux/bitops.h> 6#include <linux/bitops.h>
6#include <linux/skbuff.h> 7#include <linux/skbuff.h>
@@ -17,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
17MODULE_ALIAS("ip6t_connbytes"); 18MODULE_ALIAS("ip6t_connbytes");
18 19
19static bool 20static bool
20connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par) 21connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
21{ 22{
22 const struct xt_connbytes_info *sinfo = par->matchinfo; 23 const struct xt_connbytes_info *sinfo = par->matchinfo;
23 const struct nf_conn *ct; 24 const struct nf_conn *ct;
@@ -92,27 +93,36 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
92 return what >= sinfo->count.from; 93 return what >= sinfo->count.from;
93} 94}
94 95
95static bool connbytes_mt_check(const struct xt_mtchk_param *par) 96static int connbytes_mt_check(const struct xt_mtchk_param *par)
96{ 97{
97 const struct xt_connbytes_info *sinfo = par->matchinfo; 98 const struct xt_connbytes_info *sinfo = par->matchinfo;
99 int ret;
98 100
99 if (sinfo->what != XT_CONNBYTES_PKTS && 101 if (sinfo->what != XT_CONNBYTES_PKTS &&
100 sinfo->what != XT_CONNBYTES_BYTES && 102 sinfo->what != XT_CONNBYTES_BYTES &&
101 sinfo->what != XT_CONNBYTES_AVGPKT) 103 sinfo->what != XT_CONNBYTES_AVGPKT)
102 return false; 104 return -EINVAL;
103 105
104 if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL && 106 if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
105 sinfo->direction != XT_CONNBYTES_DIR_REPLY && 107 sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
106 sinfo->direction != XT_CONNBYTES_DIR_BOTH) 108 sinfo->direction != XT_CONNBYTES_DIR_BOTH)
107 return false; 109 return -EINVAL;
108 110
109 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 111 ret = nf_ct_l3proto_try_module_get(par->family);
110 printk(KERN_WARNING "can't load conntrack support for " 112 if (ret < 0)
111 "proto=%u\n", par->family); 113 pr_info("cannot load conntrack support for proto=%u\n",
112 return false; 114 par->family);
115
116 /*
117 * This filter cannot function correctly unless connection tracking
118 * accounting is enabled, so complain in the hope that someone notices.
119 */
120 if (!nf_ct_acct_enabled(par->net)) {
121 pr_warning("Forcing CT accounting to be enabled\n");
122 nf_ct_set_acct(par->net, true);
113 } 123 }
114 124
115 return true; 125 return ret;
116} 126}
117 127
118static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) 128static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 388ca4596098..5c5b6b921b84 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -5,13 +5,13 @@
5 * Nov 2002: Martin Bene <martin.bene@icomedias.com>: 5 * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
6 * only ignore TIME_WAIT or gone connections 6 * only ignore TIME_WAIT or gone connections
7 * (C) CC Computer Consultants GmbH, 2007 7 * (C) CC Computer Consultants GmbH, 2007
8 * Contact: <jengelh@computergmbh.de>
9 * 8 *
10 * based on ... 9 * based on ...
11 * 10 *
12 * Kernel module to match connection tracking information. 11 * Kernel module to match connection tracking information.
13 * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). 12 * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
14 */ 13 */
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15#include <linux/in.h> 15#include <linux/in.h>
16#include <linux/in6.h> 16#include <linux/in6.h>
17#include <linux/ip.h> 17#include <linux/ip.h>
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
173} 173}
174 174
175static bool 175static bool
176connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) 176connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
177{ 177{
178 struct net *net = dev_net(par->in ? par->in : par->out); 178 struct net *net = dev_net(par->in ? par->in : par->out);
179 const struct xt_connlimit_info *info = par->matchinfo; 179 const struct xt_connlimit_info *info = par->matchinfo;
@@ -206,44 +206,46 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
206 206
207 if (connections < 0) { 207 if (connections < 0) {
208 /* kmalloc failed, drop it entirely */ 208 /* kmalloc failed, drop it entirely */
209 *par->hotdrop = true; 209 par->hotdrop = true;
210 return false; 210 return false;
211 } 211 }
212 212
213 return (connections > info->limit) ^ info->inverse; 213 return (connections > info->limit) ^ info->inverse;
214 214
215 hotdrop: 215 hotdrop:
216 *par->hotdrop = true; 216 par->hotdrop = true;
217 return false; 217 return false;
218} 218}
219 219
220static bool connlimit_mt_check(const struct xt_mtchk_param *par) 220static int connlimit_mt_check(const struct xt_mtchk_param *par)
221{ 221{
222 struct xt_connlimit_info *info = par->matchinfo; 222 struct xt_connlimit_info *info = par->matchinfo;
223 unsigned int i; 223 unsigned int i;
224 int ret;
224 225
225 if (unlikely(!connlimit_rnd_inited)) { 226 if (unlikely(!connlimit_rnd_inited)) {
226 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd)); 227 get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
227 connlimit_rnd_inited = true; 228 connlimit_rnd_inited = true;
228 } 229 }
229 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 230 ret = nf_ct_l3proto_try_module_get(par->family);
230 printk(KERN_WARNING "cannot load conntrack support for " 231 if (ret < 0) {
231 "address family %u\n", par->family); 232 pr_info("cannot load conntrack support for "
232 return false; 233 "address family %u\n", par->family);
234 return ret;
233 } 235 }
234 236
235 /* init private data */ 237 /* init private data */
236 info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); 238 info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
237 if (info->data == NULL) { 239 if (info->data == NULL) {
238 nf_ct_l3proto_module_put(par->family); 240 nf_ct_l3proto_module_put(par->family);
239 return false; 241 return -ENOMEM;
240 } 242 }
241 243
242 spin_lock_init(&info->data->lock); 244 spin_lock_init(&info->data->lock);
243 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) 245 for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
244 INIT_LIST_HEAD(&info->data->iphash[i]); 246 INIT_LIST_HEAD(&info->data->iphash[i]);
245 247
246 return true; 248 return 0;
247} 249}
248 250
249static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) 251static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 122aa8b0147b..7278145e6a68 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,10 +1,10 @@
1/* 1/*
2 * xt_connmark - Netfilter module to match connection mark values 2 * xt_connmark - Netfilter module to operate on connection marks
3 * 3 *
4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> 4 * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
5 * by Henrik Nordstrom <hno@marasystems.com> 5 * by Henrik Nordstrom <hno@marasystems.com>
6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008 6 * Copyright © CC Computer Consultants GmbH, 2007 - 2008
7 * Jan Engelhardt <jengelh@computergmbh.de> 7 * Jan Engelhardt <jengelh@medozas.de>
8 * 8 *
9 * This program is free software; you can redistribute it and/or modify 9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by 10 * it under the terms of the GNU General Public License as published by
@@ -24,17 +24,74 @@
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/skbuff.h> 25#include <linux/skbuff.h>
26#include <net/netfilter/nf_conntrack.h> 26#include <net/netfilter/nf_conntrack.h>
27#include <net/netfilter/nf_conntrack_ecache.h>
27#include <linux/netfilter/x_tables.h> 28#include <linux/netfilter/x_tables.h>
28#include <linux/netfilter/xt_connmark.h> 29#include <linux/netfilter/xt_connmark.h>
29 30
30MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>"); 31MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
31MODULE_DESCRIPTION("Xtables: connection mark match"); 32MODULE_DESCRIPTION("Xtables: connection mark operations");
32MODULE_LICENSE("GPL"); 33MODULE_LICENSE("GPL");
34MODULE_ALIAS("ipt_CONNMARK");
35MODULE_ALIAS("ip6t_CONNMARK");
33MODULE_ALIAS("ipt_connmark"); 36MODULE_ALIAS("ipt_connmark");
34MODULE_ALIAS("ip6t_connmark"); 37MODULE_ALIAS("ip6t_connmark");
35 38
39static unsigned int
40connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
41{
42 const struct xt_connmark_tginfo1 *info = par->targinfo;
43 enum ip_conntrack_info ctinfo;
44 struct nf_conn *ct;
45 u_int32_t newmark;
46
47 ct = nf_ct_get(skb, &ctinfo);
48 if (ct == NULL)
49 return XT_CONTINUE;
50
51 switch (info->mode) {
52 case XT_CONNMARK_SET:
53 newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
54 if (ct->mark != newmark) {
55 ct->mark = newmark;
56 nf_conntrack_event_cache(IPCT_MARK, ct);
57 }
58 break;
59 case XT_CONNMARK_SAVE:
60 newmark = (ct->mark & ~info->ctmask) ^
61 (skb->mark & info->nfmask);
62 if (ct->mark != newmark) {
63 ct->mark = newmark;
64 nf_conntrack_event_cache(IPCT_MARK, ct);
65 }
66 break;
67 case XT_CONNMARK_RESTORE:
68 newmark = (skb->mark & ~info->nfmask) ^
69 (ct->mark & info->ctmask);
70 skb->mark = newmark;
71 break;
72 }
73
74 return XT_CONTINUE;
75}
76
77static int connmark_tg_check(const struct xt_tgchk_param *par)
78{
79 int ret;
80
81 ret = nf_ct_l3proto_try_module_get(par->family);
82 if (ret < 0)
83 pr_info("cannot load conntrack support for proto=%u\n",
84 par->family);
85 return ret;
86}
87
88static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
89{
90 nf_ct_l3proto_module_put(par->family);
91}
92
36static bool 93static bool
37connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) 94connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
38{ 95{
39 const struct xt_connmark_mtinfo1 *info = par->matchinfo; 96 const struct xt_connmark_mtinfo1 *info = par->matchinfo;
40 enum ip_conntrack_info ctinfo; 97 enum ip_conntrack_info ctinfo;
@@ -47,14 +104,15 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
47 return ((ct->mark & info->mask) == info->mark) ^ info->invert; 104 return ((ct->mark & info->mask) == info->mark) ^ info->invert;
48} 105}
49 106
50static bool connmark_mt_check(const struct xt_mtchk_param *par) 107static int connmark_mt_check(const struct xt_mtchk_param *par)
51{ 108{
52 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 109 int ret;
53 printk(KERN_WARNING "cannot load conntrack support for " 110
54 "proto=%u\n", par->family); 111 ret = nf_ct_l3proto_try_module_get(par->family);
55 return false; 112 if (ret < 0)
56 } 113 pr_info("cannot load conntrack support for proto=%u\n",
57 return true; 114 par->family);
115 return ret;
58} 116}
59 117
60static void connmark_mt_destroy(const struct xt_mtdtor_param *par) 118static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
@@ -62,6 +120,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
62 nf_ct_l3proto_module_put(par->family); 120 nf_ct_l3proto_module_put(par->family);
63} 121}
64 122
123static struct xt_target connmark_tg_reg __read_mostly = {
124 .name = "CONNMARK",
125 .revision = 1,
126 .family = NFPROTO_UNSPEC,
127 .checkentry = connmark_tg_check,
128 .target = connmark_tg,
129 .targetsize = sizeof(struct xt_connmark_tginfo1),
130 .destroy = connmark_tg_destroy,
131 .me = THIS_MODULE,
132};
133
65static struct xt_match connmark_mt_reg __read_mostly = { 134static struct xt_match connmark_mt_reg __read_mostly = {
66 .name = "connmark", 135 .name = "connmark",
67 .revision = 1, 136 .revision = 1,
@@ -75,12 +144,23 @@ static struct xt_match connmark_mt_reg __read_mostly = {
75 144
76static int __init connmark_mt_init(void) 145static int __init connmark_mt_init(void)
77{ 146{
78 return xt_register_match(&connmark_mt_reg); 147 int ret;
148
149 ret = xt_register_target(&connmark_tg_reg);
150 if (ret < 0)
151 return ret;
152 ret = xt_register_match(&connmark_mt_reg);
153 if (ret < 0) {
154 xt_unregister_target(&connmark_tg_reg);
155 return ret;
156 }
157 return 0;
79} 158}
80 159
81static void __exit connmark_mt_exit(void) 160static void __exit connmark_mt_exit(void)
82{ 161{
83 xt_unregister_match(&connmark_mt_reg); 162 xt_unregister_match(&connmark_mt_reg);
163 xt_unregister_target(&connmark_tg_reg);
84} 164}
85 165
86module_init(connmark_mt_init); 166module_init(connmark_mt_init);
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ae66305f0fe5..e536710ad916 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -9,7 +9,7 @@
9 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <net/ipv6.h> 15#include <net/ipv6.h>
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
113} 113}
114 114
115static bool 115static bool
116conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par, 116conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
117 u16 state_mask, u16 status_mask) 117 u16 state_mask, u16 status_mask)
118{ 118{
119 const struct xt_conntrack_mtinfo2 *info = par->matchinfo; 119 const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
123 123
124 ct = nf_ct_get(skb, &ctinfo); 124 ct = nf_ct_get(skb, &ctinfo);
125 125
126 if (ct == &nf_conntrack_untracked) 126 if (ct) {
127 statebit = XT_CONNTRACK_STATE_UNTRACKED; 127 if (nf_ct_is_untracked(ct))
128 else if (ct != NULL) 128 statebit = XT_CONNTRACK_STATE_UNTRACKED;
129 statebit = XT_CONNTRACK_STATE_BIT(ctinfo); 129 else
130 else 130 statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
131 } else
131 statebit = XT_CONNTRACK_STATE_INVALID; 132 statebit = XT_CONNTRACK_STATE_INVALID;
132 133
133 if (info->match_flags & XT_CONNTRACK_STATE) { 134 if (info->match_flags & XT_CONNTRACK_STATE) {
@@ -191,7 +192,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
191} 192}
192 193
193static bool 194static bool
194conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) 195conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
195{ 196{
196 const struct xt_conntrack_mtinfo1 *info = par->matchinfo; 197 const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
197 198
@@ -199,21 +200,22 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
199} 200}
200 201
201static bool 202static bool
202conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par) 203conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
203{ 204{
204 const struct xt_conntrack_mtinfo2 *info = par->matchinfo; 205 const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
205 206
206 return conntrack_mt(skb, par, info->state_mask, info->status_mask); 207 return conntrack_mt(skb, par, info->state_mask, info->status_mask);
207} 208}
208 209
209static bool conntrack_mt_check(const struct xt_mtchk_param *par) 210static int conntrack_mt_check(const struct xt_mtchk_param *par)
210{ 211{
211 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 212 int ret;
212 printk(KERN_WARNING "can't load conntrack support for " 213
213 "proto=%u\n", par->family); 214 ret = nf_ct_l3proto_try_module_get(par->family);
214 return false; 215 if (ret < 0)
215 } 216 pr_info("cannot load conntrack support for proto=%u\n",
216 return true; 217 par->family);
218 return ret;
217} 219}
218 220
219static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) 221static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
new file mode 100644
index 000000000000..b39db8a5cbae
--- /dev/null
+++ b/net/netfilter/xt_cpu.c
@@ -0,0 +1,63 @@
1/* Kernel module to match running CPU */
2
3/*
4 * Might be used to distribute connections on several daemons, if
5 * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable,
6 * each RX queue IRQ affined to one CPU (1:1 mapping)
7 *
8 */
9
10/* (C) 2010 Eric Dumazet
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2 as
14 * published by the Free Software Foundation.
15 */
16
17#include <linux/module.h>
18#include <linux/skbuff.h>
19#include <linux/netfilter/xt_cpu.h>
20#include <linux/netfilter/x_tables.h>
21
22MODULE_LICENSE("GPL");
23MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
24MODULE_DESCRIPTION("Xtables: CPU match");
25
26static int cpu_mt_check(const struct xt_mtchk_param *par)
27{
28 const struct xt_cpu_info *info = par->matchinfo;
29
30 if (info->invert & ~1)
31 return -EINVAL;
32 return 0;
33}
34
35static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par)
36{
37 const struct xt_cpu_info *info = par->matchinfo;
38
39 return (info->cpu == smp_processor_id()) ^ info->invert;
40}
41
42static struct xt_match cpu_mt_reg __read_mostly = {
43 .name = "cpu",
44 .revision = 0,
45 .family = NFPROTO_UNSPEC,
46 .checkentry = cpu_mt_check,
47 .match = cpu_mt,
48 .matchsize = sizeof(struct xt_cpu_info),
49 .me = THIS_MODULE,
50};
51
52static int __init cpu_mt_init(void)
53{
54 return xt_register_match(&cpu_mt_reg);
55}
56
57static void __exit cpu_mt_exit(void)
58{
59 xt_unregister_match(&cpu_mt_reg);
60}
61
62module_init(cpu_mt_init);
63module_exit(cpu_mt_exit);
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 395af5943ffd..b63d2a3d80ba 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
96} 96}
97 97
98static bool 98static bool
99dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 99dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
100{ 100{
101 const struct xt_dccp_info *info = par->matchinfo; 101 const struct xt_dccp_info *info = par->matchinfo;
102 const struct dccp_hdr *dh; 102 const struct dccp_hdr *dh;
@@ -107,7 +107,7 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
107 107
108 dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh); 108 dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
109 if (dh == NULL) { 109 if (dh == NULL) {
110 *par->hotdrop = true; 110 par->hotdrop = true;
111 return false; 111 return false;
112 } 112 }
113 113
@@ -120,17 +120,21 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
120 && DCCHECK(match_types(dh, info->typemask), 120 && DCCHECK(match_types(dh, info->typemask),
121 XT_DCCP_TYPE, info->flags, info->invflags) 121 XT_DCCP_TYPE, info->flags, info->invflags)
122 && DCCHECK(match_option(info->option, skb, par->thoff, dh, 122 && DCCHECK(match_option(info->option, skb, par->thoff, dh,
123 par->hotdrop), 123 &par->hotdrop),
124 XT_DCCP_OPTION, info->flags, info->invflags); 124 XT_DCCP_OPTION, info->flags, info->invflags);
125} 125}
126 126
127static bool dccp_mt_check(const struct xt_mtchk_param *par) 127static int dccp_mt_check(const struct xt_mtchk_param *par)
128{ 128{
129 const struct xt_dccp_info *info = par->matchinfo; 129 const struct xt_dccp_info *info = par->matchinfo;
130 130
131 return !(info->flags & ~XT_DCCP_VALID_FLAGS) 131 if (info->flags & ~XT_DCCP_VALID_FLAGS)
132 && !(info->invflags & ~XT_DCCP_VALID_FLAGS) 132 return -EINVAL;
133 && !(info->invflags & ~info->flags); 133 if (info->invflags & ~XT_DCCP_VALID_FLAGS)
134 return -EINVAL;
135 if (info->invflags & ~info->flags)
136 return -EINVAL;
137 return 0;
134} 138}
135 139
136static struct xt_match dccp_mt_reg[] __read_mostly = { 140static struct xt_match dccp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 0280d3a8c161..64670fc5d0e1 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/ip.h> 12#include <linux/ip.h>
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
25MODULE_ALIAS("ip6t_tos"); 25MODULE_ALIAS("ip6t_tos");
26 26
27static bool 27static bool
28dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 28dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
29{ 29{
30 const struct xt_dscp_info *info = par->matchinfo; 30 const struct xt_dscp_info *info = par->matchinfo;
31 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; 31 u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
34} 34}
35 35
36static bool 36static bool
37dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 37dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
38{ 38{
39 const struct xt_dscp_info *info = par->matchinfo; 39 const struct xt_dscp_info *info = par->matchinfo;
40 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; 40 u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -42,23 +42,23 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
42 return (dscp == info->dscp) ^ !!info->invert; 42 return (dscp == info->dscp) ^ !!info->invert;
43} 43}
44 44
45static bool dscp_mt_check(const struct xt_mtchk_param *par) 45static int dscp_mt_check(const struct xt_mtchk_param *par)
46{ 46{
47 const struct xt_dscp_info *info = par->matchinfo; 47 const struct xt_dscp_info *info = par->matchinfo;
48 48
49 if (info->dscp > XT_DSCP_MAX) { 49 if (info->dscp > XT_DSCP_MAX) {
50 printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp); 50 pr_info("dscp %x out of range\n", info->dscp);
51 return false; 51 return -EDOM;
52 } 52 }
53 53
54 return true; 54 return 0;
55} 55}
56 56
57static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par) 57static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
58{ 58{
59 const struct xt_tos_match_info *info = par->matchinfo; 59 const struct xt_tos_match_info *info = par->matchinfo;
60 60
61 if (par->match->family == NFPROTO_IPV4) 61 if (par->family == NFPROTO_IPV4)
62 return ((ip_hdr(skb)->tos & info->tos_mask) == 62 return ((ip_hdr(skb)->tos & info->tos_mask) ==
63 info->tos_value) ^ !!info->invert; 63 info->tos_value) ^ !!info->invert;
64 else 64 else
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 609439967c2c..171ba82b5902 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/in.h> 12#include <linux/in.h>
@@ -24,25 +24,19 @@ MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
24MODULE_ALIAS("ipt_esp"); 24MODULE_ALIAS("ipt_esp");
25MODULE_ALIAS("ip6t_esp"); 25MODULE_ALIAS("ip6t_esp");
26 26
27#if 0
28#define duprintf(format, args...) printk(format , ## args)
29#else
30#define duprintf(format, args...)
31#endif
32
33/* Returns 1 if the spi is matched by the range, 0 otherwise */ 27/* Returns 1 if the spi is matched by the range, 0 otherwise */
34static inline bool 28static inline bool
35spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) 29spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
36{ 30{
37 bool r; 31 bool r;
38 duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ', 32 pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
39 min, spi, max); 33 invert ? '!' : ' ', min, spi, max);
40 r = (spi >= min && spi <= max) ^ invert; 34 r = (spi >= min && spi <= max) ^ invert;
41 duprintf(" result %s\n", r ? "PASS" : "FAILED"); 35 pr_debug(" result %s\n", r ? "PASS" : "FAILED");
42 return r; 36 return r;
43} 37}
44 38
45static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 39static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
46{ 40{
47 const struct ip_esp_hdr *eh; 41 const struct ip_esp_hdr *eh;
48 struct ip_esp_hdr _esp; 42 struct ip_esp_hdr _esp;
@@ -57,8 +51,8 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
57 /* We've been asked to examine this packet, and we 51 /* We've been asked to examine this packet, and we
58 * can't. Hence, no choice but to drop. 52 * can't. Hence, no choice but to drop.
59 */ 53 */
60 duprintf("Dropping evil ESP tinygram.\n"); 54 pr_debug("Dropping evil ESP tinygram.\n");
61 *par->hotdrop = true; 55 par->hotdrop = true;
62 return false; 56 return false;
63 } 57 }
64 58
@@ -66,16 +60,16 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
66 !!(espinfo->invflags & XT_ESP_INV_SPI)); 60 !!(espinfo->invflags & XT_ESP_INV_SPI));
67} 61}
68 62
69static bool esp_mt_check(const struct xt_mtchk_param *par) 63static int esp_mt_check(const struct xt_mtchk_param *par)
70{ 64{
71 const struct xt_esp *espinfo = par->matchinfo; 65 const struct xt_esp *espinfo = par->matchinfo;
72 66
73 if (espinfo->invflags & ~XT_ESP_INV_MASK) { 67 if (espinfo->invflags & ~XT_ESP_INV_MASK) {
74 duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); 68 pr_debug("unknown flags %X\n", espinfo->invflags);
75 return false; 69 return -EINVAL;
76 } 70 }
77 71
78 return true; 72 return 0;
79} 73}
80 74
81static struct xt_match esp_mt_reg[] __read_mostly = { 75static struct xt_match esp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 215a64835de8..b46a8390896d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -7,6 +7,7 @@
7 * 7 *
8 * Development of this code was funded by Astaro AG, http://www.astaro.com/ 8 * Development of this code was funded by Astaro AG, http://www.astaro.com/
9 */ 9 */
10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 11#include <linux/module.h>
11#include <linux/spinlock.h> 12#include <linux/spinlock.h>
12#include <linux/random.h> 13#include <linux/random.h>
@@ -36,7 +37,7 @@
36 37
37MODULE_LICENSE("GPL"); 38MODULE_LICENSE("GPL");
38MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 39MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
39MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 40MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
40MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match"); 41MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
41MODULE_ALIAS("ipt_hashlimit"); 42MODULE_ALIAS("ipt_hashlimit");
42MODULE_ALIAS("ip6t_hashlimit"); 43MODULE_ALIAS("ip6t_hashlimit");
@@ -80,12 +81,14 @@ struct dsthash_ent {
80 struct dsthash_dst dst; 81 struct dsthash_dst dst;
81 82
82 /* modified structure members in the end */ 83 /* modified structure members in the end */
84 spinlock_t lock;
83 unsigned long expires; /* precalculated expiry time */ 85 unsigned long expires; /* precalculated expiry time */
84 struct { 86 struct {
85 unsigned long prev; /* last modification */ 87 unsigned long prev; /* last modification */
86 u_int32_t credit; 88 u_int32_t credit;
87 u_int32_t credit_cap, cost; 89 u_int32_t credit_cap, cost;
88 } rateinfo; 90 } rateinfo;
91 struct rcu_head rcu;
89}; 92};
90 93
91struct xt_hashlimit_htable { 94struct xt_hashlimit_htable {
@@ -142,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
142 u_int32_t hash = hash_dst(ht, dst); 145 u_int32_t hash = hash_dst(ht, dst);
143 146
144 if (!hlist_empty(&ht->hash[hash])) { 147 if (!hlist_empty(&ht->hash[hash])) {
145 hlist_for_each_entry(ent, pos, &ht->hash[hash], node) 148 hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
146 if (dst_cmp(ent, dst)) 149 if (dst_cmp(ent, dst)) {
150 spin_lock(&ent->lock);
147 return ent; 151 return ent;
152 }
148 } 153 }
149 return NULL; 154 return NULL;
150} 155}
@@ -156,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
156{ 161{
157 struct dsthash_ent *ent; 162 struct dsthash_ent *ent;
158 163
164 spin_lock(&ht->lock);
159 /* initialize hash with random val at the time we allocate 165 /* initialize hash with random val at the time we allocate
160 * the first hashtable entry */ 166 * the first hashtable entry */
161 if (!ht->rnd_initialized) { 167 if (unlikely(!ht->rnd_initialized)) {
162 get_random_bytes(&ht->rnd, sizeof(ht->rnd)); 168 get_random_bytes(&ht->rnd, sizeof(ht->rnd));
163 ht->rnd_initialized = true; 169 ht->rnd_initialized = true;
164 } 170 }
@@ -166,106 +172,40 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
166 if (ht->cfg.max && ht->count >= ht->cfg.max) { 172 if (ht->cfg.max && ht->count >= ht->cfg.max) {
167 /* FIXME: do something. question is what.. */ 173 /* FIXME: do something. question is what.. */
168 if (net_ratelimit()) 174 if (net_ratelimit())
169 printk(KERN_WARNING 175 pr_err("max count of %u reached\n", ht->cfg.max);
170 "xt_hashlimit: max count of %u reached\n", 176 ent = NULL;
171 ht->cfg.max); 177 } else
172 return NULL; 178 ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
173 }
174
175 ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
176 if (!ent) { 179 if (!ent) {
177 if (net_ratelimit()) 180 if (net_ratelimit())
178 printk(KERN_ERR 181 pr_err("cannot allocate dsthash_ent\n");
179 "xt_hashlimit: can't allocate dsthash_ent\n"); 182 } else {
180 return NULL; 183 memcpy(&ent->dst, dst, sizeof(ent->dst));
181 } 184 spin_lock_init(&ent->lock);
182 memcpy(&ent->dst, dst, sizeof(ent->dst));
183 185
184 hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); 186 spin_lock(&ent->lock);
185 ht->count++; 187 hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]);
188 ht->count++;
189 }
190 spin_unlock(&ht->lock);
186 return ent; 191 return ent;
187} 192}
188 193
189static inline void 194static void dsthash_free_rcu(struct rcu_head *head)
190dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
191{ 195{
192 hlist_del(&ent->node); 196 struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu);
197
193 kmem_cache_free(hashlimit_cachep, ent); 198 kmem_cache_free(hashlimit_cachep, ent);
194 ht->count--;
195} 199}
196static void htable_gc(unsigned long htlong);
197 200
198static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family) 201static inline void
202dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
199{ 203{
200 struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); 204 hlist_del_rcu(&ent->node);
201 struct xt_hashlimit_htable *hinfo; 205 call_rcu_bh(&ent->rcu, dsthash_free_rcu);
202 unsigned int size; 206 ht->count--;
203 unsigned int i;
204
205 if (minfo->cfg.size)
206 size = minfo->cfg.size;
207 else {
208 size = ((totalram_pages << PAGE_SHIFT) / 16384) /
209 sizeof(struct list_head);
210 if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
211 size = 8192;
212 if (size < 16)
213 size = 16;
214 }
215 /* FIXME: don't use vmalloc() here or anywhere else -HW */
216 hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
217 sizeof(struct list_head) * size);
218 if (!hinfo) {
219 printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
220 return -1;
221 }
222 minfo->hinfo = hinfo;
223
224 /* copy match config into hashtable config */
225 hinfo->cfg.mode = minfo->cfg.mode;
226 hinfo->cfg.avg = minfo->cfg.avg;
227 hinfo->cfg.burst = minfo->cfg.burst;
228 hinfo->cfg.max = minfo->cfg.max;
229 hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
230 hinfo->cfg.expire = minfo->cfg.expire;
231
232 if (family == NFPROTO_IPV4)
233 hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
234 else
235 hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
236
237 hinfo->cfg.size = size;
238 if (!hinfo->cfg.max)
239 hinfo->cfg.max = 8 * hinfo->cfg.size;
240 else if (hinfo->cfg.max < hinfo->cfg.size)
241 hinfo->cfg.max = hinfo->cfg.size;
242
243 for (i = 0; i < hinfo->cfg.size; i++)
244 INIT_HLIST_HEAD(&hinfo->hash[i]);
245
246 hinfo->use = 1;
247 hinfo->count = 0;
248 hinfo->family = family;
249 hinfo->rnd_initialized = false;
250 spin_lock_init(&hinfo->lock);
251 hinfo->pde = proc_create_data(minfo->name, 0,
252 (family == NFPROTO_IPV4) ?
253 hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
254 &dl_file_ops, hinfo);
255 if (!hinfo->pde) {
256 vfree(hinfo);
257 return -1;
258 }
259 hinfo->net = net;
260
261 setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
262 hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
263 add_timer(&hinfo->timer);
264
265 hlist_add_head(&hinfo->node, &hashlimit_net->htables);
266
267 return 0;
268} 207}
208static void htable_gc(unsigned long htlong);
269 209
270static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, 210static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
271 u_int8_t family) 211 u_int8_t family)
@@ -288,10 +228,8 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
288 /* FIXME: don't use vmalloc() here or anywhere else -HW */ 228 /* FIXME: don't use vmalloc() here or anywhere else -HW */
289 hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + 229 hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
290 sizeof(struct list_head) * size); 230 sizeof(struct list_head) * size);
291 if (hinfo == NULL) { 231 if (hinfo == NULL)
292 printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n"); 232 return -ENOMEM;
293 return -1;
294 }
295 minfo->hinfo = hinfo; 233 minfo->hinfo = hinfo;
296 234
297 /* copy match config into hashtable config */ 235 /* copy match config into hashtable config */
@@ -317,7 +255,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
317 &dl_file_ops, hinfo); 255 &dl_file_ops, hinfo);
318 if (hinfo->pde == NULL) { 256 if (hinfo->pde == NULL) {
319 vfree(hinfo); 257 vfree(hinfo);
320 return -1; 258 return -ENOMEM;
321 } 259 }
322 hinfo->net = net; 260 hinfo->net = net;
323 261
@@ -578,58 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
578} 516}
579 517
580static bool 518static bool
581hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) 519hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
582{
583 const struct xt_hashlimit_info *r = par->matchinfo;
584 struct xt_hashlimit_htable *hinfo = r->hinfo;
585 unsigned long now = jiffies;
586 struct dsthash_ent *dh;
587 struct dsthash_dst dst;
588
589 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
590 goto hotdrop;
591
592 spin_lock_bh(&hinfo->lock);
593 dh = dsthash_find(hinfo, &dst);
594 if (!dh) {
595 dh = dsthash_alloc_init(hinfo, &dst);
596 if (!dh) {
597 spin_unlock_bh(&hinfo->lock);
598 goto hotdrop;
599 }
600
601 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
602 dh->rateinfo.prev = jiffies;
603 dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
604 hinfo->cfg.burst);
605 dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
606 hinfo->cfg.burst);
607 dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
608 } else {
609 /* update expiration timeout */
610 dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
611 rateinfo_recalc(dh, now);
612 }
613
614 if (dh->rateinfo.credit >= dh->rateinfo.cost) {
615 /* We're underlimit. */
616 dh->rateinfo.credit -= dh->rateinfo.cost;
617 spin_unlock_bh(&hinfo->lock);
618 return true;
619 }
620
621 spin_unlock_bh(&hinfo->lock);
622
623 /* default case: we're overlimit, thus don't match */
624 return false;
625
626hotdrop:
627 *par->hotdrop = true;
628 return false;
629}
630
631static bool
632hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
633{ 520{
634 const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; 521 const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
635 struct xt_hashlimit_htable *hinfo = info->hinfo; 522 struct xt_hashlimit_htable *hinfo = info->hinfo;
@@ -640,15 +527,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
640 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) 527 if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
641 goto hotdrop; 528 goto hotdrop;
642 529
643 spin_lock_bh(&hinfo->lock); 530 rcu_read_lock_bh();
644 dh = dsthash_find(hinfo, &dst); 531 dh = dsthash_find(hinfo, &dst);
645 if (dh == NULL) { 532 if (dh == NULL) {
646 dh = dsthash_alloc_init(hinfo, &dst); 533 dh = dsthash_alloc_init(hinfo, &dst);
647 if (dh == NULL) { 534 if (dh == NULL) {
648 spin_unlock_bh(&hinfo->lock); 535 rcu_read_unlock_bh();
649 goto hotdrop; 536 goto hotdrop;
650 } 537 }
651
652 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); 538 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
653 dh->rateinfo.prev = jiffies; 539 dh->rateinfo.prev = jiffies;
654 dh->rateinfo.credit = user2credits(hinfo->cfg.avg * 540 dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
@@ -665,96 +551,58 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
665 if (dh->rateinfo.credit >= dh->rateinfo.cost) { 551 if (dh->rateinfo.credit >= dh->rateinfo.cost) {
666 /* below the limit */ 552 /* below the limit */
667 dh->rateinfo.credit -= dh->rateinfo.cost; 553 dh->rateinfo.credit -= dh->rateinfo.cost;
668 spin_unlock_bh(&hinfo->lock); 554 spin_unlock(&dh->lock);
555 rcu_read_unlock_bh();
669 return !(info->cfg.mode & XT_HASHLIMIT_INVERT); 556 return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
670 } 557 }
671 558
672 spin_unlock_bh(&hinfo->lock); 559 spin_unlock(&dh->lock);
560 rcu_read_unlock_bh();
673 /* default match is underlimit - so over the limit, we need to invert */ 561 /* default match is underlimit - so over the limit, we need to invert */
674 return info->cfg.mode & XT_HASHLIMIT_INVERT; 562 return info->cfg.mode & XT_HASHLIMIT_INVERT;
675 563
676 hotdrop: 564 hotdrop:
677 *par->hotdrop = true; 565 par->hotdrop = true;
678 return false; 566 return false;
679} 567}
680 568
681static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) 569static int hashlimit_mt_check(const struct xt_mtchk_param *par)
682{
683 struct net *net = par->net;
684 struct xt_hashlimit_info *r = par->matchinfo;
685
686 /* Check for overflow. */
687 if (r->cfg.burst == 0 ||
688 user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
689 printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
690 r->cfg.avg, r->cfg.burst);
691 return false;
692 }
693 if (r->cfg.mode == 0 ||
694 r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
695 XT_HASHLIMIT_HASH_DIP |
696 XT_HASHLIMIT_HASH_SIP |
697 XT_HASHLIMIT_HASH_SPT))
698 return false;
699 if (!r->cfg.gc_interval)
700 return false;
701 if (!r->cfg.expire)
702 return false;
703 if (r->name[sizeof(r->name) - 1] != '\0')
704 return false;
705
706 mutex_lock(&hashlimit_mutex);
707 r->hinfo = htable_find_get(net, r->name, par->match->family);
708 if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
709 mutex_unlock(&hashlimit_mutex);
710 return false;
711 }
712 mutex_unlock(&hashlimit_mutex);
713
714 return true;
715}
716
717static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
718{ 570{
719 struct net *net = par->net; 571 struct net *net = par->net;
720 struct xt_hashlimit_mtinfo1 *info = par->matchinfo; 572 struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
573 int ret;
721 574
722 /* Check for overflow. */ 575 /* Check for overflow. */
723 if (info->cfg.burst == 0 || 576 if (info->cfg.burst == 0 ||
724 user2credits(info->cfg.avg * info->cfg.burst) < 577 user2credits(info->cfg.avg * info->cfg.burst) <
725 user2credits(info->cfg.avg)) { 578 user2credits(info->cfg.avg)) {
726 printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n", 579 pr_info("overflow, try lower: %u/%u\n",
727 info->cfg.avg, info->cfg.burst); 580 info->cfg.avg, info->cfg.burst);
728 return false; 581 return -ERANGE;
729 } 582 }
730 if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) 583 if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
731 return false; 584 return -EINVAL;
732 if (info->name[sizeof(info->name)-1] != '\0') 585 if (info->name[sizeof(info->name)-1] != '\0')
733 return false; 586 return -EINVAL;
734 if (par->match->family == NFPROTO_IPV4) { 587 if (par->family == NFPROTO_IPV4) {
735 if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) 588 if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
736 return false; 589 return -EINVAL;
737 } else { 590 } else {
738 if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) 591 if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
739 return false; 592 return -EINVAL;
740 } 593 }
741 594
742 mutex_lock(&hashlimit_mutex); 595 mutex_lock(&hashlimit_mutex);
743 info->hinfo = htable_find_get(net, info->name, par->match->family); 596 info->hinfo = htable_find_get(net, info->name, par->family);
744 if (!info->hinfo && htable_create(net, info, par->match->family) != 0) { 597 if (info->hinfo == NULL) {
745 mutex_unlock(&hashlimit_mutex); 598 ret = htable_create(net, info, par->family);
746 return false; 599 if (ret < 0) {
600 mutex_unlock(&hashlimit_mutex);
601 return ret;
602 }
747 } 603 }
748 mutex_unlock(&hashlimit_mutex); 604 mutex_unlock(&hashlimit_mutex);
749 return true; 605 return 0;
750}
751
752static void
753hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
754{
755 const struct xt_hashlimit_info *r = par->matchinfo;
756
757 htable_put(r->hinfo);
758} 606}
759 607
760static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) 608static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
@@ -764,47 +612,8 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
764 htable_put(info->hinfo); 612 htable_put(info->hinfo);
765} 613}
766 614
767#ifdef CONFIG_COMPAT
768struct compat_xt_hashlimit_info {
769 char name[IFNAMSIZ];
770 struct hashlimit_cfg cfg;
771 compat_uptr_t hinfo;
772 compat_uptr_t master;
773};
774
775static void hashlimit_mt_compat_from_user(void *dst, const void *src)
776{
777 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
778
779 memcpy(dst, src, off);
780 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
781}
782
783static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
784{
785 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
786
787 return copy_to_user(dst, src, off) ? -EFAULT : 0;
788}
789#endif
790
791static struct xt_match hashlimit_mt_reg[] __read_mostly = { 615static struct xt_match hashlimit_mt_reg[] __read_mostly = {
792 { 616 {
793 .name = "hashlimit",
794 .revision = 0,
795 .family = NFPROTO_IPV4,
796 .match = hashlimit_mt_v0,
797 .matchsize = sizeof(struct xt_hashlimit_info),
798#ifdef CONFIG_COMPAT
799 .compatsize = sizeof(struct compat_xt_hashlimit_info),
800 .compat_from_user = hashlimit_mt_compat_from_user,
801 .compat_to_user = hashlimit_mt_compat_to_user,
802#endif
803 .checkentry = hashlimit_mt_check_v0,
804 .destroy = hashlimit_mt_destroy_v0,
805 .me = THIS_MODULE
806 },
807 {
808 .name = "hashlimit", 617 .name = "hashlimit",
809 .revision = 1, 618 .revision = 1,
810 .family = NFPROTO_IPV4, 619 .family = NFPROTO_IPV4,
@@ -816,20 +625,6 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
816 }, 625 },
817#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 626#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
818 { 627 {
819 .name = "hashlimit",
820 .family = NFPROTO_IPV6,
821 .match = hashlimit_mt_v0,
822 .matchsize = sizeof(struct xt_hashlimit_info),
823#ifdef CONFIG_COMPAT
824 .compatsize = sizeof(struct compat_xt_hashlimit_info),
825 .compat_from_user = hashlimit_mt_compat_from_user,
826 .compat_to_user = hashlimit_mt_compat_to_user,
827#endif
828 .checkentry = hashlimit_mt_check_v0,
829 .destroy = hashlimit_mt_destroy_v0,
830 .me = THIS_MODULE
831 },
832 {
833 .name = "hashlimit", 628 .name = "hashlimit",
834 .revision = 1, 629 .revision = 1,
835 .family = NFPROTO_IPV6, 630 .family = NFPROTO_IPV6,
@@ -888,12 +683,15 @@ static void dl_seq_stop(struct seq_file *s, void *v)
888static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, 683static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
889 struct seq_file *s) 684 struct seq_file *s)
890{ 685{
686 int res;
687
688 spin_lock(&ent->lock);
891 /* recalculate to show accurate numbers */ 689 /* recalculate to show accurate numbers */
892 rateinfo_recalc(ent, jiffies); 690 rateinfo_recalc(ent, jiffies);
893 691
894 switch (family) { 692 switch (family) {
895 case NFPROTO_IPV4: 693 case NFPROTO_IPV4:
896 return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", 694 res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
897 (long)(ent->expires - jiffies)/HZ, 695 (long)(ent->expires - jiffies)/HZ,
898 &ent->dst.ip.src, 696 &ent->dst.ip.src,
899 ntohs(ent->dst.src_port), 697 ntohs(ent->dst.src_port),
@@ -901,9 +699,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
901 ntohs(ent->dst.dst_port), 699 ntohs(ent->dst.dst_port),
902 ent->rateinfo.credit, ent->rateinfo.credit_cap, 700 ent->rateinfo.credit, ent->rateinfo.credit_cap,
903 ent->rateinfo.cost); 701 ent->rateinfo.cost);
702 break;
904#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 703#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
905 case NFPROTO_IPV6: 704 case NFPROTO_IPV6:
906 return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", 705 res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
907 (long)(ent->expires - jiffies)/HZ, 706 (long)(ent->expires - jiffies)/HZ,
908 &ent->dst.ip6.src, 707 &ent->dst.ip6.src,
909 ntohs(ent->dst.src_port), 708 ntohs(ent->dst.src_port),
@@ -911,11 +710,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
911 ntohs(ent->dst.dst_port), 710 ntohs(ent->dst.dst_port),
912 ent->rateinfo.credit, ent->rateinfo.credit_cap, 711 ent->rateinfo.credit, ent->rateinfo.credit_cap,
913 ent->rateinfo.cost); 712 ent->rateinfo.cost);
713 break;
914#endif 714#endif
915 default: 715 default:
916 BUG(); 716 BUG();
917 return 0; 717 res = 0;
918 } 718 }
719 spin_unlock(&ent->lock);
720 return res;
919} 721}
920 722
921static int dl_seq_show(struct seq_file *s, void *v) 723static int dl_seq_show(struct seq_file *s, void *v)
@@ -1024,7 +826,7 @@ static int __init hashlimit_mt_init(void)
1024 sizeof(struct dsthash_ent), 0, 0, 826 sizeof(struct dsthash_ent), 0, 0,
1025 NULL); 827 NULL);
1026 if (!hashlimit_cachep) { 828 if (!hashlimit_cachep) {
1027 printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); 829 pr_warning("unable to create slab cache\n");
1028 goto err2; 830 goto err2;
1029 } 831 }
1030 return 0; 832 return 0;
@@ -1039,9 +841,11 @@ err1:
1039 841
1040static void __exit hashlimit_mt_exit(void) 842static void __exit hashlimit_mt_exit(void)
1041{ 843{
1042 kmem_cache_destroy(hashlimit_cachep);
1043 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); 844 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
1044 unregister_pernet_subsys(&hashlimit_net_ops); 845 unregister_pernet_subsys(&hashlimit_net_ops);
846
847 rcu_barrier_bh();
848 kmem_cache_destroy(hashlimit_cachep);
1045} 849}
1046 850
1047module_init(hashlimit_mt_init); 851module_init(hashlimit_mt_init);
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 64fc7f277221..9f4ab00c8050 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/skbuff.h> 11#include <linux/skbuff.h>
12#include <linux/netfilter.h> 12#include <linux/netfilter.h>
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
24 24
25 25
26static bool 26static bool
27helper_mt(const struct sk_buff *skb, const struct xt_match_param *par) 27helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
28{ 28{
29 const struct xt_helper_info *info = par->matchinfo; 29 const struct xt_helper_info *info = par->matchinfo;
30 const struct nf_conn *ct; 30 const struct nf_conn *ct;
@@ -54,17 +54,19 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
54 return ret; 54 return ret;
55} 55}
56 56
57static bool helper_mt_check(const struct xt_mtchk_param *par) 57static int helper_mt_check(const struct xt_mtchk_param *par)
58{ 58{
59 struct xt_helper_info *info = par->matchinfo; 59 struct xt_helper_info *info = par->matchinfo;
60 int ret;
60 61
61 if (nf_ct_l3proto_try_module_get(par->family) < 0) { 62 ret = nf_ct_l3proto_try_module_get(par->family);
62 printk(KERN_WARNING "can't load conntrack support for " 63 if (ret < 0) {
63 "proto=%u\n", par->family); 64 pr_info("cannot load conntrack support for proto=%u\n",
64 return false; 65 par->family);
66 return ret;
65 } 67 }
66 info->name[29] = '\0'; 68 info->name[29] = '\0';
67 return true; 69 return 0;
68} 70}
69 71
70static void helper_mt_destroy(const struct xt_mtdtor_param *par) 72static void helper_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 7726154c87b2..7d12221ead89 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,7 +25,7 @@ MODULE_LICENSE("GPL");
25MODULE_ALIAS("ipt_ttl"); 25MODULE_ALIAS("ipt_ttl");
26MODULE_ALIAS("ip6t_hl"); 26MODULE_ALIAS("ip6t_hl");
27 27
28static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) 28static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
29{ 29{
30 const struct ipt_ttl_info *info = par->matchinfo; 30 const struct ipt_ttl_info *info = par->matchinfo;
31 const u8 ttl = ip_hdr(skb)->ttl; 31 const u8 ttl = ip_hdr(skb)->ttl;
@@ -39,16 +39,12 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
39 return ttl < info->ttl; 39 return ttl < info->ttl;
40 case IPT_TTL_GT: 40 case IPT_TTL_GT:
41 return ttl > info->ttl; 41 return ttl > info->ttl;
42 default:
43 printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
44 info->mode);
45 return false;
46 } 42 }
47 43
48 return false; 44 return false;
49} 45}
50 46
51static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 47static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
52{ 48{
53 const struct ip6t_hl_info *info = par->matchinfo; 49 const struct ip6t_hl_info *info = par->matchinfo;
54 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 50 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -56,20 +52,12 @@ static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
56 switch (info->mode) { 52 switch (info->mode) {
57 case IP6T_HL_EQ: 53 case IP6T_HL_EQ:
58 return ip6h->hop_limit == info->hop_limit; 54 return ip6h->hop_limit == info->hop_limit;
59 break;
60 case IP6T_HL_NE: 55 case IP6T_HL_NE:
61 return ip6h->hop_limit != info->hop_limit; 56 return ip6h->hop_limit != info->hop_limit;
62 break;
63 case IP6T_HL_LT: 57 case IP6T_HL_LT:
64 return ip6h->hop_limit < info->hop_limit; 58 return ip6h->hop_limit < info->hop_limit;
65 break;
66 case IP6T_HL_GT: 59 case IP6T_HL_GT:
67 return ip6h->hop_limit > info->hop_limit; 60 return ip6h->hop_limit > info->hop_limit;
68 break;
69 default:
70 printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
71 info->mode);
72 return false;
73 } 61 }
74 62
75 return false; 63 return false;
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index ffc96387d556..88f7c3511c72 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -8,6 +8,7 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/skbuff.h> 13#include <linux/skbuff.h>
13#include <linux/ip.h> 14#include <linux/ip.h>
@@ -16,7 +17,7 @@
16#include <linux/netfilter/xt_iprange.h> 17#include <linux/netfilter/xt_iprange.h>
17 18
18static bool 19static bool
19iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) 20iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
20{ 21{
21 const struct xt_iprange_mtinfo *info = par->matchinfo; 22 const struct xt_iprange_mtinfo *info = par->matchinfo;
22 const struct iphdr *iph = ip_hdr(skb); 23 const struct iphdr *iph = ip_hdr(skb);
@@ -67,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
67} 68}
68 69
69static bool 70static bool
70iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 71iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
71{ 72{
72 const struct xt_iprange_mtinfo *info = par->matchinfo; 73 const struct xt_iprange_mtinfo *info = par->matchinfo;
73 const struct ipv6hdr *iph = ipv6_hdr(skb); 74 const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
new file mode 100644
index 000000000000..7a4d66db95ae
--- /dev/null
+++ b/net/netfilter/xt_ipvs.c
@@ -0,0 +1,189 @@
1/*
2 * xt_ipvs - kernel module to match IPVS connection properties
3 *
4 * Author: Hannes Eder <heder@google.com>
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/module.h>
10#include <linux/moduleparam.h>
11#include <linux/spinlock.h>
12#include <linux/skbuff.h>
13#ifdef CONFIG_IP_VS_IPV6
14#include <net/ipv6.h>
15#endif
16#include <linux/ip_vs.h>
17#include <linux/types.h>
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_ipvs.h>
21#include <net/netfilter/nf_conntrack.h>
22
23#include <net/ip_vs.h>
24
25MODULE_AUTHOR("Hannes Eder <heder@google.com>");
26MODULE_DESCRIPTION("Xtables: match IPVS connection properties");
27MODULE_LICENSE("GPL");
28MODULE_ALIAS("ipt_ipvs");
29MODULE_ALIAS("ip6t_ipvs");
30
31/* borrowed from xt_conntrack */
32static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr,
33 const union nf_inet_addr *uaddr,
34 const union nf_inet_addr *umask,
35 unsigned int l3proto)
36{
37 if (l3proto == NFPROTO_IPV4)
38 return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0;
39#ifdef CONFIG_IP_VS_IPV6
40 else if (l3proto == NFPROTO_IPV6)
41 return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6,
42 &uaddr->in6) == 0;
43#endif
44 else
45 return false;
46}
47
48static bool
49ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
50{
51 const struct xt_ipvs_mtinfo *data = par->matchinfo;
52 /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
53 const u_int8_t family = par->family;
54 struct ip_vs_iphdr iph;
55 struct ip_vs_protocol *pp;
56 struct ip_vs_conn *cp;
57 bool match = true;
58
59 if (data->bitmask == XT_IPVS_IPVS_PROPERTY) {
60 match = skb->ipvs_property ^
61 !!(data->invert & XT_IPVS_IPVS_PROPERTY);
62 goto out;
63 }
64
65 /* other flags than XT_IPVS_IPVS_PROPERTY are set */
66 if (!skb->ipvs_property) {
67 match = false;
68 goto out;
69 }
70
71 ip_vs_fill_iphdr(family, skb_network_header(skb), &iph);
72
73 if (data->bitmask & XT_IPVS_PROTO)
74 if ((iph.protocol == data->l4proto) ^
75 !(data->invert & XT_IPVS_PROTO)) {
76 match = false;
77 goto out;
78 }
79
80 pp = ip_vs_proto_get(iph.protocol);
81 if (unlikely(!pp)) {
82 match = false;
83 goto out;
84 }
85
86 /*
87 * Check if the packet belongs to an existing entry
88 */
89 cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
90 if (unlikely(cp == NULL)) {
91 match = false;
92 goto out;
93 }
94
95 /*
96 * We found a connection, i.e. ct != 0, make sure to call
97 * __ip_vs_conn_put before returning. In our case jump to out_put_con.
98 */
99
100 if (data->bitmask & XT_IPVS_VPORT)
101 if ((cp->vport == data->vport) ^
102 !(data->invert & XT_IPVS_VPORT)) {
103 match = false;
104 goto out_put_cp;
105 }
106
107 if (data->bitmask & XT_IPVS_VPORTCTL)
108 if ((cp->control != NULL &&
109 cp->control->vport == data->vportctl) ^
110 !(data->invert & XT_IPVS_VPORTCTL)) {
111 match = false;
112 goto out_put_cp;
113 }
114
115 if (data->bitmask & XT_IPVS_DIR) {
116 enum ip_conntrack_info ctinfo;
117 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
118
119 if (ct == NULL || nf_ct_is_untracked(ct)) {
120 match = false;
121 goto out_put_cp;
122 }
123
124 if ((ctinfo >= IP_CT_IS_REPLY) ^
125 !!(data->invert & XT_IPVS_DIR)) {
126 match = false;
127 goto out_put_cp;
128 }
129 }
130
131 if (data->bitmask & XT_IPVS_METHOD)
132 if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^
133 !(data->invert & XT_IPVS_METHOD)) {
134 match = false;
135 goto out_put_cp;
136 }
137
138 if (data->bitmask & XT_IPVS_VADDR) {
139 if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr,
140 &data->vmask, family) ^
141 !(data->invert & XT_IPVS_VADDR)) {
142 match = false;
143 goto out_put_cp;
144 }
145 }
146
147out_put_cp:
148 __ip_vs_conn_put(cp);
149out:
150 pr_debug("match=%d\n", match);
151 return match;
152}
153
154static int ipvs_mt_check(const struct xt_mtchk_param *par)
155{
156 if (par->family != NFPROTO_IPV4
157#ifdef CONFIG_IP_VS_IPV6
158 && par->family != NFPROTO_IPV6
159#endif
160 ) {
161 pr_info("protocol family %u not supported\n", par->family);
162 return -EINVAL;
163 }
164
165 return 0;
166}
167
168static struct xt_match xt_ipvs_mt_reg __read_mostly = {
169 .name = "ipvs",
170 .revision = 0,
171 .family = NFPROTO_UNSPEC,
172 .match = ipvs_mt,
173 .checkentry = ipvs_mt_check,
174 .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)),
175 .me = THIS_MODULE,
176};
177
178static int __init ipvs_mt_init(void)
179{
180 return xt_register_match(&xt_ipvs_mt_reg);
181}
182
183static void __exit ipvs_mt_exit(void)
184{
185 xt_unregister_match(&xt_ipvs_mt_reg);
186}
187
188module_init(ipvs_mt_init);
189module_exit(ipvs_mt_exit);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index c4871ca6c86d..176e5570a999 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
21MODULE_ALIAS("ip6t_length"); 21MODULE_ALIAS("ip6t_length");
22 22
23static bool 23static bool
24length_mt(const struct sk_buff *skb, const struct xt_match_param *par) 24length_mt(const struct sk_buff *skb, struct xt_action_param *par)
25{ 25{
26 const struct xt_length_info *info = par->matchinfo; 26 const struct xt_length_info *info = par->matchinfo;
27 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); 27 u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
30} 30}
31 31
32static bool 32static bool
33length_mt6(const struct sk_buff *skb, const struct xt_match_param *par) 33length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
34{ 34{
35 const struct xt_length_info *info = par->matchinfo; 35 const struct xt_length_info *info = par->matchinfo;
36 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + 36 const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index e5d7e1ffb1a4..32b7a579a032 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -5,6 +5,7 @@
5 * it under the terms of the GNU General Public License version 2 as 5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation. 6 * published by the Free Software Foundation.
7 */ 7 */
8#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 9
9#include <linux/slab.h> 10#include <linux/slab.h>
10#include <linux/module.h> 11#include <linux/module.h>
@@ -64,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
64#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) 65#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
65 66
66static bool 67static bool
67limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) 68limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
68{ 69{
69 const struct xt_rateinfo *r = par->matchinfo; 70 const struct xt_rateinfo *r = par->matchinfo;
70 struct xt_limit_priv *priv = r->master; 71 struct xt_limit_priv *priv = r->master;
@@ -98,7 +99,7 @@ user2credits(u_int32_t user)
98 return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; 99 return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
99} 100}
100 101
101static bool limit_mt_check(const struct xt_mtchk_param *par) 102static int limit_mt_check(const struct xt_mtchk_param *par)
102{ 103{
103 struct xt_rateinfo *r = par->matchinfo; 104 struct xt_rateinfo *r = par->matchinfo;
104 struct xt_limit_priv *priv; 105 struct xt_limit_priv *priv;
@@ -106,14 +107,14 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
106 /* Check for overflow. */ 107 /* Check for overflow. */
107 if (r->burst == 0 108 if (r->burst == 0
108 || user2credits(r->avg * r->burst) < user2credits(r->avg)) { 109 || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
109 printk("Overflow in xt_limit, try lower: %u/%u\n", 110 pr_info("Overflow, try lower: %u/%u\n",
110 r->avg, r->burst); 111 r->avg, r->burst);
111 return false; 112 return -ERANGE;
112 } 113 }
113 114
114 priv = kmalloc(sizeof(*priv), GFP_KERNEL); 115 priv = kmalloc(sizeof(*priv), GFP_KERNEL);
115 if (priv == NULL) 116 if (priv == NULL)
116 return false; 117 return -ENOMEM;
117 118
118 /* For SMP, we only want to use one set of state. */ 119 /* For SMP, we only want to use one set of state. */
119 r->master = priv; 120 r->master = priv;
@@ -125,7 +126,7 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
125 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ 126 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
126 r->cost = user2credits(r->avg); 127 r->cost = user2credits(r->avg);
127 } 128 }
128 return true; 129 return 0;
129} 130}
130 131
131static void limit_mt_destroy(const struct xt_mtdtor_param *par) 132static void limit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index c2007116ce5b..8160f6b1435d 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -10,6 +10,7 @@
10 10
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/if_arp.h>
13#include <linux/if_ether.h> 14#include <linux/if_ether.h>
14#include <linux/etherdevice.h> 15#include <linux/etherdevice.h>
15 16
@@ -24,16 +25,20 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
24MODULE_ALIAS("ipt_mac"); 25MODULE_ALIAS("ipt_mac");
25MODULE_ALIAS("ip6t_mac"); 26MODULE_ALIAS("ip6t_mac");
26 27
27static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par) 28static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
28{ 29{
29 const struct xt_mac_info *info = par->matchinfo; 30 const struct xt_mac_info *info = par->matchinfo;
30 31 bool ret;
31 /* Is mac pointer valid? */ 32
32 return skb_mac_header(skb) >= skb->head && 33 if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER)
33 skb_mac_header(skb) + ETH_HLEN <= skb->data 34 return false;
34 /* If so, compare... */ 35 if (skb_mac_header(skb) < skb->head)
35 && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr)) 36 return false;
36 ^ info->invert); 37 if (skb_mac_header(skb) + ETH_HLEN > skb->data)
38 return false;
39 ret = compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr) == 0;
40 ret ^= info->invert;
41 return ret;
37} 42}
38 43
39static struct xt_match mac_mt_reg __read_mostly = { 44static struct xt_match mac_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 1db07d8125f8..23345238711b 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -18,18 +18,38 @@
18 18
19MODULE_LICENSE("GPL"); 19MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); 20MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
21MODULE_DESCRIPTION("Xtables: packet mark match"); 21MODULE_DESCRIPTION("Xtables: packet mark operations");
22MODULE_ALIAS("ipt_mark"); 22MODULE_ALIAS("ipt_mark");
23MODULE_ALIAS("ip6t_mark"); 23MODULE_ALIAS("ip6t_mark");
24MODULE_ALIAS("ipt_MARK");
25MODULE_ALIAS("ip6t_MARK");
26
27static unsigned int
28mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
29{
30 const struct xt_mark_tginfo2 *info = par->targinfo;
31
32 skb->mark = (skb->mark & ~info->mask) ^ info->mark;
33 return XT_CONTINUE;
34}
24 35
25static bool 36static bool
26mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) 37mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
27{ 38{
28 const struct xt_mark_mtinfo1 *info = par->matchinfo; 39 const struct xt_mark_mtinfo1 *info = par->matchinfo;
29 40
30 return ((skb->mark & info->mask) == info->mark) ^ info->invert; 41 return ((skb->mark & info->mask) == info->mark) ^ info->invert;
31} 42}
32 43
44static struct xt_target mark_tg_reg __read_mostly = {
45 .name = "MARK",
46 .revision = 2,
47 .family = NFPROTO_UNSPEC,
48 .target = mark_tg,
49 .targetsize = sizeof(struct xt_mark_tginfo2),
50 .me = THIS_MODULE,
51};
52
33static struct xt_match mark_mt_reg __read_mostly = { 53static struct xt_match mark_mt_reg __read_mostly = {
34 .name = "mark", 54 .name = "mark",
35 .revision = 1, 55 .revision = 1,
@@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = {
41 61
42static int __init mark_mt_init(void) 62static int __init mark_mt_init(void)
43{ 63{
44 return xt_register_match(&mark_mt_reg); 64 int ret;
65
66 ret = xt_register_target(&mark_tg_reg);
67 if (ret < 0)
68 return ret;
69 ret = xt_register_match(&mark_mt_reg);
70 if (ret < 0) {
71 xt_unregister_target(&mark_tg_reg);
72 return ret;
73 }
74 return 0;
45} 75}
46 76
47static void __exit mark_mt_exit(void) 77static void __exit mark_mt_exit(void)
48{ 78{
49 xt_unregister_match(&mark_mt_reg); 79 xt_unregister_match(&mark_mt_reg);
80 xt_unregister_target(&mark_tg_reg);
50} 81}
51 82
52module_init(mark_mt_init); 83module_init(mark_mt_init);
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index d06bb2dd3900..ac1d3c3d09e7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -8,7 +8,7 @@
8 * it under the terms of the GNU General Public License version 2 as 8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/udp.h> 14#include <linux/udp.h>
@@ -26,29 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP
26MODULE_ALIAS("ipt_multiport"); 26MODULE_ALIAS("ipt_multiport");
27MODULE_ALIAS("ip6t_multiport"); 27MODULE_ALIAS("ip6t_multiport");
28 28
29#if 0
30#define duprintf(format, args...) printk(format , ## args)
31#else
32#define duprintf(format, args...)
33#endif
34
35/* Returns 1 if the port is matched by the test, 0 otherwise. */
36static inline bool
37ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
38 u_int8_t count, u_int16_t src, u_int16_t dst)
39{
40 unsigned int i;
41 for (i = 0; i < count; i++) {
42 if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src)
43 return true;
44
45 if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst)
46 return true;
47 }
48
49 return false;
50}
51
52/* Returns 1 if the port is matched by the test, 0 otherwise. */ 29/* Returns 1 if the port is matched by the test, 0 otherwise. */
53static inline bool 30static inline bool
54ports_match_v1(const struct xt_multiport_v1 *minfo, 31ports_match_v1(const struct xt_multiport_v1 *minfo,
@@ -63,7 +40,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
63 if (minfo->pflags[i]) { 40 if (minfo->pflags[i]) {
64 /* range port matching */ 41 /* range port matching */
65 e = minfo->ports[++i]; 42 e = minfo->ports[++i];
66 duprintf("src or dst matches with %d-%d?\n", s, e); 43 pr_debug("src or dst matches with %d-%d?\n", s, e);
67 44
68 if (minfo->flags == XT_MULTIPORT_SOURCE 45 if (minfo->flags == XT_MULTIPORT_SOURCE
69 && src >= s && src <= e) 46 && src >= s && src <= e)
@@ -77,7 +54,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
77 return true ^ minfo->invert; 54 return true ^ minfo->invert;
78 } else { 55 } else {
79 /* exact port matching */ 56 /* exact port matching */
80 duprintf("src or dst matches with %d?\n", s); 57 pr_debug("src or dst matches with %d?\n", s);
81 58
82 if (minfo->flags == XT_MULTIPORT_SOURCE 59 if (minfo->flags == XT_MULTIPORT_SOURCE
83 && src == s) 60 && src == s)
@@ -95,31 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
95} 72}
96 73
97static bool 74static bool
98multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) 75multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
99{
100 const __be16 *pptr;
101 __be16 _ports[2];
102 const struct xt_multiport *multiinfo = par->matchinfo;
103
104 if (par->fragoff != 0)
105 return false;
106
107 pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
108 if (pptr == NULL) {
109 /* We've been asked to examine this packet, and we
110 * can't. Hence, no choice but to drop.
111 */
112 duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
113 *par->hotdrop = true;
114 return false;
115 }
116
117 return ports_match_v0(multiinfo->ports, multiinfo->flags,
118 multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
119}
120
121static bool
122multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
123{ 76{
124 const __be16 *pptr; 77 const __be16 *pptr;
125 __be16 _ports[2]; 78 __be16 _ports[2];
@@ -133,8 +86,8 @@ multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
133 /* We've been asked to examine this packet, and we 86 /* We've been asked to examine this packet, and we
134 * can't. Hence, no choice but to drop. 87 * can't. Hence, no choice but to drop.
135 */ 88 */
136 duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); 89 pr_debug("Dropping evil offset=0 tinygram.\n");
137 *par->hotdrop = true; 90 par->hotdrop = true;
138 return false; 91 return false;
139 } 92 }
140 93
@@ -158,55 +111,28 @@ check(u_int16_t proto,
158 && count <= XT_MULTI_PORTS; 111 && count <= XT_MULTI_PORTS;
159} 112}
160 113
161static bool multiport_mt_check_v0(const struct xt_mtchk_param *par) 114static int multiport_mt_check(const struct xt_mtchk_param *par)
162{
163 const struct ipt_ip *ip = par->entryinfo;
164 const struct xt_multiport *multiinfo = par->matchinfo;
165
166 return check(ip->proto, ip->invflags, multiinfo->flags,
167 multiinfo->count);
168}
169
170static bool multiport_mt_check(const struct xt_mtchk_param *par)
171{ 115{
172 const struct ipt_ip *ip = par->entryinfo; 116 const struct ipt_ip *ip = par->entryinfo;
173 const struct xt_multiport_v1 *multiinfo = par->matchinfo; 117 const struct xt_multiport_v1 *multiinfo = par->matchinfo;
174 118
175 return check(ip->proto, ip->invflags, multiinfo->flags, 119 return check(ip->proto, ip->invflags, multiinfo->flags,
176 multiinfo->count); 120 multiinfo->count) ? 0 : -EINVAL;
177} 121}
178 122
179static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par) 123static int multiport_mt6_check(const struct xt_mtchk_param *par)
180{
181 const struct ip6t_ip6 *ip = par->entryinfo;
182 const struct xt_multiport *multiinfo = par->matchinfo;
183
184 return check(ip->proto, ip->invflags, multiinfo->flags,
185 multiinfo->count);
186}
187
188static bool multiport_mt6_check(const struct xt_mtchk_param *par)
189{ 124{
190 const struct ip6t_ip6 *ip = par->entryinfo; 125 const struct ip6t_ip6 *ip = par->entryinfo;
191 const struct xt_multiport_v1 *multiinfo = par->matchinfo; 126 const struct xt_multiport_v1 *multiinfo = par->matchinfo;
192 127
193 return check(ip->proto, ip->invflags, multiinfo->flags, 128 return check(ip->proto, ip->invflags, multiinfo->flags,
194 multiinfo->count); 129 multiinfo->count) ? 0 : -EINVAL;
195} 130}
196 131
197static struct xt_match multiport_mt_reg[] __read_mostly = { 132static struct xt_match multiport_mt_reg[] __read_mostly = {
198 { 133 {
199 .name = "multiport", 134 .name = "multiport",
200 .family = NFPROTO_IPV4, 135 .family = NFPROTO_IPV4,
201 .revision = 0,
202 .checkentry = multiport_mt_check_v0,
203 .match = multiport_mt_v0,
204 .matchsize = sizeof(struct xt_multiport),
205 .me = THIS_MODULE,
206 },
207 {
208 .name = "multiport",
209 .family = NFPROTO_IPV4,
210 .revision = 1, 136 .revision = 1,
211 .checkentry = multiport_mt_check, 137 .checkentry = multiport_mt_check,
212 .match = multiport_mt, 138 .match = multiport_mt,
@@ -216,15 +142,6 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
216 { 142 {
217 .name = "multiport", 143 .name = "multiport",
218 .family = NFPROTO_IPV6, 144 .family = NFPROTO_IPV6,
219 .revision = 0,
220 .checkentry = multiport_mt6_check_v0,
221 .match = multiport_mt_v0,
222 .matchsize = sizeof(struct xt_multiport),
223 .me = THIS_MODULE,
224 },
225 {
226 .name = "multiport",
227 .family = NFPROTO_IPV6,
228 .revision = 1, 145 .revision = 1,
229 .checkentry = multiport_mt6_check, 146 .checkentry = multiport_mt6_check,
230 .match = multiport_mt, 147 .match = multiport_mt,
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4169e200588d..4327e101c047 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -16,7 +16,7 @@
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */ 18 */
19 19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22 22
@@ -193,8 +193,8 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
193 return ip->ttl == f_ttl; 193 return ip->ttl == f_ttl;
194} 194}
195 195
196static bool xt_osf_match_packet(const struct sk_buff *skb, 196static bool
197 const struct xt_match_param *p) 197xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
198{ 198{
199 const struct xt_osf_info *info = p->matchinfo; 199 const struct xt_osf_info *info = p->matchinfo;
200 const struct iphdr *ip = ip_hdr(skb); 200 const struct iphdr *ip = ip_hdr(skb);
@@ -382,14 +382,14 @@ static int __init xt_osf_init(void)
382 382
383 err = nfnetlink_subsys_register(&xt_osf_nfnetlink); 383 err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
384 if (err < 0) { 384 if (err < 0) {
385 printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err); 385 pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
386 goto err_out_exit; 386 goto err_out_exit;
387 } 387 }
388 388
389 err = xt_register_match(&xt_osf_match); 389 err = xt_register_match(&xt_osf_match);
390 if (err) { 390 if (err) {
391 printk(KERN_ERR "Failed (%d) to register OS fingerprint " 391 pr_err("Failed to register OS fingerprint "
392 "matching module.\n", err); 392 "matching module (%d)\n", err);
393 goto err_out_remove; 393 goto err_out_remove;
394 } 394 }
395 395
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index d24c76dffee2..772d7389b337 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
18#include <linux/netfilter/xt_owner.h> 18#include <linux/netfilter/xt_owner.h>
19 19
20static bool 20static bool
21owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) 21owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
22{ 22{
23 const struct xt_owner_match_info *info = par->matchinfo; 23 const struct xt_owner_match_info *info = par->matchinfo;
24 const struct file *filp; 24 const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 8d28ca5848bc..d7ca16b8b8df 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -7,7 +7,7 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10 10#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
13#include <linux/netfilter_bridge.h> 13#include <linux/netfilter_bridge.h>
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
22 22
23 23
24static bool 24static bool
25physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) 25physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
26{ 26{
27 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 27 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
28 const struct xt_physdev_info *info = par->matchinfo; 28 const struct xt_physdev_info *info = par->matchinfo;
@@ -83,25 +83,25 @@ match_outdev:
83 return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); 83 return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
84} 84}
85 85
86static bool physdev_mt_check(const struct xt_mtchk_param *par) 86static int physdev_mt_check(const struct xt_mtchk_param *par)
87{ 87{
88 const struct xt_physdev_info *info = par->matchinfo; 88 const struct xt_physdev_info *info = par->matchinfo;
89 89
90 if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || 90 if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
91 info->bitmask & ~XT_PHYSDEV_OP_MASK) 91 info->bitmask & ~XT_PHYSDEV_OP_MASK)
92 return false; 92 return -EINVAL;
93 if (info->bitmask & XT_PHYSDEV_OP_OUT && 93 if (info->bitmask & XT_PHYSDEV_OP_OUT &&
94 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || 94 (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
95 info->invert & XT_PHYSDEV_OP_BRIDGED) && 95 info->invert & XT_PHYSDEV_OP_BRIDGED) &&
96 par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | 96 par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
97 (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { 97 (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
98 printk(KERN_WARNING "physdev match: using --physdev-out in the " 98 pr_info("using --physdev-out in the OUTPUT, FORWARD and "
99 "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " 99 "POSTROUTING chains for non-bridged traffic is not "
100 "traffic is not supported anymore.\n"); 100 "supported anymore.\n");
101 if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) 101 if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
102 return false; 102 return -EINVAL;
103 } 103 }
104 return true; 104 return 0;
105} 105}
106 106
107static struct xt_match physdev_mt_reg __read_mostly = { 107static struct xt_match physdev_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 69da1d3a1d85..5b645cb598fc 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
23MODULE_ALIAS("ip6t_pkttype"); 23MODULE_ALIAS("ip6t_pkttype");
24 24
25static bool 25static bool
26pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) 26pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
27{ 27{
28 const struct xt_pkttype_info *info = par->matchinfo; 28 const struct xt_pkttype_info *info = par->matchinfo;
29 u_int8_t type; 29 u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 4cbfebda8fa1..f23e97bb42d7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -6,7 +6,7 @@
6 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 */ 8 */
9 9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/skbuff.h> 12#include <linux/skbuff.h>
@@ -110,15 +110,15 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
110} 110}
111 111
112static bool 112static bool
113policy_mt(const struct sk_buff *skb, const struct xt_match_param *par) 113policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
114{ 114{
115 const struct xt_policy_info *info = par->matchinfo; 115 const struct xt_policy_info *info = par->matchinfo;
116 int ret; 116 int ret;
117 117
118 if (info->flags & XT_POLICY_MATCH_IN) 118 if (info->flags & XT_POLICY_MATCH_IN)
119 ret = match_policy_in(skb, info, par->match->family); 119 ret = match_policy_in(skb, info, par->family);
120 else 120 else
121 ret = match_policy_out(skb, info, par->match->family); 121 ret = match_policy_out(skb, info, par->family);
122 122
123 if (ret < 0) 123 if (ret < 0)
124 ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; 124 ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
@@ -128,32 +128,29 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
128 return ret; 128 return ret;
129} 129}
130 130
131static bool policy_mt_check(const struct xt_mtchk_param *par) 131static int policy_mt_check(const struct xt_mtchk_param *par)
132{ 132{
133 const struct xt_policy_info *info = par->matchinfo; 133 const struct xt_policy_info *info = par->matchinfo;
134 134
135 if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { 135 if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
136 printk(KERN_ERR "xt_policy: neither incoming nor " 136 pr_info("neither incoming nor outgoing policy selected\n");
137 "outgoing policy selected\n"); 137 return -EINVAL;
138 return false;
139 } 138 }
140 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | 139 if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
141 (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { 140 (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
142 printk(KERN_ERR "xt_policy: output policy not valid in " 141 pr_info("output policy not valid in PREROUTING and INPUT\n");
143 "PRE_ROUTING and INPUT\n"); 142 return -EINVAL;
144 return false;
145 } 143 }
146 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | 144 if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
147 (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { 145 (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
148 printk(KERN_ERR "xt_policy: input policy not valid in " 146 pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
149 "POST_ROUTING and OUTPUT\n"); 147 return -EINVAL;
150 return false;
151 } 148 }
152 if (info->len > XT_POLICY_MAX_ELEM) { 149 if (info->len > XT_POLICY_MAX_ELEM) {
153 printk(KERN_ERR "xt_policy: too many policy elements\n"); 150 pr_info("too many policy elements\n");
154 return false; 151 return -EINVAL;
155 } 152 }
156 return true; 153 return 0;
157} 154}
158 155
159static struct xt_match policy_mt_reg[] __read_mostly = { 156static struct xt_match policy_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 2d5562498c43..70eb2b4984dd 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -11,7 +11,8 @@
11#include <linux/netfilter/xt_quota.h> 11#include <linux/netfilter/xt_quota.h>
12 12
13struct xt_quota_priv { 13struct xt_quota_priv {
14 uint64_t quota; 14 spinlock_t lock;
15 uint64_t quota;
15}; 16};
16 17
17MODULE_LICENSE("GPL"); 18MODULE_LICENSE("GPL");
@@ -20,16 +21,14 @@ MODULE_DESCRIPTION("Xtables: countdown quota match");
20MODULE_ALIAS("ipt_quota"); 21MODULE_ALIAS("ipt_quota");
21MODULE_ALIAS("ip6t_quota"); 22MODULE_ALIAS("ip6t_quota");
22 23
23static DEFINE_SPINLOCK(quota_lock);
24
25static bool 24static bool
26quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) 25quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
27{ 26{
28 struct xt_quota_info *q = (void *)par->matchinfo; 27 struct xt_quota_info *q = (void *)par->matchinfo;
29 struct xt_quota_priv *priv = q->master; 28 struct xt_quota_priv *priv = q->master;
30 bool ret = q->flags & XT_QUOTA_INVERT; 29 bool ret = q->flags & XT_QUOTA_INVERT;
31 30
32 spin_lock_bh(&quota_lock); 31 spin_lock_bh(&priv->lock);
33 if (priv->quota >= skb->len) { 32 if (priv->quota >= skb->len) {
34 priv->quota -= skb->len; 33 priv->quota -= skb->len;
35 ret = !ret; 34 ret = !ret;
@@ -37,26 +36,25 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
37 /* we do not allow even small packets from now on */ 36 /* we do not allow even small packets from now on */
38 priv->quota = 0; 37 priv->quota = 0;
39 } 38 }
40 /* Copy quota back to matchinfo so that iptables can display it */ 39 spin_unlock_bh(&priv->lock);
41 q->quota = priv->quota;
42 spin_unlock_bh(&quota_lock);
43 40
44 return ret; 41 return ret;
45} 42}
46 43
47static bool quota_mt_check(const struct xt_mtchk_param *par) 44static int quota_mt_check(const struct xt_mtchk_param *par)
48{ 45{
49 struct xt_quota_info *q = par->matchinfo; 46 struct xt_quota_info *q = par->matchinfo;
50 47
51 if (q->flags & ~XT_QUOTA_MASK) 48 if (q->flags & ~XT_QUOTA_MASK)
52 return false; 49 return -EINVAL;
53 50
54 q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); 51 q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
55 if (q->master == NULL) 52 if (q->master == NULL)
56 return false; 53 return -ENOMEM;
57 54
55 spin_lock_init(&q->master->lock);
58 q->master->quota = q->quota; 56 q->master->quota = q->quota;
59 return true; 57 return 0;
60} 58}
61 59
62static void quota_mt_destroy(const struct xt_mtdtor_param *par) 60static void quota_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4fc6a917f6de..76a083184d8e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
15 15
16 16
17static bool 17static bool
18xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) 18xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
19{ 19{
20 const struct xt_rateest_match_info *info = par->matchinfo; 20 const struct xt_rateest_match_info *info = par->matchinfo;
21 struct gnet_stats_rate_est *r; 21 struct gnet_stats_rate_est *r;
@@ -74,10 +74,11 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
74 return ret; 74 return ret;
75} 75}
76 76
77static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) 77static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
78{ 78{
79 struct xt_rateest_match_info *info = par->matchinfo; 79 struct xt_rateest_match_info *info = par->matchinfo;
80 struct xt_rateest *est1, *est2; 80 struct xt_rateest *est1, *est2;
81 int ret = false;
81 82
82 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | 83 if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
83 XT_RATEEST_MATCH_REL)) != 1) 84 XT_RATEEST_MATCH_REL)) != 1)
@@ -95,6 +96,7 @@ static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
95 goto err1; 96 goto err1;
96 } 97 }
97 98
99 ret = -ENOENT;
98 est1 = xt_rateest_lookup(info->name1); 100 est1 = xt_rateest_lookup(info->name1);
99 if (!est1) 101 if (!est1)
100 goto err1; 102 goto err1;
@@ -109,12 +111,12 @@ static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
109 111
110 info->est1 = est1; 112 info->est1 = est1;
111 info->est2 = est2; 113 info->est2 = est2;
112 return true; 114 return 0;
113 115
114err2: 116err2:
115 xt_rateest_put(est1); 117 xt_rateest_put(est1);
116err1: 118err1:
117 return false; 119 return -EINVAL;
118} 120}
119 121
120static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) 122static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 484d1689bfde..459a7b256eb2 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
22MODULE_ALIAS("ipt_realm"); 22MODULE_ALIAS("ipt_realm");
23 23
24static bool 24static bool
25realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) 25realm_mt(const struct sk_buff *skb, struct xt_action_param *par)
26{ 26{
27 const struct xt_realm_info *info = par->matchinfo; 27 const struct xt_realm_info *info = par->matchinfo;
28 const struct dst_entry *dst = skb_dst(skb); 28 const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 834b736857cb..76aec6a44762 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -12,6 +12,7 @@
12 * Author: Stephen Frost <sfrost@snowman.net> 12 * Author: Stephen Frost <sfrost@snowman.net>
13 * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org 13 * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
14 */ 14 */
15#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15#include <linux/init.h> 16#include <linux/init.h>
16#include <linux/ip.h> 17#include <linux/ip.h>
17#include <linux/ipv6.h> 18#include <linux/ipv6.h>
@@ -35,8 +36,8 @@
35#include <linux/netfilter/xt_recent.h> 36#include <linux/netfilter/xt_recent.h>
36 37
37MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 38MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
38MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 39MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
39MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4"); 40MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
40MODULE_LICENSE("GPL"); 41MODULE_LICENSE("GPL");
41MODULE_ALIAS("ipt_recent"); 42MODULE_ALIAS("ipt_recent");
42MODULE_ALIAS("ip6t_recent"); 43MODULE_ALIAS("ip6t_recent");
@@ -51,14 +52,14 @@ module_param(ip_list_tot, uint, 0400);
51module_param(ip_pkt_list_tot, uint, 0400); 52module_param(ip_pkt_list_tot, uint, 0400);
52module_param(ip_list_hash_size, uint, 0400); 53module_param(ip_list_hash_size, uint, 0400);
53module_param(ip_list_perms, uint, 0400); 54module_param(ip_list_perms, uint, 0400);
54module_param(ip_list_uid, uint, 0400); 55module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
55module_param(ip_list_gid, uint, 0400); 56module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
56MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); 57MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
57MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); 58MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
58MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); 59MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
59MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); 60MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
60MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files"); 61MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
61MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files"); 62MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
62 63
63struct recent_entry { 64struct recent_entry {
64 struct list_head list; 65 struct list_head list;
@@ -84,9 +85,6 @@ struct recent_net {
84 struct list_head tables; 85 struct list_head tables;
85#ifdef CONFIG_PROC_FS 86#ifdef CONFIG_PROC_FS
86 struct proc_dir_entry *xt_recent; 87 struct proc_dir_entry *xt_recent;
87#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
88 struct proc_dir_entry *ipt_recent;
89#endif
90#endif 88#endif
91}; 89};
92 90
@@ -147,6 +145,25 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
147 t->entries--; 145 t->entries--;
148} 146}
149 147
148/*
149 * Drop entries with timestamps older then 'time'.
150 */
151static void recent_entry_reap(struct recent_table *t, unsigned long time)
152{
153 struct recent_entry *e;
154
155 /*
156 * The head of the LRU list is always the oldest entry.
157 */
158 e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
159
160 /*
161 * The last time stamp is the most recent.
162 */
163 if (time_after(time, e->stamps[e->index-1]))
164 recent_entry_remove(t, e);
165}
166
150static struct recent_entry * 167static struct recent_entry *
151recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, 168recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
152 u_int16_t family, u_int8_t ttl) 169 u_int16_t family, u_int8_t ttl)
@@ -207,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
207} 224}
208 225
209static bool 226static bool
210recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) 227recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
211{ 228{
212 struct net *net = dev_net(par->in ? par->in : par->out); 229 struct net *net = dev_net(par->in ? par->in : par->out);
213 struct recent_net *recent_net = recent_pernet(net); 230 struct recent_net *recent_net = recent_pernet(net);
@@ -218,7 +235,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
218 u_int8_t ttl; 235 u_int8_t ttl;
219 bool ret = info->invert; 236 bool ret = info->invert;
220 237
221 if (par->match->family == NFPROTO_IPV4) { 238 if (par->family == NFPROTO_IPV4) {
222 const struct iphdr *iph = ip_hdr(skb); 239 const struct iphdr *iph = ip_hdr(skb);
223 240
224 if (info->side == XT_RECENT_DEST) 241 if (info->side == XT_RECENT_DEST)
@@ -244,14 +261,14 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
244 261
245 spin_lock_bh(&recent_lock); 262 spin_lock_bh(&recent_lock);
246 t = recent_table_lookup(recent_net, info->name); 263 t = recent_table_lookup(recent_net, info->name);
247 e = recent_entry_lookup(t, &addr, par->match->family, 264 e = recent_entry_lookup(t, &addr, par->family,
248 (info->check_set & XT_RECENT_TTL) ? ttl : 0); 265 (info->check_set & XT_RECENT_TTL) ? ttl : 0);
249 if (e == NULL) { 266 if (e == NULL) {
250 if (!(info->check_set & XT_RECENT_SET)) 267 if (!(info->check_set & XT_RECENT_SET))
251 goto out; 268 goto out;
252 e = recent_entry_init(t, &addr, par->match->family, ttl); 269 e = recent_entry_init(t, &addr, par->family, ttl);
253 if (e == NULL) 270 if (e == NULL)
254 *par->hotdrop = true; 271 par->hotdrop = true;
255 ret = !ret; 272 ret = !ret;
256 goto out; 273 goto out;
257 } 274 }
@@ -273,6 +290,10 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
273 break; 290 break;
274 } 291 }
275 } 292 }
293
294 /* info->seconds must be non-zero */
295 if (info->check_set & XT_RECENT_REAP)
296 recent_entry_reap(t, time);
276 } 297 }
277 298
278 if (info->check_set & XT_RECENT_SET || 299 if (info->check_set & XT_RECENT_SET ||
@@ -285,7 +306,7 @@ out:
285 return ret; 306 return ret;
286} 307}
287 308
288static bool recent_mt_check(const struct xt_mtchk_param *par) 309static int recent_mt_check(const struct xt_mtchk_param *par)
289{ 310{
290 struct recent_net *recent_net = recent_pernet(par->net); 311 struct recent_net *recent_net = recent_pernet(par->net);
291 const struct xt_recent_mtinfo *info = par->matchinfo; 312 const struct xt_recent_mtinfo *info = par->matchinfo;
@@ -294,41 +315,51 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
294 struct proc_dir_entry *pde; 315 struct proc_dir_entry *pde;
295#endif 316#endif
296 unsigned i; 317 unsigned i;
297 bool ret = false; 318 int ret = -EINVAL;
298 319
299 if (unlikely(!hash_rnd_inited)) { 320 if (unlikely(!hash_rnd_inited)) {
300 get_random_bytes(&hash_rnd, sizeof(hash_rnd)); 321 get_random_bytes(&hash_rnd, sizeof(hash_rnd));
301 hash_rnd_inited = true; 322 hash_rnd_inited = true;
302 } 323 }
324 if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
325 pr_info("Unsupported user space flags (%08x)\n",
326 info->check_set);
327 return -EINVAL;
328 }
303 if (hweight8(info->check_set & 329 if (hweight8(info->check_set &
304 (XT_RECENT_SET | XT_RECENT_REMOVE | 330 (XT_RECENT_SET | XT_RECENT_REMOVE |
305 XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) 331 XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
306 return false; 332 return -EINVAL;
307 if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && 333 if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
308 (info->seconds || info->hit_count)) 334 (info->seconds || info->hit_count ||
309 return false; 335 (info->check_set & XT_RECENT_MODIFIERS)))
336 return -EINVAL;
337 if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
338 return -EINVAL;
310 if (info->hit_count > ip_pkt_list_tot) { 339 if (info->hit_count > ip_pkt_list_tot) {
311 pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than " 340 pr_info("hitcount (%u) is larger than "
312 "packets to be remembered (%u)\n", 341 "packets to be remembered (%u)\n",
313 info->hit_count, ip_pkt_list_tot); 342 info->hit_count, ip_pkt_list_tot);
314 return false; 343 return -EINVAL;
315 } 344 }
316 if (info->name[0] == '\0' || 345 if (info->name[0] == '\0' ||
317 strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) 346 strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
318 return false; 347 return -EINVAL;
319 348
320 mutex_lock(&recent_mutex); 349 mutex_lock(&recent_mutex);
321 t = recent_table_lookup(recent_net, info->name); 350 t = recent_table_lookup(recent_net, info->name);
322 if (t != NULL) { 351 if (t != NULL) {
323 t->refcnt++; 352 t->refcnt++;
324 ret = true; 353 ret = 0;
325 goto out; 354 goto out;
326 } 355 }
327 356
328 t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size, 357 t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
329 GFP_KERNEL); 358 GFP_KERNEL);
330 if (t == NULL) 359 if (t == NULL) {
360 ret = -ENOMEM;
331 goto out; 361 goto out;
362 }
332 t->refcnt = 1; 363 t->refcnt = 1;
333 strcpy(t->name, info->name); 364 strcpy(t->name, info->name);
334 INIT_LIST_HEAD(&t->lru_list); 365 INIT_LIST_HEAD(&t->lru_list);
@@ -339,26 +370,16 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
339 &recent_mt_fops, t); 370 &recent_mt_fops, t);
340 if (pde == NULL) { 371 if (pde == NULL) {
341 kfree(t); 372 kfree(t);
342 goto out; 373 ret = -ENOMEM;
343 }
344 pde->uid = ip_list_uid;
345 pde->gid = ip_list_gid;
346#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
347 pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
348 &recent_old_fops, t);
349 if (pde == NULL) {
350 remove_proc_entry(t->name, recent_net->xt_recent);
351 kfree(t);
352 goto out; 374 goto out;
353 } 375 }
354 pde->uid = ip_list_uid; 376 pde->uid = ip_list_uid;
355 pde->gid = ip_list_gid; 377 pde->gid = ip_list_gid;
356#endif 378#endif
357#endif
358 spin_lock_bh(&recent_lock); 379 spin_lock_bh(&recent_lock);
359 list_add_tail(&t->list, &recent_net->tables); 380 list_add_tail(&t->list, &recent_net->tables);
360 spin_unlock_bh(&recent_lock); 381 spin_unlock_bh(&recent_lock);
361 ret = true; 382 ret = 0;
362out: 383out:
363 mutex_unlock(&recent_mutex); 384 mutex_unlock(&recent_mutex);
364 return ret; 385 return ret;
@@ -377,9 +398,6 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par)
377 list_del(&t->list); 398 list_del(&t->list);
378 spin_unlock_bh(&recent_lock); 399 spin_unlock_bh(&recent_lock);
379#ifdef CONFIG_PROC_FS 400#ifdef CONFIG_PROC_FS
380#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
381 remove_proc_entry(t->name, recent_net->ipt_recent);
382#endif
383 remove_proc_entry(t->name, recent_net->xt_recent); 401 remove_proc_entry(t->name, recent_net->xt_recent);
384#endif 402#endif
385 recent_table_flush(t); 403 recent_table_flush(t);
@@ -471,84 +489,6 @@ static int recent_seq_open(struct inode *inode, struct file *file)
471 return 0; 489 return 0;
472} 490}
473 491
474#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
475static int recent_old_seq_open(struct inode *inode, struct file *filp)
476{
477 static bool warned_of_old;
478
479 if (unlikely(!warned_of_old)) {
480 printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
481 " is deprecated; use /proc/net/xt_recent.\n");
482 warned_of_old = true;
483 }
484 return recent_seq_open(inode, filp);
485}
486
487static ssize_t recent_old_proc_write(struct file *file,
488 const char __user *input,
489 size_t size, loff_t *loff)
490{
491 const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
492 struct recent_table *t = pde->data;
493 struct recent_entry *e;
494 char buf[sizeof("+255.255.255.255")], *c = buf;
495 union nf_inet_addr addr = {};
496 int add;
497
498 if (size > sizeof(buf))
499 size = sizeof(buf);
500 if (copy_from_user(buf, input, size))
501 return -EFAULT;
502
503 c = skip_spaces(c);
504
505 if (size - (c - buf) < 5)
506 return c - buf;
507 if (!strncmp(c, "clear", 5)) {
508 c += 5;
509 spin_lock_bh(&recent_lock);
510 recent_table_flush(t);
511 spin_unlock_bh(&recent_lock);
512 return c - buf;
513 }
514
515 switch (*c) {
516 case '-':
517 add = 0;
518 c++;
519 break;
520 case '+':
521 c++;
522 default:
523 add = 1;
524 break;
525 }
526 addr.ip = in_aton(c);
527
528 spin_lock_bh(&recent_lock);
529 e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
530 if (e == NULL) {
531 if (add)
532 recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
533 } else {
534 if (add)
535 recent_entry_update(t, e);
536 else
537 recent_entry_remove(t, e);
538 }
539 spin_unlock_bh(&recent_lock);
540 return size;
541}
542
543static const struct file_operations recent_old_fops = {
544 .open = recent_old_seq_open,
545 .read = seq_read,
546 .write = recent_old_proc_write,
547 .release = seq_release_private,
548 .owner = THIS_MODULE,
549};
550#endif
551
552static ssize_t 492static ssize_t
553recent_mt_proc_write(struct file *file, const char __user *input, 493recent_mt_proc_write(struct file *file, const char __user *input,
554 size_t size, loff_t *loff) 494 size_t size, loff_t *loff)
@@ -585,7 +525,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
585 add = true; 525 add = true;
586 break; 526 break;
587 default: 527 default:
588 printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n"); 528 pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
589 return -EINVAL; 529 return -EINVAL;
590 } 530 }
591 531
@@ -600,8 +540,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
600 } 540 }
601 541
602 if (!succ) { 542 if (!succ) {
603 printk(KERN_INFO KBUILD_MODNAME ": illegal address written " 543 pr_info("illegal address written to procfs\n");
604 "to procfs\n");
605 return -EINVAL; 544 return -EINVAL;
606 } 545 }
607 546
@@ -637,21 +576,11 @@ static int __net_init recent_proc_net_init(struct net *net)
637 recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); 576 recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
638 if (!recent_net->xt_recent) 577 if (!recent_net->xt_recent)
639 return -ENOMEM; 578 return -ENOMEM;
640#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
641 recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
642 if (!recent_net->ipt_recent) {
643 proc_net_remove(net, "xt_recent");
644 return -ENOMEM;
645 }
646#endif
647 return 0; 579 return 0;
648} 580}
649 581
650static void __net_exit recent_proc_net_exit(struct net *net) 582static void __net_exit recent_proc_net_exit(struct net *net)
651{ 583{
652#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
653 proc_net_remove(net, "ipt_recent");
654#endif
655 proc_net_remove(net, "xt_recent"); 584 proc_net_remove(net, "xt_recent");
656} 585}
657#else 586#else
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index a189ada9128f..ef36a56a02c6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -1,7 +1,9 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1#include <linux/module.h> 2#include <linux/module.h>
2#include <linux/skbuff.h> 3#include <linux/skbuff.h>
3#include <net/ip.h> 4#include <net/ip.h>
4#include <net/ipv6.h> 5#include <net/ipv6.h>
6#include <net/sctp/sctp.h>
5#include <linux/sctp.h> 7#include <linux/sctp.h>
6 8
7#include <linux/netfilter/x_tables.h> 9#include <linux/netfilter/x_tables.h>
@@ -15,12 +17,6 @@ MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
15MODULE_ALIAS("ipt_sctp"); 17MODULE_ALIAS("ipt_sctp");
16MODULE_ALIAS("ip6t_sctp"); 18MODULE_ALIAS("ip6t_sctp");
17 19
18#ifdef DEBUG_SCTP
19#define duprintf(format, args...) printk(format , ## args)
20#else
21#define duprintf(format, args...)
22#endif
23
24#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ 20#define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
25 || (!!((invflag) & (option)) ^ (cond))) 21 || (!!((invflag) & (option)) ^ (cond)))
26 22
@@ -52,7 +48,7 @@ match_packet(const struct sk_buff *skb,
52 const struct xt_sctp_flag_info *flag_info = info->flag_info; 48 const struct xt_sctp_flag_info *flag_info = info->flag_info;
53 int flag_count = info->flag_count; 49 int flag_count = info->flag_count;
54 50
55#ifdef DEBUG_SCTP 51#ifdef DEBUG
56 int i = 0; 52 int i = 0;
57#endif 53#endif
58 54
@@ -62,17 +58,19 @@ match_packet(const struct sk_buff *skb,
62 do { 58 do {
63 sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); 59 sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
64 if (sch == NULL || sch->length == 0) { 60 if (sch == NULL || sch->length == 0) {
65 duprintf("Dropping invalid SCTP packet.\n"); 61 pr_debug("Dropping invalid SCTP packet.\n");
66 *hotdrop = true; 62 *hotdrop = true;
67 return false; 63 return false;
68 } 64 }
65#ifdef DEBUG
66 pr_debug("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d"
67 "\tflags: %x\n",
68 ++i, offset, sch->type, htons(sch->length),
69 sch->flags);
70#endif
71 offset += WORD_ROUND(ntohs(sch->length));
69 72
70 duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n", 73 pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
71 ++i, offset, sch->type, htons(sch->length), sch->flags);
72
73 offset += (ntohs(sch->length) + 3) & ~3;
74
75 duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
76 74
77 if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) { 75 if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
78 switch (chunk_match_type) { 76 switch (chunk_match_type) {
@@ -117,24 +115,24 @@ match_packet(const struct sk_buff *skb,
117} 115}
118 116
119static bool 117static bool
120sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 118sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
121{ 119{
122 const struct xt_sctp_info *info = par->matchinfo; 120 const struct xt_sctp_info *info = par->matchinfo;
123 const sctp_sctphdr_t *sh; 121 const sctp_sctphdr_t *sh;
124 sctp_sctphdr_t _sh; 122 sctp_sctphdr_t _sh;
125 123
126 if (par->fragoff != 0) { 124 if (par->fragoff != 0) {
127 duprintf("Dropping non-first fragment.. FIXME\n"); 125 pr_debug("Dropping non-first fragment.. FIXME\n");
128 return false; 126 return false;
129 } 127 }
130 128
131 sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh); 129 sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
132 if (sh == NULL) { 130 if (sh == NULL) {
133 duprintf("Dropping evil TCP offset=0 tinygram.\n"); 131 pr_debug("Dropping evil TCP offset=0 tinygram.\n");
134 *par->hotdrop = true; 132 par->hotdrop = true;
135 return false; 133 return false;
136 } 134 }
137 duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); 135 pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
138 136
139 return SCCHECK(ntohs(sh->source) >= info->spts[0] 137 return SCCHECK(ntohs(sh->source) >= info->spts[0]
140 && ntohs(sh->source) <= info->spts[1], 138 && ntohs(sh->source) <= info->spts[1],
@@ -143,22 +141,26 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
143 && ntohs(sh->dest) <= info->dpts[1], 141 && ntohs(sh->dest) <= info->dpts[1],
144 XT_SCTP_DEST_PORTS, info->flags, info->invflags) 142 XT_SCTP_DEST_PORTS, info->flags, info->invflags)
145 && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t), 143 && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
146 info, par->hotdrop), 144 info, &par->hotdrop),
147 XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); 145 XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
148} 146}
149 147
150static bool sctp_mt_check(const struct xt_mtchk_param *par) 148static int sctp_mt_check(const struct xt_mtchk_param *par)
151{ 149{
152 const struct xt_sctp_info *info = par->matchinfo; 150 const struct xt_sctp_info *info = par->matchinfo;
153 151
154 return !(info->flags & ~XT_SCTP_VALID_FLAGS) 152 if (info->flags & ~XT_SCTP_VALID_FLAGS)
155 && !(info->invflags & ~XT_SCTP_VALID_FLAGS) 153 return -EINVAL;
156 && !(info->invflags & ~info->flags) 154 if (info->invflags & ~XT_SCTP_VALID_FLAGS)
157 && ((!(info->flags & XT_SCTP_CHUNK_TYPES)) || 155 return -EINVAL;
158 (info->chunk_match_type & 156 if (info->invflags & ~info->flags)
159 (SCTP_CHUNK_MATCH_ALL 157 return -EINVAL;
160 | SCTP_CHUNK_MATCH_ANY 158 if (!(info->flags & XT_SCTP_CHUNK_TYPES))
161 | SCTP_CHUNK_MATCH_ONLY))); 159 return 0;
160 if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
161 SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
162 return 0;
163 return -EINVAL;
162} 164}
163 165
164static struct xt_match sctp_mt_reg[] __read_mostly = { 166static struct xt_match sctp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 6a902564d24f..1ca89908cbad 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -9,7 +9,7 @@
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 * 10 *
11 */ 11 */
12 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
88 88
89 89
90static bool 90static bool
91socket_match(const struct sk_buff *skb, const struct xt_match_param *par, 91socket_match(const struct sk_buff *skb, struct xt_action_param *par,
92 const struct xt_socket_mtinfo1 *info) 92 const struct xt_socket_mtinfo1 *info)
93{ 93{
94 const struct iphdr *iph = ip_hdr(skb); 94 const struct iphdr *iph = ip_hdr(skb);
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
127 * reply packet of an established SNAT-ted connection. */ 127 * reply packet of an established SNAT-ted connection. */
128 128
129 ct = nf_ct_get(skb, &ctinfo); 129 ct = nf_ct_get(skb, &ctinfo);
130 if (ct && (ct != &nf_conntrack_untracked) && 130 if (ct && !nf_ct_is_untracked(ct) &&
131 ((iph->protocol != IPPROTO_ICMP && 131 ((iph->protocol != IPPROTO_ICMP &&
132 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || 132 ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
133 (iph->protocol == IPPROTO_ICMP && 133 (iph->protocol == IPPROTO_ICMP &&
@@ -165,8 +165,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
165 sk = NULL; 165 sk = NULL;
166 } 166 }
167 167
168 pr_debug("socket match: proto %u %08x:%u -> %08x:%u " 168 pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n",
169 "(orig %08x:%u) sock %p\n",
170 protocol, ntohl(saddr), ntohs(sport), 169 protocol, ntohl(saddr), ntohs(sport),
171 ntohl(daddr), ntohs(dport), 170 ntohl(daddr), ntohs(dport),
172 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); 171 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
@@ -175,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
175} 174}
176 175
177static bool 176static bool
178socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) 177socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
179{ 178{
180 return socket_match(skb, par, NULL); 179 return socket_match(skb, par, NULL);
181} 180}
182 181
183static bool 182static bool
184socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) 183socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
185{ 184{
186 return socket_match(skb, par, par->matchinfo); 185 return socket_match(skb, par, par->matchinfo);
187} 186}
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 4c946cbd731f..a507922d80cd 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,66 +21,58 @@ MODULE_ALIAS("ipt_state");
21MODULE_ALIAS("ip6t_state"); 21MODULE_ALIAS("ip6t_state");
22 22
23static bool 23static bool
24state_mt(const struct sk_buff *skb, const struct xt_match_param *par) 24state_mt(const struct sk_buff *skb, struct xt_action_param *par)
25{ 25{
26 const struct xt_state_info *sinfo = par->matchinfo; 26 const struct xt_state_info *sinfo = par->matchinfo;
27 enum ip_conntrack_info ctinfo; 27 enum ip_conntrack_info ctinfo;
28 unsigned int statebit; 28 unsigned int statebit;
29 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
29 30
30 if (nf_ct_is_untracked(skb)) 31 if (!ct)
31 statebit = XT_STATE_UNTRACKED;
32 else if (!nf_ct_get(skb, &ctinfo))
33 statebit = XT_STATE_INVALID; 32 statebit = XT_STATE_INVALID;
34 else 33 else {
35 statebit = XT_STATE_BIT(ctinfo); 34 if (nf_ct_is_untracked(ct))
36 35 statebit = XT_STATE_UNTRACKED;
36 else
37 statebit = XT_STATE_BIT(ctinfo);
38 }
37 return (sinfo->statemask & statebit); 39 return (sinfo->statemask & statebit);
38} 40}
39 41
40static bool state_mt_check(const struct xt_mtchk_param *par) 42static int state_mt_check(const struct xt_mtchk_param *par)
41{ 43{
42 if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { 44 int ret;
43 printk(KERN_WARNING "can't load conntrack support for " 45
44 "proto=%u\n", par->match->family); 46 ret = nf_ct_l3proto_try_module_get(par->family);
45 return false; 47 if (ret < 0)
46 } 48 pr_info("cannot load conntrack support for proto=%u\n",
47 return true; 49 par->family);
50 return ret;
48} 51}
49 52
50static void state_mt_destroy(const struct xt_mtdtor_param *par) 53static void state_mt_destroy(const struct xt_mtdtor_param *par)
51{ 54{
52 nf_ct_l3proto_module_put(par->match->family); 55 nf_ct_l3proto_module_put(par->family);
53} 56}
54 57
55static struct xt_match state_mt_reg[] __read_mostly = { 58static struct xt_match state_mt_reg __read_mostly = {
56 { 59 .name = "state",
57 .name = "state", 60 .family = NFPROTO_UNSPEC,
58 .family = NFPROTO_IPV4, 61 .checkentry = state_mt_check,
59 .checkentry = state_mt_check, 62 .match = state_mt,
60 .match = state_mt, 63 .destroy = state_mt_destroy,
61 .destroy = state_mt_destroy, 64 .matchsize = sizeof(struct xt_state_info),
62 .matchsize = sizeof(struct xt_state_info), 65 .me = THIS_MODULE,
63 .me = THIS_MODULE,
64 },
65 {
66 .name = "state",
67 .family = NFPROTO_IPV6,
68 .checkentry = state_mt_check,
69 .match = state_mt,
70 .destroy = state_mt_destroy,
71 .matchsize = sizeof(struct xt_state_info),
72 .me = THIS_MODULE,
73 },
74}; 66};
75 67
76static int __init state_mt_init(void) 68static int __init state_mt_init(void)
77{ 69{
78 return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg)); 70 return xt_register_match(&state_mt_reg);
79} 71}
80 72
81static void __exit state_mt_exit(void) 73static void __exit state_mt_exit(void)
82{ 74{
83 xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg)); 75 xt_unregister_match(&state_mt_reg);
84} 76}
85 77
86module_init(state_mt_init); 78module_init(state_mt_init);
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 937ce0633e99..42ecb71d445f 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,8 +18,8 @@
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19 19
20struct xt_statistic_priv { 20struct xt_statistic_priv {
21 uint32_t count; 21 atomic_t count;
22}; 22} ____cacheline_aligned_in_smp;
23 23
24MODULE_LICENSE("GPL"); 24MODULE_LICENSE("GPL");
25MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 25MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
27MODULE_ALIAS("ipt_statistic"); 27MODULE_ALIAS("ipt_statistic");
28MODULE_ALIAS("ip6t_statistic"); 28MODULE_ALIAS("ip6t_statistic");
29 29
30static DEFINE_SPINLOCK(nth_lock);
31
32static bool 30static bool
33statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) 31statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
34{ 32{
35 const struct xt_statistic_info *info = par->matchinfo; 33 const struct xt_statistic_info *info = par->matchinfo;
36 bool ret = info->flags & XT_STATISTIC_INVERT; 34 bool ret = info->flags & XT_STATISTIC_INVERT;
35 int nval, oval;
37 36
38 switch (info->mode) { 37 switch (info->mode) {
39 case XT_STATISTIC_MODE_RANDOM: 38 case XT_STATISTIC_MODE_RANDOM:
@@ -41,34 +40,32 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
41 ret = !ret; 40 ret = !ret;
42 break; 41 break;
43 case XT_STATISTIC_MODE_NTH: 42 case XT_STATISTIC_MODE_NTH:
44 spin_lock_bh(&nth_lock); 43 do {
45 if (info->master->count++ == info->u.nth.every) { 44 oval = atomic_read(&info->master->count);
46 info->master->count = 0; 45 nval = (oval == info->u.nth.every) ? 0 : oval + 1;
46 } while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
47 if (nval == 0)
47 ret = !ret; 48 ret = !ret;
48 }
49 spin_unlock_bh(&nth_lock);
50 break; 49 break;
51 } 50 }
52 51
53 return ret; 52 return ret;
54} 53}
55 54
56static bool statistic_mt_check(const struct xt_mtchk_param *par) 55static int statistic_mt_check(const struct xt_mtchk_param *par)
57{ 56{
58 struct xt_statistic_info *info = par->matchinfo; 57 struct xt_statistic_info *info = par->matchinfo;
59 58
60 if (info->mode > XT_STATISTIC_MODE_MAX || 59 if (info->mode > XT_STATISTIC_MODE_MAX ||
61 info->flags & ~XT_STATISTIC_MASK) 60 info->flags & ~XT_STATISTIC_MASK)
62 return false; 61 return -EINVAL;
63 62
64 info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); 63 info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
65 if (info->master == NULL) { 64 if (info->master == NULL)
66 printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n"); 65 return -ENOMEM;
67 return false; 66 atomic_set(&info->master->count, info->u.nth.count);
68 }
69 info->master->count = info->u.nth.count;
70 67
71 return true; 68 return 0;
72} 69}
73 70
74static void statistic_mt_destroy(const struct xt_mtdtor_param *par) 71static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 96801ffd8af8..d3c48b14ab94 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,16 +23,14 @@ MODULE_ALIAS("ipt_string");
23MODULE_ALIAS("ip6t_string"); 23MODULE_ALIAS("ip6t_string");
24 24
25static bool 25static bool
26string_mt(const struct sk_buff *skb, const struct xt_match_param *par) 26string_mt(const struct sk_buff *skb, struct xt_action_param *par)
27{ 27{
28 const struct xt_string_info *conf = par->matchinfo; 28 const struct xt_string_info *conf = par->matchinfo;
29 struct ts_state state; 29 struct ts_state state;
30 int invert; 30 bool invert;
31 31
32 memset(&state, 0, sizeof(struct ts_state)); 32 memset(&state, 0, sizeof(struct ts_state));
33 33 invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
34 invert = (par->match->revision == 0 ? conf->u.v0.invert :
35 conf->u.v1.flags & XT_STRING_FLAG_INVERT);
36 34
37 return (skb_find_text((struct sk_buff *)skb, conf->from_offset, 35 return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
38 conf->to_offset, conf->config, &state) 36 conf->to_offset, conf->config, &state)
@@ -41,7 +39,7 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
41 39
42#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) 40#define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
43 41
44static bool string_mt_check(const struct xt_mtchk_param *par) 42static int string_mt_check(const struct xt_mtchk_param *par)
45{ 43{
46 struct xt_string_info *conf = par->matchinfo; 44 struct xt_string_info *conf = par->matchinfo;
47 struct ts_config *ts_conf; 45 struct ts_config *ts_conf;
@@ -49,26 +47,23 @@ static bool string_mt_check(const struct xt_mtchk_param *par)
49 47
50 /* Damn, can't handle this case properly with iptables... */ 48 /* Damn, can't handle this case properly with iptables... */
51 if (conf->from_offset > conf->to_offset) 49 if (conf->from_offset > conf->to_offset)
52 return false; 50 return -EINVAL;
53 if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') 51 if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
54 return false; 52 return -EINVAL;
55 if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) 53 if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
56 return false; 54 return -EINVAL;
57 if (par->match->revision == 1) { 55 if (conf->u.v1.flags &
58 if (conf->u.v1.flags & 56 ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
59 ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT)) 57 return -EINVAL;
60 return false; 58 if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
61 if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE) 59 flags |= TS_IGNORECASE;
62 flags |= TS_IGNORECASE;
63 }
64 ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, 60 ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
65 GFP_KERNEL, flags); 61 GFP_KERNEL, flags);
66 if (IS_ERR(ts_conf)) 62 if (IS_ERR(ts_conf))
67 return false; 63 return PTR_ERR(ts_conf);
68 64
69 conf->config = ts_conf; 65 conf->config = ts_conf;
70 66 return 0;
71 return true;
72} 67}
73 68
74static void string_mt_destroy(const struct xt_mtdtor_param *par) 69static void string_mt_destroy(const struct xt_mtdtor_param *par)
@@ -76,38 +71,25 @@ static void string_mt_destroy(const struct xt_mtdtor_param *par)
76 textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config); 71 textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
77} 72}
78 73
79static struct xt_match xt_string_mt_reg[] __read_mostly = { 74static struct xt_match xt_string_mt_reg __read_mostly = {
80 { 75 .name = "string",
81 .name = "string", 76 .revision = 1,
82 .revision = 0, 77 .family = NFPROTO_UNSPEC,
83 .family = NFPROTO_UNSPEC, 78 .checkentry = string_mt_check,
84 .checkentry = string_mt_check, 79 .match = string_mt,
85 .match = string_mt, 80 .destroy = string_mt_destroy,
86 .destroy = string_mt_destroy, 81 .matchsize = sizeof(struct xt_string_info),
87 .matchsize = sizeof(struct xt_string_info), 82 .me = THIS_MODULE,
88 .me = THIS_MODULE
89 },
90 {
91 .name = "string",
92 .revision = 1,
93 .family = NFPROTO_UNSPEC,
94 .checkentry = string_mt_check,
95 .match = string_mt,
96 .destroy = string_mt_destroy,
97 .matchsize = sizeof(struct xt_string_info),
98 .me = THIS_MODULE
99 },
100}; 83};
101 84
102static int __init string_mt_init(void) 85static int __init string_mt_init(void)
103{ 86{
104 return xt_register_matches(xt_string_mt_reg, 87 return xt_register_match(&xt_string_mt_reg);
105 ARRAY_SIZE(xt_string_mt_reg));
106} 88}
107 89
108static void __exit string_mt_exit(void) 90static void __exit string_mt_exit(void)
109{ 91{
110 xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg)); 92 xt_unregister_match(&xt_string_mt_reg);
111} 93}
112 94
113module_init(string_mt_init); 95module_init(string_mt_init);
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4809b34b10f8..c53d4d18eadf 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
25MODULE_ALIAS("ip6t_tcpmss"); 25MODULE_ALIAS("ip6t_tcpmss");
26 26
27static bool 27static bool
28tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par) 28tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
29{ 29{
30 const struct xt_tcpmss_match_info *info = par->matchinfo; 30 const struct xt_tcpmss_match_info *info = par->matchinfo;
31 const struct tcphdr *th; 31 const struct tcphdr *th;
@@ -73,7 +73,7 @@ out:
73 return info->invert; 73 return info->invert;
74 74
75dropit: 75dropit:
76 *par->hotdrop = true; 76 par->hotdrop = true;
77 return false; 77 return false;
78} 78}
79 79
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 1ebdc4934eed..c14d4645daa3 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -1,3 +1,4 @@
1#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
1#include <linux/types.h> 2#include <linux/types.h>
2#include <linux/module.h> 3#include <linux/module.h>
3#include <net/ip.h> 4#include <net/ip.h>
@@ -19,13 +20,6 @@ MODULE_ALIAS("ipt_tcp");
19MODULE_ALIAS("ip6t_udp"); 20MODULE_ALIAS("ip6t_udp");
20MODULE_ALIAS("ip6t_tcp"); 21MODULE_ALIAS("ip6t_tcp");
21 22
22#ifdef DEBUG_IP_FIREWALL_USER
23#define duprintf(format, args...) printk(format , ## args)
24#else
25#define duprintf(format, args...)
26#endif
27
28
29/* Returns 1 if the port is matched by the range, 0 otherwise */ 23/* Returns 1 if the port is matched by the range, 0 otherwise */
30static inline bool 24static inline bool
31port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert) 25port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
@@ -46,7 +40,7 @@ tcp_find_option(u_int8_t option,
46 u_int8_t _opt[60 - sizeof(struct tcphdr)]; 40 u_int8_t _opt[60 - sizeof(struct tcphdr)];
47 unsigned int i; 41 unsigned int i;
48 42
49 duprintf("tcp_match: finding option\n"); 43 pr_debug("finding option\n");
50 44
51 if (!optlen) 45 if (!optlen)
52 return invert; 46 return invert;
@@ -68,7 +62,7 @@ tcp_find_option(u_int8_t option,
68 return invert; 62 return invert;
69} 63}
70 64
71static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 65static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
72{ 66{
73 const struct tcphdr *th; 67 const struct tcphdr *th;
74 struct tcphdr _tcph; 68 struct tcphdr _tcph;
@@ -82,8 +76,8 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
82 flag overwrite to pass the direction checks. 76 flag overwrite to pass the direction checks.
83 */ 77 */
84 if (par->fragoff == 1) { 78 if (par->fragoff == 1) {
85 duprintf("Dropping evil TCP offset=1 frag.\n"); 79 pr_debug("Dropping evil TCP offset=1 frag.\n");
86 *par->hotdrop = true; 80 par->hotdrop = true;
87 } 81 }
88 /* Must not be a fragment. */ 82 /* Must not be a fragment. */
89 return false; 83 return false;
@@ -95,8 +89,8 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
95 if (th == NULL) { 89 if (th == NULL) {
96 /* We've been asked to examine this packet, and we 90 /* We've been asked to examine this packet, and we
97 can't. Hence, no choice but to drop. */ 91 can't. Hence, no choice but to drop. */
98 duprintf("Dropping evil TCP offset=0 tinygram.\n"); 92 pr_debug("Dropping evil TCP offset=0 tinygram.\n");
99 *par->hotdrop = true; 93 par->hotdrop = true;
100 return false; 94 return false;
101 } 95 }
102 96
@@ -114,27 +108,27 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
114 return false; 108 return false;
115 if (tcpinfo->option) { 109 if (tcpinfo->option) {
116 if (th->doff * 4 < sizeof(_tcph)) { 110 if (th->doff * 4 < sizeof(_tcph)) {
117 *par->hotdrop = true; 111 par->hotdrop = true;
118 return false; 112 return false;
119 } 113 }
120 if (!tcp_find_option(tcpinfo->option, skb, par->thoff, 114 if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
121 th->doff*4 - sizeof(_tcph), 115 th->doff*4 - sizeof(_tcph),
122 tcpinfo->invflags & XT_TCP_INV_OPTION, 116 tcpinfo->invflags & XT_TCP_INV_OPTION,
123 par->hotdrop)) 117 &par->hotdrop))
124 return false; 118 return false;
125 } 119 }
126 return true; 120 return true;
127} 121}
128 122
129static bool tcp_mt_check(const struct xt_mtchk_param *par) 123static int tcp_mt_check(const struct xt_mtchk_param *par)
130{ 124{
131 const struct xt_tcp *tcpinfo = par->matchinfo; 125 const struct xt_tcp *tcpinfo = par->matchinfo;
132 126
133 /* Must specify no unknown invflags */ 127 /* Must specify no unknown invflags */
134 return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); 128 return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
135} 129}
136 130
137static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) 131static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
138{ 132{
139 const struct udphdr *uh; 133 const struct udphdr *uh;
140 struct udphdr _udph; 134 struct udphdr _udph;
@@ -148,8 +142,8 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
148 if (uh == NULL) { 142 if (uh == NULL) {
149 /* We've been asked to examine this packet, and we 143 /* We've been asked to examine this packet, and we
150 can't. Hence, no choice but to drop. */ 144 can't. Hence, no choice but to drop. */
151 duprintf("Dropping evil UDP tinygram.\n"); 145 pr_debug("Dropping evil UDP tinygram.\n");
152 *par->hotdrop = true; 146 par->hotdrop = true;
153 return false; 147 return false;
154 } 148 }
155 149
@@ -161,12 +155,12 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
161 !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); 155 !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
162} 156}
163 157
164static bool udp_mt_check(const struct xt_mtchk_param *par) 158static int udp_mt_check(const struct xt_mtchk_param *par)
165{ 159{
166 const struct xt_udp *udpinfo = par->matchinfo; 160 const struct xt_udp *udpinfo = par->matchinfo;
167 161
168 /* Must specify no unknown invflags */ 162 /* Must specify no unknown invflags */
169 return !(udpinfo->invflags & ~XT_UDP_INV_MASK); 163 return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0;
170} 164}
171 165
172static struct xt_match tcpudp_mt_reg[] __read_mostly = { 166static struct xt_match tcpudp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 93acaa59d108..c48975ff8ea2 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -1,7 +1,6 @@
1/* 1/*
2 * xt_time 2 * xt_time
3 * Copyright © CC Computer Consultants GmbH, 2007 3 * Copyright © CC Computer Consultants GmbH, 2007
4 * Contact: <jengelh@computergmbh.de>
5 * 4 *
6 * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org> 5 * based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
7 * This is a module which is used for time matching 6 * This is a module which is used for time matching
@@ -149,11 +148,10 @@ static void localtime_3(struct xtm *r, time_t time)
149 } 148 }
150 149
151 r->month = i + 1; 150 r->month = i + 1;
152 return;
153} 151}
154 152
155static bool 153static bool
156time_mt(const struct sk_buff *skb, const struct xt_match_param *par) 154time_mt(const struct sk_buff *skb, struct xt_action_param *par)
157{ 155{
158 const struct xt_time_info *info = par->matchinfo; 156 const struct xt_time_info *info = par->matchinfo;
159 unsigned int packet_time; 157 unsigned int packet_time;
@@ -218,18 +216,18 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
218 return true; 216 return true;
219} 217}
220 218
221static bool time_mt_check(const struct xt_mtchk_param *par) 219static int time_mt_check(const struct xt_mtchk_param *par)
222{ 220{
223 const struct xt_time_info *info = par->matchinfo; 221 const struct xt_time_info *info = par->matchinfo;
224 222
225 if (info->daytime_start > XT_TIME_MAX_DAYTIME || 223 if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
226 info->daytime_stop > XT_TIME_MAX_DAYTIME) { 224 info->daytime_stop > XT_TIME_MAX_DAYTIME) {
227 printk(KERN_WARNING "xt_time: invalid argument - start or " 225 pr_info("invalid argument - start or "
228 "stop time greater than 23:59:59\n"); 226 "stop time greater than 23:59:59\n");
229 return false; 227 return -EDOM;
230 } 228 }
231 229
232 return true; 230 return 0;
233} 231}
234 232
235static struct xt_match xt_time_mt_reg __read_mostly = { 233static struct xt_match xt_time_mt_reg __read_mostly = {
@@ -264,7 +262,7 @@ static void __exit time_mt_exit(void)
264 262
265module_init(time_mt_init); 263module_init(time_mt_init);
266module_exit(time_mt_exit); 264module_exit(time_mt_exit);
267MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 265MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
268MODULE_DESCRIPTION("Xtables: time-based matching"); 266MODULE_DESCRIPTION("Xtables: time-based matching");
269MODULE_LICENSE("GPL"); 267MODULE_LICENSE("GPL");
270MODULE_ALIAS("ipt_time"); 268MODULE_ALIAS("ipt_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 24a527624500..a95b50342dbb 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * Original author: Don Cohen <don@isis.cs3-inc.com> 4 * Original author: Don Cohen <don@isis.cs3-inc.com>
5 * (C) CC Computer Consultants GmbH, 2007 5 * (C) CC Computer Consultants GmbH, 2007
6 * Contact: <jengelh@computergmbh.de>
7 */ 6 */
8 7
9#include <linux/module.h> 8#include <linux/module.h>
@@ -87,7 +86,7 @@ static bool u32_match_it(const struct xt_u32 *data,
87 return true; 86 return true;
88} 87}
89 88
90static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par) 89static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
91{ 90{
92 const struct xt_u32 *data = par->matchinfo; 91 const struct xt_u32 *data = par->matchinfo;
93 bool ret; 92 bool ret;
@@ -117,7 +116,7 @@ static void __exit u32_mt_exit(void)
117 116
118module_init(u32_mt_init); 117module_init(u32_mt_init);
119module_exit(u32_mt_exit); 118module_exit(u32_mt_exit);
120MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); 119MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
121MODULE_DESCRIPTION("Xtables: arbitrary byte matching"); 120MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
122MODULE_LICENSE("GPL"); 121MODULE_LICENSE("GPL");
123MODULE_ALIAS("ipt_u32"); 122MODULE_ALIAS("ipt_u32");
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 07ae7fd82be1..1c1c093cf279 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -130,7 +130,6 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
130 int src, const char *dev, 130 int src, const char *dev,
131 __be32 addr, __be32 mask) 131 __be32 addr, __be32 mask)
132{ 132{
133 return;
134} 133}
135#endif 134#endif
136 135
@@ -203,7 +202,6 @@ static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
203 const struct in6_addr *addr, 202 const struct in6_addr *addr,
204 const struct in6_addr *mask) 203 const struct in6_addr *mask)
205{ 204{
206 return;
207} 205}
208#endif 206#endif
209#endif /* IPV6 */ 207#endif /* IPV6 */
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index a3d64aabe2f7..e2b0a680dd56 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -670,7 +670,6 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
670 670
671unlhsh_condremove_failure: 671unlhsh_condremove_failure:
672 spin_unlock(&netlbl_unlhsh_lock); 672 spin_unlock(&netlbl_unlhsh_lock);
673 return;
674} 673}
675 674
676/** 675/**
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 795424396aff..2cbf380377d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -545,7 +545,7 @@ static int netlink_autobind(struct socket *sock)
545 struct hlist_head *head; 545 struct hlist_head *head;
546 struct sock *osk; 546 struct sock *osk;
547 struct hlist_node *node; 547 struct hlist_node *node;
548 s32 pid = current->tgid; 548 s32 pid = task_tgid_vnr(current);
549 int err; 549 int err;
550 static s32 rover = -4097; 550 static s32 rover = -4097;
551 551
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
978 int delivered; 978 int delivered;
979 gfp_t allocation; 979 gfp_t allocation;
980 struct sk_buff *skb, *skb2; 980 struct sk_buff *skb, *skb2;
981 int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
982 void *tx_data;
981}; 983};
982 984
983static inline int do_one_broadcast(struct sock *sk, 985static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
1020 p->failure = 1; 1022 p->failure = 1;
1021 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) 1023 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1022 p->delivery_failure = 1; 1024 p->delivery_failure = 1;
1025 } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1026 kfree_skb(p->skb2);
1027 p->skb2 = NULL;
1023 } else if (sk_filter(sk, p->skb2)) { 1028 } else if (sk_filter(sk, p->skb2)) {
1024 kfree_skb(p->skb2); 1029 kfree_skb(p->skb2);
1025 p->skb2 = NULL; 1030 p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
1038 return 0; 1043 return 0;
1039} 1044}
1040 1045
1041int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, 1046int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1042 u32 group, gfp_t allocation) 1047 u32 group, gfp_t allocation,
1048 int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1049 void *filter_data)
1043{ 1050{
1044 struct net *net = sock_net(ssk); 1051 struct net *net = sock_net(ssk);
1045 struct netlink_broadcast_data info; 1052 struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1059 info.allocation = allocation; 1066 info.allocation = allocation;
1060 info.skb = skb; 1067 info.skb = skb;
1061 info.skb2 = NULL; 1068 info.skb2 = NULL;
1069 info.tx_filter = filter;
1070 info.tx_data = filter_data;
1062 1071
1063 /* While we sleep in clone, do not allow to change socket list */ 1072 /* While we sleep in clone, do not allow to change socket list */
1064 1073
@@ -1067,14 +1076,15 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1067 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) 1076 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1068 do_one_broadcast(sk, &info); 1077 do_one_broadcast(sk, &info);
1069 1078
1070 kfree_skb(skb); 1079 consume_skb(skb);
1071 1080
1072 netlink_unlock_table(); 1081 netlink_unlock_table();
1073 1082
1074 kfree_skb(info.skb2); 1083 if (info.delivery_failure) {
1075 1084 kfree_skb(info.skb2);
1076 if (info.delivery_failure)
1077 return -ENOBUFS; 1085 return -ENOBUFS;
1086 } else
1087 consume_skb(info.skb2);
1078 1088
1079 if (info.delivered) { 1089 if (info.delivered) {
1080 if (info.congested && (allocation & __GFP_WAIT)) 1090 if (info.congested && (allocation & __GFP_WAIT))
@@ -1083,6 +1093,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1083 } 1093 }
1084 return -ESRCH; 1094 return -ESRCH;
1085} 1095}
1096EXPORT_SYMBOL(netlink_broadcast_filtered);
1097
1098int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1099 u32 group, gfp_t allocation)
1100{
1101 return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1102 NULL, NULL);
1103}
1086EXPORT_SYMBOL(netlink_broadcast); 1104EXPORT_SYMBOL(netlink_broadcast);
1087 1105
1088struct netlink_set_err_data { 1106struct netlink_set_err_data {
@@ -1306,19 +1324,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1306 if (msg->msg_flags&MSG_OOB) 1324 if (msg->msg_flags&MSG_OOB)
1307 return -EOPNOTSUPP; 1325 return -EOPNOTSUPP;
1308 1326
1309 if (NULL == siocb->scm) 1327 if (NULL == siocb->scm) {
1310 siocb->scm = &scm; 1328 siocb->scm = &scm;
1329 memset(&scm, 0, sizeof(scm));
1330 }
1311 err = scm_send(sock, msg, siocb->scm); 1331 err = scm_send(sock, msg, siocb->scm);
1312 if (err < 0) 1332 if (err < 0)
1313 return err; 1333 return err;
1314 1334
1315 if (msg->msg_namelen) { 1335 if (msg->msg_namelen) {
1336 err = -EINVAL;
1316 if (addr->nl_family != AF_NETLINK) 1337 if (addr->nl_family != AF_NETLINK)
1317 return -EINVAL; 1338 goto out;
1318 dst_pid = addr->nl_pid; 1339 dst_pid = addr->nl_pid;
1319 dst_group = ffs(addr->nl_groups); 1340 dst_group = ffs(addr->nl_groups);
1341 err = -EPERM;
1320 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) 1342 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
1321 return -EPERM; 1343 goto out;
1322 } else { 1344 } else {
1323 dst_pid = nlk->dst_pid; 1345 dst_pid = nlk->dst_pid;
1324 dst_group = nlk->dst_group; 1346 dst_group = nlk->dst_group;
@@ -1370,6 +1392,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1370 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); 1392 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1371 1393
1372out: 1394out:
1395 scm_destroy(siocb->scm);
1373 return err; 1396 return err;
1374} 1397}
1375 1398
@@ -1383,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1383 struct netlink_sock *nlk = nlk_sk(sk); 1406 struct netlink_sock *nlk = nlk_sk(sk);
1384 int noblock = flags&MSG_DONTWAIT; 1407 int noblock = flags&MSG_DONTWAIT;
1385 size_t copied; 1408 size_t copied;
1386 struct sk_buff *skb, *frag __maybe_unused = NULL; 1409 struct sk_buff *skb;
1387 int err; 1410 int err;
1388 1411
1389 if (flags&MSG_OOB) 1412 if (flags&MSG_OOB)
@@ -1418,7 +1441,21 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1418 kfree_skb(skb); 1441 kfree_skb(skb);
1419 skb = compskb; 1442 skb = compskb;
1420 } else { 1443 } else {
1421 frag = skb_shinfo(skb)->frag_list; 1444 /*
1445 * Before setting frag_list to NULL, we must get a
1446 * private copy of skb if shared (because of MSG_PEEK)
1447 */
1448 if (skb_shared(skb)) {
1449 struct sk_buff *nskb;
1450
1451 nskb = pskb_copy(skb, GFP_KERNEL);
1452 kfree_skb(skb);
1453 skb = nskb;
1454 err = -ENOMEM;
1455 if (!skb)
1456 goto out;
1457 }
1458 kfree_skb(skb_shinfo(skb)->frag_list);
1422 skb_shinfo(skb)->frag_list = NULL; 1459 skb_shinfo(skb)->frag_list = NULL;
1423 } 1460 }
1424 } 1461 }
@@ -1455,10 +1492,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1455 if (flags & MSG_TRUNC) 1492 if (flags & MSG_TRUNC)
1456 copied = skb->len; 1493 copied = skb->len;
1457 1494
1458#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1459 skb_shinfo(skb)->frag_list = frag;
1460#endif
1461
1462 skb_free_datagram(sk, skb); 1495 skb_free_datagram(sk, skb);
1463 1496
1464 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) 1497 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 06438fa2b1e5..26ed3e8587c2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -21,15 +21,17 @@
21 21
22static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ 22static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
23 23
24static inline void genl_lock(void) 24void genl_lock(void)
25{ 25{
26 mutex_lock(&genl_mutex); 26 mutex_lock(&genl_mutex);
27} 27}
28EXPORT_SYMBOL(genl_lock);
28 29
29static inline void genl_unlock(void) 30void genl_unlock(void)
30{ 31{
31 mutex_unlock(&genl_mutex); 32 mutex_unlock(&genl_mutex);
32} 33}
34EXPORT_SYMBOL(genl_unlock);
33 35
34#define GENL_FAM_TAB_SIZE 16 36#define GENL_FAM_TAB_SIZE 16
35#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1) 37#define GENL_FAM_TAB_MASK (GENL_FAM_TAB_SIZE - 1)
@@ -301,6 +303,7 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)
301errout: 303errout:
302 return err; 304 return err;
303} 305}
306EXPORT_SYMBOL(genl_register_ops);
304 307
305/** 308/**
306 * genl_unregister_ops - unregister generic netlink operations 309 * genl_unregister_ops - unregister generic netlink operations
@@ -335,6 +338,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
335 338
336 return -ENOENT; 339 return -ENOENT;
337} 340}
341EXPORT_SYMBOL(genl_unregister_ops);
338 342
339/** 343/**
340 * genl_register_family - register a generic netlink family 344 * genl_register_family - register a generic netlink family
@@ -403,6 +407,7 @@ errout_locked:
403errout: 407errout:
404 return err; 408 return err;
405} 409}
410EXPORT_SYMBOL(genl_register_family);
406 411
407/** 412/**
408 * genl_register_family_with_ops - register a generic netlink family 413 * genl_register_family_with_ops - register a generic netlink family
@@ -483,6 +488,7 @@ int genl_unregister_family(struct genl_family *family)
483 488
484 return -ENOENT; 489 return -ENOENT;
485} 490}
491EXPORT_SYMBOL(genl_unregister_family);
486 492
487static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) 493static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
488{ 494{
@@ -871,11 +877,7 @@ static int __init genl_init(void)
871 for (i = 0; i < GENL_FAM_TAB_SIZE; i++) 877 for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
872 INIT_LIST_HEAD(&family_ht[i]); 878 INIT_LIST_HEAD(&family_ht[i]);
873 879
874 err = genl_register_family(&genl_ctrl); 880 err = genl_register_family_with_ops(&genl_ctrl, &genl_ctrl_ops, 1);
875 if (err < 0)
876 goto problem;
877
878 err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
879 if (err < 0) 881 if (err < 0)
880 goto problem; 882 goto problem;
881 883
@@ -897,11 +899,6 @@ problem:
897 899
898subsys_initcall(genl_init); 900subsys_initcall(genl_init);
899 901
900EXPORT_SYMBOL(genl_register_ops);
901EXPORT_SYMBOL(genl_unregister_ops);
902EXPORT_SYMBOL(genl_register_family);
903EXPORT_SYMBOL(genl_unregister_family);
904
905static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, 902static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
906 gfp_t flags) 903 gfp_t flags)
907{ 904{
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa07f044b599..06cb02796a0e 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
739 DEFINE_WAIT(wait); 739 DEFINE_WAIT(wait);
740 740
741 for (;;) { 741 for (;;) {
742 prepare_to_wait(sk->sk_sleep, &wait, 742 prepare_to_wait(sk_sleep(sk), &wait,
743 TASK_INTERRUPTIBLE); 743 TASK_INTERRUPTIBLE);
744 if (sk->sk_state != TCP_SYN_SENT) 744 if (sk->sk_state != TCP_SYN_SENT)
745 break; 745 break;
@@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
752 err = -ERESTARTSYS; 752 err = -ERESTARTSYS;
753 break; 753 break;
754 } 754 }
755 finish_wait(sk->sk_sleep, &wait); 755 finish_wait(sk_sleep(sk), &wait);
756 if (err) 756 if (err)
757 goto out_release; 757 goto out_release;
758 } 758 }
@@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
798 * hooked into the SABM we saved 798 * hooked into the SABM we saved
799 */ 799 */
800 for (;;) { 800 for (;;) {
801 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 801 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
802 skb = skb_dequeue(&sk->sk_receive_queue); 802 skb = skb_dequeue(&sk->sk_receive_queue);
803 if (skb) 803 if (skb)
804 break; 804 break;
@@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
816 err = -ERESTARTSYS; 816 err = -ERESTARTSYS;
817 break; 817 break;
818 } 818 }
819 finish_wait(sk->sk_sleep, &wait); 819 finish_wait(sk_sleep(sk), &wait);
820 if (err) 820 if (err)
821 goto out_release; 821 goto out_release;
822 822
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 243946d4809d..9a17f28b1253 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -82,6 +82,8 @@
82#include <linux/mutex.h> 82#include <linux/mutex.h>
83#include <linux/if_vlan.h> 83#include <linux/if_vlan.h>
84#include <linux/virtio_net.h> 84#include <linux/virtio_net.h>
85#include <linux/errqueue.h>
86#include <linux/net_tstamp.h>
85 87
86#ifdef CONFIG_INET 88#ifdef CONFIG_INET
87#include <net/inet_common.h> 89#include <net/inet_common.h>
@@ -201,6 +203,7 @@ struct packet_sock {
201 unsigned int tp_hdrlen; 203 unsigned int tp_hdrlen;
202 unsigned int tp_reserve; 204 unsigned int tp_reserve;
203 unsigned int tp_loss:1; 205 unsigned int tp_loss:1;
206 unsigned int tp_tstamp;
204 struct packet_type prot_hook ____cacheline_aligned_in_smp; 207 struct packet_type prot_hook ____cacheline_aligned_in_smp;
205}; 208};
206 209
@@ -315,6 +318,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
315 318
316static void packet_sock_destruct(struct sock *sk) 319static void packet_sock_destruct(struct sock *sk)
317{ 320{
321 skb_queue_purge(&sk->sk_error_queue);
322
318 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 323 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
319 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 324 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
320 325
@@ -483,6 +488,9 @@ retry:
483 skb->dev = dev; 488 skb->dev = dev;
484 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
485 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
492 if (err < 0)
493 goto out_unlock;
486 494
487 dev_queue_xmit(skb); 495 dev_queue_xmit(skb);
488 rcu_read_unlock(); 496 rcu_read_unlock();
@@ -650,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
650 struct sk_buff *copy_skb = NULL; 658 struct sk_buff *copy_skb = NULL;
651 struct timeval tv; 659 struct timeval tv;
652 struct timespec ts; 660 struct timespec ts;
661 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
653 662
654 if (skb->pkt_type == PACKET_LOOPBACK) 663 if (skb->pkt_type == PACKET_LOOPBACK)
655 goto drop; 664 goto drop;
@@ -731,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
731 h.h1->tp_snaplen = snaplen; 740 h.h1->tp_snaplen = snaplen;
732 h.h1->tp_mac = macoff; 741 h.h1->tp_mac = macoff;
733 h.h1->tp_net = netoff; 742 h.h1->tp_net = netoff;
734 if (skb->tstamp.tv64) 743 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
744 && shhwtstamps->syststamp.tv64)
745 tv = ktime_to_timeval(shhwtstamps->syststamp);
746 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
747 && shhwtstamps->hwtstamp.tv64)
748 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
749 else if (skb->tstamp.tv64)
735 tv = ktime_to_timeval(skb->tstamp); 750 tv = ktime_to_timeval(skb->tstamp);
736 else 751 else
737 do_gettimeofday(&tv); 752 do_gettimeofday(&tv);
@@ -744,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
744 h.h2->tp_snaplen = snaplen; 759 h.h2->tp_snaplen = snaplen;
745 h.h2->tp_mac = macoff; 760 h.h2->tp_mac = macoff;
746 h.h2->tp_net = netoff; 761 h.h2->tp_net = netoff;
747 if (skb->tstamp.tv64) 762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
763 && shhwtstamps->syststamp.tv64)
764 ts = ktime_to_timespec(shhwtstamps->syststamp);
765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
766 && shhwtstamps->hwtstamp.tv64)
767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
768 else if (skb->tstamp.tv64)
748 ts = ktime_to_timespec(skb->tstamp); 769 ts = ktime_to_timespec(skb->tstamp);
749 else 770 else
750 getnstimeofday(&ts); 771 getnstimeofday(&ts);
@@ -1188,6 +1209,9 @@ static int packet_snd(struct socket *sock,
1188 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1189 if (err) 1210 if (err)
1190 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
1213 if (err < 0)
1214 goto out_free;
1191 1215
1192 skb->protocol = proto; 1216 skb->protocol = proto;
1193 skb->dev = dev; 1217 skb->dev = dev;
@@ -1487,6 +1511,51 @@ out:
1487 return err; 1511 return err;
1488} 1512}
1489 1513
1514static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1515{
1516 struct sock_exterr_skb *serr;
1517 struct sk_buff *skb, *skb2;
1518 int copied, err;
1519
1520 err = -EAGAIN;
1521 skb = skb_dequeue(&sk->sk_error_queue);
1522 if (skb == NULL)
1523 goto out;
1524
1525 copied = skb->len;
1526 if (copied > len) {
1527 msg->msg_flags |= MSG_TRUNC;
1528 copied = len;
1529 }
1530 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1531 if (err)
1532 goto out_free_skb;
1533
1534 sock_recv_timestamp(msg, sk, skb);
1535
1536 serr = SKB_EXT_ERR(skb);
1537 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1538 sizeof(serr->ee), &serr->ee);
1539
1540 msg->msg_flags |= MSG_ERRQUEUE;
1541 err = copied;
1542
1543 /* Reset and regenerate socket error */
1544 spin_lock_bh(&sk->sk_error_queue.lock);
1545 sk->sk_err = 0;
1546 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1547 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1548 spin_unlock_bh(&sk->sk_error_queue.lock);
1549 sk->sk_error_report(sk);
1550 } else
1551 spin_unlock_bh(&sk->sk_error_queue.lock);
1552
1553out_free_skb:
1554 kfree_skb(skb);
1555out:
1556 return err;
1557}
1558
1490/* 1559/*
1491 * Pull a packet from our receive queue and hand it to the user. 1560 * Pull a packet from our receive queue and hand it to the user.
1492 * If necessary we block. 1561 * If necessary we block.
@@ -1502,7 +1571,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1502 int vnet_hdr_len = 0; 1571 int vnet_hdr_len = 0;
1503 1572
1504 err = -EINVAL; 1573 err = -EINVAL;
1505 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) 1574 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1506 goto out; 1575 goto out;
1507 1576
1508#if 0 1577#if 0
@@ -1511,6 +1580,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1511 return -ENODEV; 1580 return -ENODEV;
1512#endif 1581#endif
1513 1582
1583 if (flags & MSG_ERRQUEUE) {
1584 err = packet_recv_error(sk, msg, len);
1585 goto out;
1586 }
1587
1514 /* 1588 /*
1515 * Call the generic datagram receiver. This handles all sorts 1589 * Call the generic datagram receiver. This handles all sorts
1516 * of horrible races and re-entrancy so we can forget about it 1590 * of horrible races and re-entrancy so we can forget about it
@@ -1692,9 +1766,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1692 if (i->alen != dev->addr_len) 1766 if (i->alen != dev->addr_len)
1693 return -EINVAL; 1767 return -EINVAL;
1694 if (what > 0) 1768 if (what > 0)
1695 return dev_mc_add(dev, i->addr, i->alen, 0); 1769 return dev_mc_add(dev, i->addr);
1696 else 1770 else
1697 return dev_mc_delete(dev, i->addr, i->alen, 0); 1771 return dev_mc_del(dev, i->addr);
1698 break; 1772 break;
1699 case PACKET_MR_PROMISC: 1773 case PACKET_MR_PROMISC:
1700 return dev_set_promiscuity(dev, what); 1774 return dev_set_promiscuity(dev, what);
@@ -1706,9 +1780,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1706 if (i->alen != dev->addr_len) 1780 if (i->alen != dev->addr_len)
1707 return -EINVAL; 1781 return -EINVAL;
1708 if (what > 0) 1782 if (what > 0)
1709 return dev_unicast_add(dev, i->addr); 1783 return dev_uc_add(dev, i->addr);
1710 else 1784 else
1711 return dev_unicast_delete(dev, i->addr); 1785 return dev_uc_del(dev, i->addr);
1712 break; 1786 break;
1713 default: 1787 default:
1714 break; 1788 break;
@@ -1968,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1968 po->has_vnet_hdr = !!val; 2042 po->has_vnet_hdr = !!val;
1969 return 0; 2043 return 0;
1970 } 2044 }
2045 case PACKET_TIMESTAMP:
2046 {
2047 int val;
2048
2049 if (optlen != sizeof(val))
2050 return -EINVAL;
2051 if (copy_from_user(&val, optval, sizeof(val)))
2052 return -EFAULT;
2053
2054 po->tp_tstamp = val;
2055 return 0;
2056 }
1971 default: 2057 default:
1972 return -ENOPROTOOPT; 2058 return -ENOPROTOOPT;
1973 } 2059 }
@@ -2060,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
2060 val = po->tp_loss; 2146 val = po->tp_loss;
2061 data = &val; 2147 data = &val;
2062 break; 2148 break;
2149 case PACKET_TIMESTAMP:
2150 if (len > sizeof(int))
2151 len = sizeof(int);
2152 val = po->tp_tstamp;
2153 data = &val;
2154 break;
2063 default: 2155 default:
2064 return -ENOPROTOOPT; 2156 return -ENOPROTOOPT;
2065 } 2157 }
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e2a95762abd3..b2a3ae6cad78 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -626,6 +626,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
626 struct pep_sock *pn = pep_sk(sk); 626 struct pep_sock *pn = pep_sk(sk);
627 int ifindex = 0; 627 int ifindex = 0;
628 628
629 sock_hold(sk); /* keep a reference after sk_common_release() */
629 sk_common_release(sk); 630 sk_common_release(sk);
630 631
631 lock_sock(sk); 632 lock_sock(sk);
@@ -644,6 +645,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
644 645
645 if (ifindex) 646 if (ifindex)
646 gprs_detach(sk); 647 gprs_detach(sk);
648 sock_put(sk);
647} 649}
648 650
649static int pep_wait_connreq(struct sock *sk, int noblock) 651static int pep_wait_connreq(struct sock *sk, int noblock)
@@ -664,12 +666,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock)
664 if (signal_pending(tsk)) 666 if (signal_pending(tsk))
665 return sock_intr_errno(timeo); 667 return sock_intr_errno(timeo);
666 668
667 prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, 669 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
668 TASK_INTERRUPTIBLE); 670 TASK_INTERRUPTIBLE);
669 release_sock(sk); 671 release_sock(sk);
670 timeo = schedule_timeout(timeo); 672 timeo = schedule_timeout(timeo);
671 lock_sock(sk); 673 lock_sock(sk);
672 finish_wait(&sk->sk_socket->wait, &wait); 674 finish_wait(sk_sleep(sk), &wait);
673 } 675 }
674 676
675 return 0; 677 return 0;
@@ -696,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
696 newsk = NULL; 698 newsk = NULL;
697 goto out; 699 goto out;
698 } 700 }
701 kfree_skb(oskb);
699 702
700 sock_hold(sk); 703 sock_hold(sk);
701 pep_sk(newsk)->listener = sk; 704 pep_sk(newsk)->listener = sk;
@@ -910,10 +913,10 @@ disabled:
910 goto out; 913 goto out;
911 } 914 }
912 915
913 prepare_to_wait(&sk->sk_socket->wait, &wait, 916 prepare_to_wait(sk_sleep(sk), &wait,
914 TASK_INTERRUPTIBLE); 917 TASK_INTERRUPTIBLE);
915 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); 918 done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
916 finish_wait(&sk->sk_socket->wait, &wait); 919 finish_wait(sk_sleep(sk), &wait);
917 920
918 if (sk->sk_state != TCP_ESTABLISHED) 921 if (sk->sk_state != TCP_ESTABLISHED)
919 goto disabled; 922 goto disabled;
@@ -1043,12 +1046,12 @@ static void pep_sock_unhash(struct sock *sk)
1043 lock_sock(sk); 1046 lock_sock(sk);
1044 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { 1047 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1045 skparent = pn->listener; 1048 skparent = pn->listener;
1046 sk_del_node_init(sk);
1047 release_sock(sk); 1049 release_sock(sk);
1048 1050
1049 sk = skparent;
1050 pn = pep_sk(skparent); 1051 pn = pep_sk(skparent);
1051 lock_sock(sk); 1052 lock_sock(skparent);
1053 sk_del_node_init(sk);
1054 sk = skparent;
1052 } 1055 }
1053 /* Unhash a listening sock only when it is closed 1056 /* Unhash a listening sock only when it is closed
1054 * and all of its active connected pipes are closed. */ 1057 * and all of its active connected pipes are closed. */
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 9b4ced6e0968..b18e48fae975 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -46,9 +46,16 @@ struct phonet_net {
46 46
47int phonet_net_id __read_mostly; 47int phonet_net_id __read_mostly;
48 48
49static struct phonet_net *phonet_pernet(struct net *net)
50{
51 BUG_ON(!net);
52
53 return net_generic(net, phonet_net_id);
54}
55
49struct phonet_device_list *phonet_device_list(struct net *net) 56struct phonet_device_list *phonet_device_list(struct net *net)
50{ 57{
51 struct phonet_net *pnn = net_generic(net, phonet_net_id); 58 struct phonet_net *pnn = phonet_pernet(net);
52 return &pnn->pndevs; 59 return &pnn->pndevs;
53} 60}
54 61
@@ -155,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
155 return err; 162 return err;
156} 163}
157 164
165static void phonet_device_rcu_free(struct rcu_head *head)
166{
167 struct phonet_device *pnd;
168
169 pnd = container_of(head, struct phonet_device, rcu);
170 kfree(pnd);
171}
172
158int phonet_address_del(struct net_device *dev, u8 addr) 173int phonet_address_del(struct net_device *dev, u8 addr)
159{ 174{
160 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); 175 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -172,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
172 pnd = NULL; 187 pnd = NULL;
173 mutex_unlock(&pndevs->lock); 188 mutex_unlock(&pndevs->lock);
174 189
175 if (pnd) { 190 if (pnd)
176 synchronize_rcu(); 191 call_rcu(&pnd->rcu, phonet_device_rcu_free);
177 kfree(pnd); 192
178 }
179 return err; 193 return err;
180} 194}
181 195
@@ -261,7 +275,7 @@ static int phonet_device_autoconf(struct net_device *dev)
261 275
262static void phonet_route_autodel(struct net_device *dev) 276static void phonet_route_autodel(struct net_device *dev)
263{ 277{
264 struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); 278 struct phonet_net *pnn = phonet_pernet(dev_net(dev));
265 unsigned i; 279 unsigned i;
266 DECLARE_BITMAP(deleted, 64); 280 DECLARE_BITMAP(deleted, 64);
267 281
@@ -313,7 +327,7 @@ static struct notifier_block phonet_device_notifier = {
313/* Per-namespace Phonet devices handling */ 327/* Per-namespace Phonet devices handling */
314static int __net_init phonet_init_net(struct net *net) 328static int __net_init phonet_init_net(struct net *net)
315{ 329{
316 struct phonet_net *pnn = net_generic(net, phonet_net_id); 330 struct phonet_net *pnn = phonet_pernet(net);
317 331
318 if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) 332 if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops))
319 return -ENOMEM; 333 return -ENOMEM;
@@ -326,7 +340,7 @@ static int __net_init phonet_init_net(struct net *net)
326 340
327static void __net_exit phonet_exit_net(struct net *net) 341static void __net_exit phonet_exit_net(struct net *net)
328{ 342{
329 struct phonet_net *pnn = net_generic(net, phonet_net_id); 343 struct phonet_net *pnn = phonet_pernet(net);
330 struct net_device *dev; 344 struct net_device *dev;
331 unsigned i; 345 unsigned i;
332 346
@@ -376,7 +390,7 @@ void phonet_device_exit(void)
376 390
377int phonet_route_add(struct net_device *dev, u8 daddr) 391int phonet_route_add(struct net_device *dev, u8 daddr)
378{ 392{
379 struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); 393 struct phonet_net *pnn = phonet_pernet(dev_net(dev));
380 struct phonet_routes *routes = &pnn->routes; 394 struct phonet_routes *routes = &pnn->routes;
381 int err = -EEXIST; 395 int err = -EEXIST;
382 396
@@ -393,7 +407,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
393 407
394int phonet_route_del(struct net_device *dev, u8 daddr) 408int phonet_route_del(struct net_device *dev, u8 daddr)
395{ 409{
396 struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id); 410 struct phonet_net *pnn = phonet_pernet(dev_net(dev));
397 struct phonet_routes *routes = &pnn->routes; 411 struct phonet_routes *routes = &pnn->routes;
398 412
399 daddr = daddr >> 2; 413 daddr = daddr >> 2;
@@ -413,7 +427,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
413 427
414struct net_device *phonet_route_get(struct net *net, u8 daddr) 428struct net_device *phonet_route_get(struct net *net, u8 daddr)
415{ 429{
416 struct phonet_net *pnn = net_generic(net, phonet_net_id); 430 struct phonet_net *pnn = phonet_pernet(net);
417 struct phonet_routes *routes = &pnn->routes; 431 struct phonet_routes *routes = &pnn->routes;
418 struct net_device *dev; 432 struct net_device *dev;
419 433
@@ -428,7 +442,7 @@ struct net_device *phonet_route_get(struct net *net, u8 daddr)
428 442
429struct net_device *phonet_route_output(struct net *net, u8 daddr) 443struct net_device *phonet_route_output(struct net *net, u8 daddr)
430{ 444{
431 struct phonet_net *pnn = net_generic(net, phonet_net_id); 445 struct phonet_net *pnn = phonet_pernet(net);
432 struct phonet_routes *routes = &pnn->routes; 446 struct phonet_routes *routes = &pnn->routes;
433 struct net_device *dev; 447 struct net_device *dev;
434 448
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c785bfd0744f..6e9848bf0370 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
265 struct pep_sock *pn = pep_sk(sk); 265 struct pep_sock *pn = pep_sk(sk);
266 unsigned int mask = 0; 266 unsigned int mask = 0;
267 267
268 poll_wait(file, &sock->wait, wait); 268 poll_wait(file, sk_sleep(sk), wait);
269 269
270 switch (sk->sk_state) { 270 switch (sk->sk_state) {
271 case TCP_LISTEN: 271 case TCP_LISTEN:
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index f81862baf4d0..aebfecbdb841 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -158,9 +158,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
158 unsigned int mask = 0; 158 unsigned int mask = 0;
159 unsigned long flags; 159 unsigned long flags;
160 160
161 poll_wait(file, sk->sk_sleep, wait); 161 poll_wait(file, sk_sleep(sk), wait);
162 162
163 poll_wait(file, &rds_poll_waitq, wait); 163 if (rs->rs_seen_congestion)
164 poll_wait(file, &rds_poll_waitq, wait);
164 165
165 read_lock_irqsave(&rs->rs_recv_lock, flags); 166 read_lock_irqsave(&rs->rs_recv_lock, flags);
166 if (!rs->rs_cong_monitor) { 167 if (!rs->rs_cong_monitor) {
@@ -182,6 +183,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
182 mask |= (POLLOUT | POLLWRNORM); 183 mask |= (POLLOUT | POLLWRNORM);
183 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 184 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
184 185
186 /* clear state any time we wake a seen-congested socket */
187 if (mask)
188 rs->rs_seen_congestion = 0;
189
185 return mask; 190 return mask;
186} 191}
187 192
@@ -447,7 +452,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
447 struct rds_info_lengths *lens) 452 struct rds_info_lengths *lens)
448{ 453{
449 struct rds_sock *rs; 454 struct rds_sock *rs;
450 struct sock *sk;
451 struct rds_incoming *inc; 455 struct rds_incoming *inc;
452 unsigned long flags; 456 unsigned long flags;
453 unsigned int total = 0; 457 unsigned int total = 0;
@@ -457,7 +461,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
457 spin_lock_irqsave(&rds_sock_lock, flags); 461 spin_lock_irqsave(&rds_sock_lock, flags);
458 462
459 list_for_each_entry(rs, &rds_sock_list, rs_item) { 463 list_for_each_entry(rs, &rds_sock_list, rs_item) {
460 sk = rds_rs_to_sk(rs);
461 read_lock(&rs->rs_recv_lock); 464 read_lock(&rs->rs_recv_lock);
462 465
463 /* XXX too lazy to maintain counts.. */ 466 /* XXX too lazy to maintain counts.. */
diff --git a/net/rds/cong.c b/net/rds/cong.c
index f1da27ceb064..0871a29f0780 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -219,8 +219,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
219 spin_lock_irqsave(&rds_cong_lock, flags); 219 spin_lock_irqsave(&rds_cong_lock, flags);
220 220
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (conn->c_loopback)
223 continue;
224 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
225 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
226 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 88d0856cb797..f68832798db2 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -204,9 +204,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 205 break;
206 default: 206 default:
207 rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u " 207 rdsdebug("Fatal QP Event %u "
208 "- connection %pI4->%pI4, reconnecting\n", 208 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 209 event->event, &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn);
210 break; 211 break;
211 } 212 }
212} 213}
@@ -474,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
474 err = rds_ib_setup_qp(conn); 475 err = rds_ib_setup_qp(conn);
475 if (err) { 476 if (err) {
476 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
477 goto out; 479 goto out;
478 } 480 }
479 481
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 059989fdb7d7..a54cd63f9e35 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -235,8 +235,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 235{
236 flush_workqueue(rds_wq); 236 flush_workqueue(rds_wq);
237 rds_ib_flush_mr_pool(pool, 1); 237 rds_ib_flush_mr_pool(pool, 1);
238 BUG_ON(atomic_read(&pool->item_count)); 238 WARN_ON(atomic_read(&pool->item_count));
239 BUG_ON(atomic_read(&pool->free_pinned)); 239 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 240 kfree(pool);
241} 241}
242 242
@@ -441,6 +441,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 441
442 /* FIXME we need a way to tell a r/w MR 442 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 443 * from a r/o MR */
444 BUG_ON(in_interrupt());
444 set_page_dirty(page); 445 set_page_dirty(page);
445 put_page(page); 446 put_page(page);
446 } 447 }
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c7dd11b835f0..c74e9904a6b2 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -469,8 +469,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
469 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 469 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
470 470
471 rds_ib_stats_inc(s_ib_ack_send_failure); 471 rds_ib_stats_inc(s_ib_ack_send_failure);
472 /* Need to finesse this later. */ 472
473 BUG(); 473 rds_ib_conn_error(ic->conn, "sending ack failed\n");
474 } else 474 } else
475 rds_ib_stats_inc(s_ib_ack_sent); 475 rds_ib_stats_inc(s_ib_ack_sent);
476} 476}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index a10fab6886d1..17fa80803ab0 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
243 struct rds_message *rm; 243 struct rds_message *rm;
244 244
245 rm = rds_send_get_message(conn, send->s_op); 245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) 246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
247 rds_ib_send_rdma_complete(rm, wc.status); 249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 }
248 } 252 }
249 253
250 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 254 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
@@ -482,6 +486,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
482 BUG_ON(off % RDS_FRAG_SIZE); 486 BUG_ON(off % RDS_FRAG_SIZE);
483 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
484 488
489 /* Do not send cong updates to IB loopback */
490 if (conn->c_loopback
491 && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
492 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
493 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
494 }
495
485 /* FIXME we may overallocate here */ 496 /* FIXME we may overallocate here */
486 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) 497 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
487 i = 1; 498 i = 1;
@@ -574,8 +585,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
574 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
575 adv_credits += posted; 586 adv_credits += posted;
576 BUG_ON(adv_credits > 255); 587 BUG_ON(adv_credits > 255);
577 } else if (ic->i_rm != rm) 588 }
578 BUG();
579 589
580 send = &ic->i_sends[pos]; 590 send = &ic->i_sends[pos];
581 first = send; 591 first = send;
@@ -714,8 +724,8 @@ add_header:
714 ic->i_rm = prev->s_rm; 724 ic->i_rm = prev->s_rm;
715 prev->s_rm = NULL; 725 prev->s_rm = NULL;
716 } 726 }
717 /* Finesse this later */ 727
718 BUG(); 728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
719 goto out; 729 goto out;
720 } 730 }
721 731
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 3e9460f935d8..b5dd6ac39be8 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -157,9 +157,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
157 case IB_EVENT_QP_REQ_ERR: 157 case IB_EVENT_QP_REQ_ERR:
158 case IB_EVENT_QP_FATAL: 158 case IB_EVENT_QP_FATAL:
159 default: 159 default:
160 rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n", 160 rdsdebug("Fatal QP Event %u "
161 "- connection %pI4->%pI4, reconnecting\n",
161 event->event, &conn->c_laddr, 162 event->event, &conn->c_laddr,
162 &conn->c_faddr); 163 &conn->c_faddr);
164 rds_conn_drop(conn);
163 break; 165 break;
164 } 166 }
165} 167}
@@ -450,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
450 err = rds_iw_setup_qp(conn); 452 err = rds_iw_setup_qp(conn);
451 if (err) { 453 if (err) {
452 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); 454 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
455 mutex_unlock(&conn->c_cm_lock);
453 goto out; 456 goto out;
454 } 457 }
455 458
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index da43ee840ca3..3d479067d54d 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -469,8 +469,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
469 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 469 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
470 470
471 rds_iw_stats_inc(s_iw_ack_send_failure); 471 rds_iw_stats_inc(s_iw_ack_send_failure);
472 /* Need to finesse this later. */ 472
473 BUG(); 473 rds_iw_conn_error(ic->conn, "sending ack failed\n");
474 } else 474 } else
475 rds_iw_stats_inc(s_iw_ack_sent); 475 rds_iw_stats_inc(s_iw_ack_sent);
476} 476}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 1379e9d66a78..52182ff7519e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
616 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 616 rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
617 adv_credits += posted; 617 adv_credits += posted;
618 BUG_ON(adv_credits > 255); 618 BUG_ON(adv_credits > 255);
619 } else if (ic->i_rm != rm) 619 }
620 BUG();
621 620
622 send = &ic->i_sends[pos]; 621 send = &ic->i_sends[pos];
623 first = send; 622 first = send;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 0d7a159158b8..dd9879379457 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -81,16 +81,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, 81 struct rds_cong_map *map,
82 unsigned long offset) 82 unsigned long offset)
83{ 83{
84 unsigned long i;
85
86 BUG_ON(offset); 84 BUG_ON(offset);
87 BUG_ON(map != conn->c_lcong); 85 BUG_ON(map != conn->c_lcong);
88 86
89 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
90 memcpy((void *)conn->c_fcong->m_page_addrs[i],
91 (void *)map->m_page_addrs[i], PAGE_SIZE);
92 }
93
94 rds_cong_map_updated(conn->c_fcong, ~(u64) 0); 87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
95 88
96 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; 89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 5ce9437cad67..75fd13bb631b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -439,8 +439,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
439 /* Mark page dirty if it was possibly modified, which 439 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 440 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 441 * to local memory */
442 if (!ro->r_write) 442 if (!ro->r_write) {
443 BUG_ON(in_interrupt());
443 set_page_dirty(page); 444 set_page_dirty(page);
445 }
444 put_page(page); 446 put_page(page);
445 } 447 }
446 448
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 7b155081b4dc..e599ba2f950d 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
101 break; 101 break;
102 102
103 case RDMA_CM_EVENT_DISCONNECTED: 103 case RDMA_CM_EVENT_DISCONNECTED:
104 printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection " 104 rdsdebug("DISCONNECT event - dropping connection "
105 "%pI4->%pI4\n", &conn->c_laddr, 105 "%pI4->%pI4\n", &conn->c_laddr,
106 &conn->c_faddr); 106 &conn->c_faddr);
107 rds_conn_drop(conn); 107 rds_conn_drop(conn);
@@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 109
110 default: 110 default:
111 /* things like device disconnect? */ 111 /* things like device disconnect? */
112 printk(KERN_ERR "unknown event %u\n", event->event); 112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event);
113 BUG();
114 break; 113 break;
115 } 114 }
116 115
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 85d6f897ecc7..c224b5bb3ba9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -388,6 +388,8 @@ struct rds_sock {
388 388
389 /* flag indicating we were congested or not */ 389 /* flag indicating we were congested or not */
390 int rs_congested; 390 int rs_congested;
391 /* seen congestion (ENOBUFS) when sending? */
392 int rs_seen_congestion;
391 393
392 /* rs_lock protects all these adjacent members before the newline */ 394 /* rs_lock protects all these adjacent members before the newline */
393 spinlock_t rs_lock; 395 spinlock_t rs_lock;
@@ -490,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs);
490void rds_wake_sk_sleep(struct rds_sock *rs); 492void rds_wake_sk_sleep(struct rds_sock *rs);
491static inline void __rds_wake_sk_sleep(struct sock *sk) 493static inline void __rds_wake_sk_sleep(struct sock *sk)
492{ 494{
493 wait_queue_head_t *waitq = sk->sk_sleep; 495 wait_queue_head_t *waitq = sk_sleep(sk);
494 496
495 if (!sock_flag(sk, SOCK_DEAD) && waitq) 497 if (!sock_flag(sk, SOCK_DEAD) && waitq)
496 wake_up(waitq); 498 wake_up(waitq);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index e2a2b9344f7b..795a00b7f2cb 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
432 break; 432 break;
433 } 433 }
434 434
435 timeo = wait_event_interruptible_timeout(*sk->sk_sleep, 435 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
436 (!list_empty(&rs->rs_notify_queue) || 436 (!list_empty(&rs->rs_notify_queue) ||
437 rs->rs_cong_notify || 437 rs->rs_cong_notify ||
438 rds_next_incoming(rs, &inc)), timeo); 438 rds_next_incoming(rs, &inc)), timeo);
diff --git a/net/rds/send.c b/net/rds/send.c
index f04b929ded92..9c1c6bcaa6c9 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -508,12 +508,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
508 */ 508 */
509void rds_send_remove_from_sock(struct list_head *messages, int status) 509void rds_send_remove_from_sock(struct list_head *messages, int status)
510{ 510{
511 unsigned long flags = 0; /* silence gcc :P */ 511 unsigned long flags;
512 struct rds_sock *rs = NULL; 512 struct rds_sock *rs = NULL;
513 struct rds_message *rm; 513 struct rds_message *rm;
514 514
515 local_irq_save(flags);
516 while (!list_empty(messages)) { 515 while (!list_empty(messages)) {
516 int was_on_sock = 0;
517
517 rm = list_entry(messages->next, struct rds_message, 518 rm = list_entry(messages->next, struct rds_message,
518 m_conn_item); 519 m_conn_item);
519 list_del_init(&rm->m_conn_item); 520 list_del_init(&rm->m_conn_item);
@@ -528,20 +529,19 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
528 * while we're messing with it. It does not prevent the 529 * while we're messing with it. It does not prevent the
529 * message from being removed from the socket, though. 530 * message from being removed from the socket, though.
530 */ 531 */
531 spin_lock(&rm->m_rs_lock); 532 spin_lock_irqsave(&rm->m_rs_lock, flags);
532 if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) 533 if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
533 goto unlock_and_drop; 534 goto unlock_and_drop;
534 535
535 if (rs != rm->m_rs) { 536 if (rs != rm->m_rs) {
536 if (rs) { 537 if (rs) {
537 spin_unlock(&rs->rs_lock);
538 rds_wake_sk_sleep(rs); 538 rds_wake_sk_sleep(rs);
539 sock_put(rds_rs_to_sk(rs)); 539 sock_put(rds_rs_to_sk(rs));
540 } 540 }
541 rs = rm->m_rs; 541 rs = rm->m_rs;
542 spin_lock(&rs->rs_lock);
543 sock_hold(rds_rs_to_sk(rs)); 542 sock_hold(rds_rs_to_sk(rs));
544 } 543 }
544 spin_lock(&rs->rs_lock);
545 545
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 547 struct rds_rdma_op *ro = rm->m_rdma_op;
@@ -558,21 +558,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
558 notifier->n_status = status; 558 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 559 rm->m_rdma_op->r_notifier = NULL;
560 } 560 }
561 rds_message_put(rm); 561 was_on_sock = 1;
562 rm->m_rs = NULL; 562 rm->m_rs = NULL;
563 } 563 }
564 spin_unlock(&rs->rs_lock);
564 565
565unlock_and_drop: 566unlock_and_drop:
566 spin_unlock(&rm->m_rs_lock); 567 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
567 rds_message_put(rm); 568 rds_message_put(rm);
569 if (was_on_sock)
570 rds_message_put(rm);
568 } 571 }
569 572
570 if (rs) { 573 if (rs) {
571 spin_unlock(&rs->rs_lock);
572 rds_wake_sk_sleep(rs); 574 rds_wake_sk_sleep(rs);
573 sock_put(rds_rs_to_sk(rs)); 575 sock_put(rds_rs_to_sk(rs));
574 } 576 }
575 local_irq_restore(flags);
576} 577}
577 578
578/* 579/*
@@ -634,9 +635,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
634 list_move(&rm->m_sock_item, &list); 635 list_move(&rm->m_sock_item, &list);
635 rds_send_sndbuf_remove(rs, rm); 636 rds_send_sndbuf_remove(rs, rm);
636 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
637
638 /* If this is a RDMA operation, notify the app. */
639 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
640 } 638 }
641 639
642 /* order flag updates with the rs lock */ 640 /* order flag updates with the rs lock */
@@ -645,9 +643,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
645 643
646 spin_unlock_irqrestore(&rs->rs_lock, flags); 644 spin_unlock_irqrestore(&rs->rs_lock, flags);
647 645
648 if (wake)
649 rds_wake_sk_sleep(rs);
650
651 conn = NULL; 646 conn = NULL;
652 647
653 /* now remove the messages from the conn list as needed */ 648 /* now remove the messages from the conn list as needed */
@@ -655,6 +650,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
655 /* We do this here rather than in the loop above, so that 650 /* We do this here rather than in the loop above, so that
656 * we don't have to nest m_rs_lock under rs->rs_lock */ 651 * we don't have to nest m_rs_lock under rs->rs_lock */
657 spin_lock_irqsave(&rm->m_rs_lock, flags2); 652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
658 rm->m_rs = NULL; 657 rm->m_rs = NULL;
659 spin_unlock_irqrestore(&rm->m_rs_lock, flags2); 658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
660 659
@@ -683,6 +682,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
683 if (conn) 682 if (conn)
684 spin_unlock_irqrestore(&conn->c_lock, flags); 683 spin_unlock_irqrestore(&conn->c_lock, flags);
685 684
685 if (wake)
686 rds_wake_sk_sleep(rs);
687
686 while (!list_empty(&list)) { 688 while (!list_empty(&list)) {
687 rm = list_entry(list.next, struct rds_message, m_sock_item); 689 rm = list_entry(list.next, struct rds_message, m_sock_item);
688 list_del_init(&rm->m_sock_item); 690 list_del_init(&rm->m_sock_item);
@@ -816,7 +818,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
816 int ret = 0; 818 int ret = 0;
817 int queued = 0, allocated_mr = 0; 819 int queued = 0, allocated_mr = 0;
818 int nonblock = msg->msg_flags & MSG_DONTWAIT; 820 int nonblock = msg->msg_flags & MSG_DONTWAIT;
819 long timeo = sock_rcvtimeo(sk, nonblock); 821 long timeo = sock_sndtimeo(sk, nonblock);
820 822
821 /* Mirror Linux UDP mirror of BSD error message compatibility */ 823 /* Mirror Linux UDP mirror of BSD error message compatibility */
822 /* XXX: Perhaps MSG_MORE someday */ 824 /* XXX: Perhaps MSG_MORE someday */
@@ -895,8 +897,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
895 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
896 898
897 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
898 if (ret) 900 if (ret) {
901 rs->rs_seen_congestion = 1;
899 goto out; 902 goto out;
903 }
900 904
901 while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, 905 while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
902 dport, &queued)) { 906 dport, &queued)) {
@@ -911,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
911 goto out; 915 goto out;
912 } 916 }
913 917
914 timeo = wait_event_interruptible_timeout(*sk->sk_sleep, 918 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
915 rds_send_queue_rm(rs, conn, rm, 919 rds_send_queue_rm(rs, conn, rm,
916 rs->rs_bound_port, 920 rs->rs_bound_port,
917 dport, 921 dport,
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 056256285987..c397524c039c 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -141,7 +141,7 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn)
141 141
142 release_sock(sock->sk); 142 release_sock(sock->sk);
143 sock_release(sock); 143 sock_release(sock);
144 }; 144 }
145 145
146 if (tc->t_tinc) { 146 if (tc->t_tinc) {
147 rds_inc_put(&tc->t_tinc->ti_inc); 147 rds_inc_put(&tc->t_tinc->ti_inc);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e08ec912d8b0..1aba6878fa5d 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -98,6 +98,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
98 goto out; 98 goto out;
99 } 99 }
100 100
101 rds_stats_add(s_copy_to_user, to_copy);
101 size -= to_copy; 102 size -= to_copy;
102 ret += to_copy; 103 ret += to_copy;
103 skb_off += to_copy; 104 skb_off += to_copy;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 34fdcc059e54..a28b895ff0d1 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk)
240 tc->t_last_seen_una = rds_tcp_snd_una(tc); 240 tc->t_last_seen_una = rds_tcp_snd_una(tc);
241 rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); 241 rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
242 242
243 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 243 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
244 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
245
244out: 246out:
245 read_unlock(&sk->sk_callback_lock); 247 read_unlock(&sk->sk_callback_lock);
246 248
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 00fa10e59af8..786c20eaaf5e 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -259,7 +259,7 @@ void rds_threads_exit(void)
259 259
260int __init rds_threads_init(void) 260int __init rds_threads_init(void)
261{ 261{
262 rds_wq = create_singlethread_workqueue("krdsd"); 262 rds_wq = create_workqueue("krdsd");
263 if (rds_wq == NULL) 263 if (rds_wq == NULL)
264 return -ENOMEM; 264 return -ENOMEM;
265 265
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index a9fa86f65983..51875a0c5d48 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -629,6 +629,49 @@ static ssize_t rfkill_persistent_show(struct device *dev,
629 return sprintf(buf, "%d\n", rfkill->persistent); 629 return sprintf(buf, "%d\n", rfkill->persistent);
630} 630}
631 631
632static ssize_t rfkill_hard_show(struct device *dev,
633 struct device_attribute *attr,
634 char *buf)
635{
636 struct rfkill *rfkill = to_rfkill(dev);
637
638 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
639}
640
641static ssize_t rfkill_soft_show(struct device *dev,
642 struct device_attribute *attr,
643 char *buf)
644{
645 struct rfkill *rfkill = to_rfkill(dev);
646
647 return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
648}
649
650static ssize_t rfkill_soft_store(struct device *dev,
651 struct device_attribute *attr,
652 const char *buf, size_t count)
653{
654 struct rfkill *rfkill = to_rfkill(dev);
655 unsigned long state;
656 int err;
657
658 if (!capable(CAP_NET_ADMIN))
659 return -EPERM;
660
661 err = strict_strtoul(buf, 0, &state);
662 if (err)
663 return err;
664
665 if (state > 1 )
666 return -EINVAL;
667
668 mutex_lock(&rfkill_global_mutex);
669 rfkill_set_block(rfkill, state);
670 mutex_unlock(&rfkill_global_mutex);
671
672 return err ?: count;
673}
674
632static u8 user_state_from_blocked(unsigned long state) 675static u8 user_state_from_blocked(unsigned long state)
633{ 676{
634 if (state & RFKILL_BLOCK_HW) 677 if (state & RFKILL_BLOCK_HW)
@@ -644,14 +687,8 @@ static ssize_t rfkill_state_show(struct device *dev,
644 char *buf) 687 char *buf)
645{ 688{
646 struct rfkill *rfkill = to_rfkill(dev); 689 struct rfkill *rfkill = to_rfkill(dev);
647 unsigned long flags;
648 u32 state;
649
650 spin_lock_irqsave(&rfkill->lock, flags);
651 state = rfkill->state;
652 spin_unlock_irqrestore(&rfkill->lock, flags);
653 690
654 return sprintf(buf, "%d\n", user_state_from_blocked(state)); 691 return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
655} 692}
656 693
657static ssize_t rfkill_state_store(struct device *dev, 694static ssize_t rfkill_state_store(struct device *dev,
@@ -701,6 +738,8 @@ static struct device_attribute rfkill_dev_attrs[] = {
701 __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL), 738 __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
702 __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), 739 __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
703 __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), 740 __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
741 __ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
742 __ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
704 __ATTR_NULL 743 __ATTR_NULL
705}; 744};
706 745
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4fb711a035f4..8e45e76a95f5 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -845,7 +845,7 @@ rose_try_next_neigh:
845 DEFINE_WAIT(wait); 845 DEFINE_WAIT(wait);
846 846
847 for (;;) { 847 for (;;) {
848 prepare_to_wait(sk->sk_sleep, &wait, 848 prepare_to_wait(sk_sleep(sk), &wait,
849 TASK_INTERRUPTIBLE); 849 TASK_INTERRUPTIBLE);
850 if (sk->sk_state != TCP_SYN_SENT) 850 if (sk->sk_state != TCP_SYN_SENT)
851 break; 851 break;
@@ -858,7 +858,7 @@ rose_try_next_neigh:
858 err = -ERESTARTSYS; 858 err = -ERESTARTSYS;
859 break; 859 break;
860 } 860 }
861 finish_wait(sk->sk_sleep, &wait); 861 finish_wait(sk_sleep(sk), &wait);
862 862
863 if (err) 863 if (err)
864 goto out_release; 864 goto out_release;
@@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
911 * hooked into the SABM we saved 911 * hooked into the SABM we saved
912 */ 912 */
913 for (;;) { 913 for (;;) {
914 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 914 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
915 915
916 skb = skb_dequeue(&sk->sk_receive_queue); 916 skb = skb_dequeue(&sk->sk_receive_queue);
917 if (skb) 917 if (skb)
@@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
930 err = -ERESTARTSYS; 930 err = -ERESTARTSYS;
931 break; 931 break;
932 } 932 }
933 finish_wait(sk->sk_sleep, &wait); 933 finish_wait(sk_sleep(sk), &wait);
934 if (err) 934 if (err)
935 goto out_release; 935 goto out_release;
936 936
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index cbc244a128bd..b4fdaac233f7 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -109,7 +109,9 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
109 init_timer(&rose_neigh->t0timer); 109 init_timer(&rose_neigh->t0timer);
110 110
111 if (rose_route->ndigis != 0) { 111 if (rose_route->ndigis != 0) {
112 if ((rose_neigh->digipeat = kmalloc(sizeof(ax25_digi), GFP_KERNEL)) == NULL) { 112 rose_neigh->digipeat =
113 kmalloc(sizeof(ax25_digi), GFP_ATOMIC);
114 if (rose_neigh->digipeat == NULL) {
113 kfree(rose_neigh); 115 kfree(rose_neigh);
114 res = -ENOMEM; 116 res = -ENOMEM;
115 goto out; 117 goto out;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c060095b27ce..0b9bb2085ce4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk)
62static void rxrpc_write_space(struct sock *sk) 62static void rxrpc_write_space(struct sock *sk)
63{ 63{
64 _enter("%p", sk); 64 _enter("%p", sk);
65 read_lock(&sk->sk_callback_lock); 65 rcu_read_lock();
66 if (rxrpc_writable(sk)) { 66 if (rxrpc_writable(sk)) {
67 if (sk_has_sleeper(sk)) 67 struct socket_wq *wq = rcu_dereference(sk->sk_wq);
68 wake_up_interruptible(sk->sk_sleep); 68
69 if (wq_has_sleeper(wq))
70 wake_up_interruptible(&wq->wait);
69 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 71 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
70 } 72 }
71 read_unlock(&sk->sk_callback_lock); 73 rcu_read_unlock();
72} 74}
73 75
74/* 76/*
@@ -589,7 +591,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
589 unsigned int mask; 591 unsigned int mask;
590 struct sock *sk = sock->sk; 592 struct sock *sk = sock->sk;
591 593
592 sock_poll_wait(file, sk->sk_sleep, wait); 594 sock_poll_wait(file, sk_sleep(sk), wait);
593 mask = 0; 595 mask = 0;
594 596
595 /* the socket is readable if there are any messages waiting on the Rx 597 /* the socket is readable if there are any messages waiting on the Rx
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 2714da167fb8..b6ffe4e1b84a 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -245,6 +245,9 @@ static void rxrpc_resend_timer(struct rxrpc_call *call)
245 _enter("%d,%d,%d", 245 _enter("%d,%d,%d",
246 call->acks_tail, call->acks_unacked, call->acks_head); 246 call->acks_tail, call->acks_unacked, call->acks_head);
247 247
248 if (call->state >= RXRPC_CALL_COMPLETE)
249 return;
250
248 resend = 0; 251 resend = 0;
249 resend_at = 0; 252 resend_at = 0;
250 253
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index 909d092de9f4..bf656c230ba9 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -786,6 +786,7 @@ static void rxrpc_call_life_expired(unsigned long _call)
786 786
787/* 787/*
788 * handle resend timer expiry 788 * handle resend timer expiry
789 * - may not take call->state_lock as this can deadlock against del_timer_sync()
789 */ 790 */
790static void rxrpc_resend_time_expired(unsigned long _call) 791static void rxrpc_resend_time_expired(unsigned long _call)
791{ 792{
@@ -796,12 +797,9 @@ static void rxrpc_resend_time_expired(unsigned long _call)
796 if (call->state >= RXRPC_CALL_COMPLETE) 797 if (call->state >= RXRPC_CALL_COMPLETE)
797 return; 798 return;
798 799
799 read_lock_bh(&call->state_lock);
800 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); 800 clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
801 if (call->state < RXRPC_CALL_COMPLETE && 801 if (!test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
802 !test_and_set_bit(RXRPC_CALL_RESEND_TIMER, &call->events))
803 rxrpc_queue_call(call); 802 rxrpc_queue_call(call);
804 read_unlock_bh(&call->state_lock);
805} 803}
806 804
807/* 805/*
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f7..9f1729bd60de 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
64 return; 64 return;
65 } 65 }
66 66
67 peer->if_mtu = dst_mtu(&rt->u.dst); 67 peer->if_mtu = dst_mtu(&rt->dst);
68 dst_release(&rt->u.dst); 68 dst_release(&rt->dst);
69 69
70 _leave(" [if_mtu %u]", peer->if_mtu); 70 _leave(" [if_mtu %u]", peer->if_mtu);
71} 71}
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 60c2b94e6b54..0c65013e3bfe 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -91,7 +91,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
91 91
92 /* wait for a message to turn up */ 92 /* wait for a message to turn up */
93 release_sock(&rx->sk); 93 release_sock(&rx->sk);
94 prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait, 94 prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
95 TASK_INTERRUPTIBLE); 95 TASK_INTERRUPTIBLE);
96 ret = sock_error(&rx->sk); 96 ret = sock_error(&rx->sk);
97 if (ret) 97 if (ret)
@@ -102,7 +102,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
102 goto wait_interrupted; 102 goto wait_interrupted;
103 timeo = schedule_timeout(timeo); 103 timeo = schedule_timeout(timeo);
104 } 104 }
105 finish_wait(rx->sk.sk_sleep, &wait); 105 finish_wait(sk_sleep(&rx->sk), &wait);
106 lock_sock(&rx->sk); 106 lock_sock(&rx->sk);
107 continue; 107 continue;
108 } 108 }
@@ -356,7 +356,7 @@ csum_copy_error:
356wait_interrupted: 356wait_interrupted:
357 ret = sock_intr_errno(timeo); 357 ret = sock_intr_errno(timeo);
358wait_error: 358wait_error:
359 finish_wait(rx->sk.sk_sleep, &wait); 359 finish_wait(sk_sleep(&rx->sk), &wait);
360 if (continue_call) 360 if (continue_call)
361 rxrpc_put_call(continue_call); 361 rxrpc_put_call(continue_call);
362 if (copied) 362 if (copied)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d8e0171d9a4b..23b25f89e7e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
26#include <net/act_api.h> 26#include <net/act_api.h>
27#include <net/netlink.h> 27#include <net/netlink.h>
28 28
29static void tcf_common_free_rcu(struct rcu_head *head)
30{
31 kfree(container_of(head, struct tcf_common, tcfc_rcu));
32}
33
29void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 34void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
30{ 35{
31 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 36 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
38 write_unlock_bh(hinfo->lock); 43 write_unlock_bh(hinfo->lock);
39 gen_kill_estimator(&p->tcfc_bstats, 44 gen_kill_estimator(&p->tcfc_bstats,
40 &p->tcfc_rate_est); 45 &p->tcfc_rate_est);
41 kfree(p); 46 /*
47 * gen_estimator est_timer() might access p->tcfc_lock
48 * or bstats, wait a RCU grace period before freeing p
49 */
50 call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
42 return; 51 return;
43 } 52 }
44 } 53 }
@@ -153,7 +162,7 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
153 } else if (type == RTM_GETACTION) { 162 } else if (type == RTM_GETACTION) {
154 return tcf_dump_walker(skb, cb, a, hinfo); 163 return tcf_dump_walker(skb, cb, a, hinfo);
155 } else { 164 } else {
156 printk("tcf_generic_walker: unknown action %d\n", type); 165 WARN(1, "tcf_generic_walker: unknown action %d\n", type);
157 return -EINVAL; 166 return -EINVAL;
158 } 167 }
159} 168}
@@ -403,8 +412,9 @@ void tcf_action_destroy(struct tc_action *act, int bind)
403 module_put(a->ops->owner); 412 module_put(a->ops->owner);
404 act = act->next; 413 act = act->next;
405 kfree(a); 414 kfree(a);
406 } else { /*FIXME: Remove later - catch insertion bugs*/ 415 } else {
407 printk("tcf_action_destroy: BUG? destroying NULL ops\n"); 416 /*FIXME: Remove later - catch insertion bugs*/
417 WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
408 act = act->next; 418 act = act->next;
409 kfree(a); 419 kfree(a);
410 } 420 }
@@ -668,7 +678,8 @@ nlmsg_failure:
668} 678}
669 679
670static int 680static int
671act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) 681act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
682 struct tc_action *a, int event)
672{ 683{
673 struct sk_buff *skb; 684 struct sk_buff *skb;
674 685
@@ -680,7 +691,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
680 return -EINVAL; 691 return -EINVAL;
681 } 692 }
682 693
683 return rtnl_unicast(skb, &init_net, pid); 694 return rtnl_unicast(skb, net, pid);
684} 695}
685 696
686static struct tc_action * 697static struct tc_action *
@@ -743,14 +754,15 @@ static struct tc_action *create_a(int i)
743 754
744 act = kzalloc(sizeof(*act), GFP_KERNEL); 755 act = kzalloc(sizeof(*act), GFP_KERNEL);
745 if (act == NULL) { 756 if (act == NULL) {
746 printk("create_a: failed to alloc!\n"); 757 pr_debug("create_a: failed to alloc!\n");
747 return NULL; 758 return NULL;
748 } 759 }
749 act->order = i; 760 act->order = i;
750 return act; 761 return act;
751} 762}
752 763
753static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid) 764static int tca_action_flush(struct net *net, struct nlattr *nla,
765 struct nlmsghdr *n, u32 pid)
754{ 766{
755 struct sk_buff *skb; 767 struct sk_buff *skb;
756 unsigned char *b; 768 unsigned char *b;
@@ -764,13 +776,13 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
764 int err = -ENOMEM; 776 int err = -ENOMEM;
765 777
766 if (a == NULL) { 778 if (a == NULL) {
767 printk("tca_action_flush: couldnt create tc_action\n"); 779 pr_debug("tca_action_flush: couldnt create tc_action\n");
768 return err; 780 return err;
769 } 781 }
770 782
771 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 783 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
772 if (!skb) { 784 if (!skb) {
773 printk("tca_action_flush: failed skb alloc\n"); 785 pr_debug("tca_action_flush: failed skb alloc\n");
774 kfree(a); 786 kfree(a);
775 return err; 787 return err;
776 } 788 }
@@ -809,7 +821,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
809 nlh->nlmsg_flags |= NLM_F_ROOT; 821 nlh->nlmsg_flags |= NLM_F_ROOT;
810 module_put(a->ops->owner); 822 module_put(a->ops->owner);
811 kfree(a); 823 kfree(a);
812 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 824 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
813 if (err > 0) 825 if (err > 0)
814 return 0; 826 return 0;
815 827
@@ -826,7 +838,8 @@ noflush_out:
826} 838}
827 839
828static int 840static int
829tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event) 841tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
842 u32 pid, int event)
830{ 843{
831 int i, ret; 844 int i, ret;
832 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 845 struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
@@ -838,7 +851,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
838 851
839 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 852 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
840 if (tb[1] != NULL) 853 if (tb[1] != NULL)
841 return tca_action_flush(tb[1], n, pid); 854 return tca_action_flush(net, tb[1], n, pid);
842 else 855 else
843 return -EINVAL; 856 return -EINVAL;
844 } 857 }
@@ -859,7 +872,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
859 } 872 }
860 873
861 if (event == RTM_GETACTION) 874 if (event == RTM_GETACTION)
862 ret = act_get_notify(pid, n, head, event); 875 ret = act_get_notify(net, pid, n, head, event);
863 else { /* delete */ 876 else { /* delete */
864 struct sk_buff *skb; 877 struct sk_buff *skb;
865 878
@@ -878,7 +891,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
878 891
879 /* now do the delete */ 892 /* now do the delete */
880 tcf_action_destroy(head, 0); 893 tcf_action_destroy(head, 0);
881 ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, 894 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
882 n->nlmsg_flags&NLM_F_ECHO); 895 n->nlmsg_flags&NLM_F_ECHO);
883 if (ret > 0) 896 if (ret > 0)
884 return 0; 897 return 0;
@@ -889,8 +902,8 @@ err:
889 return ret; 902 return ret;
890} 903}
891 904
892static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, 905static int tcf_add_notify(struct net *net, struct tc_action *a,
893 u16 flags) 906 u32 pid, u32 seq, int event, u16 flags)
894{ 907{
895 struct tcamsg *t; 908 struct tcamsg *t;
896 struct nlmsghdr *nlh; 909 struct nlmsghdr *nlh;
@@ -923,7 +936,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
923 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 936 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
924 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 937 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
925 938
926 err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 939 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
927 if (err > 0) 940 if (err > 0)
928 err = 0; 941 err = 0;
929 return err; 942 return err;
@@ -936,7 +949,8 @@ nlmsg_failure:
936 949
937 950
938static int 951static int
939tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr) 952tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
953 u32 pid, int ovr)
940{ 954{
941 int ret = 0; 955 int ret = 0;
942 struct tc_action *act; 956 struct tc_action *act;
@@ -954,7 +968,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
954 /* dump then free all the actions after update; inserted policy 968 /* dump then free all the actions after update; inserted policy
955 * stays intact 969 * stays intact
956 * */ 970 * */
957 ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); 971 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
958 for (a = act; a; a = act) { 972 for (a = act; a; a = act) {
959 act = a->next; 973 act = a->next;
960 kfree(a); 974 kfree(a);
@@ -970,15 +984,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
970 u32 pid = skb ? NETLINK_CB(skb).pid : 0; 984 u32 pid = skb ? NETLINK_CB(skb).pid : 0;
971 int ret = 0, ovr = 0; 985 int ret = 0, ovr = 0;
972 986
973 if (!net_eq(net, &init_net))
974 return -EINVAL;
975
976 ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); 987 ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
977 if (ret < 0) 988 if (ret < 0)
978 return ret; 989 return ret;
979 990
980 if (tca[TCA_ACT_TAB] == NULL) { 991 if (tca[TCA_ACT_TAB] == NULL) {
981 printk("tc_ctl_action: received NO action attribs\n"); 992 pr_notice("tc_ctl_action: received NO action attribs\n");
982 return -EINVAL; 993 return -EINVAL;
983 } 994 }
984 995
@@ -995,15 +1006,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
995 if (n->nlmsg_flags&NLM_F_REPLACE) 1006 if (n->nlmsg_flags&NLM_F_REPLACE)
996 ovr = 1; 1007 ovr = 1;
997replay: 1008replay:
998 ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr); 1009 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
999 if (ret == -EAGAIN) 1010 if (ret == -EAGAIN)
1000 goto replay; 1011 goto replay;
1001 break; 1012 break;
1002 case RTM_DELACTION: 1013 case RTM_DELACTION:
1003 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION); 1014 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1015 pid, RTM_DELACTION);
1004 break; 1016 break;
1005 case RTM_GETACTION: 1017 case RTM_GETACTION:
1006 ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION); 1018 ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
1019 pid, RTM_GETACTION);
1007 break; 1020 break;
1008 default: 1021 default:
1009 BUG(); 1022 BUG();
@@ -1043,7 +1056,6 @@ find_dump_kind(const struct nlmsghdr *n)
1043static int 1056static int
1044tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) 1057tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1045{ 1058{
1046 struct net *net = sock_net(skb->sk);
1047 struct nlmsghdr *nlh; 1059 struct nlmsghdr *nlh;
1048 unsigned char *b = skb_tail_pointer(skb); 1060 unsigned char *b = skb_tail_pointer(skb);
1049 struct nlattr *nest; 1061 struct nlattr *nest;
@@ -1053,11 +1065,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1053 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); 1065 struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
1054 struct nlattr *kind = find_dump_kind(cb->nlh); 1066 struct nlattr *kind = find_dump_kind(cb->nlh);
1055 1067
1056 if (!net_eq(net, &init_net))
1057 return 0;
1058
1059 if (kind == NULL) { 1068 if (kind == NULL) {
1060 printk("tc_dump_action: action bad kind\n"); 1069 pr_info("tc_dump_action: action bad kind\n");
1061 return 0; 1070 return 0;
1062 } 1071 }
1063 1072
@@ -1070,7 +1079,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1070 a.ops = a_o; 1079 a.ops = a_o;
1071 1080
1072 if (a_o->walk == NULL) { 1081 if (a_o->walk == NULL) {
1073 printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind); 1082 WARN(1, "tc_dump_action: %s !capable of dumping table\n",
1083 a_o->kind);
1074 goto nla_put_failure; 1084 goto nla_put_failure;
1075 } 1085 }
1076 1086
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e7f796aec657..8406c6654990 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -202,9 +202,9 @@ MODULE_LICENSE("GPL");
202static int __init gact_init_module(void) 202static int __init gact_init_module(void)
203{ 203{
204#ifdef CONFIG_GACT_PROB 204#ifdef CONFIG_GACT_PROB
205 printk("GACT probability on\n"); 205 printk(KERN_INFO "GACT probability on\n");
206#else 206#else
207 printk("GACT probability NOT on\n"); 207 printk(KERN_INFO "GACT probability NOT on\n");
208#endif 208#endif
209 return tcf_register_action(&act_gact_ops); 209 return tcf_register_action(&act_gact_ops);
210} 210}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index da27a170b6b7..c7e59e6ec349 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -47,8 +47,8 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
47 47
48 target = xt_request_find_target(AF_INET, t->u.user.name, 48 target = xt_request_find_target(AF_INET, t->u.user.name,
49 t->u.user.revision); 49 t->u.user.revision);
50 if (!target) 50 if (IS_ERR(target))
51 return -ENOENT; 51 return PTR_ERR(target);
52 52
53 t->u.kernel.target = target; 53 t->u.kernel.target = target;
54 par.table = table; 54 par.table = table;
@@ -199,7 +199,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
199{ 199{
200 int ret = 0, result = 0; 200 int ret = 0, result = 0;
201 struct tcf_ipt *ipt = a->priv; 201 struct tcf_ipt *ipt = a->priv;
202 struct xt_target_param par; 202 struct xt_action_param par;
203 203
204 if (skb_cloned(skb)) { 204 if (skb_cloned(skb)) {
205 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 205 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -235,7 +235,8 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
235 break; 235 break;
236 default: 236 default:
237 if (net_ratelimit()) 237 if (net_ratelimit())
238 printk("Bogus netfilter code %d assume ACCEPT\n", ret); 238 pr_notice("tc filter: Bogus netfilter code"
239 " %d assume ACCEPT\n", ret);
239 result = TC_POLICE_OK; 240 result = TC_POLICE_OK;
240 break; 241 break;
241 } 242 }
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c046682054eb..11f195af2da0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -33,6 +33,7 @@
33static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; 33static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1];
34static u32 mirred_idx_gen; 34static u32 mirred_idx_gen;
35static DEFINE_RWLOCK(mirred_lock); 35static DEFINE_RWLOCK(mirred_lock);
36static LIST_HEAD(mirred_list);
36 37
37static struct tcf_hashinfo mirred_hash_info = { 38static struct tcf_hashinfo mirred_hash_info = {
38 .htab = tcf_mirred_ht, 39 .htab = tcf_mirred_ht,
@@ -47,7 +48,9 @@ static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
47 m->tcf_bindcnt--; 48 m->tcf_bindcnt--;
48 m->tcf_refcnt--; 49 m->tcf_refcnt--;
49 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { 50 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
50 dev_put(m->tcfm_dev); 51 list_del(&m->tcfm_list);
52 if (m->tcfm_dev)
53 dev_put(m->tcfm_dev);
51 tcf_hash_destroy(&m->common, &mirred_hash_info); 54 tcf_hash_destroy(&m->common, &mirred_hash_info);
52 return 1; 55 return 1;
53 } 56 }
@@ -134,8 +137,10 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
134 m->tcfm_ok_push = ok_push; 137 m->tcfm_ok_push = ok_push;
135 } 138 }
136 spin_unlock_bh(&m->tcf_lock); 139 spin_unlock_bh(&m->tcf_lock);
137 if (ret == ACT_P_CREATED) 140 if (ret == ACT_P_CREATED) {
141 list_add(&m->tcfm_list, &mirred_list);
138 tcf_hash_insert(pc, &mirred_hash_info); 142 tcf_hash_insert(pc, &mirred_hash_info);
143 }
139 144
140 return ret; 145 return ret;
141} 146}
@@ -160,22 +165,27 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
160 165
161 spin_lock(&m->tcf_lock); 166 spin_lock(&m->tcf_lock);
162 m->tcf_tm.lastuse = jiffies; 167 m->tcf_tm.lastuse = jiffies;
168 m->tcf_bstats.bytes += qdisc_pkt_len(skb);
169 m->tcf_bstats.packets++;
163 170
164 dev = m->tcfm_dev; 171 dev = m->tcfm_dev;
172 if (!dev) {
173 printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
174 goto out;
175 }
176
165 if (!(dev->flags & IFF_UP)) { 177 if (!(dev->flags & IFF_UP)) {
166 if (net_ratelimit()) 178 if (net_ratelimit())
167 printk("mirred to Houston: device %s is gone!\n", 179 pr_notice("tc mirred to Houston: device %s is down\n",
168 dev->name); 180 dev->name);
169 goto out; 181 goto out;
170 } 182 }
171 183
172 skb2 = skb_act_clone(skb, GFP_ATOMIC); 184 at = G_TC_AT(skb->tc_verd);
185 skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
173 if (skb2 == NULL) 186 if (skb2 == NULL)
174 goto out; 187 goto out;
175 188
176 m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
177 m->tcf_bstats.packets++;
178 at = G_TC_AT(skb->tc_verd);
179 if (!(at & AT_EGRESS)) { 189 if (!(at & AT_EGRESS)) {
180 if (m->tcfm_ok_push) 190 if (m->tcfm_ok_push)
181 skb_push(skb2, skb2->dev->hard_header_len); 191 skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,16 +195,14 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
185 if (m->tcfm_eaction != TCA_EGRESS_MIRROR) 195 if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
186 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); 196 skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
187 197
188 skb2->dev = dev;
189 skb2->skb_iif = skb->dev->ifindex; 198 skb2->skb_iif = skb->dev->ifindex;
199 skb2->dev = dev;
190 dev_queue_xmit(skb2); 200 dev_queue_xmit(skb2);
191 err = 0; 201 err = 0;
192 202
193out: 203out:
194 if (err) { 204 if (err) {
195 m->tcf_qstats.overlimits++; 205 m->tcf_qstats.overlimits++;
196 m->tcf_bstats.bytes += qdisc_pkt_len(skb);
197 m->tcf_bstats.packets++;
198 /* should we be asking for packet to be dropped? 206 /* should we be asking for packet to be dropped?
199 * may make sense for redirect case only 207 * may make sense for redirect case only
200 */ 208 */
@@ -232,6 +240,28 @@ nla_put_failure:
232 return -1; 240 return -1;
233} 241}
234 242
243static int mirred_device_event(struct notifier_block *unused,
244 unsigned long event, void *ptr)
245{
246 struct net_device *dev = ptr;
247 struct tcf_mirred *m;
248
249 if (event == NETDEV_UNREGISTER)
250 list_for_each_entry(m, &mirred_list, tcfm_list) {
251 if (m->tcfm_dev == dev) {
252 dev_put(dev);
253 m->tcfm_dev = NULL;
254 }
255 }
256
257 return NOTIFY_DONE;
258}
259
260static struct notifier_block mirred_device_notifier = {
261 .notifier_call = mirred_device_event,
262};
263
264
235static struct tc_action_ops act_mirred_ops = { 265static struct tc_action_ops act_mirred_ops = {
236 .kind = "mirred", 266 .kind = "mirred",
237 .hinfo = &mirred_hash_info, 267 .hinfo = &mirred_hash_info,
@@ -252,12 +282,17 @@ MODULE_LICENSE("GPL");
252 282
253static int __init mirred_init_module(void) 283static int __init mirred_init_module(void)
254{ 284{
255 printk("Mirror/redirect action on\n"); 285 int err = register_netdevice_notifier(&mirred_device_notifier);
286 if (err)
287 return err;
288
289 pr_info("Mirror/redirect action on\n");
256 return tcf_register_action(&act_mirred_ops); 290 return tcf_register_action(&act_mirred_ops);
257} 291}
258 292
259static void __exit mirred_cleanup_module(void) 293static void __exit mirred_cleanup_module(void)
260{ 294{
295 unregister_netdevice_notifier(&mirred_device_notifier);
261 tcf_unregister_action(&act_mirred_ops); 296 tcf_unregister_action(&act_mirred_ops);
262} 297}
263 298
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d885ba311564..509a2d53a99d 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -114,6 +114,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
114 int egress; 114 int egress;
115 int action; 115 int action;
116 int ihl; 116 int ihl;
117 int noff;
117 118
118 spin_lock(&p->tcf_lock); 119 spin_lock(&p->tcf_lock);
119 120
@@ -132,7 +133,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
132 if (unlikely(action == TC_ACT_SHOT)) 133 if (unlikely(action == TC_ACT_SHOT))
133 goto drop; 134 goto drop;
134 135
135 if (!pskb_may_pull(skb, sizeof(*iph))) 136 noff = skb_network_offset(skb);
137 if (!pskb_may_pull(skb, sizeof(*iph) + noff))
136 goto drop; 138 goto drop;
137 139
138 iph = ip_hdr(skb); 140 iph = ip_hdr(skb);
@@ -144,7 +146,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
144 146
145 if (!((old_addr ^ addr) & mask)) { 147 if (!((old_addr ^ addr) & mask)) {
146 if (skb_cloned(skb) && 148 if (skb_cloned(skb) &&
147 !skb_clone_writable(skb, sizeof(*iph)) && 149 !skb_clone_writable(skb, sizeof(*iph) + noff) &&
148 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 150 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
149 goto drop; 151 goto drop;
150 152
@@ -159,6 +161,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
159 iph->daddr = new_addr; 161 iph->daddr = new_addr;
160 162
161 csum_replace4(&iph->check, addr, new_addr); 163 csum_replace4(&iph->check, addr, new_addr);
164 } else if ((iph->frag_off & htons(IP_OFFSET)) ||
165 iph->protocol != IPPROTO_ICMP) {
166 goto out;
162 } 167 }
163 168
164 ihl = iph->ihl * 4; 169 ihl = iph->ihl * 4;
@@ -169,9 +174,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
169 { 174 {
170 struct tcphdr *tcph; 175 struct tcphdr *tcph;
171 176
172 if (!pskb_may_pull(skb, ihl + sizeof(*tcph)) || 177 if (!pskb_may_pull(skb, ihl + sizeof(*tcph) + noff) ||
173 (skb_cloned(skb) && 178 (skb_cloned(skb) &&
174 !skb_clone_writable(skb, ihl + sizeof(*tcph)) && 179 !skb_clone_writable(skb, ihl + sizeof(*tcph) + noff) &&
175 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 180 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
176 goto drop; 181 goto drop;
177 182
@@ -183,9 +188,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
183 { 188 {
184 struct udphdr *udph; 189 struct udphdr *udph;
185 190
186 if (!pskb_may_pull(skb, ihl + sizeof(*udph)) || 191 if (!pskb_may_pull(skb, ihl + sizeof(*udph) + noff) ||
187 (skb_cloned(skb) && 192 (skb_cloned(skb) &&
188 !skb_clone_writable(skb, ihl + sizeof(*udph)) && 193 !skb_clone_writable(skb, ihl + sizeof(*udph) + noff) &&
189 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) 194 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
190 goto drop; 195 goto drop;
191 196
@@ -202,7 +207,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
202 { 207 {
203 struct icmphdr *icmph; 208 struct icmphdr *icmph;
204 209
205 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph))) 210 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + noff))
206 goto drop; 211 goto drop;
207 212
208 icmph = (void *)(skb_network_header(skb) + ihl); 213 icmph = (void *)(skb_network_header(skb) + ihl);
@@ -212,6 +217,11 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
212 (icmph->type != ICMP_PARAMETERPROB)) 217 (icmph->type != ICMP_PARAMETERPROB))
213 break; 218 break;
214 219
220 if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph) +
221 noff))
222 goto drop;
223
224 icmph = (void *)(skb_network_header(skb) + ihl);
215 iph = (void *)(icmph + 1); 225 iph = (void *)(icmph + 1);
216 if (egress) 226 if (egress)
217 addr = iph->daddr; 227 addr = iph->daddr;
@@ -222,8 +232,8 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
222 break; 232 break;
223 233
224 if (skb_cloned(skb) && 234 if (skb_cloned(skb) &&
225 !skb_clone_writable(skb, 235 !skb_clone_writable(skb, ihl + sizeof(*icmph) +
226 ihl + sizeof(*icmph) + sizeof(*iph)) && 236 sizeof(*iph) + noff) &&
227 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 237 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
228 goto drop; 238 goto drop;
229 239
@@ -240,13 +250,14 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
240 iph->saddr = new_addr; 250 iph->saddr = new_addr;
241 251
242 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr, 252 inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
243 1); 253 0);
244 break; 254 break;
245 } 255 }
246 default: 256 default:
247 break; 257 break;
248 } 258 }
249 259
260out:
250 return action; 261 return action;
251 262
252drop: 263drop:
@@ -261,40 +272,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
261{ 272{
262 unsigned char *b = skb_tail_pointer(skb); 273 unsigned char *b = skb_tail_pointer(skb);
263 struct tcf_nat *p = a->priv; 274 struct tcf_nat *p = a->priv;
264 struct tc_nat *opt; 275 struct tc_nat opt;
265 struct tcf_t t; 276 struct tcf_t t;
266 int s;
267 277
268 s = sizeof(*opt); 278 opt.old_addr = p->old_addr;
279 opt.new_addr = p->new_addr;
280 opt.mask = p->mask;
281 opt.flags = p->flags;
269 282
270 /* netlink spinlocks held above us - must use ATOMIC */ 283 opt.index = p->tcf_index;
271 opt = kzalloc(s, GFP_ATOMIC); 284 opt.action = p->tcf_action;
272 if (unlikely(!opt)) 285 opt.refcnt = p->tcf_refcnt - ref;
273 return -ENOBUFS; 286 opt.bindcnt = p->tcf_bindcnt - bind;
274 287
275 opt->old_addr = p->old_addr; 288 NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
276 opt->new_addr = p->new_addr;
277 opt->mask = p->mask;
278 opt->flags = p->flags;
279
280 opt->index = p->tcf_index;
281 opt->action = p->tcf_action;
282 opt->refcnt = p->tcf_refcnt - ref;
283 opt->bindcnt = p->tcf_bindcnt - bind;
284
285 NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
286 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); 289 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
287 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); 290 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
288 t.expires = jiffies_to_clock_t(p->tcf_tm.expires); 291 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
289 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t); 292 NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
290 293
291 kfree(opt);
292
293 return skb->len; 294 return skb->len;
294 295
295nla_put_failure: 296nla_put_failure:
296 nlmsg_trim(skb, b); 297 nlmsg_trim(skb, b);
297 kfree(opt);
298 return -1; 298 return -1;
299} 299}
300 300
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b7dcfedc802e..a0593c9640db 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -125,16 +125,15 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
125{ 125{
126 struct tcf_pedit *p = a->priv; 126 struct tcf_pedit *p = a->priv;
127 int i, munged = 0; 127 int i, munged = 0;
128 u8 *pptr; 128 unsigned int off;
129 129
130 if (!(skb->tc_verd & TC_OK2MUNGE)) { 130 if (skb_cloned(skb)) {
131 /* should we set skb->cloned? */
132 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 131 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
133 return p->tcf_action; 132 return p->tcf_action;
134 } 133 }
135 } 134 }
136 135
137 pptr = skb_network_header(skb); 136 off = skb_network_offset(skb);
138 137
139 spin_lock(&p->tcf_lock); 138 spin_lock(&p->tcf_lock);
140 139
@@ -144,41 +143,46 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
144 struct tc_pedit_key *tkey = p->tcfp_keys; 143 struct tc_pedit_key *tkey = p->tcfp_keys;
145 144
146 for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { 145 for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
147 u32 *ptr; 146 u32 *ptr, _data;
148 int offset = tkey->off; 147 int offset = tkey->off;
149 148
150 if (tkey->offmask) { 149 if (tkey->offmask) {
151 if (skb->len > tkey->at) { 150 char *d, _d;
152 char *j = pptr + tkey->at; 151
153 offset += ((*j & tkey->offmask) >> 152 d = skb_header_pointer(skb, off + tkey->at, 1,
154 tkey->shift); 153 &_d);
155 } else { 154 if (!d)
156 goto bad; 155 goto bad;
157 } 156 offset += (*d & tkey->offmask) >> tkey->shift;
158 } 157 }
159 158
160 if (offset % 4) { 159 if (offset % 4) {
161 printk("offset must be on 32 bit boundaries\n"); 160 pr_info("tc filter pedit"
161 " offset must be on 32 bit boundaries\n");
162 goto bad; 162 goto bad;
163 } 163 }
164 if (offset > 0 && offset > skb->len) { 164 if (offset > 0 && offset > skb->len) {
165 printk("offset %d cant exceed pkt length %d\n", 165 pr_info("tc filter pedit"
166 " offset %d cant exceed pkt length %d\n",
166 offset, skb->len); 167 offset, skb->len);
167 goto bad; 168 goto bad;
168 } 169 }
169 170
170 ptr = (u32 *)(pptr+offset); 171 ptr = skb_header_pointer(skb, off + offset, 4, &_data);
172 if (!ptr)
173 goto bad;
171 /* just do it, baby */ 174 /* just do it, baby */
172 *ptr = ((*ptr & tkey->mask) ^ tkey->val); 175 *ptr = ((*ptr & tkey->mask) ^ tkey->val);
176 if (ptr == &_data)
177 skb_store_bits(skb, off + offset, ptr, 4);
173 munged++; 178 munged++;
174 } 179 }
175 180
176 if (munged) 181 if (munged)
177 skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); 182 skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
178 goto done; 183 goto done;
179 } else { 184 } else
180 printk("pedit BUG: index %d\n", p->tcf_index); 185 WARN(1, "pedit BUG: index %d\n", p->tcf_index);
181 }
182 186
183bad: 187bad:
184 p->tcf_qstats.overlimits++; 188 p->tcf_qstats.overlimits++;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c1..537a48732e9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
97 goto done; 97 goto done;
98} 98}
99 99
100static void tcf_police_free_rcu(struct rcu_head *head)
101{
102 kfree(container_of(head, struct tcf_police, tcf_rcu));
103}
104
100static void tcf_police_destroy(struct tcf_police *p) 105static void tcf_police_destroy(struct tcf_police *p)
101{ 106{
102 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 107 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
113 qdisc_put_rtab(p->tcfp_R_tab); 118 qdisc_put_rtab(p->tcfp_R_tab);
114 if (p->tcfp_P_tab) 119 if (p->tcfp_P_tab)
115 qdisc_put_rtab(p->tcfp_P_tab); 120 qdisc_put_rtab(p->tcfp_P_tab);
116 kfree(p); 121 /*
122 * gen_estimator est_timer() might access p->tcf_lock
123 * or bstats, wait a RCU grace period before freeing p
124 */
125 call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
117 return; 126 return;
118 } 127 }
119 } 128 }
@@ -397,6 +406,7 @@ static void __exit
397police_cleanup_module(void) 406police_cleanup_module(void)
398{ 407{
399 tcf_unregister_action(&act_police_ops); 408 tcf_unregister_action(&act_police_ops);
409 rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
400} 410}
401 411
402module_init(police_init_module); 412module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 622ca809c15c..4a1d640b0cf1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -49,7 +49,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
49 * Example if this was the 3rd packet and the string was "hello" 49 * Example if this was the 3rd packet and the string was "hello"
50 * then it would look like "hello_3" (without quotes) 50 * then it would look like "hello_3" (without quotes)
51 **/ 51 **/
52 printk("simple: %s_%d\n", 52 pr_info("simple: %s_%d\n",
53 (char *)d->tcfd_defdata, d->tcf_bstats.packets); 53 (char *)d->tcfd_defdata, d->tcf_bstats.packets);
54 spin_unlock(&d->tcf_lock); 54 spin_unlock(&d->tcf_lock);
55 return d->tcf_action; 55 return d->tcf_action;
@@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
73 73
74static int alloc_defdata(struct tcf_defact *d, char *defdata) 74static int alloc_defdata(struct tcf_defact *d, char *defdata)
75{ 75{
76 d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL); 76 d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
77 if (unlikely(!d->tcfd_defdata)) 77 if (unlikely(!d->tcfd_defdata))
78 return -ENOMEM; 78 return -ENOMEM;
79 79 strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
80 return 0; 80 return 0;
81} 81}
82 82
@@ -205,7 +205,7 @@ static int __init simp_init_module(void)
205{ 205{
206 int ret = tcf_register_action(&act_simp_ops); 206 int ret = tcf_register_action(&act_simp_ops);
207 if (!ret) 207 if (!ret)
208 printk("Simple TC action Loaded\n"); 208 pr_info("Simple TC action Loaded\n");
209 return ret; 209 return ret;
210} 210}
211 211
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f082b27ff46d..5fd0c28ef79a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -99,8 +99,9 @@ out:
99} 99}
100EXPORT_SYMBOL(unregister_tcf_proto_ops); 100EXPORT_SYMBOL(unregister_tcf_proto_ops);
101 101
102static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 102static int tfilter_notify(struct net *net, struct sk_buff *oskb,
103 struct tcf_proto *tp, unsigned long fh, int event); 103 struct nlmsghdr *n, struct tcf_proto *tp,
104 unsigned long fh, int event);
104 105
105 106
106/* Select new prio value from the range, managed by kernel. */ 107/* Select new prio value from the range, managed by kernel. */
@@ -138,9 +139,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
138 int err; 139 int err;
139 int tp_created = 0; 140 int tp_created = 0;
140 141
141 if (!net_eq(net, &init_net))
142 return -EINVAL;
143
144replay: 142replay:
145 t = NLMSG_DATA(n); 143 t = NLMSG_DATA(n);
146 protocol = TC_H_MIN(t->tcm_info); 144 protocol = TC_H_MIN(t->tcm_info);
@@ -159,7 +157,7 @@ replay:
159 /* Find head of filter chain. */ 157 /* Find head of filter chain. */
160 158
161 /* Find link */ 159 /* Find link */
162 dev = __dev_get_by_index(&init_net, t->tcm_ifindex); 160 dev = __dev_get_by_index(net, t->tcm_ifindex);
163 if (dev == NULL) 161 if (dev == NULL)
164 return -ENODEV; 162 return -ENODEV;
165 163
@@ -283,7 +281,7 @@ replay:
283 *back = tp->next; 281 *back = tp->next;
284 spin_unlock_bh(root_lock); 282 spin_unlock_bh(root_lock);
285 283
286 tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); 284 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
287 tcf_destroy(tp); 285 tcf_destroy(tp);
288 err = 0; 286 err = 0;
289 goto errout; 287 goto errout;
@@ -306,10 +304,10 @@ replay:
306 case RTM_DELTFILTER: 304 case RTM_DELTFILTER:
307 err = tp->ops->delete(tp, fh); 305 err = tp->ops->delete(tp, fh);
308 if (err == 0) 306 if (err == 0)
309 tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER); 307 tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
310 goto errout; 308 goto errout;
311 case RTM_GETTFILTER: 309 case RTM_GETTFILTER:
312 err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 310 err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
313 goto errout; 311 goto errout;
314 default: 312 default:
315 err = -EINVAL; 313 err = -EINVAL;
@@ -325,7 +323,7 @@ replay:
325 *back = tp; 323 *back = tp;
326 spin_unlock_bh(root_lock); 324 spin_unlock_bh(root_lock);
327 } 325 }
328 tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER); 326 tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
329 } else { 327 } else {
330 if (tp_created) 328 if (tp_created)
331 tcf_destroy(tp); 329 tcf_destroy(tp);
@@ -371,8 +369,9 @@ nla_put_failure:
371 return -1; 369 return -1;
372} 370}
373 371
374static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, 372static int tfilter_notify(struct net *net, struct sk_buff *oskb,
375 struct tcf_proto *tp, unsigned long fh, int event) 373 struct nlmsghdr *n, struct tcf_proto *tp,
374 unsigned long fh, int event)
376{ 375{
377 struct sk_buff *skb; 376 struct sk_buff *skb;
378 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; 377 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -386,7 +385,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
386 return -EINVAL; 385 return -EINVAL;
387 } 386 }
388 387
389 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, 388 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
390 n->nlmsg_flags & NLM_F_ECHO); 389 n->nlmsg_flags & NLM_F_ECHO);
391} 390}
392 391
@@ -419,12 +418,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
419 const struct Qdisc_class_ops *cops; 418 const struct Qdisc_class_ops *cops;
420 struct tcf_dump_args arg; 419 struct tcf_dump_args arg;
421 420
422 if (!net_eq(net, &init_net))
423 return 0;
424
425 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 421 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
426 return skb->len; 422 return skb->len;
427 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 423 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
428 return skb->len; 424 return skb->len;
429 425
430 if (!tcm->tcm_parent) 426 if (!tcm->tcm_parent)
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
16#include <linux/errno.h> 16#include <linux/errno.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/cgroup.h> 18#include <linux/cgroup.h>
19#include <linux/rcupdate.h>
19#include <net/rtnetlink.h> 20#include <net/rtnetlink.h>
20#include <net/pkt_cls.h> 21#include <net/pkt_cls.h>
21 22#include <net/sock.h>
22struct cgroup_cls_state 23#include <net/cls_cgroup.h>
23{
24 struct cgroup_subsys_state css;
25 u32 classid;
26};
27 24
28static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, 25static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
29 struct cgroup *cgrp); 26 struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
112 struct cls_cgroup_head *head = tp->root; 109 struct cls_cgroup_head *head = tp->root;
113 u32 classid; 110 u32 classid;
114 111
112 rcu_read_lock();
113 classid = task_cls_state(current)->classid;
114 rcu_read_unlock();
115
115 /* 116 /*
116 * Due to the nature of the classifier it is required to ignore all 117 * Due to the nature of the classifier it is required to ignore all
117 * packets originating from softirq context as accessing `current' 118 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
122 * calls by looking at the number of nested bh disable calls because 123 * calls by looking at the number of nested bh disable calls because
123 * softirqs always disables bh. 124 * softirqs always disables bh.
124 */ 125 */
125 if (softirq_count() != SOFTIRQ_OFFSET) 126 if (softirq_count() != SOFTIRQ_OFFSET) {
126 return -1; 127 /* If there is an sk_classid we'll use that. */
127 128 if (!skb->sk)
128 rcu_read_lock(); 129 return -1;
129 classid = task_cls_state(current)->classid; 130 classid = skb->sk->sk_classid;
130 rcu_read_unlock(); 131 }
131 132
132 if (!classid) 133 if (!classid)
133 return -1; 134 return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
289 290
290static int __init init_cgroup_cls(void) 291static int __init init_cgroup_cls(void)
291{ 292{
292 int ret = register_tcf_proto_ops(&cls_cgroup_ops); 293 int ret;
293 if (ret) 294
294 return ret;
295 ret = cgroup_load_subsys(&net_cls_subsys); 295 ret = cgroup_load_subsys(&net_cls_subsys);
296 if (ret) 296 if (ret)
297 unregister_tcf_proto_ops(&cls_cgroup_ops); 297 goto out;
298
299#ifndef CONFIG_NET_CLS_CGROUP
300 /* We can't use rcu_assign_pointer because this is an int. */
301 smp_wmb();
302 net_cls_subsys_id = net_cls_subsys.subsys_id;
303#endif
304
305 ret = register_tcf_proto_ops(&cls_cgroup_ops);
306 if (ret)
307 cgroup_unload_subsys(&net_cls_subsys);
308
309out:
298 return ret; 310 return ret;
299} 311}
300 312
301static void __exit exit_cgroup_cls(void) 313static void __exit exit_cgroup_cls(void)
302{ 314{
303 unregister_tcf_proto_ops(&cls_cgroup_ops); 315 unregister_tcf_proto_ops(&cls_cgroup_ops);
316
317#ifndef CONFIG_NET_CLS_CGROUP
318 net_cls_subsys_id = -1;
319 synchronize_rcu();
320#endif
321
304 cgroup_unload_subsys(&net_cls_subsys); 322 cgroup_unload_subsys(&net_cls_subsys);
305} 323}
306 324
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ed61b10e002..e17096e3913c 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -65,37 +65,47 @@ static inline u32 addr_fold(void *addr)
65 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0); 65 return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
66} 66}
67 67
68static u32 flow_get_src(const struct sk_buff *skb) 68static u32 flow_get_src(struct sk_buff *skb)
69{ 69{
70 switch (skb->protocol) { 70 switch (skb->protocol) {
71 case htons(ETH_P_IP): 71 case htons(ETH_P_IP):
72 return ntohl(ip_hdr(skb)->saddr); 72 if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
73 return ntohl(ip_hdr(skb)->saddr);
74 break;
73 case htons(ETH_P_IPV6): 75 case htons(ETH_P_IPV6):
74 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); 76 if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
75 default: 77 return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
76 return addr_fold(skb->sk); 78 break;
77 } 79 }
80
81 return addr_fold(skb->sk);
78} 82}
79 83
80static u32 flow_get_dst(const struct sk_buff *skb) 84static u32 flow_get_dst(struct sk_buff *skb)
81{ 85{
82 switch (skb->protocol) { 86 switch (skb->protocol) {
83 case htons(ETH_P_IP): 87 case htons(ETH_P_IP):
84 return ntohl(ip_hdr(skb)->daddr); 88 if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
89 return ntohl(ip_hdr(skb)->daddr);
90 break;
85 case htons(ETH_P_IPV6): 91 case htons(ETH_P_IPV6):
86 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); 92 if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
87 default: 93 return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
88 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol; 94 break;
89 } 95 }
96
97 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
90} 98}
91 99
92static u32 flow_get_proto(const struct sk_buff *skb) 100static u32 flow_get_proto(struct sk_buff *skb)
93{ 101{
94 switch (skb->protocol) { 102 switch (skb->protocol) {
95 case htons(ETH_P_IP): 103 case htons(ETH_P_IP):
96 return ip_hdr(skb)->protocol; 104 return pskb_network_may_pull(skb, sizeof(struct iphdr)) ?
105 ip_hdr(skb)->protocol : 0;
97 case htons(ETH_P_IPV6): 106 case htons(ETH_P_IPV6):
98 return ipv6_hdr(skb)->nexthdr; 107 return pskb_network_may_pull(skb, sizeof(struct ipv6hdr)) ?
108 ipv6_hdr(skb)->nexthdr : 0;
99 default: 109 default:
100 return 0; 110 return 0;
101 } 111 }
@@ -116,58 +126,64 @@ static int has_ports(u8 protocol)
116 } 126 }
117} 127}
118 128
119static u32 flow_get_proto_src(const struct sk_buff *skb) 129static u32 flow_get_proto_src(struct sk_buff *skb)
120{ 130{
121 u32 res = 0;
122
123 switch (skb->protocol) { 131 switch (skb->protocol) {
124 case htons(ETH_P_IP): { 132 case htons(ETH_P_IP): {
125 struct iphdr *iph = ip_hdr(skb); 133 struct iphdr *iph;
126 134
135 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break;
137 iph = ip_hdr(skb);
127 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
128 has_ports(iph->protocol)) 139 has_ports(iph->protocol) &&
129 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 140 pskb_network_may_pull(skb, iph->ihl * 4 + 2))
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
130 break; 142 break;
131 } 143 }
132 case htons(ETH_P_IPV6): { 144 case htons(ETH_P_IPV6): {
133 struct ipv6hdr *iph = ipv6_hdr(skb); 145 struct ipv6hdr *iph;
134 146
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2))
148 break;
149 iph = ipv6_hdr(skb);
135 if (has_ports(iph->nexthdr)) 150 if (has_ports(iph->nexthdr))
136 res = ntohs(*(__be16 *)&iph[1]); 151 return ntohs(*(__be16 *)&iph[1]);
137 break; 152 break;
138 } 153 }
139 default:
140 res = addr_fold(skb->sk);
141 } 154 }
142 155
143 return res; 156 return addr_fold(skb->sk);
144} 157}
145 158
146static u32 flow_get_proto_dst(const struct sk_buff *skb) 159static u32 flow_get_proto_dst(struct sk_buff *skb)
147{ 160{
148 u32 res = 0;
149
150 switch (skb->protocol) { 161 switch (skb->protocol) {
151 case htons(ETH_P_IP): { 162 case htons(ETH_P_IP): {
152 struct iphdr *iph = ip_hdr(skb); 163 struct iphdr *iph;
153 164
165 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break;
167 iph = ip_hdr(skb);
154 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
155 has_ports(iph->protocol)) 169 has_ports(iph->protocol) &&
156 res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 170 pskb_network_may_pull(skb, iph->ihl * 4 + 4))
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
157 break; 172 break;
158 } 173 }
159 case htons(ETH_P_IPV6): { 174 case htons(ETH_P_IPV6): {
160 struct ipv6hdr *iph = ipv6_hdr(skb); 175 struct ipv6hdr *iph;
161 176
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4))
178 break;
179 iph = ipv6_hdr(skb);
162 if (has_ports(iph->nexthdr)) 180 if (has_ports(iph->nexthdr))
163 res = ntohs(*(__be16 *)((void *)&iph[1] + 2)); 181 return ntohs(*(__be16 *)((void *)&iph[1] + 2));
164 break; 182 break;
165 } 183 }
166 default:
167 res = addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
168 } 184 }
169 185
170 return res; 186 return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
171} 187}
172 188
173static u32 flow_get_iif(const struct sk_buff *skb) 189static u32 flow_get_iif(const struct sk_buff *skb)
@@ -211,7 +227,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
211}) 227})
212#endif 228#endif
213 229
214static u32 flow_get_nfct_src(const struct sk_buff *skb) 230static u32 flow_get_nfct_src(struct sk_buff *skb)
215{ 231{
216 switch (skb->protocol) { 232 switch (skb->protocol) {
217 case htons(ETH_P_IP): 233 case htons(ETH_P_IP):
@@ -223,7 +239,7 @@ fallback:
223 return flow_get_src(skb); 239 return flow_get_src(skb);
224} 240}
225 241
226static u32 flow_get_nfct_dst(const struct sk_buff *skb) 242static u32 flow_get_nfct_dst(struct sk_buff *skb)
227{ 243{
228 switch (skb->protocol) { 244 switch (skb->protocol) {
229 case htons(ETH_P_IP): 245 case htons(ETH_P_IP):
@@ -235,14 +251,14 @@ fallback:
235 return flow_get_dst(skb); 251 return flow_get_dst(skb);
236} 252}
237 253
238static u32 flow_get_nfct_proto_src(const struct sk_buff *skb) 254static u32 flow_get_nfct_proto_src(struct sk_buff *skb)
239{ 255{
240 return ntohs(CTTUPLE(skb, src.u.all)); 256 return ntohs(CTTUPLE(skb, src.u.all));
241fallback: 257fallback:
242 return flow_get_proto_src(skb); 258 return flow_get_proto_src(skb);
243} 259}
244 260
245static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb) 261static u32 flow_get_nfct_proto_dst(struct sk_buff *skb)
246{ 262{
247 return ntohs(CTTUPLE(skb, dst.u.all)); 263 return ntohs(CTTUPLE(skb, dst.u.all));
248fallback: 264fallback:
@@ -281,7 +297,7 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
281 return tag & VLAN_VID_MASK; 297 return tag & VLAN_VID_MASK;
282} 298}
283 299
284static u32 flow_key_get(const struct sk_buff *skb, int key) 300static u32 flow_key_get(struct sk_buff *skb, int key)
285{ 301{
286 switch (key) { 302 switch (key) {
287 case FLOW_KEY_SRC: 303 case FLOW_KEY_SRC:
@@ -602,7 +618,6 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
602 618
603static void flow_put(struct tcf_proto *tp, unsigned long f) 619static void flow_put(struct tcf_proto *tp, unsigned long f)
604{ 620{
605 return;
606} 621}
607 622
608static int flow_dump(struct tcf_proto *tp, unsigned long fh, 623static int flow_dump(struct tcf_proto *tp, unsigned long fh,
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index dd9414e44200..425a1790b048 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -143,9 +143,17 @@ static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
143 u8 tunnelid = 0; 143 u8 tunnelid = 0;
144 u8 *xprt; 144 u8 *xprt;
145#if RSVP_DST_LEN == 4 145#if RSVP_DST_LEN == 4
146 struct ipv6hdr *nhptr = ipv6_hdr(skb); 146 struct ipv6hdr *nhptr;
147
148 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
149 return -1;
150 nhptr = ipv6_hdr(skb);
147#else 151#else
148 struct iphdr *nhptr = ip_hdr(skb); 152 struct iphdr *nhptr;
153
154 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
155 return -1;
156 nhptr = ip_hdr(skb);
149#endif 157#endif
150 158
151restart: 159restart:
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 17c5dfc67320..7416a5c73b2a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
98{ 98{
99 struct { 99 struct {
100 struct tc_u_knode *knode; 100 struct tc_u_knode *knode;
101 u8 *ptr; 101 unsigned int off;
102 } stack[TC_U32_MAXDEPTH]; 102 } stack[TC_U32_MAXDEPTH];
103 103
104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
105 u8 *ptr = skb_network_header(skb); 105 unsigned int off = skb_network_offset(skb);
106 struct tc_u_knode *n; 106 struct tc_u_knode *n;
107 int sdepth = 0; 107 int sdepth = 0;
108 int off2 = 0; 108 int off2 = 0;
@@ -134,8 +134,16 @@ next_knode:
134#endif 134#endif
135 135
136 for (i = n->sel.nkeys; i>0; i--, key++) { 136 for (i = n->sel.nkeys; i>0; i--, key++) {
137 int toff = off + key->off + (off2 & key->offmask);
138 __be32 *data, _data;
137 139
138 if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) { 140 if (skb_headroom(skb) + toff < 0)
141 goto out;
142
143 data = skb_header_pointer(skb, toff, 4, &_data);
144 if (!data)
145 goto out;
146 if ((*data ^ key->val) & key->mask) {
139 n = n->next; 147 n = n->next;
140 goto next_knode; 148 goto next_knode;
141 } 149 }
@@ -174,29 +182,45 @@ check_terminal:
174 if (sdepth >= TC_U32_MAXDEPTH) 182 if (sdepth >= TC_U32_MAXDEPTH)
175 goto deadloop; 183 goto deadloop;
176 stack[sdepth].knode = n; 184 stack[sdepth].knode = n;
177 stack[sdepth].ptr = ptr; 185 stack[sdepth].off = off;
178 sdepth++; 186 sdepth++;
179 187
180 ht = n->ht_down; 188 ht = n->ht_down;
181 sel = 0; 189 sel = 0;
182 if (ht->divisor) 190 if (ht->divisor) {
183 sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift); 191 __be32 *data, _data;
184 192
193 data = skb_header_pointer(skb, off + n->sel.hoff, 4,
194 &_data);
195 if (!data)
196 goto out;
197 sel = ht->divisor & u32_hash_fold(*data, &n->sel,
198 n->fshift);
199 }
185 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 200 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
186 goto next_ht; 201 goto next_ht;
187 202
188 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 203 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
189 off2 = n->sel.off + 3; 204 off2 = n->sel.off + 3;
190 if (n->sel.flags&TC_U32_VAROFFSET) 205 if (n->sel.flags & TC_U32_VAROFFSET) {
191 off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift; 206 __be16 *data, _data;
207
208 data = skb_header_pointer(skb,
209 off + n->sel.offoff,
210 2, &_data);
211 if (!data)
212 goto out;
213 off2 += ntohs(n->sel.offmask & *data) >>
214 n->sel.offshift;
215 }
192 off2 &= ~3; 216 off2 &= ~3;
193 } 217 }
194 if (n->sel.flags&TC_U32_EAT) { 218 if (n->sel.flags&TC_U32_EAT) {
195 ptr += off2; 219 off += off2;
196 off2 = 0; 220 off2 = 0;
197 } 221 }
198 222
199 if (ptr < skb_tail_pointer(skb)) 223 if (off < skb->len)
200 goto next_ht; 224 goto next_ht;
201 } 225 }
202 226
@@ -204,14 +228,15 @@ check_terminal:
204 if (sdepth--) { 228 if (sdepth--) {
205 n = stack[sdepth].knode; 229 n = stack[sdepth].knode;
206 ht = n->ht_up; 230 ht = n->ht_up;
207 ptr = stack[sdepth].ptr; 231 off = stack[sdepth].off;
208 goto check_terminal; 232 goto check_terminal;
209 } 233 }
234out:
210 return -1; 235 return -1;
211 236
212deadloop: 237deadloop:
213 if (net_ratelimit()) 238 if (net_ratelimit())
214 printk("cls_u32: dead loop\n"); 239 printk(KERN_WARNING "cls_u32: dead loop\n");
215 return -1; 240 return -1;
216} 241}
217 242
@@ -768,15 +793,15 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
768 793
769static int __init init_u32(void) 794static int __init init_u32(void)
770{ 795{
771 printk("u32 classifier\n"); 796 pr_info("u32 classifier\n");
772#ifdef CONFIG_CLS_U32_PERF 797#ifdef CONFIG_CLS_U32_PERF
773 printk(" Performance counters on\n"); 798 pr_info(" Performance counters on\n");
774#endif 799#endif
775#ifdef CONFIG_NET_CLS_IND 800#ifdef CONFIG_NET_CLS_IND
776 printk(" input device check on \n"); 801 pr_info(" input device check on\n");
777#endif 802#endif
778#ifdef CONFIG_NET_CLS_ACT 803#ifdef CONFIG_NET_CLS_ACT
779 printk(" Actions configured \n"); 804 pr_info(" Actions configured\n");
780#endif 805#endif
781 return register_tcf_proto_ops(&cls_u32_ops); 806 return register_tcf_proto_ops(&cls_u32_ops);
782} 807}
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e782bdeedc58..5e37da961f80 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -527,7 +527,8 @@ pop_stack:
527 527
528stack_overflow: 528stack_overflow:
529 if (net_ratelimit()) 529 if (net_ratelimit())
530 printk("Local stack overflow, increase NET_EMATCH_STACK\n"); 530 printk(KERN_WARNING "tc ematch: local stack overflow,"
531 " increase NET_EMATCH_STACK\n");
531 return -1; 532 return -1;
532} 533}
533EXPORT_SYMBOL(__tcf_em_tree_match); 534EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 145268ca57cf..b9e8c3b7d406 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -35,10 +35,12 @@
35#include <net/netlink.h> 35#include <net/netlink.h>
36#include <net/pkt_sched.h> 36#include <net/pkt_sched.h>
37 37
38static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, 38static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
39 struct Qdisc *old, struct Qdisc *new); 40 struct Qdisc *old, struct Qdisc *new);
40static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, 41static int tclass_notify(struct net *net, struct sk_buff *oskb,
41 struct Qdisc *q, unsigned long cl, int event); 42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
42 44
43/* 45/*
44 46
@@ -639,11 +641,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
639} 641}
640EXPORT_SYMBOL(qdisc_tree_decrease_qlen); 642EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
641 643
642static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid, 644static void notify_and_destroy(struct net *net, struct sk_buff *skb,
645 struct nlmsghdr *n, u32 clid,
643 struct Qdisc *old, struct Qdisc *new) 646 struct Qdisc *old, struct Qdisc *new)
644{ 647{
645 if (new || old) 648 if (new || old)
646 qdisc_notify(skb, n, clid, old, new); 649 qdisc_notify(net, skb, n, clid, old, new);
647 650
648 if (old) 651 if (old)
649 qdisc_destroy(old); 652 qdisc_destroy(old);
@@ -663,6 +666,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
663 struct Qdisc *new, struct Qdisc *old) 666 struct Qdisc *new, struct Qdisc *old)
664{ 667{
665 struct Qdisc *q = old; 668 struct Qdisc *q = old;
669 struct net *net = dev_net(dev);
666 int err = 0; 670 int err = 0;
667 671
668 if (parent == NULL) { 672 if (parent == NULL) {
@@ -699,12 +703,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
699 } 703 }
700 704
701 if (!ingress) { 705 if (!ingress) {
702 notify_and_destroy(skb, n, classid, dev->qdisc, new); 706 notify_and_destroy(net, skb, n, classid,
707 dev->qdisc, new);
703 if (new && !new->ops->attach) 708 if (new && !new->ops->attach)
704 atomic_inc(&new->refcnt); 709 atomic_inc(&new->refcnt);
705 dev->qdisc = new ? : &noop_qdisc; 710 dev->qdisc = new ? : &noop_qdisc;
706 } else { 711 } else {
707 notify_and_destroy(skb, n, classid, old, new); 712 notify_and_destroy(net, skb, n, classid, old, new);
708 } 713 }
709 714
710 if (dev->flags & IFF_UP) 715 if (dev->flags & IFF_UP)
@@ -722,7 +727,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
722 err = -ENOENT; 727 err = -ENOENT;
723 } 728 }
724 if (!err) 729 if (!err)
725 notify_and_destroy(skb, n, classid, old, new); 730 notify_and_destroy(net, skb, n, classid, old, new);
726 } 731 }
727 return err; 732 return err;
728} 733}
@@ -948,10 +953,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
948 struct Qdisc *p = NULL; 953 struct Qdisc *p = NULL;
949 int err; 954 int err;
950 955
951 if (!net_eq(net, &init_net)) 956 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
952 return -EINVAL;
953
954 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
955 return -ENODEV; 957 return -ENODEV;
956 958
957 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 959 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -991,7 +993,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
991 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) 993 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
992 return err; 994 return err;
993 } else { 995 } else {
994 qdisc_notify(skb, n, clid, NULL, q); 996 qdisc_notify(net, skb, n, clid, NULL, q);
995 } 997 }
996 return 0; 998 return 0;
997} 999}
@@ -1010,16 +1012,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1010 struct Qdisc *q, *p; 1012 struct Qdisc *q, *p;
1011 int err; 1013 int err;
1012 1014
1013 if (!net_eq(net, &init_net))
1014 return -EINVAL;
1015
1016replay: 1015replay:
1017 /* Reinit, just in case something touches this. */ 1016 /* Reinit, just in case something touches this. */
1018 tcm = NLMSG_DATA(n); 1017 tcm = NLMSG_DATA(n);
1019 clid = tcm->tcm_parent; 1018 clid = tcm->tcm_parent;
1020 q = p = NULL; 1019 q = p = NULL;
1021 1020
1022 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 1021 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
1023 return -ENODEV; 1022 return -ENODEV;
1024 1023
1025 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1024 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1106,7 +1105,7 @@ replay:
1106 return -EINVAL; 1105 return -EINVAL;
1107 err = qdisc_change(q, tca); 1106 err = qdisc_change(q, tca);
1108 if (err == 0) 1107 if (err == 0)
1109 qdisc_notify(skb, n, clid, NULL, q); 1108 qdisc_notify(net, skb, n, clid, NULL, q);
1110 return err; 1109 return err;
1111 1110
1112create_n_graft: 1111create_n_graft:
@@ -1196,8 +1195,14 @@ nla_put_failure:
1196 return -1; 1195 return -1;
1197} 1196}
1198 1197
1199static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, 1198static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1200 u32 clid, struct Qdisc *old, struct Qdisc *new) 1199{
1200 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1201}
1202
1203static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1204 struct nlmsghdr *n, u32 clid,
1205 struct Qdisc *old, struct Qdisc *new)
1201{ 1206{
1202 struct sk_buff *skb; 1207 struct sk_buff *skb;
1203 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; 1208 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1206,28 +1211,23 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1206 if (!skb) 1211 if (!skb)
1207 return -ENOBUFS; 1212 return -ENOBUFS;
1208 1213
1209 if (old && old->handle) { 1214 if (old && !tc_qdisc_dump_ignore(old)) {
1210 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) 1215 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
1211 goto err_out; 1216 goto err_out;
1212 } 1217 }
1213 if (new) { 1218 if (new && !tc_qdisc_dump_ignore(new)) {
1214 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1219 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1215 goto err_out; 1220 goto err_out;
1216 } 1221 }
1217 1222
1218 if (skb->len) 1223 if (skb->len)
1219 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1224 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1220 1225
1221err_out: 1226err_out:
1222 kfree_skb(skb); 1227 kfree_skb(skb);
1223 return -EINVAL; 1228 return -EINVAL;
1224} 1229}
1225 1230
1226static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1227{
1228 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1229}
1230
1231static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1231static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1232 struct netlink_callback *cb, 1232 struct netlink_callback *cb,
1233 int *q_idx_p, int s_q_idx) 1233 int *q_idx_p, int s_q_idx)
@@ -1275,15 +1275,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1275 int s_idx, s_q_idx; 1275 int s_idx, s_q_idx;
1276 struct net_device *dev; 1276 struct net_device *dev;
1277 1277
1278 if (!net_eq(net, &init_net))
1279 return 0;
1280
1281 s_idx = cb->args[0]; 1278 s_idx = cb->args[0];
1282 s_q_idx = q_idx = cb->args[1]; 1279 s_q_idx = q_idx = cb->args[1];
1283 1280
1284 rcu_read_lock(); 1281 rcu_read_lock();
1285 idx = 0; 1282 idx = 0;
1286 for_each_netdev_rcu(&init_net, dev) { 1283 for_each_netdev_rcu(net, dev) {
1287 struct netdev_queue *dev_queue; 1284 struct netdev_queue *dev_queue;
1288 1285
1289 if (idx < s_idx) 1286 if (idx < s_idx)
@@ -1335,10 +1332,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1335 u32 qid = TC_H_MAJ(clid); 1332 u32 qid = TC_H_MAJ(clid);
1336 int err; 1333 int err;
1337 1334
1338 if (!net_eq(net, &init_net)) 1335 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
1339 return -EINVAL;
1340
1341 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1342 return -ENODEV; 1336 return -ENODEV;
1343 1337
1344 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1338 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1419,10 +1413,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1419 if (cops->delete) 1413 if (cops->delete)
1420 err = cops->delete(q, cl); 1414 err = cops->delete(q, cl);
1421 if (err == 0) 1415 if (err == 0)
1422 tclass_notify(skb, n, q, cl, RTM_DELTCLASS); 1416 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
1423 goto out; 1417 goto out;
1424 case RTM_GETTCLASS: 1418 case RTM_GETTCLASS:
1425 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS); 1419 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1426 goto out; 1420 goto out;
1427 default: 1421 default:
1428 err = -EINVAL; 1422 err = -EINVAL;
@@ -1435,7 +1429,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1435 if (cops->change) 1429 if (cops->change)
1436 err = cops->change(q, clid, pid, tca, &new_cl); 1430 err = cops->change(q, clid, pid, tca, &new_cl);
1437 if (err == 0) 1431 if (err == 0)
1438 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); 1432 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1439 1433
1440out: 1434out:
1441 if (cl) 1435 if (cl)
@@ -1487,8 +1481,9 @@ nla_put_failure:
1487 return -1; 1481 return -1;
1488} 1482}
1489 1483
1490static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, 1484static int tclass_notify(struct net *net, struct sk_buff *oskb,
1491 struct Qdisc *q, unsigned long cl, int event) 1485 struct nlmsghdr *n, struct Qdisc *q,
1486 unsigned long cl, int event)
1492{ 1487{
1493 struct sk_buff *skb; 1488 struct sk_buff *skb;
1494 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; 1489 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1502,7 +1497,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1502 return -EINVAL; 1497 return -EINVAL;
1503 } 1498 }
1504 1499
1505 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1500 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1506} 1501}
1507 1502
1508struct qdisc_dump_args 1503struct qdisc_dump_args
@@ -1577,12 +1572,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1577 struct net_device *dev; 1572 struct net_device *dev;
1578 int t, s_t; 1573 int t, s_t;
1579 1574
1580 if (!net_eq(net, &init_net))
1581 return 0;
1582
1583 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1575 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1584 return 0; 1576 return 0;
1585 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) 1577 if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
1586 return 0; 1578 return 0;
1587 1579
1588 s_t = cb->args[0]; 1580 s_t = cb->args[0];
@@ -1645,9 +1637,12 @@ reclassify:
1645 tp = otp; 1637 tp = otp;
1646 1638
1647 if (verd++ >= MAX_REC_LOOP) { 1639 if (verd++ >= MAX_REC_LOOP) {
1648 printk("rule prio %u protocol %02x reclassify loop, " 1640 if (net_ratelimit())
1649 "packet dropped\n", 1641 printk(KERN_NOTICE
1650 tp->prio&0xffff, ntohs(tp->protocol)); 1642 "%s: packet reclassify loop"
1643 " rule prio %u protocol %02x\n",
1644 tp->q->ops->id,
1645 tp->prio & 0xffff, ntohs(tp->protocol));
1651 return TC_ACT_SHOT; 1646 return TC_ACT_SHOT;
1652 } 1647 }
1653 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); 1648 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1692,7 +1687,7 @@ static int psched_show(struct seq_file *seq, void *v)
1692 1687
1693static int psched_open(struct inode *inode, struct file *file) 1688static int psched_open(struct inode *inode, struct file *file)
1694{ 1689{
1695 return single_open(file, psched_show, PDE(inode)->data); 1690 return single_open(file, psched_show, NULL);
1696} 1691}
1697 1692
1698static const struct file_operations psched_fops = { 1693static const struct file_operations psched_fops = {
@@ -1702,15 +1697,53 @@ static const struct file_operations psched_fops = {
1702 .llseek = seq_lseek, 1697 .llseek = seq_lseek,
1703 .release = single_release, 1698 .release = single_release,
1704}; 1699};
1700
1701static int __net_init psched_net_init(struct net *net)
1702{
1703 struct proc_dir_entry *e;
1704
1705 e = proc_net_fops_create(net, "psched", 0, &psched_fops);
1706 if (e == NULL)
1707 return -ENOMEM;
1708
1709 return 0;
1710}
1711
1712static void __net_exit psched_net_exit(struct net *net)
1713{
1714 proc_net_remove(net, "psched");
1715}
1716#else
1717static int __net_init psched_net_init(struct net *net)
1718{
1719 return 0;
1720}
1721
1722static void __net_exit psched_net_exit(struct net *net)
1723{
1724}
1705#endif 1725#endif
1706 1726
1727static struct pernet_operations psched_net_ops = {
1728 .init = psched_net_init,
1729 .exit = psched_net_exit,
1730};
1731
1707static int __init pktsched_init(void) 1732static int __init pktsched_init(void)
1708{ 1733{
1734 int err;
1735
1736 err = register_pernet_subsys(&psched_net_ops);
1737 if (err) {
1738 printk(KERN_ERR "pktsched_init: "
1739 "cannot initialize per netns operations\n");
1740 return err;
1741 }
1742
1709 register_qdisc(&pfifo_qdisc_ops); 1743 register_qdisc(&pfifo_qdisc_ops);
1710 register_qdisc(&bfifo_qdisc_ops); 1744 register_qdisc(&bfifo_qdisc_ops);
1711 register_qdisc(&pfifo_head_drop_qdisc_ops); 1745 register_qdisc(&pfifo_head_drop_qdisc_ops);
1712 register_qdisc(&mq_qdisc_ops); 1746 register_qdisc(&mq_qdisc_ops);
1713 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1714 1747
1715 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); 1748 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1716 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); 1749 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fcbb86a486a2..e114f23d5eae 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
52 int ref; /* reference count */ 52 int ref; /* reference count */
53 struct gnet_stats_basic_packed bstats; 53 struct gnet_stats_basic_packed bstats;
54 struct gnet_stats_queue qstats; 54 struct gnet_stats_queue qstats;
55 struct atm_flow_data *next; 55 struct list_head list;
56 struct atm_flow_data *excess; /* flow for excess traffic; 56 struct atm_flow_data *excess; /* flow for excess traffic;
57 NULL to set CLP instead */ 57 NULL to set CLP instead */
58 int hdr_len; 58 int hdr_len;
@@ -61,34 +61,23 @@ struct atm_flow_data {
61 61
62struct atm_qdisc_data { 62struct atm_qdisc_data {
63 struct atm_flow_data link; /* unclassified skbs go here */ 63 struct atm_flow_data link; /* unclassified skbs go here */
64 struct atm_flow_data *flows; /* NB: "link" is also on this 64 struct list_head flows; /* NB: "link" is also on this
65 list */ 65 list */
66 struct tasklet_struct task; /* dequeue tasklet */ 66 struct tasklet_struct task; /* dequeue tasklet */
67}; 67};
68 68
69/* ------------------------- Class/flow operations ------------------------- */ 69/* ------------------------- Class/flow operations ------------------------- */
70 70
71static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
72{
73 struct atm_flow_data *walk;
74
75 pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
76 for (walk = qdisc->flows; walk; walk = walk->next)
77 if (walk == flow)
78 return 1;
79 pr_debug("find_flow: not found\n");
80 return 0;
81}
82
83static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) 71static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
84{ 72{
85 struct atm_qdisc_data *p = qdisc_priv(sch); 73 struct atm_qdisc_data *p = qdisc_priv(sch);
86 struct atm_flow_data *flow; 74 struct atm_flow_data *flow;
87 75
88 for (flow = p->flows; flow; flow = flow->next) 76 list_for_each_entry(flow, &p->flows, list) {
89 if (flow->classid == classid) 77 if (flow->classid == classid)
90 break; 78 return flow;
91 return flow; 79 }
80 return NULL;
92} 81}
93 82
94static int atm_tc_graft(struct Qdisc *sch, unsigned long arg, 83static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
@@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
99 88
100 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n", 89 pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
101 sch, p, flow, new, old); 90 sch, p, flow, new, old);
102 if (!find_flow(p, flow)) 91 if (list_empty(&flow->list))
103 return -EINVAL; 92 return -EINVAL;
104 if (!new) 93 if (!new)
105 new = &noop_qdisc; 94 new = &noop_qdisc;
@@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
146{ 135{
147 struct atm_qdisc_data *p = qdisc_priv(sch); 136 struct atm_qdisc_data *p = qdisc_priv(sch);
148 struct atm_flow_data *flow = (struct atm_flow_data *)cl; 137 struct atm_flow_data *flow = (struct atm_flow_data *)cl;
149 struct atm_flow_data **prev;
150 138
151 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 139 pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
152 if (--flow->ref) 140 if (--flow->ref)
153 return; 141 return;
154 pr_debug("atm_tc_put: destroying\n"); 142 pr_debug("atm_tc_put: destroying\n");
155 for (prev = &p->flows; *prev; prev = &(*prev)->next) 143 list_del_init(&flow->list);
156 if (*prev == flow)
157 break;
158 if (!*prev) {
159 printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
160 return;
161 }
162 *prev = flow->next;
163 pr_debug("atm_tc_put: qdisc %p\n", flow->q); 144 pr_debug("atm_tc_put: qdisc %p\n", flow->q);
164 qdisc_destroy(flow->q); 145 qdisc_destroy(flow->q);
165 tcf_destroy_chain(&flow->filter_list); 146 tcf_destroy_chain(&flow->filter_list);
@@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
274 error = -EINVAL; 255 error = -EINVAL;
275 goto err_out; 256 goto err_out;
276 } 257 }
277 if (find_flow(p, flow)) { 258 if (!list_empty(&flow->list)) {
278 error = -EEXIST; 259 error = -EEXIST;
279 goto err_out; 260 goto err_out;
280 } 261 }
@@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
313 flow->classid = classid; 294 flow->classid = classid;
314 flow->ref = 1; 295 flow->ref = 1;
315 flow->excess = excess; 296 flow->excess = excess;
316 flow->next = p->link.next; 297 list_add(&flow->list, &p->link.list);
317 p->link.next = flow;
318 flow->hdr_len = hdr_len; 298 flow->hdr_len = hdr_len;
319 if (hdr) 299 if (hdr)
320 memcpy(flow->hdr, hdr, hdr_len); 300 memcpy(flow->hdr, hdr, hdr_len);
@@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
335 struct atm_flow_data *flow = (struct atm_flow_data *)arg; 315 struct atm_flow_data *flow = (struct atm_flow_data *)arg;
336 316
337 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); 317 pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
338 if (!find_flow(qdisc_priv(sch), flow)) 318 if (list_empty(&flow->list))
339 return -EINVAL; 319 return -EINVAL;
340 if (flow->filter_list || flow == &p->link) 320 if (flow->filter_list || flow == &p->link)
341 return -EBUSY; 321 return -EBUSY;
@@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
361 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker); 341 pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
362 if (walker->stop) 342 if (walker->stop)
363 return; 343 return;
364 for (flow = p->flows; flow; flow = flow->next) { 344 list_for_each_entry(flow, &p->flows, list) {
365 if (walker->count >= walker->skip) 345 if (walker->count >= walker->skip &&
366 if (walker->fn(sch, (unsigned long)flow, walker) < 0) { 346 walker->fn(sch, (unsigned long)flow, walker) < 0) {
367 walker->stop = 1; 347 walker->stop = 1;
368 break; 348 break;
369 } 349 }
370 walker->count++; 350 walker->count++;
371 } 351 }
372} 352}
@@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
385static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 365static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
386{ 366{
387 struct atm_qdisc_data *p = qdisc_priv(sch); 367 struct atm_qdisc_data *p = qdisc_priv(sch);
388 struct atm_flow_data *flow = NULL; /* @@@ */ 368 struct atm_flow_data *flow;
389 struct tcf_result res; 369 struct tcf_result res;
390 int result; 370 int result;
391 int ret = NET_XMIT_POLICED; 371 int ret = NET_XMIT_POLICED;
392 372
393 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 373 pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
394 result = TC_POLICE_OK; /* be nice to gcc */ 374 result = TC_POLICE_OK; /* be nice to gcc */
375 flow = NULL;
395 if (TC_H_MAJ(skb->priority) != sch->handle || 376 if (TC_H_MAJ(skb->priority) != sch->handle ||
396 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) 377 !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
397 for (flow = p->flows; flow; flow = flow->next) 378 list_for_each_entry(flow, &p->flows, list) {
398 if (flow->filter_list) { 379 if (flow->filter_list) {
399 result = tc_classify_compat(skb, 380 result = tc_classify_compat(skb,
400 flow->filter_list, 381 flow->filter_list,
@@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
404 flow = (struct atm_flow_data *)res.class; 385 flow = (struct atm_flow_data *)res.class;
405 if (!flow) 386 if (!flow)
406 flow = lookup_flow(sch, res.classid); 387 flow = lookup_flow(sch, res.classid);
407 break; 388 goto done;
408 } 389 }
390 }
391 flow = NULL;
392 done:
393 ;
394 }
409 if (!flow) 395 if (!flow)
410 flow = &p->link; 396 flow = &p->link;
411 else { 397 else {
@@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data)
477 struct sk_buff *skb; 463 struct sk_buff *skb;
478 464
479 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p); 465 pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
480 for (flow = p->link.next; flow; flow = flow->next) 466 list_for_each_entry(flow, &p->flows, list) {
467 if (flow == &p->link)
468 continue;
481 /* 469 /*
482 * If traffic is properly shaped, this won't generate nasty 470 * If traffic is properly shaped, this won't generate nasty
483 * little bursts. Otherwise, it may ... (but that's okay) 471 * little bursts. Otherwise, it may ... (but that's okay)
@@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data)
512 /* atm.atm_options are already set by atm_tc_enqueue */ 500 /* atm.atm_options are already set by atm_tc_enqueue */
513 flow->vcc->send(flow->vcc, skb); 501 flow->vcc->send(flow->vcc, skb);
514 } 502 }
503 }
515} 504}
516 505
517static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) 506static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
@@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
543 unsigned int len; 532 unsigned int len;
544 533
545 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p); 534 pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
546 for (flow = p->flows; flow; flow = flow->next) 535 list_for_each_entry(flow, &p->flows, list) {
547 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q))) 536 if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
548 return len; 537 return len;
538 }
549 return 0; 539 return 0;
550} 540}
551 541
@@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
554 struct atm_qdisc_data *p = qdisc_priv(sch); 544 struct atm_qdisc_data *p = qdisc_priv(sch);
555 545
556 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt); 546 pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
557 p->flows = &p->link; 547 INIT_LIST_HEAD(&p->flows);
548 INIT_LIST_HEAD(&p->link.list);
549 list_add(&p->link.list, &p->flows);
558 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 550 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
559 &pfifo_qdisc_ops, sch->handle); 551 &pfifo_qdisc_ops, sch->handle);
560 if (!p->link.q) 552 if (!p->link.q)
@@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
565 p->link.sock = NULL; 557 p->link.sock = NULL;
566 p->link.classid = sch->handle; 558 p->link.classid = sch->handle;
567 p->link.ref = 1; 559 p->link.ref = 1;
568 p->link.next = NULL;
569 tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); 560 tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
570 return 0; 561 return 0;
571} 562}
@@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch)
576 struct atm_flow_data *flow; 567 struct atm_flow_data *flow;
577 568
578 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p); 569 pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
579 for (flow = p->flows; flow; flow = flow->next) 570 list_for_each_entry(flow, &p->flows, list)
580 qdisc_reset(flow->q); 571 qdisc_reset(flow->q);
581 sch->q.qlen = 0; 572 sch->q.qlen = 0;
582} 573}
@@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch)
584static void atm_tc_destroy(struct Qdisc *sch) 575static void atm_tc_destroy(struct Qdisc *sch)
585{ 576{
586 struct atm_qdisc_data *p = qdisc_priv(sch); 577 struct atm_qdisc_data *p = qdisc_priv(sch);
587 struct atm_flow_data *flow; 578 struct atm_flow_data *flow, *tmp;
588 579
589 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); 580 pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
590 for (flow = p->flows; flow; flow = flow->next) 581 list_for_each_entry(flow, &p->flows, list)
591 tcf_destroy_chain(&flow->filter_list); 582 tcf_destroy_chain(&flow->filter_list);
592 583
593 /* races ? */ 584 list_for_each_entry_safe(flow, tmp, &p->flows, list) {
594 while ((flow = p->flows)) {
595 if (flow->ref > 1) 585 if (flow->ref > 1)
596 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, 586 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
597 flow->ref); 587 flow->ref);
598 atm_tc_put(sch, (unsigned long)flow); 588 atm_tc_put(sch, (unsigned long)flow);
599 if (p->flows == flow) {
600 printk(KERN_ERR "atm_destroy: putting flow %p didn't "
601 "kill it\n", flow);
602 p->flows = flow->next; /* brute force */
603 break;
604 }
605 } 589 }
606 tasklet_kill(&p->task); 590 tasklet_kill(&p->task);
607} 591}
@@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
615 599
616 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n", 600 pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
617 sch, p, flow, skb, tcm); 601 sch, p, flow, skb, tcm);
618 if (!find_flow(p, flow)) 602 if (list_empty(&flow->list))
619 return -EINVAL; 603 return -EINVAL;
620 tcm->tcm_handle = flow->classid; 604 tcm->tcm_handle = flow->classid;
621 tcm->tcm_info = flow->q->handle; 605 tcm->tcm_info = flow->q->handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ff4dd53eeff0..2aeb3a4386a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
26#include <linux/list.h> 26#include <linux/list.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <net/pkt_sched.h> 28#include <net/pkt_sched.h>
29#include <net/dst.h>
29 30
30/* Main transmission queue. */ 31/* Main transmission queue. */
31 32
@@ -40,6 +41,7 @@
40 41
41static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) 42static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
42{ 43{
44 skb_dst_force(skb);
43 q->gso_skb = skb; 45 q->gso_skb = skb;
44 q->qstats.requeues++; 46 q->qstats.requeues++;
45 q->q.qlen++; /* it's still part of the queue */ 47 q->q.qlen++; /* it's still part of the queue */
@@ -94,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
94 * Another cpu is holding lock, requeue & delay xmits for 96 * Another cpu is holding lock, requeue & delay xmits for
95 * some time. 97 * some time.
96 */ 98 */
97 __get_cpu_var(netdev_rx_stat).cpu_collision++; 99 __this_cpu_inc(softnet_data.cpu_collision);
98 ret = dev_requeue_skb(skb, q); 100 ret = dev_requeue_skb(skb, q);
99 } 101 }
100 102
@@ -179,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q)
179 skb = dequeue_skb(q); 181 skb = dequeue_skb(q);
180 if (unlikely(!skb)) 182 if (unlikely(!skb))
181 return 0; 183 return 0;
182 184 WARN_ON_ONCE(skb_dst_is_noref(skb));
183 root_lock = qdisc_lock(q); 185 root_lock = qdisc_lock(q);
184 dev = qdisc_dev(q); 186 dev = qdisc_dev(q);
185 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 187 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
@@ -203,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
203 } 205 }
204 } 206 }
205 207
206 clear_bit(__QDISC_STATE_RUNNING, &q->state); 208 qdisc_run_end(q);
207} 209}
208 210
209unsigned long dev_trans_start(struct net_device *dev) 211unsigned long dev_trans_start(struct net_device *dev)
@@ -325,6 +327,24 @@ void netif_carrier_off(struct net_device *dev)
325} 327}
326EXPORT_SYMBOL(netif_carrier_off); 328EXPORT_SYMBOL(netif_carrier_off);
327 329
330/**
331 * netif_notify_peers - notify network peers about existence of @dev
332 * @dev: network device
333 *
334 * Generate traffic such that interested network peers are aware of
335 * @dev, such as by generating a gratuitous ARP. This may be used when
336 * a device wants to inform the rest of the network about some sort of
337 * reconfiguration such as a failover event or virtual machine
338 * migration.
339 */
340void netif_notify_peers(struct net_device *dev)
341{
342 rtnl_lock();
343 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
344 rtnl_unlock();
345}
346EXPORT_SYMBOL(netif_notify_peers);
347
328/* "NOOP" scheduler: the best scheduler, recommended for all interfaces 348/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
329 under all circumstances. It is difficult to invent anything faster or 349 under all circumstances. It is difficult to invent anything faster or
330 cheaper. 350 cheaper.
@@ -529,7 +549,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
529 unsigned int size; 549 unsigned int size;
530 int err = -ENOBUFS; 550 int err = -ENOBUFS;
531 551
532 /* ensure that the Qdisc and the private data are 32-byte aligned */ 552 /* ensure that the Qdisc and the private data are 64-byte aligned */
533 size = QDISC_ALIGN(sizeof(*sch)); 553 size = QDISC_ALIGN(sizeof(*sch));
534 size += ops->priv_size + (QDISC_ALIGNTO - 1); 554 size += ops->priv_size + (QDISC_ALIGNTO - 1);
535 555
@@ -541,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
541 561
542 INIT_LIST_HEAD(&sch->list); 562 INIT_LIST_HEAD(&sch->list);
543 skb_queue_head_init(&sch->q); 563 skb_queue_head_init(&sch->q);
564 spin_lock_init(&sch->busylock);
544 sch->ops = ops; 565 sch->ops = ops;
545 sch->enqueue = ops->enqueue; 566 sch->enqueue = ops->enqueue;
546 sch->dequeue = ops->dequeue; 567 sch->dequeue = ops->dequeue;
@@ -591,6 +612,13 @@ void qdisc_reset(struct Qdisc *qdisc)
591} 612}
592EXPORT_SYMBOL(qdisc_reset); 613EXPORT_SYMBOL(qdisc_reset);
593 614
615static void qdisc_rcu_free(struct rcu_head *head)
616{
617 struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
618
619 kfree((char *) qdisc - qdisc->padded);
620}
621
594void qdisc_destroy(struct Qdisc *qdisc) 622void qdisc_destroy(struct Qdisc *qdisc)
595{ 623{
596 const struct Qdisc_ops *ops = qdisc->ops; 624 const struct Qdisc_ops *ops = qdisc->ops;
@@ -614,7 +642,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
614 dev_put(qdisc_dev(qdisc)); 642 dev_put(qdisc_dev(qdisc));
615 643
616 kfree_skb(qdisc->gso_skb); 644 kfree_skb(qdisc->gso_skb);
617 kfree((char *) qdisc - qdisc->padded); 645 /*
646 * gen_estimator est_timer() might access qdisc->q.lock,
647 * wait a RCU grace period before freeing qdisc.
648 */
649 call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
618} 650}
619EXPORT_SYMBOL(qdisc_destroy); 651EXPORT_SYMBOL(qdisc_destroy);
620 652
@@ -766,7 +798,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
766 798
767 spin_lock_bh(root_lock); 799 spin_lock_bh(root_lock);
768 800
769 val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || 801 val = (qdisc_is_running(q) ||
770 test_bit(__QDISC_STATE_SCHED, &q->state)); 802 test_bit(__QDISC_STATE_SCHED, &q->state));
771 803
772 spin_unlock_bh(root_lock); 804 spin_unlock_bh(root_lock);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b38b39c60752..abd904be4287 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -617,7 +617,6 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
617 rtsc->y = y; 617 rtsc->y = y;
618 rtsc->dx = dx; 618 rtsc->dx = dx;
619 rtsc->dy = dy; 619 rtsc->dy = dy;
620 return;
621} 620}
622 621
623static void 622static void
@@ -1155,7 +1154,7 @@ static struct hfsc_class *
1155hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) 1154hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1156{ 1155{
1157 struct hfsc_sched *q = qdisc_priv(sch); 1156 struct hfsc_sched *q = qdisc_priv(sch);
1158 struct hfsc_class *cl; 1157 struct hfsc_class *head, *cl;
1159 struct tcf_result res; 1158 struct tcf_result res;
1160 struct tcf_proto *tcf; 1159 struct tcf_proto *tcf;
1161 int result; 1160 int result;
@@ -1166,6 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1166 return cl; 1165 return cl;
1167 1166
1168 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 1167 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1168 head = &q->root;
1169 tcf = q->root.filter_list; 1169 tcf = q->root.filter_list;
1170 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { 1170 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
1171#ifdef CONFIG_NET_CLS_ACT 1171#ifdef CONFIG_NET_CLS_ACT
@@ -1180,6 +1180,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1180 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1180 if ((cl = (struct hfsc_class *)res.class) == NULL) {
1181 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1181 if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
1182 break; /* filter selected invalid classid */ 1182 break; /* filter selected invalid classid */
1183 if (cl->level >= head->level)
1184 break; /* filter may only point downwards */
1183 } 1185 }
1184 1186
1185 if (cl->level == 0) 1187 if (cl->level == 0)
@@ -1187,6 +1189,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1187 1189
1188 /* apply inner filter chain */ 1190 /* apply inner filter chain */
1189 tcf = cl->filter_list; 1191 tcf = cl->filter_list;
1192 head = cl;
1190 } 1193 }
1191 1194
1192 /* classification failed, try default class */ 1195 /* classification failed, try default class */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0b52b8de562c..4be8d04b262d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = {
1550}; 1550};
1551 1551
1552static struct Qdisc_ops htb_qdisc_ops __read_mostly = { 1552static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1553 .next = NULL,
1554 .cl_ops = &htb_class_ops, 1553 .cl_ops = &htb_class_ops,
1555 .id = "htb", 1554 .id = "htb",
1556 .priv_size = sizeof(struct htb_sched), 1555 .priv_size = sizeof(struct htb_sched),
@@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1561 .init = htb_init, 1560 .init = htb_init,
1562 .reset = htb_reset, 1561 .reset = htb_reset,
1563 .destroy = htb_destroy, 1562 .destroy = htb_destroy,
1564 .change = NULL /* htb_change */,
1565 .dump = htb_dump, 1563 .dump = htb_dump,
1566 .owner = THIS_MODULE, 1564 .owner = THIS_MODULE,
1567}; 1565};
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a9e646bdb605..f10e34a68445 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -44,7 +44,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl)
44 44
45static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) 45static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
46{ 46{
47 return;
48} 47}
49 48
50static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) 49static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b2aba3f5e6fa..fe91e50f9d98 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -174,7 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
174 174
175static void mq_put(struct Qdisc *sch, unsigned long cl) 175static void mq_put(struct Qdisc *sch, unsigned long cl)
176{ 176{
177 return;
178} 177}
179 178
180static int mq_dump_class(struct Qdisc *sch, unsigned long cl, 179static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index c50876cd8704..6ae251279fc2 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -340,7 +340,6 @@ static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
340 340
341static void multiq_put(struct Qdisc *q, unsigned long cl) 341static void multiq_put(struct Qdisc *q, unsigned long cl)
342{ 342{
343 return;
344} 343}
345 344
346static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, 345static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 81672e0c1b25..0748fb1e3a49 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -303,7 +303,6 @@ static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 clas
303 303
304static void prio_put(struct Qdisc *q, unsigned long cl) 304static void prio_put(struct Qdisc *q, unsigned long cl)
305{ 305{
306 return;
307} 306}
308 307
309static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, 308static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 072cdf442f8e..8d42bb3ba540 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -303,7 +303,6 @@ static unsigned long red_get(struct Qdisc *sch, u32 classid)
303 303
304static void red_put(struct Qdisc *sch, unsigned long arg) 304static void red_put(struct Qdisc *sch, unsigned long arg)
305{ 305{
306 return;
307} 306}
308 307
309static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 308static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c5a9ac566007..534f33231c17 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -122,35 +122,46 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
122 switch (skb->protocol) { 122 switch (skb->protocol) {
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph = ip_hdr(skb); 125 const struct iphdr *iph;
126 h = iph->daddr; 126
127 h2 = iph->saddr ^ iph->protocol; 127 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err;
129 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol;
128 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
129 (iph->protocol == IPPROTO_TCP || 133 (iph->protocol == IPPROTO_TCP ||
130 iph->protocol == IPPROTO_UDP || 134 iph->protocol == IPPROTO_UDP ||
131 iph->protocol == IPPROTO_UDPLITE || 135 iph->protocol == IPPROTO_UDPLITE ||
132 iph->protocol == IPPROTO_SCTP || 136 iph->protocol == IPPROTO_SCTP ||
133 iph->protocol == IPPROTO_DCCP || 137 iph->protocol == IPPROTO_DCCP ||
134 iph->protocol == IPPROTO_ESP)) 138 iph->protocol == IPPROTO_ESP) &&
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4))
135 h2 ^= *(((u32*)iph) + iph->ihl); 140 h2 ^= *(((u32*)iph) + iph->ihl);
136 break; 141 break;
137 } 142 }
138 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
139 { 144 {
140 struct ipv6hdr *iph = ipv6_hdr(skb); 145 struct ipv6hdr *iph;
141 h = iph->daddr.s6_addr32[3]; 146
142 h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr; 147 if (!pskb_network_may_pull(skb, sizeof(*iph)))
143 if (iph->nexthdr == IPPROTO_TCP || 148 goto err;
144 iph->nexthdr == IPPROTO_UDP || 149 iph = ipv6_hdr(skb);
145 iph->nexthdr == IPPROTO_UDPLITE || 150 h = (__force u32)iph->daddr.s6_addr32[3];
146 iph->nexthdr == IPPROTO_SCTP || 151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
147 iph->nexthdr == IPPROTO_DCCP || 152 if ((iph->nexthdr == IPPROTO_TCP ||
148 iph->nexthdr == IPPROTO_ESP) 153 iph->nexthdr == IPPROTO_UDP ||
154 iph->nexthdr == IPPROTO_UDPLITE ||
155 iph->nexthdr == IPPROTO_SCTP ||
156 iph->nexthdr == IPPROTO_DCCP ||
157 iph->nexthdr == IPPROTO_ESP) &&
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
149 h2 ^= *(u32*)&iph[1]; 159 h2 ^= *(u32*)&iph[1];
150 break; 160 break;
151 } 161 }
152 default: 162 default:
153 h = (unsigned long)skb_dst(skb) ^ skb->protocol; 163err:
164 h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
154 h2 = (unsigned long)skb->sk; 165 h2 = (unsigned long)skb->sk;
155 } 166 }
156 167
@@ -502,6 +513,12 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
502 return 0; 513 return 0;
503} 514}
504 515
516static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
517 u32 classid)
518{
519 return 0;
520}
521
505static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl) 522static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
506{ 523{
507 struct sfq_sched_data *q = qdisc_priv(sch); 524 struct sfq_sched_data *q = qdisc_priv(sch);
@@ -556,6 +573,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
556static const struct Qdisc_class_ops sfq_class_ops = { 573static const struct Qdisc_class_ops sfq_class_ops = {
557 .get = sfq_get, 574 .get = sfq_get,
558 .tcf_chain = sfq_find_tcf, 575 .tcf_chain = sfq_find_tcf,
576 .bind_tcf = sfq_bind,
559 .dump = sfq_dump_class, 577 .dump = sfq_dump_class,
560 .dump_stats = sfq_dump_class_stats, 578 .dump_stats = sfq_dump_class_stats,
561 .walk = sfq_walk, 579 .walk = sfq_walk,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 8fb8107ab188..0991c640cd3e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -273,7 +273,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
273 if (max_size < 0) 273 if (max_size < 0)
274 goto done; 274 goto done;
275 275
276 if (qopt->limit > 0) { 276 if (q->qdisc != &noop_qdisc) {
277 err = fifo_set_limit(q->qdisc, qopt->limit);
278 if (err)
279 goto done;
280 } else if (qopt->limit > 0) {
277 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); 281 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
278 if (IS_ERR(child)) { 282 if (IS_ERR(child)) {
279 err = PTR_ERR(child); 283 err = PTR_ERR(child);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 3415b6ce1c0a..807643bdcbac 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev)
449 dev->tx_queue_len = 100; 449 dev->tx_queue_len = 100;
450 dev->flags = IFF_NOARP; 450 dev->flags = IFF_NOARP;
451 dev->hard_header_len = LL_MAX_HEADER; 451 dev->hard_header_len = LL_MAX_HEADER;
452 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
452} 453}
453 454
454static LIST_HEAD(master_dev_list); 455static LIST_HEAD(master_dev_list);
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e882a187..126b014eb79b 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
37 37
38if IP_SCTP 38if IP_SCTP
39 39
40config NET_SCTPPROBE
41 tristate "SCTP: Association probing"
42 depends on PROC_FS && KPROBES
43 ---help---
44 This module allows for capturing the changes to SCTP association
45 state in response to incoming packets. It is used for debugging
46 SCTP congestion control algorithms. If you don't understand
47 what was just said, you don't need it: say N.
48
49 To compile this code as a module, choose M here: the
50 module will be called sctp_probe.
51
40config SCTP_DBG_MSG 52config SCTP_DBG_MSG
41 bool "SCTP: Debug messages" 53 bool "SCTP: Debug messages"
42 help 54 help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b794734380a..5c30b7a873df 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_IP_SCTP) += sctp.o 5obj-$(CONFIG_IP_SCTP) += sctp.o
6obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
6 7
7sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ 8sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
8 protocol.o endpointola.o associola.o \ 9 protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
11 tsnmap.o bind_addr.o socket.o primitive.o \ 12 tsnmap.o bind_addr.o socket.o primitive.o \
12 output.o input.o debug.o ssnmap.o auth.o 13 output.o input.o debug.o ssnmap.o auth.o
13 14
15sctp_probe-y := probe.o
16
14sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o 17sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
15sctp-$(CONFIG_PROC_FS) += proc.o 18sctp-$(CONFIG_PROC_FS) += proc.o
16sctp-$(CONFIG_SYSCTL) += sysctl.o 19sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 99c93ee98ad9..0b85e5256434 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
87 /* Retrieve the SCTP per socket area. */ 87 /* Retrieve the SCTP per socket area. */
88 sp = sctp_sk((struct sock *)sk); 88 sp = sctp_sk((struct sock *)sk);
89 89
90 /* Init all variables to a known value. */
91 memset(asoc, 0, sizeof(struct sctp_association));
92
93 /* Discarding const is appropriate here. */ 90 /* Discarding const is appropriate here. */
94 asoc->ep = (struct sctp_endpoint *)ep; 91 asoc->ep = (struct sctp_endpoint *)ep;
95 sctp_endpoint_hold(asoc->ep); 92 sctp_endpoint_hold(asoc->ep);
@@ -175,7 +172,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
175 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = 172 asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] =
176 (unsigned long)sp->autoclose * HZ; 173 (unsigned long)sp->autoclose * HZ;
177 174
178 /* Initilizes the timers */ 175 /* Initializes the timers */
179 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) 176 for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i)
180 setup_timer(&asoc->timers[i], sctp_timer_events[i], 177 setup_timer(&asoc->timers[i], sctp_timer_events[i],
181 (unsigned long)asoc); 178 (unsigned long)asoc);
@@ -762,7 +759,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
762 asoc->peer.retran_path = peer; 759 asoc->peer.retran_path = peer;
763 } 760 }
764 761
765 if (asoc->peer.active_path == asoc->peer.retran_path) { 762 if (asoc->peer.active_path == asoc->peer.retran_path &&
763 peer->state != SCTP_UNCONFIRMED) {
766 asoc->peer.retran_path = peer; 764 asoc->peer.retran_path = peer;
767 } 765 }
768 766
@@ -818,8 +816,6 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
818 if (t != primary) 816 if (t != primary)
819 sctp_assoc_rm_peer(asoc, t); 817 sctp_assoc_rm_peer(asoc, t);
820 } 818 }
821
822 return;
823} 819}
824 820
825/* Engage in transport control operations. 821/* Engage in transport control operations.
@@ -1320,12 +1316,13 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
1320 /* Keep track of the next transport in case 1316 /* Keep track of the next transport in case
1321 * we don't find any active transport. 1317 * we don't find any active transport.
1322 */ 1318 */
1323 if (!next) 1319 if (t->state != SCTP_UNCONFIRMED && !next)
1324 next = t; 1320 next = t;
1325 } 1321 }
1326 } 1322 }
1327 1323
1328 asoc->peer.retran_path = t; 1324 if (t)
1325 asoc->peer.retran_path = t;
1329 1326
1330 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" 1327 SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
1331 " %p addr: ", 1328 " %p addr: ",
@@ -1485,7 +1482,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
1485 if (asoc->rwnd >= len) { 1482 if (asoc->rwnd >= len) {
1486 asoc->rwnd -= len; 1483 asoc->rwnd -= len;
1487 if (over) { 1484 if (over) {
1488 asoc->rwnd_press = asoc->rwnd; 1485 asoc->rwnd_press += asoc->rwnd;
1489 asoc->rwnd = 0; 1486 asoc->rwnd = 0;
1490 } 1487 }
1491 } else { 1488 } else {
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db59a37..476caaf100ed 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
58 msg->send_failed = 0; 58 msg->send_failed = 0;
59 msg->send_error = 0; 59 msg->send_error = 0;
60 msg->can_abandon = 0; 60 msg->can_abandon = 0;
61 msg->can_delay = 1;
61 msg->expires_at = 0; 62 msg->expires_at = 0;
62 INIT_LIST_HEAD(&msg->chunks); 63 INIT_LIST_HEAD(&msg->chunks);
63 msg->msg_size = 0;
64} 64}
65 65
66/* Allocate and initialize datamsg. */ 66/* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
157{ 157{
158 sctp_datamsg_hold(msg); 158 sctp_datamsg_hold(msg);
159 chunk->msg = msg; 159 chunk->msg = msg;
160 msg->msg_size += chunk->skb->len;
161} 160}
162 161
163 162
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
247 if (msg_len >= first_len) { 246 if (msg_len >= first_len) {
248 msg_len -= first_len; 247 msg_len -= first_len;
249 whole = 1; 248 whole = 1;
249 msg->can_delay = 0;
250 } 250 }
251 251
252 /* How many full sized? How many bytes leftover? */ 252 /* How many full sized? How many bytes leftover? */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 7ec09ba03a1c..e10acc01c75f 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
70 struct sctp_shared_key *null_key; 70 struct sctp_shared_key *null_key;
71 int err; 71 int err;
72 72
73 memset(ep, 0, sizeof(struct sctp_endpoint));
74
75 ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); 73 ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
76 if (!ep->digest) 74 if (!ep->digest)
77 return NULL; 75 return NULL;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2a570184e5a9..ea2192444ce6 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -440,11 +440,25 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
440{ 440{
441 SCTP_DEBUG_PRINTK("%s\n", __func__); 441 SCTP_DEBUG_PRINTK("%s\n", __func__);
442 442
443 sctp_do_sm(SCTP_EVENT_T_OTHER, 443 if (sock_owned_by_user(sk)) {
444 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH), 444 if (timer_pending(&t->proto_unreach_timer))
445 asoc->state, asoc->ep, asoc, t, 445 return;
446 GFP_ATOMIC); 446 else {
447 if (!mod_timer(&t->proto_unreach_timer,
448 jiffies + (HZ/20)))
449 sctp_association_hold(asoc);
450 }
451
452 } else {
453 if (timer_pending(&t->proto_unreach_timer) &&
454 del_timer(&t->proto_unreach_timer))
455 sctp_association_put(asoc);
447 456
457 sctp_do_sm(SCTP_EVENT_T_OTHER,
458 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
459 asoc->state, asoc->ep, asoc, t,
460 GFP_ATOMIC);
461 }
448} 462}
449 463
450/* Common lookup code for icmp/icmpv6 error handler. */ 464/* Common lookup code for icmp/icmpv6 error handler. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 9fb5d37c37ad..732689140fb8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
232 if (!(transport->param_flags & SPP_PMTUD_ENABLE)) 232 if (!(transport->param_flags & SPP_PMTUD_ENABLE))
233 skb->local_df = 1; 233 skb->local_df = 1;
234 234
235 return ip6_xmit(sk, skb, &fl, np->opt, 0); 235 return ip6_xmit(sk, skb, &fl, np->opt);
236} 236}
237 237
238/* Returns the dst cache entry for the given source and destination ip 238/* Returns the dst cache entry for the given source and destination ip
@@ -277,20 +277,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
277static inline int sctp_v6_addr_match_len(union sctp_addr *s1, 277static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
278 union sctp_addr *s2) 278 union sctp_addr *s2)
279{ 279{
280 struct in6_addr *a1 = &s1->v6.sin6_addr; 280 return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr);
281 struct in6_addr *a2 = &s2->v6.sin6_addr;
282 int i, j;
283
284 for (i = 0; i < 4 ; i++) {
285 __be32 a1xora2;
286
287 a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
288
289 if ((j = fls(ntohl(a1xora2))))
290 return (i * 32 + 32 - j);
291 }
292
293 return (i*32);
294} 281}
295 282
296/* Fills in the source address(saddr) based on the destination address(daddr) 283/* Fills in the source address(saddr) based on the destination address(daddr)
@@ -372,13 +359,13 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
372 } 359 }
373 360
374 read_lock_bh(&in6_dev->lock); 361 read_lock_bh(&in6_dev->lock);
375 for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) { 362 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
376 /* Add the address to the local list. */ 363 /* Add the address to the local list. */
377 addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); 364 addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
378 if (addr) { 365 if (addr) {
379 addr->a.v6.sin6_family = AF_INET6; 366 addr->a.v6.sin6_family = AF_INET6;
380 addr->a.v6.sin6_port = 0; 367 addr->a.v6.sin6_port = 0;
381 addr->a.v6.sin6_addr = ifp->addr; 368 ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
382 addr->a.v6.sin6_scope_id = dev->ifindex; 369 addr->a.v6.sin6_scope_id = dev->ifindex;
383 addr->valid = 1; 370 addr->valid = 1;
384 INIT_LIST_HEAD(&addr->list); 371 INIT_LIST_HEAD(&addr->list);
@@ -419,7 +406,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
419{ 406{
420 addr->v6.sin6_family = AF_INET6; 407 addr->v6.sin6_family = AF_INET6;
421 addr->v6.sin6_port = 0; 408 addr->v6.sin6_port = 0;
422 addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr; 409 ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
423} 410}
424 411
425/* Initialize sk->sk_rcv_saddr from sctp_addr. */ 412/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -432,7 +419,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
432 inet6_sk(sk)->rcv_saddr.s6_addr32[3] = 419 inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
433 addr->v4.sin_addr.s_addr; 420 addr->v4.sin_addr.s_addr;
434 } else { 421 } else {
435 inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr; 422 ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
436 } 423 }
437} 424}
438 425
@@ -445,7 +432,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
445 inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff); 432 inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
446 inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr; 433 inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
447 } else { 434 } else {
448 inet6_sk(sk)->daddr = addr->v6.sin6_addr; 435 ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
449 } 436 }
450} 437}
451 438
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d41ec2..a646681f5acd 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
429 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 429 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
430 list_del_init(&chunk->list); 430 list_del_init(&chunk->list);
431 if (sctp_chunk_is_data(chunk)) { 431 if (sctp_chunk_is_data(chunk)) {
432 /* 6.3.1 C4) When data is in flight and when allowed
433 * by rule C5, a new RTT measurement MUST be made each
434 * round trip. Furthermore, new RTT measurements
435 * SHOULD be made no more than once per round-trip
436 * for a given destination transport address.
437 */
432 438
433 if (!chunk->resent) { 439 if (!tp->rto_pending) {
434 440 chunk->rtt_in_progress = 1;
435 /* 6.3.1 C4) When data is in flight and when allowed 441 tp->rto_pending = 1;
436 * by rule C5, a new RTT measurement MUST be made each
437 * round trip. Furthermore, new RTT measurements
438 * SHOULD be made no more than once per round-trip
439 * for a given destination transport address.
440 */
441
442 if (!tp->rto_pending) {
443 chunk->rtt_in_progress = 1;
444 tp->rto_pending = 1;
445 }
446 } 442 }
447
448 chunk->resent = 1;
449
450 has_data = 1; 443 has_data = 1;
451 } 444 }
452 445
@@ -681,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
681 * Don't delay large message writes that may have been 674 * Don't delay large message writes that may have been
682 * fragmeneted into small peices. 675 * fragmeneted into small peices.
683 */ 676 */
684 if ((len < max) && (chunk->msg->msg_size < max)) { 677 if ((len < max) && chunk->msg->can_delay) {
685 retval = SCTP_XMIT_NAGLE_DELAY; 678 retval = SCTP_XMIT_NAGLE_DELAY;
686 goto finish; 679 goto finish;
687 } 680 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8dee74..c04b2eb59186 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
62 struct list_head *transmitted_queue, 62 struct list_head *transmitted_queue,
63 struct sctp_transport *transport, 63 struct sctp_transport *transport,
64 struct sctp_sackhdr *sack, 64 struct sctp_sackhdr *sack,
65 __u32 highest_new_tsn); 65 __u32 *highest_new_tsn);
66 66
67static void sctp_mark_missing(struct sctp_outq *q, 67static void sctp_mark_missing(struct sctp_outq *q,
68 struct list_head *transmitted_queue, 68 struct list_head *transmitted_queue,
@@ -80,7 +80,6 @@ static inline void sctp_outq_head_data(struct sctp_outq *q,
80{ 80{
81 list_add(&ch->list, &q->out_chunk_list); 81 list_add(&ch->list, &q->out_chunk_list);
82 q->out_qlen += ch->skb->len; 82 q->out_qlen += ch->skb->len;
83 return;
84} 83}
85 84
86/* Take data from the front of the queue. */ 85/* Take data from the front of the queue. */
@@ -103,7 +102,6 @@ static inline void sctp_outq_tail_data(struct sctp_outq *q,
103{ 102{
104 list_add_tail(&ch->list, &q->out_chunk_list); 103 list_add_tail(&ch->list, &q->out_chunk_list);
105 q->out_qlen += ch->skb->len; 104 q->out_qlen += ch->skb->len;
106 return;
107} 105}
108 106
109/* 107/*
@@ -308,7 +306,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
308 /* If it is data, queue it up, otherwise, send it 306 /* If it is data, queue it up, otherwise, send it
309 * immediately. 307 * immediately.
310 */ 308 */
311 if (SCTP_CID_DATA == chunk->chunk_hdr->type) { 309 if (sctp_chunk_is_data(chunk)) {
312 /* Is it OK to queue data chunks? */ 310 /* Is it OK to queue data chunks? */
313 /* From 9. Termination of Association 311 /* From 9. Termination of Association
314 * 312 *
@@ -598,11 +596,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
598 if (fast_rtx && !chunk->fast_retransmit) 596 if (fast_rtx && !chunk->fast_retransmit)
599 continue; 597 continue;
600 598
599redo:
601 /* Attempt to append this chunk to the packet. */ 600 /* Attempt to append this chunk to the packet. */
602 status = sctp_packet_append_chunk(pkt, chunk); 601 status = sctp_packet_append_chunk(pkt, chunk);
603 602
604 switch (status) { 603 switch (status) {
605 case SCTP_XMIT_PMTU_FULL: 604 case SCTP_XMIT_PMTU_FULL:
605 if (!pkt->has_data && !pkt->has_cookie_echo) {
606 /* If this packet did not contain DATA then
607 * retransmission did not happen, so do it
608 * again. We'll ignore the error here since
609 * control chunks are already freed so there
610 * is nothing we can do.
611 */
612 sctp_packet_transmit(pkt);
613 goto redo;
614 }
615
606 /* Send this packet. */ 616 /* Send this packet. */
607 error = sctp_packet_transmit(pkt); 617 error = sctp_packet_transmit(pkt);
608 618
@@ -647,14 +657,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
647 if (chunk->fast_retransmit == SCTP_NEED_FRTX) 657 if (chunk->fast_retransmit == SCTP_NEED_FRTX)
648 chunk->fast_retransmit = SCTP_DONT_FRTX; 658 chunk->fast_retransmit = SCTP_DONT_FRTX;
649 659
650 /* Force start T3-rtx timer when fast retransmitting
651 * the earliest outstanding TSN
652 */
653 if (!timer && fast_rtx &&
654 ntohl(chunk->subh.data_hdr->tsn) ==
655 asoc->ctsn_ack_point + 1)
656 timer = 2;
657
658 q->empty = 0; 660 q->empty = 0;
659 break; 661 break;
660 } 662 }
@@ -854,6 +856,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
854 if (status != SCTP_XMIT_OK) { 856 if (status != SCTP_XMIT_OK) {
855 /* put the chunk back */ 857 /* put the chunk back */
856 list_add(&chunk->list, &q->control_chunk_list); 858 list_add(&chunk->list, &q->control_chunk_list);
859 } else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
860 /* PR-SCTP C5) If a FORWARD TSN is sent, the
861 * sender MUST assure that at least one T3-rtx
862 * timer is running.
863 */
864 sctp_transport_reset_timers(transport);
857 } 865 }
858 break; 866 break;
859 867
@@ -906,8 +914,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
906 rtx_timeout, &start_timer); 914 rtx_timeout, &start_timer);
907 915
908 if (start_timer) 916 if (start_timer)
909 sctp_transport_reset_timers(transport, 917 sctp_transport_reset_timers(transport);
910 start_timer-1);
911 918
912 /* This can happen on COOKIE-ECHO resend. Only 919 /* This can happen on COOKIE-ECHO resend. Only
913 * one chunk can get bundled with a COOKIE-ECHO. 920 * one chunk can get bundled with a COOKIE-ECHO.
@@ -1040,7 +1047,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
1040 list_add_tail(&chunk->transmitted_list, 1047 list_add_tail(&chunk->transmitted_list,
1041 &transport->transmitted); 1048 &transport->transmitted);
1042 1049
1043 sctp_transport_reset_timers(transport, 0); 1050 sctp_transport_reset_timers(transport);
1044 1051
1045 q->empty = 0; 1052 q->empty = 0;
1046 1053
@@ -1100,32 +1107,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
1100 assoc->unack_data = unack_data; 1107 assoc->unack_data = unack_data;
1101} 1108}
1102 1109
1103/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
1104static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
1105 struct sctp_association *asoc)
1106{
1107 struct sctp_transport *transport;
1108 struct sctp_chunk *chunk;
1109 __u32 highest_new_tsn, tsn;
1110 struct list_head *transport_list = &asoc->peer.transport_addr_list;
1111
1112 highest_new_tsn = ntohl(sack->cum_tsn_ack);
1113
1114 list_for_each_entry(transport, transport_list, transports) {
1115 list_for_each_entry(chunk, &transport->transmitted,
1116 transmitted_list) {
1117 tsn = ntohl(chunk->subh.data_hdr->tsn);
1118
1119 if (!chunk->tsn_gap_acked &&
1120 TSN_lt(highest_new_tsn, tsn) &&
1121 sctp_acked(sack, tsn))
1122 highest_new_tsn = tsn;
1123 }
1124 }
1125
1126 return highest_new_tsn;
1127}
1128
1129/* This is where we REALLY process a SACK. 1110/* This is where we REALLY process a SACK.
1130 * 1111 *
1131 * Process the SACK against the outqueue. Mostly, this just frees 1112 * Process the SACK against the outqueue. Mostly, this just frees
@@ -1145,6 +1126,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
1145 struct sctp_transport *primary = asoc->peer.primary_path; 1126 struct sctp_transport *primary = asoc->peer.primary_path;
1146 int count_of_newacks = 0; 1127 int count_of_newacks = 0;
1147 int gap_ack_blocks; 1128 int gap_ack_blocks;
1129 u8 accum_moved = 0;
1148 1130
1149 /* Grab the association's destination address list. */ 1131 /* Grab the association's destination address list. */
1150 transport_list = &asoc->peer.transport_addr_list; 1132 transport_list = &asoc->peer.transport_addr_list;
@@ -1193,18 +1175,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
1193 if (gap_ack_blocks) 1175 if (gap_ack_blocks)
1194 highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); 1176 highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
1195 1177
1196 if (TSN_lt(asoc->highest_sacked, highest_tsn)) { 1178 if (TSN_lt(asoc->highest_sacked, highest_tsn))
1197 highest_new_tsn = highest_tsn;
1198 asoc->highest_sacked = highest_tsn; 1179 asoc->highest_sacked = highest_tsn;
1199 } else {
1200 highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
1201 }
1202 1180
1181 highest_new_tsn = sack_ctsn;
1203 1182
1204 /* Run through the retransmit queue. Credit bytes received 1183 /* Run through the retransmit queue. Credit bytes received
1205 * and free those chunks that we can. 1184 * and free those chunks that we can.
1206 */ 1185 */
1207 sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); 1186 sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
1208 1187
1209 /* Run through the transmitted queue. 1188 /* Run through the transmitted queue.
1210 * Credit bytes received and free those chunks which we can. 1189 * Credit bytes received and free those chunks which we can.
@@ -1213,7 +1192,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
1213 */ 1192 */
1214 list_for_each_entry(transport, transport_list, transports) { 1193 list_for_each_entry(transport, transport_list, transports) {
1215 sctp_check_transmitted(q, &transport->transmitted, 1194 sctp_check_transmitted(q, &transport->transmitted,
1216 transport, sack, highest_new_tsn); 1195 transport, sack, &highest_new_tsn);
1217 /* 1196 /*
1218 * SFR-CACC algorithm: 1197 * SFR-CACC algorithm:
1219 * C) Let count_of_newacks be the number of 1198 * C) Let count_of_newacks be the number of
@@ -1223,16 +1202,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
1223 count_of_newacks ++; 1202 count_of_newacks ++;
1224 } 1203 }
1225 1204
1205 /* Move the Cumulative TSN Ack Point if appropriate. */
1206 if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
1207 asoc->ctsn_ack_point = sack_ctsn;
1208 accum_moved = 1;
1209 }
1210
1226 if (gap_ack_blocks) { 1211 if (gap_ack_blocks) {
1212
1213 if (asoc->fast_recovery && accum_moved)
1214 highest_new_tsn = highest_tsn;
1215
1227 list_for_each_entry(transport, transport_list, transports) 1216 list_for_each_entry(transport, transport_list, transports)
1228 sctp_mark_missing(q, &transport->transmitted, transport, 1217 sctp_mark_missing(q, &transport->transmitted, transport,
1229 highest_new_tsn, count_of_newacks); 1218 highest_new_tsn, count_of_newacks);
1230 } 1219 }
1231 1220
1232 /* Move the Cumulative TSN Ack Point if appropriate. */
1233 if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
1234 asoc->ctsn_ack_point = sack_ctsn;
1235
1236 /* Update unack_data field in the assoc. */ 1221 /* Update unack_data field in the assoc. */
1237 sctp_sack_update_unack_data(asoc, sack); 1222 sctp_sack_update_unack_data(asoc, sack);
1238 1223
@@ -1315,7 +1300,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1315 struct list_head *transmitted_queue, 1300 struct list_head *transmitted_queue,
1316 struct sctp_transport *transport, 1301 struct sctp_transport *transport,
1317 struct sctp_sackhdr *sack, 1302 struct sctp_sackhdr *sack,
1318 __u32 highest_new_tsn_in_sack) 1303 __u32 *highest_new_tsn_in_sack)
1319{ 1304{
1320 struct list_head *lchunk; 1305 struct list_head *lchunk;
1321 struct sctp_chunk *tchunk; 1306 struct sctp_chunk *tchunk;
@@ -1387,7 +1372,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1387 * instance). 1372 * instance).
1388 */ 1373 */
1389 if (!tchunk->tsn_gap_acked && 1374 if (!tchunk->tsn_gap_acked &&
1390 !tchunk->resent &&
1391 tchunk->rtt_in_progress) { 1375 tchunk->rtt_in_progress) {
1392 tchunk->rtt_in_progress = 0; 1376 tchunk->rtt_in_progress = 0;
1393 rtt = jiffies - tchunk->sent_at; 1377 rtt = jiffies - tchunk->sent_at;
@@ -1404,6 +1388,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1404 */ 1388 */
1405 if (!tchunk->tsn_gap_acked) { 1389 if (!tchunk->tsn_gap_acked) {
1406 tchunk->tsn_gap_acked = 1; 1390 tchunk->tsn_gap_acked = 1;
1391 *highest_new_tsn_in_sack = tsn;
1407 bytes_acked += sctp_data_size(tchunk); 1392 bytes_acked += sctp_data_size(tchunk);
1408 if (!tchunk->transport) 1393 if (!tchunk->transport)
1409 migrate_bytes += sctp_data_size(tchunk); 1394 migrate_bytes += sctp_data_size(tchunk);
@@ -1677,7 +1662,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
1677 struct sctp_chunk *chunk; 1662 struct sctp_chunk *chunk;
1678 __u32 tsn; 1663 __u32 tsn;
1679 char do_fast_retransmit = 0; 1664 char do_fast_retransmit = 0;
1680 struct sctp_transport *primary = q->asoc->peer.primary_path; 1665 struct sctp_association *asoc = q->asoc;
1666 struct sctp_transport *primary = asoc->peer.primary_path;
1681 1667
1682 list_for_each_entry(chunk, transmitted_queue, transmitted_list) { 1668 list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
1683 1669
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 000000000000..db3a42b8b349
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,214 @@
1/*
2 * sctp_probe - Observe the SCTP flow with kprobes.
3 *
4 * The idea for this came from Werner Almesberger's umlsim
5 * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6 *
7 * Modified for SCTP from Stephen Hemminger's code
8 * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25#include <linux/kernel.h>
26#include <linux/kprobes.h>
27#include <linux/socket.h>
28#include <linux/sctp.h>
29#include <linux/proc_fs.h>
30#include <linux/vmalloc.h>
31#include <linux/module.h>
32#include <linux/kfifo.h>
33#include <linux/time.h>
34#include <net/net_namespace.h>
35
36#include <net/sctp/sctp.h>
37#include <net/sctp/sm.h>
38
39MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
40MODULE_DESCRIPTION("SCTP snooper");
41MODULE_LICENSE("GPL");
42
43static int port __read_mostly = 0;
44MODULE_PARM_DESC(port, "Port to match (0=all)");
45module_param(port, int, 0);
46
47static int bufsize __read_mostly = 64 * 1024;
48MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
49module_param(bufsize, int, 0);
50
51static int full __read_mostly = 1;
52MODULE_PARM_DESC(full, "Full log (1=every ack packet received, 0=only cwnd changes)");
53module_param(full, int, 0);
54
55static const char procname[] = "sctpprobe";
56
57static struct {
58 struct kfifo fifo;
59 spinlock_t lock;
60 wait_queue_head_t wait;
61 struct timespec tstart;
62} sctpw;
63
64static void printl(const char *fmt, ...)
65{
66 va_list args;
67 int len;
68 char tbuf[256];
69
70 va_start(args, fmt);
71 len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
72 va_end(args);
73
74 kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
75 wake_up(&sctpw.wait);
76}
77
78static int sctpprobe_open(struct inode *inode, struct file *file)
79{
80 kfifo_reset(&sctpw.fifo);
81 getnstimeofday(&sctpw.tstart);
82
83 return 0;
84}
85
86static ssize_t sctpprobe_read(struct file *file, char __user *buf,
87 size_t len, loff_t *ppos)
88{
89 int error = 0, cnt = 0;
90 unsigned char *tbuf;
91
92 if (!buf)
93 return -EINVAL;
94
95 if (len == 0)
96 return 0;
97
98 tbuf = vmalloc(len);
99 if (!tbuf)
100 return -ENOMEM;
101
102 error = wait_event_interruptible(sctpw.wait,
103 kfifo_len(&sctpw.fifo) != 0);
104 if (error)
105 goto out_free;
106
107 cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
108 error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
109
110out_free:
111 vfree(tbuf);
112
113 return error ? error : cnt;
114}
115
116static const struct file_operations sctpprobe_fops = {
117 .owner = THIS_MODULE,
118 .open = sctpprobe_open,
119 .read = sctpprobe_read,
120};
121
122sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
123 const struct sctp_association *asoc,
124 const sctp_subtype_t type,
125 void *arg,
126 sctp_cmd_seq_t *commands)
127{
128 struct sctp_transport *sp;
129 static __u32 lcwnd = 0;
130 struct timespec now;
131
132 sp = asoc->peer.primary_path;
133
134 if ((full || sp->cwnd != lcwnd) &&
135 (!port || asoc->peer.port == port ||
136 ep->base.bind_addr.port == port)) {
137 lcwnd = sp->cwnd;
138
139 getnstimeofday(&now);
140 now = timespec_sub(now, sctpw.tstart);
141
142 printl("%lu.%06lu ", (unsigned long) now.tv_sec,
143 (unsigned long) now.tv_nsec / NSEC_PER_USEC);
144
145 printl("%p %5d %5d %5d %8d %5d ", asoc,
146 ep->base.bind_addr.port, asoc->peer.port,
147 asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
148
149 list_for_each_entry(sp, &asoc->peer.transport_addr_list,
150 transports) {
151 if (sp == asoc->peer.primary_path)
152 printl("*");
153
154 if (sp->ipaddr.sa.sa_family == AF_INET)
155 printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
156 else
157 printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
158
159 printl("%2u %8u %8u %8u %8u %8u ",
160 sp->state, sp->cwnd, sp->ssthresh,
161 sp->flight_size, sp->partial_bytes_acked,
162 sp->pathmtu);
163 }
164 printl("\n");
165 }
166
167 jprobe_return();
168 return 0;
169}
170
171static struct jprobe sctp_recv_probe = {
172 .kp = {
173 .symbol_name = "sctp_sf_eat_sack_6_2",
174 },
175 .entry = jsctp_sf_eat_sack,
176};
177
178static __init int sctpprobe_init(void)
179{
180 int ret = -ENOMEM;
181
182 init_waitqueue_head(&sctpw.wait);
183 spin_lock_init(&sctpw.lock);
184 if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
185 return ret;
186
187 if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
188 &sctpprobe_fops))
189 goto free_kfifo;
190
191 ret = register_jprobe(&sctp_recv_probe);
192 if (ret)
193 goto remove_proc;
194
195 pr_info("SCTP probe registered (port=%d)\n", port);
196
197 return 0;
198
199remove_proc:
200 proc_net_remove(&init_net, procname);
201free_kfifo:
202 kfifo_free(&sctpw.fifo);
203 return ret;
204}
205
206static __exit void sctpprobe_exit(void)
207{
208 kfifo_free(&sctpw.fifo);
209 proc_net_remove(&init_net, procname);
210 unregister_jprobe(&sctp_recv_probe);
211}
212
213module_init(sctpprobe_init);
214module_exit(sctpprobe_exit);
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 784bcc9a979d..61aacfbbaa92 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -181,7 +181,6 @@ static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
181 181
182static void sctp_eps_seq_stop(struct seq_file *seq, void *v) 182static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
183{ 183{
184 return;
185} 184}
186 185
187 186
@@ -286,7 +285,6 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
286 285
287static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) 286static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
288{ 287{
289 return;
290} 288}
291 289
292 290
@@ -409,7 +407,6 @@ static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
409 407
410static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v) 408static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
411{ 409{
412 return;
413} 410}
414 411
415static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) 412static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a56f98e82f92..5027b83f1cc0 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,19 +474,23 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
474 474
475 memset(&fl, 0x0, sizeof(struct flowi)); 475 memset(&fl, 0x0, sizeof(struct flowi));
476 fl.fl4_dst = daddr->v4.sin_addr.s_addr; 476 fl.fl4_dst = daddr->v4.sin_addr.s_addr;
477 fl.fl_ip_dport = daddr->v4.sin_port;
477 fl.proto = IPPROTO_SCTP; 478 fl.proto = IPPROTO_SCTP;
478 if (asoc) { 479 if (asoc) {
479 fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk); 480 fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
480 fl.oif = asoc->base.sk->sk_bound_dev_if; 481 fl.oif = asoc->base.sk->sk_bound_dev_if;
482 fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
481 } 483 }
482 if (saddr) 484 if (saddr) {
483 fl.fl4_src = saddr->v4.sin_addr.s_addr; 485 fl.fl4_src = saddr->v4.sin_addr.s_addr;
486 fl.fl_ip_sport = saddr->v4.sin_port;
487 }
484 488
485 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", 489 SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
486 __func__, &fl.fl4_dst, &fl.fl4_src); 490 __func__, &fl.fl4_dst, &fl.fl4_src);
487 491
488 if (!ip_route_output_key(&init_net, &rt, &fl)) { 492 if (!ip_route_output_key(&init_net, &rt, &fl)) {
489 dst = &rt->u.dst; 493 dst = &rt->dst;
490 } 494 }
491 495
492 /* If there is no association or if a source address is passed, no 496 /* If there is no association or if a source address is passed, no
@@ -528,8 +532,9 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
528 if ((laddr->state == SCTP_ADDR_SRC) && 532 if ((laddr->state == SCTP_ADDR_SRC) &&
529 (AF_INET == laddr->a.sa.sa_family)) { 533 (AF_INET == laddr->a.sa.sa_family)) {
530 fl.fl4_src = laddr->a.v4.sin_addr.s_addr; 534 fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
535 fl.fl_ip_sport = laddr->a.v4.sin_port;
531 if (!ip_route_output_key(&init_net, &rt, &fl)) { 536 if (!ip_route_output_key(&init_net, &rt, &fl)) {
532 dst = &rt->u.dst; 537 dst = &rt->dst;
533 goto out_unlock; 538 goto out_unlock;
534 } 539 }
535 } 540 }
@@ -854,7 +859,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
854 IP_PMTUDISC_DO : IP_PMTUDISC_DONT; 859 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
855 860
856 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); 861 SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
857 return ip_queue_xmit(skb, 0); 862 return ip_queue_xmit(skb);
858} 863}
859 864
860static struct sctp_af sctp_af_inet; 865static struct sctp_af sctp_af_inet;
@@ -997,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
997static inline int init_sctp_mibs(void) 1002static inline int init_sctp_mibs(void)
998{ 1003{
999 return snmp_mib_init((void __percpu **)sctp_statistics, 1004 return snmp_mib_init((void __percpu **)sctp_statistics,
1000 sizeof(struct sctp_mib)); 1005 sizeof(struct sctp_mib),
1006 __alignof__(struct sctp_mib));
1001} 1007}
1002 1008
1003static inline void cleanup_sctp_mibs(void) 1009static inline void cleanup_sctp_mibs(void)
@@ -1157,7 +1163,7 @@ SCTP_STATIC __init int sctp_init(void)
1157 /* Set the pressure threshold to be a fraction of global memory that 1163 /* Set the pressure threshold to be a fraction of global memory that
1158 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of 1164 * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
1159 * memory, with a floor of 128 pages. 1165 * memory, with a floor of 128 pages.
1160 * Note this initalizes the data in sctpv6_prot too 1166 * Note this initializes the data in sctpv6_prot too
1161 * Unabashedly stolen from tcp_init 1167 * Unabashedly stolen from tcp_init
1162 */ 1168 */
1163 nr_pages = totalram_pages - totalhigh_pages; 1169 nr_pages = totalram_pages - totalhigh_pages;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 30c1767186b8..246f92924658 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -141,7 +141,7 @@ int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
141 len = sizeof(sctp_errhdr_t) + paylen; 141 len = sizeof(sctp_errhdr_t) + paylen;
142 err.length = htons(len); 142 err.length = htons(len);
143 143
144 if (skb_tailroom(chunk->skb) > len) 144 if (skb_tailroom(chunk->skb) < len)
145 return -ENOSPC; 145 return -ENOSPC;
146 chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, 146 chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
147 sizeof(sctp_errhdr_t), 147 sizeof(sctp_errhdr_t),
@@ -445,10 +445,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
445 if (!retval) 445 if (!retval)
446 goto nomem_chunk; 446 goto nomem_chunk;
447 447
448 /* Per the advice in RFC 2960 6.4, send this reply to 448 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
449 * the source of the INIT packet. 449 *
450 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
451 * HEARTBEAT ACK, * etc.) to the same destination transport
452 * address from which it received the DATA or control chunk
453 * to which it is replying.
454 *
455 * [INIT ACK back to where the INIT came from.]
450 */ 456 */
451 retval->transport = chunk->transport; 457 retval->transport = chunk->transport;
458
452 retval->subh.init_hdr = 459 retval->subh.init_hdr =
453 sctp_addto_chunk(retval, sizeof(initack), &initack); 460 sctp_addto_chunk(retval, sizeof(initack), &initack);
454 retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v); 461 retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -487,18 +494,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
487 /* We need to remove the const qualifier at this point. */ 494 /* We need to remove the const qualifier at this point. */
488 retval->asoc = (struct sctp_association *) asoc; 495 retval->asoc = (struct sctp_association *) asoc;
489 496
490 /* RFC 2960 6.4 Multi-homed SCTP Endpoints
491 *
492 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
493 * HEARTBEAT ACK, * etc.) to the same destination transport
494 * address from which it received the DATA or control chunk
495 * to which it is replying.
496 *
497 * [INIT ACK back to where the INIT came from.]
498 */
499 if (chunk)
500 retval->transport = chunk->transport;
501
502nomem_chunk: 497nomem_chunk:
503 kfree(cookie); 498 kfree(cookie);
504nomem_cookie: 499nomem_cookie:
@@ -1254,7 +1249,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
1254 INIT_LIST_HEAD(&retval->list); 1249 INIT_LIST_HEAD(&retval->list);
1255 retval->skb = skb; 1250 retval->skb = skb;
1256 retval->asoc = (struct sctp_association *)asoc; 1251 retval->asoc = (struct sctp_association *)asoc;
1257 retval->resent = 0;
1258 retval->has_tsn = 0; 1252 retval->has_tsn = 0;
1259 retval->has_ssn = 0; 1253 retval->has_ssn = 0;
1260 retval->rtt_in_progress = 0; 1254 retval->rtt_in_progress = 0;
@@ -1421,7 +1415,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
1421void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk, 1415void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
1422 int len, const void *data) 1416 int len, const void *data)
1423{ 1417{
1424 if (skb_tailroom(chunk->skb) > len) 1418 if (skb_tailroom(chunk->skb) >= len)
1425 return sctp_addto_chunk(chunk, len, data); 1419 return sctp_addto_chunk(chunk, len, data);
1426 else 1420 else
1427 return NULL; 1421 return NULL;
@@ -1823,7 +1817,7 @@ malformed:
1823struct __sctp_missing { 1817struct __sctp_missing {
1824 __be32 num_missing; 1818 __be32 num_missing;
1825 __be16 type; 1819 __be16 type;
1826} __attribute__((packed)); 1820} __packed;
1827 1821
1828/* 1822/*
1829 * Report a missing mandatory parameter. 1823 * Report a missing mandatory parameter.
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d5ae450b6f02..f5e5e27cac5e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -397,6 +397,41 @@ out_unlock:
397 sctp_transport_put(transport); 397 sctp_transport_put(transport);
398} 398}
399 399
400/* Handle the timeout of the ICMP protocol unreachable timer. Trigger
401 * the correct state machine transition that will close the association.
402 */
403void sctp_generate_proto_unreach_event(unsigned long data)
404{
405 struct sctp_transport *transport = (struct sctp_transport *) data;
406 struct sctp_association *asoc = transport->asoc;
407
408 sctp_bh_lock_sock(asoc->base.sk);
409 if (sock_owned_by_user(asoc->base.sk)) {
410 SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__);
411
412 /* Try again later. */
413 if (!mod_timer(&transport->proto_unreach_timer,
414 jiffies + (HZ/20)))
415 sctp_association_hold(asoc);
416 goto out_unlock;
417 }
418
419 /* Is this structure just waiting around for us to actually
420 * get destroyed?
421 */
422 if (asoc->base.dead)
423 goto out_unlock;
424
425 sctp_do_sm(SCTP_EVENT_T_OTHER,
426 SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
427 asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC);
428
429out_unlock:
430 sctp_bh_unlock_sock(asoc->base.sk);
431 sctp_association_put(asoc);
432}
433
434
400/* Inject a SACK Timeout event into the state machine. */ 435/* Inject a SACK Timeout event into the state machine. */
401static void sctp_generate_sack_event(unsigned long data) 436static void sctp_generate_sack_event(unsigned long data)
402{ 437{
@@ -697,11 +732,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
697{ 732{
698 struct sctp_transport *t; 733 struct sctp_transport *t;
699 734
700 t = sctp_assoc_choose_alter_transport(asoc, 735 if (chunk->transport)
736 t = chunk->transport;
737 else {
738 t = sctp_assoc_choose_alter_transport(asoc,
701 asoc->shutdown_last_sent_to); 739 asoc->shutdown_last_sent_to);
740 chunk->transport = t;
741 }
702 asoc->shutdown_last_sent_to = t; 742 asoc->shutdown_last_sent_to = t;
703 asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; 743 asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
704 chunk->transport = t;
705} 744}
706 745
707/* Helper function to change the state of an association. */ 746/* Helper function to change the state of an association. */
@@ -853,8 +892,6 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
853 sctp_walk_fwdtsn(skip, chunk) { 892 sctp_walk_fwdtsn(skip, chunk) {
854 sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn)); 893 sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
855 } 894 }
856
857 return;
858} 895}
859 896
860/* Helper function to remove the association non-primary peer 897/* Helper function to remove the association non-primary peer
@@ -873,8 +910,6 @@ static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
873 sctp_assoc_del_peer(asoc, &t->ipaddr); 910 sctp_assoc_del_peer(asoc, &t->ipaddr);
874 } 911 }
875 } 912 }
876
877 return;
878} 913}
879 914
880/* Helper function to set sk_err on a 1-1 style socket. */ 915/* Helper function to set sk_err on a 1-1 style socket. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 44a1ab03a3f0..ca44917872d2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3720,9 +3720,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
3720 3720
3721 SCTP_DBG_OBJCNT_INC(sock); 3721 SCTP_DBG_OBJCNT_INC(sock);
3722 3722
3723 /* Set socket backlog limit. */
3724 sk->sk_backlog.limit = sysctl_sctp_rmem[1];
3725
3726 local_bh_disable(); 3723 local_bh_disable();
3727 percpu_counter_inc(&sctp_sockets_allocated); 3724 percpu_counter_inc(&sctp_sockets_allocated);
3728 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 3725 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -4387,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
4387 transports) { 4384 transports) {
4388 memcpy(&temp, &from->ipaddr, sizeof(temp)); 4385 memcpy(&temp, &from->ipaddr, sizeof(temp));
4389 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); 4386 sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
4390 addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len; 4387 addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
4391 if (space_left < addrlen) 4388 if (space_left < addrlen)
4392 return -ENOMEM; 4389 return -ENOMEM;
4393 if (copy_to_user(to, &temp, addrlen)) 4390 if (copy_to_user(to, &temp, addrlen))
@@ -5436,6 +5433,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
5436 rover++; 5433 rover++;
5437 if ((rover < low) || (rover > high)) 5434 if ((rover < low) || (rover > high))
5438 rover = low; 5435 rover = low;
5436 if (inet_is_reserved_local_port(rover))
5437 continue;
5439 index = sctp_phashfn(rover); 5438 index = sctp_phashfn(rover);
5440 head = &sctp_port_hashtable[index]; 5439 head = &sctp_port_hashtable[index];
5441 sctp_spin_lock(&head->lock); 5440 sctp_spin_lock(&head->lock);
@@ -5482,7 +5481,6 @@ pp_found:
5482 */ 5481 */
5483 int reuse = sk->sk_reuse; 5482 int reuse = sk->sk_reuse;
5484 struct sock *sk2; 5483 struct sock *sk2;
5485 struct hlist_node *node;
5486 5484
5487 SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); 5485 SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
5488 if (pp->fastreuse && sk->sk_reuse && 5486 if (pp->fastreuse && sk->sk_reuse &&
@@ -5703,7 +5701,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5703 struct sctp_sock *sp = sctp_sk(sk); 5701 struct sctp_sock *sp = sctp_sk(sk);
5704 unsigned int mask; 5702 unsigned int mask;
5705 5703
5706 poll_wait(file, sk->sk_sleep, wait); 5704 poll_wait(file, sk_sleep(sk), wait);
5707 5705
5708 /* A TCP-style listening socket becomes readable when the accept queue 5706 /* A TCP-style listening socket becomes readable when the accept queue
5709 * is not empty. 5707 * is not empty.
@@ -5944,7 +5942,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
5944 int error; 5942 int error;
5945 DEFINE_WAIT(wait); 5943 DEFINE_WAIT(wait);
5946 5944
5947 prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 5945 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
5948 5946
5949 /* Socket errors? */ 5947 /* Socket errors? */
5950 error = sock_error(sk); 5948 error = sock_error(sk);
@@ -5981,14 +5979,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
5981 sctp_lock_sock(sk); 5979 sctp_lock_sock(sk);
5982 5980
5983ready: 5981ready:
5984 finish_wait(sk->sk_sleep, &wait); 5982 finish_wait(sk_sleep(sk), &wait);
5985 return 0; 5983 return 0;
5986 5984
5987interrupted: 5985interrupted:
5988 error = sock_intr_errno(*timeo_p); 5986 error = sock_intr_errno(*timeo_p);
5989 5987
5990out: 5988out:
5991 finish_wait(sk->sk_sleep, &wait); 5989 finish_wait(sk_sleep(sk), &wait);
5992 *err = error; 5990 *err = error;
5993 return error; 5991 return error;
5994} 5992}
@@ -6062,14 +6060,14 @@ static void __sctp_write_space(struct sctp_association *asoc)
6062 wake_up_interruptible(&asoc->wait); 6060 wake_up_interruptible(&asoc->wait);
6063 6061
6064 if (sctp_writeable(sk)) { 6062 if (sctp_writeable(sk)) {
6065 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 6063 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
6066 wake_up_interruptible(sk->sk_sleep); 6064 wake_up_interruptible(sk_sleep(sk));
6067 6065
6068 /* Note that we try to include the Async I/O support 6066 /* Note that we try to include the Async I/O support
6069 * here by modeling from the current TCP/UDP code. 6067 * here by modeling from the current TCP/UDP code.
6070 * We have not tested with it yet. 6068 * We have not tested with it yet.
6071 */ 6069 */
6072 if (sock->fasync_list && 6070 if (sock->wq->fasync_list &&
6073 !(sk->sk_shutdown & SEND_SHUTDOWN)) 6071 !(sk->sk_shutdown & SEND_SHUTDOWN))
6074 sock_wake_async(sock, 6072 sock_wake_async(sock,
6075 SOCK_WAKE_SPACE, POLL_OUT); 6073 SOCK_WAKE_SPACE, POLL_OUT);
@@ -6191,12 +6189,15 @@ do_nonblock:
6191 6189
6192void sctp_data_ready(struct sock *sk, int len) 6190void sctp_data_ready(struct sock *sk, int len)
6193{ 6191{
6194 read_lock_bh(&sk->sk_callback_lock); 6192 struct socket_wq *wq;
6195 if (sk_has_sleeper(sk)) 6193
6196 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | 6194 rcu_read_lock();
6195 wq = rcu_dereference(sk->sk_wq);
6196 if (wq_has_sleeper(wq))
6197 wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
6197 POLLRDNORM | POLLRDBAND); 6198 POLLRDNORM | POLLRDBAND);
6198 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 6199 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
6199 read_unlock_bh(&sk->sk_callback_lock); 6200 rcu_read_unlock();
6200} 6201}
6201 6202
6202/* If socket sndbuf has changed, wake up all per association waiters. */ 6203/* If socket sndbuf has changed, wake up all per association waiters. */
@@ -6307,7 +6308,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
6307 6308
6308 6309
6309 for (;;) { 6310 for (;;) {
6310 prepare_to_wait_exclusive(sk->sk_sleep, &wait, 6311 prepare_to_wait_exclusive(sk_sleep(sk), &wait,
6311 TASK_INTERRUPTIBLE); 6312 TASK_INTERRUPTIBLE);
6312 6313
6313 if (list_empty(&ep->asocs)) { 6314 if (list_empty(&ep->asocs)) {
@@ -6333,7 +6334,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
6333 break; 6334 break;
6334 } 6335 }
6335 6336
6336 finish_wait(sk->sk_sleep, &wait); 6337 finish_wait(sk_sleep(sk), &wait);
6337 6338
6338 return err; 6339 return err;
6339} 6340}
@@ -6343,7 +6344,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
6343 DEFINE_WAIT(wait); 6344 DEFINE_WAIT(wait);
6344 6345
6345 do { 6346 do {
6346 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 6347 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
6347 if (list_empty(&sctp_sk(sk)->ep->asocs)) 6348 if (list_empty(&sctp_sk(sk)->ep->asocs))
6348 break; 6349 break;
6349 sctp_release_sock(sk); 6350 sctp_release_sock(sk);
@@ -6351,7 +6352,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
6351 sctp_lock_sock(sk); 6352 sctp_lock_sock(sk);
6352 } while (!signal_pending(current) && timeout); 6353 } while (!signal_pending(current) && timeout);
6353 6354
6354 finish_wait(sk->sk_sleep, &wait); 6355 finish_wait(sk_sleep(sk), &wait);
6355} 6356}
6356 6357
6357static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk) 6358static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5cc..132046cb82fc 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
64 /* Copy in the address. */ 64 /* Copy in the address. */
65 peer->ipaddr = *addr; 65 peer->ipaddr = *addr;
66 peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); 66 peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
67 peer->asoc = NULL;
68
69 peer->dst = NULL;
70 memset(&peer->saddr, 0, sizeof(union sctp_addr)); 67 memset(&peer->saddr, 0, sizeof(union sctp_addr));
71 68
72 /* From 6.3.1 RTO Calculation: 69 /* From 6.3.1 RTO Calculation:
@@ -76,52 +73,32 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
76 * parameter 'RTO.Initial'. 73 * parameter 'RTO.Initial'.
77 */ 74 */
78 peer->rto = msecs_to_jiffies(sctp_rto_initial); 75 peer->rto = msecs_to_jiffies(sctp_rto_initial);
79 peer->rtt = 0;
80 peer->rttvar = 0;
81 peer->srtt = 0;
82 peer->rto_pending = 0;
83 peer->hb_sent = 0;
84 peer->fast_recovery = 0;
85 76
86 peer->last_time_heard = jiffies; 77 peer->last_time_heard = jiffies;
87 peer->last_time_ecne_reduced = jiffies; 78 peer->last_time_ecne_reduced = jiffies;
88 79
89 peer->init_sent_count = 0;
90
91 peer->param_flags = SPP_HB_DISABLE | 80 peer->param_flags = SPP_HB_DISABLE |
92 SPP_PMTUD_ENABLE | 81 SPP_PMTUD_ENABLE |
93 SPP_SACKDELAY_ENABLE; 82 SPP_SACKDELAY_ENABLE;
94 peer->hbinterval = 0;
95 83
96 /* Initialize the default path max_retrans. */ 84 /* Initialize the default path max_retrans. */
97 peer->pathmaxrxt = sctp_max_retrans_path; 85 peer->pathmaxrxt = sctp_max_retrans_path;
98 peer->error_count = 0;
99 86
100 INIT_LIST_HEAD(&peer->transmitted); 87 INIT_LIST_HEAD(&peer->transmitted);
101 INIT_LIST_HEAD(&peer->send_ready); 88 INIT_LIST_HEAD(&peer->send_ready);
102 INIT_LIST_HEAD(&peer->transports); 89 INIT_LIST_HEAD(&peer->transports);
103 90
104 peer->T3_rtx_timer.expires = 0;
105 peer->hb_timer.expires = 0;
106
107 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 91 setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
108 (unsigned long)peer); 92 (unsigned long)peer);
109 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, 93 setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
110 (unsigned long)peer); 94 (unsigned long)peer);
95 setup_timer(&peer->proto_unreach_timer,
96 sctp_generate_proto_unreach_event, (unsigned long)peer);
111 97
112 /* Initialize the 64-bit random nonce sent with heartbeat. */ 98 /* Initialize the 64-bit random nonce sent with heartbeat. */
113 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); 99 get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
114 100
115 atomic_set(&peer->refcnt, 1); 101 atomic_set(&peer->refcnt, 1);
116 peer->dead = 0;
117
118 peer->malloced = 0;
119
120 /* Initialize the state information for SFR-CACC */
121 peer->cacc.changeover_active = 0;
122 peer->cacc.cycling_changeover = 0;
123 peer->cacc.next_tsn_at_change = 0;
124 peer->cacc.cacc_saw_newack = 0;
125 102
126 return peer; 103 return peer;
127} 104}
@@ -171,6 +148,10 @@ void sctp_transport_free(struct sctp_transport *transport)
171 del_timer(&transport->T3_rtx_timer)) 148 del_timer(&transport->T3_rtx_timer))
172 sctp_transport_put(transport); 149 sctp_transport_put(transport);
173 150
151 /* Delete the ICMP proto unreachable timer if it's active. */
152 if (timer_pending(&transport->proto_unreach_timer) &&
153 del_timer(&transport->proto_unreach_timer))
154 sctp_association_put(transport->asoc);
174 155
175 sctp_transport_put(transport); 156 sctp_transport_put(transport);
176} 157}
@@ -195,7 +176,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
195/* Start T3_rtx timer if it is not already running and update the heartbeat 176/* Start T3_rtx timer if it is not already running and update the heartbeat
196 * timer. This routine is called every time a DATA chunk is sent. 177 * timer. This routine is called every time a DATA chunk is sent.
197 */ 178 */
198void sctp_transport_reset_timers(struct sctp_transport *transport, int force) 179void sctp_transport_reset_timers(struct sctp_transport *transport)
199{ 180{
200 /* RFC 2960 6.3.2 Retransmission Timer Rules 181 /* RFC 2960 6.3.2 Retransmission Timer Rules
201 * 182 *
@@ -205,7 +186,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
205 * address. 186 * address.
206 */ 187 */
207 188
208 if (force || !timer_pending(&transport->T3_rtx_timer)) 189 if (!timer_pending(&transport->T3_rtx_timer))
209 if (!mod_timer(&transport->T3_rtx_timer, 190 if (!mod_timer(&transport->T3_rtx_timer,
210 jiffies + transport->rto)) 191 jiffies + transport->rto))
211 sctp_transport_hold(transport); 192 sctp_transport_hold(transport);
@@ -403,15 +384,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
403void sctp_transport_raise_cwnd(struct sctp_transport *transport, 384void sctp_transport_raise_cwnd(struct sctp_transport *transport,
404 __u32 sack_ctsn, __u32 bytes_acked) 385 __u32 sack_ctsn, __u32 bytes_acked)
405{ 386{
387 struct sctp_association *asoc = transport->asoc;
406 __u32 cwnd, ssthresh, flight_size, pba, pmtu; 388 __u32 cwnd, ssthresh, flight_size, pba, pmtu;
407 389
408 cwnd = transport->cwnd; 390 cwnd = transport->cwnd;
409 flight_size = transport->flight_size; 391 flight_size = transport->flight_size;
410 392
411 /* See if we need to exit Fast Recovery first */ 393 /* See if we need to exit Fast Recovery first */
412 if (transport->fast_recovery && 394 if (asoc->fast_recovery &&
413 TSN_lte(transport->fast_recovery_exit, sack_ctsn)) 395 TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
414 transport->fast_recovery = 0; 396 asoc->fast_recovery = 0;
415 397
416 /* The appropriate cwnd increase algorithm is performed if, and only 398 /* The appropriate cwnd increase algorithm is performed if, and only
417 * if the cumulative TSN whould advanced and the congestion window is 399 * if the cumulative TSN whould advanced and the congestion window is
@@ -440,7 +422,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
440 * 2) the destination's path MTU. This upper bound protects 422 * 2) the destination's path MTU. This upper bound protects
441 * against the ACK-Splitting attack outlined in [SAVAGE99]. 423 * against the ACK-Splitting attack outlined in [SAVAGE99].
442 */ 424 */
443 if (transport->fast_recovery) 425 if (asoc->fast_recovery)
444 return; 426 return;
445 427
446 if (bytes_acked > pmtu) 428 if (bytes_acked > pmtu)
@@ -491,6 +473,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
491void sctp_transport_lower_cwnd(struct sctp_transport *transport, 473void sctp_transport_lower_cwnd(struct sctp_transport *transport,
492 sctp_lower_cwnd_t reason) 474 sctp_lower_cwnd_t reason)
493{ 475{
476 struct sctp_association *asoc = transport->asoc;
477
494 switch (reason) { 478 switch (reason) {
495 case SCTP_LOWER_CWND_T3_RTX: 479 case SCTP_LOWER_CWND_T3_RTX:
496 /* RFC 2960 Section 7.2.3, sctpimpguide 480 /* RFC 2960 Section 7.2.3, sctpimpguide
@@ -501,11 +485,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
501 * partial_bytes_acked = 0 485 * partial_bytes_acked = 0
502 */ 486 */
503 transport->ssthresh = max(transport->cwnd/2, 487 transport->ssthresh = max(transport->cwnd/2,
504 4*transport->asoc->pathmtu); 488 4*asoc->pathmtu);
505 transport->cwnd = transport->asoc->pathmtu; 489 transport->cwnd = asoc->pathmtu;
506 490
507 /* T3-rtx also clears fast recovery on the transport */ 491 /* T3-rtx also clears fast recovery */
508 transport->fast_recovery = 0; 492 asoc->fast_recovery = 0;
509 break; 493 break;
510 494
511 case SCTP_LOWER_CWND_FAST_RTX: 495 case SCTP_LOWER_CWND_FAST_RTX:
@@ -521,15 +505,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
521 * cwnd = ssthresh 505 * cwnd = ssthresh
522 * partial_bytes_acked = 0 506 * partial_bytes_acked = 0
523 */ 507 */
524 if (transport->fast_recovery) 508 if (asoc->fast_recovery)
525 return; 509 return;
526 510
527 /* Mark Fast recovery */ 511 /* Mark Fast recovery */
528 transport->fast_recovery = 1; 512 asoc->fast_recovery = 1;
529 transport->fast_recovery_exit = transport->asoc->next_tsn - 1; 513 asoc->fast_recovery_exit = asoc->next_tsn - 1;
530 514
531 transport->ssthresh = max(transport->cwnd/2, 515 transport->ssthresh = max(transport->cwnd/2,
532 4*transport->asoc->pathmtu); 516 4*asoc->pathmtu);
533 transport->cwnd = transport->ssthresh; 517 transport->cwnd = transport->ssthresh;
534 break; 518 break;
535 519
@@ -549,7 +533,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
549 if (time_after(jiffies, transport->last_time_ecne_reduced + 533 if (time_after(jiffies, transport->last_time_ecne_reduced +
550 transport->rtt)) { 534 transport->rtt)) {
551 transport->ssthresh = max(transport->cwnd/2, 535 transport->ssthresh = max(transport->cwnd/2,
552 4*transport->asoc->pathmtu); 536 4*asoc->pathmtu);
553 transport->cwnd = transport->ssthresh; 537 transport->cwnd = transport->ssthresh;
554 transport->last_time_ecne_reduced = jiffies; 538 transport->last_time_ecne_reduced = jiffies;
555 } 539 }
@@ -565,7 +549,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
565 * interval. 549 * interval.
566 */ 550 */
567 transport->cwnd = max(transport->cwnd/2, 551 transport->cwnd = max(transport->cwnd/2,
568 4*transport->asoc->pathmtu); 552 4*asoc->pathmtu);
569 break; 553 break;
570 } 554 }
571 555
@@ -650,7 +634,6 @@ void sctp_transport_reset(struct sctp_transport *t)
650 t->error_count = 0; 634 t->error_count = 0;
651 t->rto_pending = 0; 635 t->rto_pending = 0;
652 t->hb_sent = 0; 636 t->hb_sent = 0;
653 t->fast_recovery = 0;
654 637
655 /* Initialize the state information for SFR-CACC */ 638 /* Initialize the state information for SFR-CACC */
656 t->cacc.changeover_active = 0; 639 t->cacc.changeover_active = 0;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 3a448536f0b6..c7f7e49609cb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -955,7 +955,6 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
955 * ordering and deliver them if needed. 955 * ordering and deliver them if needed.
956 */ 956 */
957 sctp_ulpq_reap_ordered(ulpq, sid); 957 sctp_ulpq_reap_ordered(ulpq, sid);
958 return;
959} 958}
960 959
961static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, 960static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
@@ -1064,7 +1063,6 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
1064 } 1063 }
1065 1064
1066 sk_mem_reclaim(asoc->base.sk); 1065 sk_mem_reclaim(asoc->base.sk);
1067 return;
1068} 1066}
1069 1067
1070 1068
diff --git a/net/socket.c b/net/socket.c
index 5e8d0af3c0e7..2270b941bcc7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -94,6 +94,7 @@
94 94
95#include <net/compat.h> 95#include <net/compat.h>
96#include <net/wext.h> 96#include <net/wext.h>
97#include <net/cls_cgroup.h>
97 98
98#include <net/sock.h> 99#include <net/sock.h>
99#include <linux/netfilter.h> 100#include <linux/netfilter.h>
@@ -123,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
123static ssize_t sock_sendpage(struct file *file, struct page *page, 124static ssize_t sock_sendpage(struct file *file, struct page *page,
124 int offset, size_t size, loff_t *ppos, int more); 125 int offset, size_t size, loff_t *ppos, int more);
125static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
126 struct pipe_inode_info *pipe, size_t len, 127 struct pipe_inode_info *pipe, size_t len,
127 unsigned int flags); 128 unsigned int flags);
128 129
129/* 130/*
@@ -161,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
161 * Statistics counters of the socket lists 162 * Statistics counters of the socket lists
162 */ 163 */
163 164
164static DEFINE_PER_CPU(int, sockets_in_use) = 0; 165static DEFINE_PER_CPU(int, sockets_in_use);
165 166
166/* 167/*
167 * Support routines. 168 * Support routines.
@@ -169,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0;
169 * divide and look after the messy bits. 170 * divide and look after the messy bits.
170 */ 171 */
171 172
172#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
173 16 for IP, 16 for IPX,
174 24 for IPv6,
175 about 80 for AX.25
176 must be at least one bigger than
177 the AF_UNIX size (see net/unix/af_unix.c
178 :unix_mkname()).
179 */
180
181/** 173/**
182 * move_addr_to_kernel - copy a socket address into kernel space 174 * move_addr_to_kernel - copy a socket address into kernel space
183 * @uaddr: Address in user space 175 * @uaddr: Address in user space
@@ -252,9 +244,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
252 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
253 if (!ei) 245 if (!ei)
254 return NULL; 246 return NULL;
255 init_waitqueue_head(&ei->socket.wait); 247 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
248 if (!ei->socket.wq) {
249 kmem_cache_free(sock_inode_cachep, ei);
250 return NULL;
251 }
252 init_waitqueue_head(&ei->socket.wq->wait);
253 ei->socket.wq->fasync_list = NULL;
256 254
257 ei->socket.fasync_list = NULL;
258 ei->socket.state = SS_UNCONNECTED; 255 ei->socket.state = SS_UNCONNECTED;
259 ei->socket.flags = 0; 256 ei->socket.flags = 0;
260 ei->socket.ops = NULL; 257 ei->socket.ops = NULL;
@@ -264,10 +261,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
264 return &ei->vfs_inode; 261 return &ei->vfs_inode;
265} 262}
266 263
264
265static void wq_free_rcu(struct rcu_head *head)
266{
267 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
268
269 kfree(wq);
270}
271
267static void sock_destroy_inode(struct inode *inode) 272static void sock_destroy_inode(struct inode *inode)
268{ 273{
269 kmem_cache_free(sock_inode_cachep, 274 struct socket_alloc *ei;
270 container_of(inode, struct socket_alloc, vfs_inode)); 275
276 ei = container_of(inode, struct socket_alloc, vfs_inode);
277 call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
278 kmem_cache_free(sock_inode_cachep, ei);
271} 279}
272 280
273static void init_once(void *foo) 281static void init_once(void *foo)
@@ -292,9 +300,9 @@ static int init_inodecache(void)
292} 300}
293 301
294static const struct super_operations sockfs_ops = { 302static const struct super_operations sockfs_ops = {
295 .alloc_inode = sock_alloc_inode, 303 .alloc_inode = sock_alloc_inode,
296 .destroy_inode =sock_destroy_inode, 304 .destroy_inode = sock_destroy_inode,
297 .statfs = simple_statfs, 305 .statfs = simple_statfs,
298}; 306};
299 307
300static int sockfs_get_sb(struct file_system_type *fs_type, 308static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -394,6 +402,7 @@ int sock_map_fd(struct socket *sock, int flags)
394 402
395 return fd; 403 return fd;
396} 404}
405EXPORT_SYMBOL(sock_map_fd);
397 406
398static struct socket *sock_from_file(struct file *file, int *err) 407static struct socket *sock_from_file(struct file *file, int *err)
399{ 408{
@@ -405,7 +414,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
405} 414}
406 415
407/** 416/**
408 * sockfd_lookup - Go from a file number to its socket slot 417 * sockfd_lookup - Go from a file number to its socket slot
409 * @fd: file handle 418 * @fd: file handle
410 * @err: pointer to an error code return 419 * @err: pointer to an error code return
411 * 420 *
@@ -433,6 +442,7 @@ struct socket *sockfd_lookup(int fd, int *err)
433 fput(file); 442 fput(file);
434 return sock; 443 return sock;
435} 444}
445EXPORT_SYMBOL(sockfd_lookup);
436 446
437static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 447static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
438{ 448{
@@ -513,7 +523,7 @@ void sock_release(struct socket *sock)
513 module_put(owner); 523 module_put(owner);
514 } 524 }
515 525
516 if (sock->fasync_list) 526 if (sock->wq->fasync_list)
517 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 527 printk(KERN_ERR "sock_release: fasync list not empty!\n");
518 528
519 percpu_sub(sockets_in_use, 1); 529 percpu_sub(sockets_in_use, 1);
@@ -523,6 +533,7 @@ void sock_release(struct socket *sock)
523 } 533 }
524 sock->file = NULL; 534 sock->file = NULL;
525} 535}
536EXPORT_SYMBOL(sock_release);
526 537
527int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 538int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
528 union skb_shared_tx *shtx) 539 union skb_shared_tx *shtx)
@@ -542,6 +553,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
542 struct sock_iocb *si = kiocb_to_siocb(iocb); 553 struct sock_iocb *si = kiocb_to_siocb(iocb);
543 int err; 554 int err;
544 555
556 sock_update_classid(sock->sk);
557
545 si->sock = sock; 558 si->sock = sock;
546 si->scm = NULL; 559 si->scm = NULL;
547 si->msg = msg; 560 si->msg = msg;
@@ -567,6 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
567 ret = wait_on_sync_kiocb(&iocb); 580 ret = wait_on_sync_kiocb(&iocb);
568 return ret; 581 return ret;
569} 582}
583EXPORT_SYMBOL(sock_sendmsg);
570 584
571int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 585int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
572 struct kvec *vec, size_t num, size_t size) 586 struct kvec *vec, size_t num, size_t size)
@@ -585,6 +599,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
585 set_fs(oldfs); 599 set_fs(oldfs);
586 return result; 600 return result;
587} 601}
602EXPORT_SYMBOL(kernel_sendmsg);
588 603
589static int ktime2ts(ktime_t kt, struct timespec *ts) 604static int ktime2ts(ktime_t kt, struct timespec *ts)
590{ 605{
@@ -620,10 +635,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
620 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, 635 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
621 sizeof(tv), &tv); 636 sizeof(tv), &tv);
622 } else { 637 } else {
623 struct timespec ts; 638 skb_get_timestampns(skb, &ts[0]);
624 skb_get_timestampns(skb, &ts);
625 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, 639 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
626 sizeof(ts), &ts); 640 sizeof(ts[0]), &ts[0]);
627 } 641 }
628 } 642 }
629 643
@@ -646,7 +660,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
646 put_cmsg(msg, SOL_SOCKET, 660 put_cmsg(msg, SOL_SOCKET,
647 SCM_TIMESTAMPING, sizeof(ts), &ts); 661 SCM_TIMESTAMPING, sizeof(ts), &ts);
648} 662}
649
650EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 663EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
651 664
652inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 665inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -656,19 +669,21 @@ inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff
656 sizeof(__u32), &skb->dropcount); 669 sizeof(__u32), &skb->dropcount);
657} 670}
658 671
659void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 672void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
660 struct sk_buff *skb) 673 struct sk_buff *skb)
661{ 674{
662 sock_recv_timestamp(msg, sk, skb); 675 sock_recv_timestamp(msg, sk, skb);
663 sock_recv_drops(msg, sk, skb); 676 sock_recv_drops(msg, sk, skb);
664} 677}
665EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); 678EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
666 679
667static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, 680static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
668 struct msghdr *msg, size_t size, int flags) 681 struct msghdr *msg, size_t size, int flags)
669{ 682{
670 struct sock_iocb *si = kiocb_to_siocb(iocb); 683 struct sock_iocb *si = kiocb_to_siocb(iocb);
671 684
685 sock_update_classid(sock->sk);
686
672 si->sock = sock; 687 si->sock = sock;
673 si->scm = NULL; 688 si->scm = NULL;
674 si->msg = msg; 689 si->msg = msg;
@@ -700,6 +715,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
700 ret = wait_on_sync_kiocb(&iocb); 715 ret = wait_on_sync_kiocb(&iocb);
701 return ret; 716 return ret;
702} 717}
718EXPORT_SYMBOL(sock_recvmsg);
703 719
704static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 720static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
705 size_t size, int flags) 721 size_t size, int flags)
@@ -732,6 +748,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
732 set_fs(oldfs); 748 set_fs(oldfs);
733 return result; 749 return result;
734} 750}
751EXPORT_SYMBOL(kernel_recvmsg);
735 752
736static void sock_aio_dtor(struct kiocb *iocb) 753static void sock_aio_dtor(struct kiocb *iocb)
737{ 754{
@@ -754,7 +771,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
754} 771}
755 772
756static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 773static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
757 struct pipe_inode_info *pipe, size_t len, 774 struct pipe_inode_info *pipe, size_t len,
758 unsigned int flags) 775 unsigned int flags)
759{ 776{
760 struct socket *sock = file->private_data; 777 struct socket *sock = file->private_data;
@@ -762,6 +779,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
762 if (unlikely(!sock->ops->splice_read)) 779 if (unlikely(!sock->ops->splice_read))
763 return -EINVAL; 780 return -EINVAL;
764 781
782 sock_update_classid(sock->sk);
783
765 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 784 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
766} 785}
767 786
@@ -865,7 +884,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
865 */ 884 */
866 885
867static DEFINE_MUTEX(br_ioctl_mutex); 886static DEFINE_MUTEX(br_ioctl_mutex);
868static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; 887static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
869 888
870void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 889void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
871{ 890{
@@ -873,7 +892,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
873 br_ioctl_hook = hook; 892 br_ioctl_hook = hook;
874 mutex_unlock(&br_ioctl_mutex); 893 mutex_unlock(&br_ioctl_mutex);
875} 894}
876
877EXPORT_SYMBOL(brioctl_set); 895EXPORT_SYMBOL(brioctl_set);
878 896
879static DEFINE_MUTEX(vlan_ioctl_mutex); 897static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -885,7 +903,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
885 vlan_ioctl_hook = hook; 903 vlan_ioctl_hook = hook;
886 mutex_unlock(&vlan_ioctl_mutex); 904 mutex_unlock(&vlan_ioctl_mutex);
887} 905}
888
889EXPORT_SYMBOL(vlan_ioctl_set); 906EXPORT_SYMBOL(vlan_ioctl_set);
890 907
891static DEFINE_MUTEX(dlci_ioctl_mutex); 908static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -897,7 +914,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
897 dlci_ioctl_hook = hook; 914 dlci_ioctl_hook = hook;
898 mutex_unlock(&dlci_ioctl_mutex); 915 mutex_unlock(&dlci_ioctl_mutex);
899} 916}
900
901EXPORT_SYMBOL(dlci_ioctl_set); 917EXPORT_SYMBOL(dlci_ioctl_set);
902 918
903static long sock_do_ioctl(struct net *net, struct socket *sock, 919static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1025,6 +1041,7 @@ out_release:
1025 sock = NULL; 1041 sock = NULL;
1026 goto out; 1042 goto out;
1027} 1043}
1044EXPORT_SYMBOL(sock_create_lite);
1028 1045
1029/* No kernel lock held - perfect */ 1046/* No kernel lock held - perfect */
1030static unsigned int sock_poll(struct file *file, poll_table *wait) 1047static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1068,87 +1085,44 @@ static int sock_close(struct inode *inode, struct file *filp)
1068 * 1. fasync_list is modified only under process context socket lock 1085 * 1. fasync_list is modified only under process context socket lock
1069 * i.e. under semaphore. 1086 * i.e. under semaphore.
1070 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1087 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1071 * or under socket lock. 1088 * or under socket lock
1072 * 3. fasync_list can be used from softirq context, so that
1073 * modification under socket lock have to be enhanced with
1074 * write_lock_bh(&sk->sk_callback_lock).
1075 * --ANK (990710)
1076 */ 1089 */
1077 1090
1078static int sock_fasync(int fd, struct file *filp, int on) 1091static int sock_fasync(int fd, struct file *filp, int on)
1079{ 1092{
1080 struct fasync_struct *fa, *fna = NULL, **prev; 1093 struct socket *sock = filp->private_data;
1081 struct socket *sock; 1094 struct sock *sk = sock->sk;
1082 struct sock *sk;
1083
1084 if (on) {
1085 fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
1086 if (fna == NULL)
1087 return -ENOMEM;
1088 }
1089
1090 sock = filp->private_data;
1091 1095
1092 sk = sock->sk; 1096 if (sk == NULL)
1093 if (sk == NULL) {
1094 kfree(fna);
1095 return -EINVAL; 1097 return -EINVAL;
1096 }
1097 1098
1098 lock_sock(sk); 1099 lock_sock(sk);
1099 1100
1100 spin_lock(&filp->f_lock); 1101 fasync_helper(fd, filp, on, &sock->wq->fasync_list);
1101 if (on)
1102 filp->f_flags |= FASYNC;
1103 else
1104 filp->f_flags &= ~FASYNC;
1105 spin_unlock(&filp->f_lock);
1106
1107 prev = &(sock->fasync_list);
1108
1109 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
1110 if (fa->fa_file == filp)
1111 break;
1112
1113 if (on) {
1114 if (fa != NULL) {
1115 write_lock_bh(&sk->sk_callback_lock);
1116 fa->fa_fd = fd;
1117 write_unlock_bh(&sk->sk_callback_lock);
1118 1102
1119 kfree(fna); 1103 if (!sock->wq->fasync_list)
1120 goto out; 1104 sock_reset_flag(sk, SOCK_FASYNC);
1121 } 1105 else
1122 fna->fa_file = filp;
1123 fna->fa_fd = fd;
1124 fna->magic = FASYNC_MAGIC;
1125 fna->fa_next = sock->fasync_list;
1126 write_lock_bh(&sk->sk_callback_lock);
1127 sock->fasync_list = fna;
1128 sock_set_flag(sk, SOCK_FASYNC); 1106 sock_set_flag(sk, SOCK_FASYNC);
1129 write_unlock_bh(&sk->sk_callback_lock);
1130 } else {
1131 if (fa != NULL) {
1132 write_lock_bh(&sk->sk_callback_lock);
1133 *prev = fa->fa_next;
1134 if (!sock->fasync_list)
1135 sock_reset_flag(sk, SOCK_FASYNC);
1136 write_unlock_bh(&sk->sk_callback_lock);
1137 kfree(fa);
1138 }
1139 }
1140 1107
1141out: 1108 release_sock(sk);
1142 release_sock(sock->sk);
1143 return 0; 1109 return 0;
1144} 1110}
1145 1111
1146/* This function may be called only under socket lock or callback_lock */ 1112/* This function may be called only under socket lock or callback_lock or rcu_lock */
1147 1113
1148int sock_wake_async(struct socket *sock, int how, int band) 1114int sock_wake_async(struct socket *sock, int how, int band)
1149{ 1115{
1150 if (!sock || !sock->fasync_list) 1116 struct socket_wq *wq;
1117
1118 if (!sock)
1119 return -1;
1120 rcu_read_lock();
1121 wq = rcu_dereference(sock->wq);
1122 if (!wq || !wq->fasync_list) {
1123 rcu_read_unlock();
1151 return -1; 1124 return -1;
1125 }
1152 switch (how) { 1126 switch (how) {
1153 case SOCK_WAKE_WAITD: 1127 case SOCK_WAKE_WAITD:
1154 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 1128 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1160,13 +1134,15 @@ int sock_wake_async(struct socket *sock, int how, int band)
1160 /* fall through */ 1134 /* fall through */
1161 case SOCK_WAKE_IO: 1135 case SOCK_WAKE_IO:
1162call_kill: 1136call_kill:
1163 __kill_fasync(sock->fasync_list, SIGIO, band); 1137 kill_fasync(&wq->fasync_list, SIGIO, band);
1164 break; 1138 break;
1165 case SOCK_WAKE_URG: 1139 case SOCK_WAKE_URG:
1166 __kill_fasync(sock->fasync_list, SIGURG, band); 1140 kill_fasync(&wq->fasync_list, SIGURG, band);
1167 } 1141 }
1142 rcu_read_unlock();
1168 return 0; 1143 return 0;
1169} 1144}
1145EXPORT_SYMBOL(sock_wake_async);
1170 1146
1171static int __sock_create(struct net *net, int family, int type, int protocol, 1147static int __sock_create(struct net *net, int family, int type, int protocol,
1172 struct socket **res, int kern) 1148 struct socket **res, int kern)
@@ -1285,11 +1261,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
1285{ 1261{
1286 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1262 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1287} 1263}
1264EXPORT_SYMBOL(sock_create);
1288 1265
1289int sock_create_kern(int family, int type, int protocol, struct socket **res) 1266int sock_create_kern(int family, int type, int protocol, struct socket **res)
1290{ 1267{
1291 return __sock_create(&init_net, family, type, protocol, res, 1); 1268 return __sock_create(&init_net, family, type, protocol, res, 1);
1292} 1269}
1270EXPORT_SYMBOL(sock_create_kern);
1293 1271
1294SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1272SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1295{ 1273{
@@ -1494,7 +1472,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1494 goto out; 1472 goto out;
1495 1473
1496 err = -ENFILE; 1474 err = -ENFILE;
1497 if (!(newsock = sock_alloc())) 1475 newsock = sock_alloc();
1476 if (!newsock)
1498 goto out_put; 1477 goto out_put;
1499 1478
1500 newsock->type = sock->type; 1479 newsock->type = sock->type;
@@ -1881,8 +1860,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1881 if (MSG_CMSG_COMPAT & flags) { 1860 if (MSG_CMSG_COMPAT & flags) {
1882 if (get_compat_msghdr(&msg_sys, msg_compat)) 1861 if (get_compat_msghdr(&msg_sys, msg_compat))
1883 return -EFAULT; 1862 return -EFAULT;
1884 } 1863 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1885 else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1886 return -EFAULT; 1864 return -EFAULT;
1887 1865
1888 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1866 sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1984,8 +1962,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1984 if (MSG_CMSG_COMPAT & flags) { 1962 if (MSG_CMSG_COMPAT & flags) {
1985 if (get_compat_msghdr(msg_sys, msg_compat)) 1963 if (get_compat_msghdr(msg_sys, msg_compat))
1986 return -EFAULT; 1964 return -EFAULT;
1987 } 1965 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1988 else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
1989 return -EFAULT; 1966 return -EFAULT;
1990 1967
1991 err = -EMSGSIZE; 1968 err = -EMSGSIZE;
@@ -2211,10 +2188,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2211/* Argument list sizes for sys_socketcall */ 2188/* Argument list sizes for sys_socketcall */
2212#define AL(x) ((x) * sizeof(unsigned long)) 2189#define AL(x) ((x) * sizeof(unsigned long))
2213static const unsigned char nargs[20] = { 2190static const unsigned char nargs[20] = {
2214 AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 2191 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2215 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 2192 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2216 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), 2193 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2217 AL(4),AL(5) 2194 AL(4), AL(5)
2218}; 2195};
2219 2196
2220#undef AL 2197#undef AL
@@ -2360,6 +2337,7 @@ int sock_register(const struct net_proto_family *ops)
2360 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 2337 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
2361 return err; 2338 return err;
2362} 2339}
2340EXPORT_SYMBOL(sock_register);
2363 2341
2364/** 2342/**
2365 * sock_unregister - remove a protocol handler 2343 * sock_unregister - remove a protocol handler
@@ -2386,6 +2364,7 @@ void sock_unregister(int family)
2386 2364
2387 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 2365 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
2388} 2366}
2367EXPORT_SYMBOL(sock_unregister);
2389 2368
2390static int __init sock_init(void) 2369static int __init sock_init(void)
2391{ 2370{
@@ -2415,6 +2394,10 @@ static int __init sock_init(void)
2415 netfilter_init(); 2394 netfilter_init();
2416#endif 2395#endif
2417 2396
2397#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
2398 skb_timestamping_init();
2399#endif
2400
2418 return 0; 2401 return 0;
2419} 2402}
2420 2403
@@ -2510,13 +2493,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2510 ifc.ifc_req = NULL; 2493 ifc.ifc_req = NULL;
2511 uifc = compat_alloc_user_space(sizeof(struct ifconf)); 2494 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2512 } else { 2495 } else {
2513 size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * 2496 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2514 sizeof (struct ifreq); 2497 sizeof(struct ifreq);
2515 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); 2498 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2516 ifc.ifc_len = len; 2499 ifc.ifc_len = len;
2517 ifr = ifc.ifc_req = (void __user *)(uifc + 1); 2500 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2518 ifr32 = compat_ptr(ifc32.ifcbuf); 2501 ifr32 = compat_ptr(ifc32.ifcbuf);
2519 for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { 2502 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
2520 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) 2503 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
2521 return -EFAULT; 2504 return -EFAULT;
2522 ifr++; 2505 ifr++;
@@ -2536,9 +2519,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
2536 ifr = ifc.ifc_req; 2519 ifr = ifc.ifc_req;
2537 ifr32 = compat_ptr(ifc32.ifcbuf); 2520 ifr32 = compat_ptr(ifc32.ifcbuf);
2538 for (i = 0, j = 0; 2521 for (i = 0, j = 0;
2539 i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; 2522 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2540 i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { 2523 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2541 if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) 2524 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
2542 return -EFAULT; 2525 return -EFAULT;
2543 ifr32++; 2526 ifr32++;
2544 ifr++; 2527 ifr++;
@@ -2587,7 +2570,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
2587 compat_uptr_t uptr32; 2570 compat_uptr_t uptr32;
2588 struct ifreq __user *uifr; 2571 struct ifreq __user *uifr;
2589 2572
2590 uifr = compat_alloc_user_space(sizeof (*uifr)); 2573 uifr = compat_alloc_user_space(sizeof(*uifr));
2591 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2574 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2592 return -EFAULT; 2575 return -EFAULT;
2593 2576
@@ -2621,9 +2604,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2621 return -EFAULT; 2604 return -EFAULT;
2622 2605
2623 old_fs = get_fs(); 2606 old_fs = get_fs();
2624 set_fs (KERNEL_DS); 2607 set_fs(KERNEL_DS);
2625 err = dev_ioctl(net, cmd, &kifr); 2608 err = dev_ioctl(net, cmd, &kifr);
2626 set_fs (old_fs); 2609 set_fs(old_fs);
2627 2610
2628 return err; 2611 return err;
2629 case SIOCBONDSLAVEINFOQUERY: 2612 case SIOCBONDSLAVEINFOQUERY:
@@ -2642,7 +2625,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
2642 return dev_ioctl(net, cmd, uifr); 2625 return dev_ioctl(net, cmd, uifr);
2643 default: 2626 default:
2644 return -EINVAL; 2627 return -EINVAL;
2645 }; 2628 }
2646} 2629}
2647 2630
2648static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, 2631static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
@@ -2730,9 +2713,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2730 return -EFAULT; 2713 return -EFAULT;
2731 2714
2732 old_fs = get_fs(); 2715 old_fs = get_fs();
2733 set_fs (KERNEL_DS); 2716 set_fs(KERNEL_DS);
2734 err = dev_ioctl(net, cmd, (void __user *)&ifr); 2717 err = dev_ioctl(net, cmd, (void __user *)&ifr);
2735 set_fs (old_fs); 2718 set_fs(old_fs);
2736 2719
2737 if (cmd == SIOCGIFMAP && !err) { 2720 if (cmd == SIOCGIFMAP && !err) {
2738 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 2721 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2754,7 +2737,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
2754 compat_uptr_t uptr32; 2737 compat_uptr_t uptr32;
2755 struct ifreq __user *uifr; 2738 struct ifreq __user *uifr;
2756 2739
2757 uifr = compat_alloc_user_space(sizeof (*uifr)); 2740 uifr = compat_alloc_user_space(sizeof(*uifr));
2758 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) 2741 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2759 return -EFAULT; 2742 return -EFAULT;
2760 2743
@@ -2770,20 +2753,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
2770} 2753}
2771 2754
2772struct rtentry32 { 2755struct rtentry32 {
2773 u32 rt_pad1; 2756 u32 rt_pad1;
2774 struct sockaddr rt_dst; /* target address */ 2757 struct sockaddr rt_dst; /* target address */
2775 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ 2758 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2776 struct sockaddr rt_genmask; /* target network mask (IP) */ 2759 struct sockaddr rt_genmask; /* target network mask (IP) */
2777 unsigned short rt_flags; 2760 unsigned short rt_flags;
2778 short rt_pad2; 2761 short rt_pad2;
2779 u32 rt_pad3; 2762 u32 rt_pad3;
2780 unsigned char rt_tos; 2763 unsigned char rt_tos;
2781 unsigned char rt_class; 2764 unsigned char rt_class;
2782 short rt_pad4; 2765 short rt_pad4;
2783 short rt_metric; /* +1 for binary compatibility! */ 2766 short rt_metric; /* +1 for binary compatibility! */
2784 /* char * */ u32 rt_dev; /* forcing the device at add */ 2767 /* char * */ u32 rt_dev; /* forcing the device at add */
2785 u32 rt_mtu; /* per route MTU/Window */ 2768 u32 rt_mtu; /* per route MTU/Window */
2786 u32 rt_window; /* Window clamping */ 2769 u32 rt_window; /* Window clamping */
2787 unsigned short rt_irtt; /* Initial RTT */ 2770 unsigned short rt_irtt; /* Initial RTT */
2788}; 2771};
2789 2772
@@ -2813,29 +2796,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2813 2796
2814 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ 2797 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2815 struct in6_rtmsg32 __user *ur6 = argp; 2798 struct in6_rtmsg32 __user *ur6 = argp;
2816 ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), 2799 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
2817 3 * sizeof(struct in6_addr)); 2800 3 * sizeof(struct in6_addr));
2818 ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); 2801 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2819 ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 2802 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2820 ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 2803 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2821 ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); 2804 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2822 ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); 2805 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2823 ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); 2806 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2824 ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 2807 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
2825 2808
2826 r = (void *) &r6; 2809 r = (void *) &r6;
2827 } else { /* ipv4 */ 2810 } else { /* ipv4 */
2828 struct rtentry32 __user *ur4 = argp; 2811 struct rtentry32 __user *ur4 = argp;
2829 ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), 2812 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
2830 3 * sizeof(struct sockaddr)); 2813 3 * sizeof(struct sockaddr));
2831 ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); 2814 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
2832 ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); 2815 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
2833 ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); 2816 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
2834 ret |= __get_user (r4.rt_window, &(ur4->rt_window)); 2817 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
2835 ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); 2818 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
2836 ret |= __get_user (rtdev, &(ur4->rt_dev)); 2819 ret |= __get_user(rtdev, &(ur4->rt_dev));
2837 if (rtdev) { 2820 if (rtdev) {
2838 ret |= copy_from_user (devname, compat_ptr(rtdev), 15); 2821 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
2839 r4.rt_dev = devname; devname[15] = 0; 2822 r4.rt_dev = devname; devname[15] = 0;
2840 } else 2823 } else
2841 r4.rt_dev = NULL; 2824 r4.rt_dev = NULL;
@@ -2848,9 +2831,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
2848 goto out; 2831 goto out;
2849 } 2832 }
2850 2833
2851 set_fs (KERNEL_DS); 2834 set_fs(KERNEL_DS);
2852 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); 2835 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
2853 set_fs (old_fs); 2836 set_fs(old_fs);
2854 2837
2855out: 2838out:
2856 return ret; 2839 return ret;
@@ -3013,11 +2996,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3013{ 2996{
3014 return sock->ops->bind(sock, addr, addrlen); 2997 return sock->ops->bind(sock, addr, addrlen);
3015} 2998}
2999EXPORT_SYMBOL(kernel_bind);
3016 3000
3017int kernel_listen(struct socket *sock, int backlog) 3001int kernel_listen(struct socket *sock, int backlog)
3018{ 3002{
3019 return sock->ops->listen(sock, backlog); 3003 return sock->ops->listen(sock, backlog);
3020} 3004}
3005EXPORT_SYMBOL(kernel_listen);
3021 3006
3022int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 3007int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3023{ 3008{
@@ -3042,24 +3027,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3042done: 3027done:
3043 return err; 3028 return err;
3044} 3029}
3030EXPORT_SYMBOL(kernel_accept);
3045 3031
3046int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 3032int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3047 int flags) 3033 int flags)
3048{ 3034{
3049 return sock->ops->connect(sock, addr, addrlen, flags); 3035 return sock->ops->connect(sock, addr, addrlen, flags);
3050} 3036}
3037EXPORT_SYMBOL(kernel_connect);
3051 3038
3052int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 3039int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3053 int *addrlen) 3040 int *addrlen)
3054{ 3041{
3055 return sock->ops->getname(sock, addr, addrlen, 0); 3042 return sock->ops->getname(sock, addr, addrlen, 0);
3056} 3043}
3044EXPORT_SYMBOL(kernel_getsockname);
3057 3045
3058int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 3046int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3059 int *addrlen) 3047 int *addrlen)
3060{ 3048{
3061 return sock->ops->getname(sock, addr, addrlen, 1); 3049 return sock->ops->getname(sock, addr, addrlen, 1);
3062} 3050}
3051EXPORT_SYMBOL(kernel_getpeername);
3063 3052
3064int kernel_getsockopt(struct socket *sock, int level, int optname, 3053int kernel_getsockopt(struct socket *sock, int level, int optname,
3065 char *optval, int *optlen) 3054 char *optval, int *optlen)
@@ -3076,6 +3065,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3076 set_fs(oldfs); 3065 set_fs(oldfs);
3077 return err; 3066 return err;
3078} 3067}
3068EXPORT_SYMBOL(kernel_getsockopt);
3079 3069
3080int kernel_setsockopt(struct socket *sock, int level, int optname, 3070int kernel_setsockopt(struct socket *sock, int level, int optname,
3081 char *optval, unsigned int optlen) 3071 char *optval, unsigned int optlen)
@@ -3092,15 +3082,19 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3092 set_fs(oldfs); 3082 set_fs(oldfs);
3093 return err; 3083 return err;
3094} 3084}
3085EXPORT_SYMBOL(kernel_setsockopt);
3095 3086
3096int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3087int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3097 size_t size, int flags) 3088 size_t size, int flags)
3098{ 3089{
3090 sock_update_classid(sock->sk);
3091
3099 if (sock->ops->sendpage) 3092 if (sock->ops->sendpage)
3100 return sock->ops->sendpage(sock, page, offset, size, flags); 3093 return sock->ops->sendpage(sock, page, offset, size, flags);
3101 3094
3102 return sock_no_sendpage(sock, page, offset, size, flags); 3095 return sock_no_sendpage(sock, page, offset, size, flags);
3103} 3096}
3097EXPORT_SYMBOL(kernel_sendpage);
3104 3098
3105int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 3099int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3106{ 3100{
@@ -3113,33 +3107,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3113 3107
3114 return err; 3108 return err;
3115} 3109}
3110EXPORT_SYMBOL(kernel_sock_ioctl);
3116 3111
3117int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3112int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3118{ 3113{
3119 return sock->ops->shutdown(sock, how); 3114 return sock->ops->shutdown(sock, how);
3120} 3115}
3121
3122EXPORT_SYMBOL(sock_create);
3123EXPORT_SYMBOL(sock_create_kern);
3124EXPORT_SYMBOL(sock_create_lite);
3125EXPORT_SYMBOL(sock_map_fd);
3126EXPORT_SYMBOL(sock_recvmsg);
3127EXPORT_SYMBOL(sock_register);
3128EXPORT_SYMBOL(sock_release);
3129EXPORT_SYMBOL(sock_sendmsg);
3130EXPORT_SYMBOL(sock_unregister);
3131EXPORT_SYMBOL(sock_wake_async);
3132EXPORT_SYMBOL(sockfd_lookup);
3133EXPORT_SYMBOL(kernel_sendmsg);
3134EXPORT_SYMBOL(kernel_recvmsg);
3135EXPORT_SYMBOL(kernel_bind);
3136EXPORT_SYMBOL(kernel_listen);
3137EXPORT_SYMBOL(kernel_accept);
3138EXPORT_SYMBOL(kernel_connect);
3139EXPORT_SYMBOL(kernel_getsockname);
3140EXPORT_SYMBOL(kernel_getpeername);
3141EXPORT_SYMBOL(kernel_getsockopt);
3142EXPORT_SYMBOL(kernel_setsockopt);
3143EXPORT_SYMBOL(kernel_sendpage);
3144EXPORT_SYMBOL(kernel_sock_ioctl);
3145EXPORT_SYMBOL(kernel_sock_shutdown); 3116EXPORT_SYMBOL(kernel_sock_shutdown);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 95afe79dd9d7..880d0de3f50f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -19,6 +19,15 @@
19# define RPCDBG_FACILITY RPCDBG_AUTH 19# define RPCDBG_FACILITY RPCDBG_AUTH
20#endif 20#endif
21 21
22#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
23struct rpc_cred_cache {
24 struct hlist_head *hashtable;
25 unsigned int hashbits;
26 spinlock_t lock;
27};
28
29static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
30
22static DEFINE_SPINLOCK(rpc_authflavor_lock); 31static DEFINE_SPINLOCK(rpc_authflavor_lock);
23static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { 32static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
24 &authnull_ops, /* AUTH_NULL */ 33 &authnull_ops, /* AUTH_NULL */
@@ -29,6 +38,42 @@ static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
29static LIST_HEAD(cred_unused); 38static LIST_HEAD(cred_unused);
30static unsigned long number_cred_unused; 39static unsigned long number_cred_unused;
31 40
41#define MAX_HASHTABLE_BITS (10)
42static int param_set_hashtbl_sz(const char *val, struct kernel_param *kp)
43{
44 unsigned long num;
45 unsigned int nbits;
46 int ret;
47
48 if (!val)
49 goto out_inval;
50 ret = strict_strtoul(val, 0, &num);
51 if (ret == -EINVAL)
52 goto out_inval;
53 nbits = fls(num);
54 if (num > (1U << nbits))
55 nbits++;
56 if (nbits > MAX_HASHTABLE_BITS || nbits < 2)
57 goto out_inval;
58 *(unsigned int *)kp->arg = nbits;
59 return 0;
60out_inval:
61 return -EINVAL;
62}
63
64static int param_get_hashtbl_sz(char *buffer, struct kernel_param *kp)
65{
66 unsigned int nbits;
67
68 nbits = *(unsigned int *)kp->arg;
69 return sprintf(buffer, "%u", 1U << nbits);
70}
71
72#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
73
74module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
75MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
76
32static u32 77static u32
33pseudoflavor_to_flavor(u32 flavor) { 78pseudoflavor_to_flavor(u32 flavor) {
34 if (flavor >= RPC_AUTH_MAXFLAVOR) 79 if (flavor >= RPC_AUTH_MAXFLAVOR)
@@ -145,16 +190,23 @@ int
145rpcauth_init_credcache(struct rpc_auth *auth) 190rpcauth_init_credcache(struct rpc_auth *auth)
146{ 191{
147 struct rpc_cred_cache *new; 192 struct rpc_cred_cache *new;
148 int i; 193 unsigned int hashsize;
149 194
150 new = kmalloc(sizeof(*new), GFP_KERNEL); 195 new = kmalloc(sizeof(*new), GFP_KERNEL);
151 if (!new) 196 if (!new)
152 return -ENOMEM; 197 goto out_nocache;
153 for (i = 0; i < RPC_CREDCACHE_NR; i++) 198 new->hashbits = auth_hashbits;
154 INIT_HLIST_HEAD(&new->hashtable[i]); 199 hashsize = 1U << new->hashbits;
200 new->hashtable = kcalloc(hashsize, sizeof(new->hashtable[0]), GFP_KERNEL);
201 if (!new->hashtable)
202 goto out_nohashtbl;
155 spin_lock_init(&new->lock); 203 spin_lock_init(&new->lock);
156 auth->au_credcache = new; 204 auth->au_credcache = new;
157 return 0; 205 return 0;
206out_nohashtbl:
207 kfree(new);
208out_nocache:
209 return -ENOMEM;
158} 210}
159EXPORT_SYMBOL_GPL(rpcauth_init_credcache); 211EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
160 212
@@ -183,11 +235,12 @@ rpcauth_clear_credcache(struct rpc_cred_cache *cache)
183 LIST_HEAD(free); 235 LIST_HEAD(free);
184 struct hlist_head *head; 236 struct hlist_head *head;
185 struct rpc_cred *cred; 237 struct rpc_cred *cred;
238 unsigned int hashsize = 1U << cache->hashbits;
186 int i; 239 int i;
187 240
188 spin_lock(&rpc_credcache_lock); 241 spin_lock(&rpc_credcache_lock);
189 spin_lock(&cache->lock); 242 spin_lock(&cache->lock);
190 for (i = 0; i < RPC_CREDCACHE_NR; i++) { 243 for (i = 0; i < hashsize; i++) {
191 head = &cache->hashtable[i]; 244 head = &cache->hashtable[i];
192 while (!hlist_empty(head)) { 245 while (!hlist_empty(head)) {
193 cred = hlist_entry(head->first, struct rpc_cred, cr_hash); 246 cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
@@ -216,6 +269,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
216 if (cache) { 269 if (cache) {
217 auth->au_credcache = NULL; 270 auth->au_credcache = NULL;
218 rpcauth_clear_credcache(cache); 271 rpcauth_clear_credcache(cache);
272 kfree(cache->hashtable);
219 kfree(cache); 273 kfree(cache);
220 } 274 }
221} 275}
@@ -236,10 +290,15 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
236 290
237 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { 291 list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
238 292
239 /* Enforce a 60 second garbage collection moratorium */ 293 if (nr_to_scan-- == 0)
294 break;
295 /*
296 * Enforce a 60 second garbage collection moratorium
297 * Note that the cred_unused list must be time-ordered.
298 */
240 if (time_in_range(cred->cr_expire, expired, jiffies) && 299 if (time_in_range(cred->cr_expire, expired, jiffies) &&
241 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) 300 test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
242 continue; 301 return 0;
243 302
244 list_del_init(&cred->cr_lru); 303 list_del_init(&cred->cr_lru);
245 number_cred_unused--; 304 number_cred_unused--;
@@ -252,29 +311,27 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
252 get_rpccred(cred); 311 get_rpccred(cred);
253 list_add_tail(&cred->cr_lru, free); 312 list_add_tail(&cred->cr_lru, free);
254 rpcauth_unhash_cred_locked(cred); 313 rpcauth_unhash_cred_locked(cred);
255 nr_to_scan--;
256 } 314 }
257 spin_unlock(cache_lock); 315 spin_unlock(cache_lock);
258 if (nr_to_scan == 0)
259 break;
260 } 316 }
261 return nr_to_scan; 317 return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
262} 318}
263 319
264/* 320/*
265 * Run memory cache shrinker. 321 * Run memory cache shrinker.
266 */ 322 */
267static int 323static int
268rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 324rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
269{ 325{
270 LIST_HEAD(free); 326 LIST_HEAD(free);
271 int res; 327 int res;
272 328
329 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
330 return (nr_to_scan == 0) ? 0 : -1;
273 if (list_empty(&cred_unused)) 331 if (list_empty(&cred_unused))
274 return 0; 332 return 0;
275 spin_lock(&rpc_credcache_lock); 333 spin_lock(&rpc_credcache_lock);
276 nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); 334 res = rpcauth_prune_expired(&free, nr_to_scan);
277 res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
278 spin_unlock(&rpc_credcache_lock); 335 spin_unlock(&rpc_credcache_lock);
279 rpcauth_destroy_credlist(&free); 336 rpcauth_destroy_credlist(&free);
280 return res; 337 return res;
@@ -294,7 +351,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
294 *entry, *new; 351 *entry, *new;
295 unsigned int nr; 352 unsigned int nr;
296 353
297 nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); 354 nr = hash_long(acred->uid, cache->hashbits);
298 355
299 rcu_read_lock(); 356 rcu_read_lock();
300 hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { 357 hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
@@ -387,16 +444,16 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
387} 444}
388EXPORT_SYMBOL_GPL(rpcauth_init_cred); 445EXPORT_SYMBOL_GPL(rpcauth_init_cred);
389 446
390void 447struct rpc_cred *
391rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) 448rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
392{ 449{
393 task->tk_msg.rpc_cred = get_rpccred(cred);
394 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, 450 dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
395 cred->cr_auth->au_ops->au_name, cred); 451 cred->cr_auth->au_ops->au_name, cred);
452 return get_rpccred(cred);
396} 453}
397EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); 454EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
398 455
399static void 456static struct rpc_cred *
400rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) 457rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
401{ 458{
402 struct rpc_auth *auth = task->tk_client->cl_auth; 459 struct rpc_auth *auth = task->tk_client->cl_auth;
@@ -404,45 +461,43 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
404 .uid = 0, 461 .uid = 0,
405 .gid = 0, 462 .gid = 0,
406 }; 463 };
407 struct rpc_cred *ret;
408 464
409 dprintk("RPC: %5u looking up %s cred\n", 465 dprintk("RPC: %5u looking up %s cred\n",
410 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); 466 task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
411 ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); 467 return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
412 if (!IS_ERR(ret))
413 task->tk_msg.rpc_cred = ret;
414 else
415 task->tk_status = PTR_ERR(ret);
416} 468}
417 469
418static void 470static struct rpc_cred *
419rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags) 471rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
420{ 472{
421 struct rpc_auth *auth = task->tk_client->cl_auth; 473 struct rpc_auth *auth = task->tk_client->cl_auth;
422 struct rpc_cred *ret;
423 474
424 dprintk("RPC: %5u looking up %s cred\n", 475 dprintk("RPC: %5u looking up %s cred\n",
425 task->tk_pid, auth->au_ops->au_name); 476 task->tk_pid, auth->au_ops->au_name);
426 ret = rpcauth_lookupcred(auth, lookupflags); 477 return rpcauth_lookupcred(auth, lookupflags);
427 if (!IS_ERR(ret))
428 task->tk_msg.rpc_cred = ret;
429 else
430 task->tk_status = PTR_ERR(ret);
431} 478}
432 479
433void 480static int
434rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) 481rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
435{ 482{
483 struct rpc_rqst *req = task->tk_rqstp;
484 struct rpc_cred *new;
436 int lookupflags = 0; 485 int lookupflags = 0;
437 486
438 if (flags & RPC_TASK_ASYNC) 487 if (flags & RPC_TASK_ASYNC)
439 lookupflags |= RPCAUTH_LOOKUP_NEW; 488 lookupflags |= RPCAUTH_LOOKUP_NEW;
440 if (cred != NULL) 489 if (cred != NULL)
441 cred->cr_ops->crbind(task, cred, lookupflags); 490 new = cred->cr_ops->crbind(task, cred, lookupflags);
442 else if (flags & RPC_TASK_ROOTCREDS) 491 else if (flags & RPC_TASK_ROOTCREDS)
443 rpcauth_bind_root_cred(task, lookupflags); 492 new = rpcauth_bind_root_cred(task, lookupflags);
444 else 493 else
445 rpcauth_bind_new_cred(task, lookupflags); 494 new = rpcauth_bind_new_cred(task, lookupflags);
495 if (IS_ERR(new))
496 return PTR_ERR(new);
497 if (req->rq_cred != NULL)
498 put_rpccred(req->rq_cred);
499 req->rq_cred = new;
500 return 0;
446} 501}
447 502
448void 503void
@@ -481,22 +536,10 @@ out_nodestroy:
481} 536}
482EXPORT_SYMBOL_GPL(put_rpccred); 537EXPORT_SYMBOL_GPL(put_rpccred);
483 538
484void
485rpcauth_unbindcred(struct rpc_task *task)
486{
487 struct rpc_cred *cred = task->tk_msg.rpc_cred;
488
489 dprintk("RPC: %5u releasing %s cred %p\n",
490 task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
491
492 put_rpccred(cred);
493 task->tk_msg.rpc_cred = NULL;
494}
495
496__be32 * 539__be32 *
497rpcauth_marshcred(struct rpc_task *task, __be32 *p) 540rpcauth_marshcred(struct rpc_task *task, __be32 *p)
498{ 541{
499 struct rpc_cred *cred = task->tk_msg.rpc_cred; 542 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
500 543
501 dprintk("RPC: %5u marshaling %s cred %p\n", 544 dprintk("RPC: %5u marshaling %s cred %p\n",
502 task->tk_pid, cred->cr_auth->au_ops->au_name, cred); 545 task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -507,7 +550,7 @@ rpcauth_marshcred(struct rpc_task *task, __be32 *p)
507__be32 * 550__be32 *
508rpcauth_checkverf(struct rpc_task *task, __be32 *p) 551rpcauth_checkverf(struct rpc_task *task, __be32 *p)
509{ 552{
510 struct rpc_cred *cred = task->tk_msg.rpc_cred; 553 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
511 554
512 dprintk("RPC: %5u validating %s cred %p\n", 555 dprintk("RPC: %5u validating %s cred %p\n",
513 task->tk_pid, cred->cr_auth->au_ops->au_name, cred); 556 task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -519,7 +562,7 @@ int
519rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, 562rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
520 __be32 *data, void *obj) 563 __be32 *data, void *obj)
521{ 564{
522 struct rpc_cred *cred = task->tk_msg.rpc_cred; 565 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
523 566
524 dprintk("RPC: %5u using %s cred %p to wrap rpc data\n", 567 dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
525 task->tk_pid, cred->cr_ops->cr_name, cred); 568 task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -533,7 +576,7 @@ int
533rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, 576rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
534 __be32 *data, void *obj) 577 __be32 *data, void *obj)
535{ 578{
536 struct rpc_cred *cred = task->tk_msg.rpc_cred; 579 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
537 580
538 dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n", 581 dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
539 task->tk_pid, cred->cr_ops->cr_name, cred); 582 task->tk_pid, cred->cr_ops->cr_name, cred);
@@ -547,13 +590,21 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
547int 590int
548rpcauth_refreshcred(struct rpc_task *task) 591rpcauth_refreshcred(struct rpc_task *task)
549{ 592{
550 struct rpc_cred *cred = task->tk_msg.rpc_cred; 593 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
551 int err; 594 int err;
552 595
596 cred = task->tk_rqstp->rq_cred;
597 if (cred == NULL) {
598 err = rpcauth_bindcred(task, task->tk_msg.rpc_cred, task->tk_flags);
599 if (err < 0)
600 goto out;
601 cred = task->tk_rqstp->rq_cred;
602 };
553 dprintk("RPC: %5u refreshing %s cred %p\n", 603 dprintk("RPC: %5u refreshing %s cred %p\n",
554 task->tk_pid, cred->cr_auth->au_ops->au_name, cred); 604 task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
555 605
556 err = cred->cr_ops->crrefresh(task); 606 err = cred->cr_ops->crrefresh(task);
607out:
557 if (err < 0) 608 if (err < 0)
558 task->tk_status = err; 609 task->tk_status = err;
559 return err; 610 return err;
@@ -562,7 +613,7 @@ rpcauth_refreshcred(struct rpc_task *task)
562void 613void
563rpcauth_invalcred(struct rpc_task *task) 614rpcauth_invalcred(struct rpc_task *task)
564{ 615{
565 struct rpc_cred *cred = task->tk_msg.rpc_cred; 616 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
566 617
567 dprintk("RPC: %5u invalidating %s cred %p\n", 618 dprintk("RPC: %5u invalidating %s cred %p\n",
568 task->tk_pid, cred->cr_auth->au_ops->au_name, cred); 619 task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
@@ -573,7 +624,7 @@ rpcauth_invalcred(struct rpc_task *task)
573int 624int
574rpcauth_uptodatecred(struct rpc_task *task) 625rpcauth_uptodatecred(struct rpc_task *task)
575{ 626{
576 struct rpc_cred *cred = task->tk_msg.rpc_cred; 627 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
577 628
578 return cred == NULL || 629 return cred == NULL ||
579 test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; 630 test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
@@ -584,14 +635,27 @@ static struct shrinker rpc_cred_shrinker = {
584 .seeks = DEFAULT_SEEKS, 635 .seeks = DEFAULT_SEEKS,
585}; 636};
586 637
587void __init rpcauth_init_module(void) 638int __init rpcauth_init_module(void)
588{ 639{
589 rpc_init_authunix(); 640 int err;
590 rpc_init_generic_auth(); 641
642 err = rpc_init_authunix();
643 if (err < 0)
644 goto out1;
645 err = rpc_init_generic_auth();
646 if (err < 0)
647 goto out2;
591 register_shrinker(&rpc_cred_shrinker); 648 register_shrinker(&rpc_cred_shrinker);
649 return 0;
650out2:
651 rpc_destroy_authunix();
652out1:
653 return err;
592} 654}
593 655
594void __exit rpcauth_remove_module(void) 656void __exit rpcauth_remove_module(void)
595{ 657{
658 rpc_destroy_authunix();
659 rpc_destroy_generic_auth();
596 unregister_shrinker(&rpc_cred_shrinker); 660 unregister_shrinker(&rpc_cred_shrinker);
597} 661}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 8f623b0f03dd..43162bb3b78f 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -27,7 +27,6 @@ struct generic_cred {
27}; 27};
28 28
29static struct rpc_auth generic_auth; 29static struct rpc_auth generic_auth;
30static struct rpc_cred_cache generic_cred_cache;
31static const struct rpc_credops generic_credops; 30static const struct rpc_credops generic_credops;
32 31
33/* 32/*
@@ -55,18 +54,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
55} 54}
56EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); 55EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
57 56
58static void 57static struct rpc_cred *generic_bind_cred(struct rpc_task *task,
59generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags) 58 struct rpc_cred *cred, int lookupflags)
60{ 59{
61 struct rpc_auth *auth = task->tk_client->cl_auth; 60 struct rpc_auth *auth = task->tk_client->cl_auth;
62 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; 61 struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
63 struct rpc_cred *ret;
64 62
65 ret = auth->au_ops->lookup_cred(auth, acred, lookupflags); 63 return auth->au_ops->lookup_cred(auth, acred, lookupflags);
66 if (!IS_ERR(ret))
67 task->tk_msg.rpc_cred = ret;
68 else
69 task->tk_status = PTR_ERR(ret);
70} 64}
71 65
72/* 66/*
@@ -159,20 +153,16 @@ out_nomatch:
159 return 0; 153 return 0;
160} 154}
161 155
162void __init rpc_init_generic_auth(void) 156int __init rpc_init_generic_auth(void)
163{ 157{
164 spin_lock_init(&generic_cred_cache.lock); 158 return rpcauth_init_credcache(&generic_auth);
165} 159}
166 160
167void __exit rpc_destroy_generic_auth(void) 161void __exit rpc_destroy_generic_auth(void)
168{ 162{
169 rpcauth_clear_credcache(&generic_cred_cache); 163 rpcauth_destroy_credcache(&generic_auth);
170} 164}
171 165
172static struct rpc_cred_cache generic_cred_cache = {
173 {{ NULL, },},
174};
175
176static const struct rpc_authops generic_auth_ops = { 166static const struct rpc_authops generic_auth_ops = {
177 .owner = THIS_MODULE, 167 .owner = THIS_MODULE,
178 .au_name = "Generic", 168 .au_name = "Generic",
@@ -183,7 +173,6 @@ static const struct rpc_authops generic_auth_ops = {
183static struct rpc_auth generic_auth = { 173static struct rpc_auth generic_auth = {
184 .au_ops = &generic_auth_ops, 174 .au_ops = &generic_auth_ops,
185 .au_count = ATOMIC_INIT(0), 175 .au_count = ATOMIC_INIT(0),
186 .au_credcache = &generic_cred_cache,
187}; 176};
188 177
189static const struct rpc_credops generic_credops = { 178static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 4de8bcf26fa7..74a231735f67 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o 10obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
11 11
12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ 12rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o 13 gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
14 14
15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o 15obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
16 16
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c389ccf6437d..dcfc66bab2bb 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -57,11 +57,14 @@ static const struct rpc_authops authgss_ops;
57static const struct rpc_credops gss_credops; 57static const struct rpc_credops gss_credops;
58static const struct rpc_credops gss_nullops; 58static const struct rpc_credops gss_nullops;
59 59
60#define GSS_RETRY_EXPIRED 5
61static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
62
60#ifdef RPC_DEBUG 63#ifdef RPC_DEBUG
61# define RPCDBG_FACILITY RPCDBG_AUTH 64# define RPCDBG_FACILITY RPCDBG_AUTH
62#endif 65#endif
63 66
64#define GSS_CRED_SLACK 1024 67#define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2)
65/* length of a krb5 verifier (48), plus data added before arguments when 68/* length of a krb5 verifier (48), plus data added before arguments when
66 * using integrity (two 4-byte integers): */ 69 * using integrity (two 4-byte integers): */
67#define GSS_VERF_SLACK 100 70#define GSS_VERF_SLACK 100
@@ -229,7 +232,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
229 p = ERR_PTR(-EFAULT); 232 p = ERR_PTR(-EFAULT);
230 goto err; 233 goto err;
231 } 234 }
232 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx); 235 ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
233 if (ret < 0) { 236 if (ret < 0) {
234 p = ERR_PTR(ret); 237 p = ERR_PTR(ret);
235 goto err; 238 goto err;
@@ -350,21 +353,35 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
350} 353}
351 354
352static void 355static void
356gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
357{
358 switch (gss_msg->msg.errno) {
359 case 0:
360 if (gss_msg->ctx == NULL)
361 break;
362 clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
363 gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
364 break;
365 case -EKEYEXPIRED:
366 set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
367 }
368 gss_cred->gc_upcall_timestamp = jiffies;
369 gss_cred->gc_upcall = NULL;
370 rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
371}
372
373static void
353gss_upcall_callback(struct rpc_task *task) 374gss_upcall_callback(struct rpc_task *task)
354{ 375{
355 struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, 376 struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred,
356 struct gss_cred, gc_base); 377 struct gss_cred, gc_base);
357 struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; 378 struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
358 struct inode *inode = &gss_msg->inode->vfs_inode; 379 struct inode *inode = &gss_msg->inode->vfs_inode;
359 380
360 spin_lock(&inode->i_lock); 381 spin_lock(&inode->i_lock);
361 if (gss_msg->ctx) 382 gss_handle_downcall_result(gss_cred, gss_msg);
362 gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
363 else
364 task->tk_status = gss_msg->msg.errno;
365 gss_cred->gc_upcall = NULL;
366 rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
367 spin_unlock(&inode->i_lock); 383 spin_unlock(&inode->i_lock);
384 task->tk_status = gss_msg->msg.errno;
368 gss_release_msg(gss_msg); 385 gss_release_msg(gss_msg);
369} 386}
370 387
@@ -377,11 +394,12 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
377static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, 394static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
378 struct rpc_clnt *clnt, int machine_cred) 395 struct rpc_clnt *clnt, int machine_cred)
379{ 396{
397 struct gss_api_mech *mech = gss_msg->auth->mech;
380 char *p = gss_msg->databuf; 398 char *p = gss_msg->databuf;
381 int len = 0; 399 int len = 0;
382 400
383 gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ", 401 gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
384 gss_msg->auth->mech->gm_name, 402 mech->gm_name,
385 gss_msg->uid); 403 gss_msg->uid);
386 p += gss_msg->msg.len; 404 p += gss_msg->msg.len;
387 if (clnt->cl_principal) { 405 if (clnt->cl_principal) {
@@ -398,6 +416,11 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
398 p += len; 416 p += len;
399 gss_msg->msg.len += len; 417 gss_msg->msg.len += len;
400 } 418 }
419 if (mech->gm_upcall_enctypes) {
420 len = sprintf(p, mech->gm_upcall_enctypes);
421 p += len;
422 gss_msg->msg.len += len;
423 }
401 len = sprintf(p, "\n"); 424 len = sprintf(p, "\n");
402 gss_msg->msg.len += len; 425 gss_msg->msg.len += len;
403 426
@@ -479,7 +502,7 @@ static void warn_gssd(void)
479static inline int 502static inline int
480gss_refresh_upcall(struct rpc_task *task) 503gss_refresh_upcall(struct rpc_task *task)
481{ 504{
482 struct rpc_cred *cred = task->tk_msg.rpc_cred; 505 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
483 struct gss_auth *gss_auth = container_of(cred->cr_auth, 506 struct gss_auth *gss_auth = container_of(cred->cr_auth,
484 struct gss_auth, rpc_auth); 507 struct gss_auth, rpc_auth);
485 struct gss_cred *gss_cred = container_of(cred, 508 struct gss_cred *gss_cred = container_of(cred,
@@ -507,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task)
507 spin_lock(&inode->i_lock); 530 spin_lock(&inode->i_lock);
508 if (gss_cred->gc_upcall != NULL) 531 if (gss_cred->gc_upcall != NULL)
509 rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); 532 rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
510 else if (gss_msg->ctx != NULL) { 533 else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
511 gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
512 gss_cred->gc_upcall = NULL;
513 rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
514 } else if (gss_msg->msg.errno >= 0) {
515 task->tk_timeout = 0; 534 task->tk_timeout = 0;
516 gss_cred->gc_upcall = gss_msg; 535 gss_cred->gc_upcall = gss_msg;
517 /* gss_upcall_callback will release the reference to gss_upcall_msg */ 536 /* gss_upcall_callback will release the reference to gss_upcall_msg */
518 atomic_inc(&gss_msg->count); 537 atomic_inc(&gss_msg->count);
519 rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); 538 rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
520 } else 539 } else {
540 gss_handle_downcall_result(gss_cred, gss_msg);
521 err = gss_msg->msg.errno; 541 err = gss_msg->msg.errno;
542 }
522 spin_unlock(&inode->i_lock); 543 spin_unlock(&inode->i_lock);
523 gss_release_msg(gss_msg); 544 gss_release_msg(gss_msg);
524out: 545out:
@@ -907,6 +928,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
907{ 928{
908 dprintk("RPC: gss_free_ctx\n"); 929 dprintk("RPC: gss_free_ctx\n");
909 930
931 gss_delete_sec_context(&ctx->gc_gss_ctx);
910 kfree(ctx->gc_wire_ctx.data); 932 kfree(ctx->gc_wire_ctx.data);
911 kfree(ctx); 933 kfree(ctx);
912} 934}
@@ -921,13 +943,7 @@ gss_free_ctx_callback(struct rcu_head *head)
921static void 943static void
922gss_free_ctx(struct gss_cl_ctx *ctx) 944gss_free_ctx(struct gss_cl_ctx *ctx)
923{ 945{
924 struct gss_ctx *gc_gss_ctx;
925
926 gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx);
927 rcu_assign_pointer(ctx->gc_gss_ctx, NULL);
928 call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); 946 call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
929 if (gc_gss_ctx)
930 gss_delete_sec_context(&gc_gss_ctx);
931} 947}
932 948
933static void 949static void
@@ -1043,12 +1059,12 @@ out:
1043static __be32 * 1059static __be32 *
1044gss_marshal(struct rpc_task *task, __be32 *p) 1060gss_marshal(struct rpc_task *task, __be32 *p)
1045{ 1061{
1046 struct rpc_cred *cred = task->tk_msg.rpc_cred; 1062 struct rpc_rqst *req = task->tk_rqstp;
1063 struct rpc_cred *cred = req->rq_cred;
1047 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1064 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
1048 gc_base); 1065 gc_base);
1049 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1066 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
1050 __be32 *cred_len; 1067 __be32 *cred_len;
1051 struct rpc_rqst *req = task->tk_rqstp;
1052 u32 maj_stat = 0; 1068 u32 maj_stat = 0;
1053 struct xdr_netobj mic; 1069 struct xdr_netobj mic;
1054 struct kvec iov; 1070 struct kvec iov;
@@ -1098,7 +1114,7 @@ out_put_ctx:
1098 1114
1099static int gss_renew_cred(struct rpc_task *task) 1115static int gss_renew_cred(struct rpc_task *task)
1100{ 1116{
1101 struct rpc_cred *oldcred = task->tk_msg.rpc_cred; 1117 struct rpc_cred *oldcred = task->tk_rqstp->rq_cred;
1102 struct gss_cred *gss_cred = container_of(oldcred, 1118 struct gss_cred *gss_cred = container_of(oldcred,
1103 struct gss_cred, 1119 struct gss_cred,
1104 gc_base); 1120 gc_base);
@@ -1112,26 +1128,46 @@ static int gss_renew_cred(struct rpc_task *task)
1112 new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); 1128 new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
1113 if (IS_ERR(new)) 1129 if (IS_ERR(new))
1114 return PTR_ERR(new); 1130 return PTR_ERR(new);
1115 task->tk_msg.rpc_cred = new; 1131 task->tk_rqstp->rq_cred = new;
1116 put_rpccred(oldcred); 1132 put_rpccred(oldcred);
1117 return 0; 1133 return 0;
1118} 1134}
1119 1135
1136static int gss_cred_is_negative_entry(struct rpc_cred *cred)
1137{
1138 if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
1139 unsigned long now = jiffies;
1140 unsigned long begin, expire;
1141 struct gss_cred *gss_cred;
1142
1143 gss_cred = container_of(cred, struct gss_cred, gc_base);
1144 begin = gss_cred->gc_upcall_timestamp;
1145 expire = begin + gss_expired_cred_retry_delay * HZ;
1146
1147 if (time_in_range_open(now, begin, expire))
1148 return 1;
1149 }
1150 return 0;
1151}
1152
1120/* 1153/*
1121* Refresh credentials. XXX - finish 1154* Refresh credentials. XXX - finish
1122*/ 1155*/
1123static int 1156static int
1124gss_refresh(struct rpc_task *task) 1157gss_refresh(struct rpc_task *task)
1125{ 1158{
1126 struct rpc_cred *cred = task->tk_msg.rpc_cred; 1159 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1127 int ret = 0; 1160 int ret = 0;
1128 1161
1162 if (gss_cred_is_negative_entry(cred))
1163 return -EKEYEXPIRED;
1164
1129 if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && 1165 if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
1130 !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { 1166 !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
1131 ret = gss_renew_cred(task); 1167 ret = gss_renew_cred(task);
1132 if (ret < 0) 1168 if (ret < 0)
1133 goto out; 1169 goto out;
1134 cred = task->tk_msg.rpc_cred; 1170 cred = task->tk_rqstp->rq_cred;
1135 } 1171 }
1136 1172
1137 if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) 1173 if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
@@ -1150,7 +1186,7 @@ gss_refresh_null(struct rpc_task *task)
1150static __be32 * 1186static __be32 *
1151gss_validate(struct rpc_task *task, __be32 *p) 1187gss_validate(struct rpc_task *task, __be32 *p)
1152{ 1188{
1153 struct rpc_cred *cred = task->tk_msg.rpc_cred; 1189 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1154 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1190 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
1155 __be32 seq; 1191 __be32 seq;
1156 struct kvec iov; 1192 struct kvec iov;
@@ -1316,15 +1352,21 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
1316 inpages = snd_buf->pages + first; 1352 inpages = snd_buf->pages + first;
1317 snd_buf->pages = rqstp->rq_enc_pages; 1353 snd_buf->pages = rqstp->rq_enc_pages;
1318 snd_buf->page_base -= first << PAGE_CACHE_SHIFT; 1354 snd_buf->page_base -= first << PAGE_CACHE_SHIFT;
1319 /* Give the tail its own page, in case we need extra space in the 1355 /*
1320 * head when wrapping: */ 1356 * Give the tail its own page, in case we need extra space in the
1357 * head when wrapping:
1358 *
1359 * call_allocate() allocates twice the slack space required
1360 * by the authentication flavor to rq_callsize.
1361 * For GSS, slack is GSS_CRED_SLACK.
1362 */
1321 if (snd_buf->page_len || snd_buf->tail[0].iov_len) { 1363 if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
1322 tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); 1364 tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
1323 memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); 1365 memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
1324 snd_buf->tail[0].iov_base = tmp; 1366 snd_buf->tail[0].iov_base = tmp;
1325 } 1367 }
1326 maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); 1368 maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
1327 /* RPC_SLACK_SPACE should prevent this ever happening: */ 1369 /* slack space should prevent this ever happening: */
1328 BUG_ON(snd_buf->len > snd_buf->buflen); 1370 BUG_ON(snd_buf->len > snd_buf->buflen);
1329 status = -EIO; 1371 status = -EIO;
1330 /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was 1372 /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
@@ -1353,7 +1395,7 @@ static int
1353gss_wrap_req(struct rpc_task *task, 1395gss_wrap_req(struct rpc_task *task,
1354 kxdrproc_t encode, void *rqstp, __be32 *p, void *obj) 1396 kxdrproc_t encode, void *rqstp, __be32 *p, void *obj)
1355{ 1397{
1356 struct rpc_cred *cred = task->tk_msg.rpc_cred; 1398 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1357 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1399 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
1358 gc_base); 1400 gc_base);
1359 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1401 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
@@ -1456,7 +1498,7 @@ static int
1456gss_unwrap_resp(struct rpc_task *task, 1498gss_unwrap_resp(struct rpc_task *task,
1457 kxdrproc_t decode, void *rqstp, __be32 *p, void *obj) 1499 kxdrproc_t decode, void *rqstp, __be32 *p, void *obj)
1458{ 1500{
1459 struct rpc_cred *cred = task->tk_msg.rpc_cred; 1501 struct rpc_cred *cred = task->tk_rqstp->rq_cred;
1460 struct gss_cred *gss_cred = container_of(cred, struct gss_cred, 1502 struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
1461 gc_base); 1503 gc_base);
1462 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1504 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
@@ -1573,5 +1615,11 @@ static void __exit exit_rpcsec_gss(void)
1573} 1615}
1574 1616
1575MODULE_LICENSE("GPL"); 1617MODULE_LICENSE("GPL");
1618module_param_named(expired_cred_retry_delay,
1619 gss_expired_cred_retry_delay,
1620 uint, 0644);
1621MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
1622 "the RPC engine retries an expired credential");
1623
1576module_init(init_rpcsec_gss) 1624module_init(init_rpcsec_gss)
1577module_exit(exit_rpcsec_gss) 1625module_exit(exit_rpcsec_gss)
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index e9b636176687..75ee993ea057 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/net/sunrpc/gss_krb5_crypto.c 2 * linux/net/sunrpc/gss_krb5_crypto.c
3 * 3 *
4 * Copyright (c) 2000 The Regents of the University of Michigan. 4 * Copyright (c) 2000-2008 The Regents of the University of Michigan.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Andy Adamson <andros@umich.edu> 7 * Andy Adamson <andros@umich.edu>
@@ -41,6 +41,7 @@
41#include <linux/crypto.h> 41#include <linux/crypto.h>
42#include <linux/highmem.h> 42#include <linux/highmem.h>
43#include <linux/pagemap.h> 43#include <linux/pagemap.h>
44#include <linux/random.h>
44#include <linux/sunrpc/gss_krb5.h> 45#include <linux/sunrpc/gss_krb5.h>
45#include <linux/sunrpc/xdr.h> 46#include <linux/sunrpc/xdr.h>
46 47
@@ -58,13 +59,13 @@ krb5_encrypt(
58{ 59{
59 u32 ret = -EINVAL; 60 u32 ret = -EINVAL;
60 struct scatterlist sg[1]; 61 struct scatterlist sg[1];
61 u8 local_iv[16] = {0}; 62 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
62 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 63 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
63 64
64 if (length % crypto_blkcipher_blocksize(tfm) != 0) 65 if (length % crypto_blkcipher_blocksize(tfm) != 0)
65 goto out; 66 goto out;
66 67
67 if (crypto_blkcipher_ivsize(tfm) > 16) { 68 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
68 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n", 69 dprintk("RPC: gss_k5encrypt: tfm iv size too large %d\n",
69 crypto_blkcipher_ivsize(tfm)); 70 crypto_blkcipher_ivsize(tfm));
70 goto out; 71 goto out;
@@ -92,13 +93,13 @@ krb5_decrypt(
92{ 93{
93 u32 ret = -EINVAL; 94 u32 ret = -EINVAL;
94 struct scatterlist sg[1]; 95 struct scatterlist sg[1];
95 u8 local_iv[16] = {0}; 96 u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
96 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv }; 97 struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
97 98
98 if (length % crypto_blkcipher_blocksize(tfm) != 0) 99 if (length % crypto_blkcipher_blocksize(tfm) != 0)
99 goto out; 100 goto out;
100 101
101 if (crypto_blkcipher_ivsize(tfm) > 16) { 102 if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
102 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n", 103 dprintk("RPC: gss_k5decrypt: tfm iv size too large %d\n",
103 crypto_blkcipher_ivsize(tfm)); 104 crypto_blkcipher_ivsize(tfm));
104 goto out; 105 goto out;
@@ -123,21 +124,155 @@ checksummer(struct scatterlist *sg, void *data)
123 return crypto_hash_update(desc, sg, sg->length); 124 return crypto_hash_update(desc, sg, sg->length);
124} 125}
125 126
126/* checksum the plaintext data and hdrlen bytes of the token header */ 127static int
127s32 128arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
128make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body, 129{
129 int body_offset, struct xdr_netobj *cksum) 130 unsigned int ms_usage;
131
132 switch (usage) {
133 case KG_USAGE_SIGN:
134 ms_usage = 15;
135 break;
136 case KG_USAGE_SEAL:
137 ms_usage = 13;
138 break;
139 default:
140 return EINVAL;;
141 }
142 salt[0] = (ms_usage >> 0) & 0xff;
143 salt[1] = (ms_usage >> 8) & 0xff;
144 salt[2] = (ms_usage >> 16) & 0xff;
145 salt[3] = (ms_usage >> 24) & 0xff;
146
147 return 0;
148}
149
150static u32
151make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
152 struct xdr_buf *body, int body_offset, u8 *cksumkey,
153 unsigned int usage, struct xdr_netobj *cksumout)
130{ 154{
131 struct hash_desc desc; /* XXX add to ctx? */ 155 struct hash_desc desc;
132 struct scatterlist sg[1]; 156 struct scatterlist sg[1];
133 int err; 157 int err;
158 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
159 u8 rc4salt[4];
160 struct crypto_hash *md5;
161 struct crypto_hash *hmac_md5;
162
163 if (cksumkey == NULL)
164 return GSS_S_FAILURE;
165
166 if (cksumout->len < kctx->gk5e->cksumlength) {
167 dprintk("%s: checksum buffer length, %u, too small for %s\n",
168 __func__, cksumout->len, kctx->gk5e->name);
169 return GSS_S_FAILURE;
170 }
171
172 if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
173 dprintk("%s: invalid usage value %u\n", __func__, usage);
174 return GSS_S_FAILURE;
175 }
176
177 md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
178 if (IS_ERR(md5))
179 return GSS_S_FAILURE;
180
181 hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
182 CRYPTO_ALG_ASYNC);
183 if (IS_ERR(hmac_md5)) {
184 crypto_free_hash(md5);
185 return GSS_S_FAILURE;
186 }
187
188 desc.tfm = md5;
189 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
190
191 err = crypto_hash_init(&desc);
192 if (err)
193 goto out;
194 sg_init_one(sg, rc4salt, 4);
195 err = crypto_hash_update(&desc, sg, 4);
196 if (err)
197 goto out;
198
199 sg_init_one(sg, header, hdrlen);
200 err = crypto_hash_update(&desc, sg, hdrlen);
201 if (err)
202 goto out;
203 err = xdr_process_buf(body, body_offset, body->len - body_offset,
204 checksummer, &desc);
205 if (err)
206 goto out;
207 err = crypto_hash_final(&desc, checksumdata);
208 if (err)
209 goto out;
210
211 desc.tfm = hmac_md5;
212 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
213
214 err = crypto_hash_init(&desc);
215 if (err)
216 goto out;
217 err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
218 if (err)
219 goto out;
220
221 sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
222 err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
223 checksumdata);
224 if (err)
225 goto out;
226
227 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
228 cksumout->len = kctx->gk5e->cksumlength;
229out:
230 crypto_free_hash(md5);
231 crypto_free_hash(hmac_md5);
232 return err ? GSS_S_FAILURE : 0;
233}
234
235/*
236 * checksum the plaintext data and hdrlen bytes of the token header
237 * The checksum is performed over the first 8 bytes of the
238 * gss token header and then over the data body
239 */
240u32
241make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
242 struct xdr_buf *body, int body_offset, u8 *cksumkey,
243 unsigned int usage, struct xdr_netobj *cksumout)
244{
245 struct hash_desc desc;
246 struct scatterlist sg[1];
247 int err;
248 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
249 unsigned int checksumlen;
250
251 if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
252 return make_checksum_hmac_md5(kctx, header, hdrlen,
253 body, body_offset,
254 cksumkey, usage, cksumout);
255
256 if (cksumout->len < kctx->gk5e->cksumlength) {
257 dprintk("%s: checksum buffer length, %u, too small for %s\n",
258 __func__, cksumout->len, kctx->gk5e->name);
259 return GSS_S_FAILURE;
260 }
134 261
135 desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC); 262 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
136 if (IS_ERR(desc.tfm)) 263 if (IS_ERR(desc.tfm))
137 return GSS_S_FAILURE; 264 return GSS_S_FAILURE;
138 cksum->len = crypto_hash_digestsize(desc.tfm);
139 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP; 265 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
140 266
267 checksumlen = crypto_hash_digestsize(desc.tfm);
268
269 if (cksumkey != NULL) {
270 err = crypto_hash_setkey(desc.tfm, cksumkey,
271 kctx->gk5e->keylength);
272 if (err)
273 goto out;
274 }
275
141 err = crypto_hash_init(&desc); 276 err = crypto_hash_init(&desc);
142 if (err) 277 if (err)
143 goto out; 278 goto out;
@@ -149,15 +284,109 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
149 checksummer, &desc); 284 checksummer, &desc);
150 if (err) 285 if (err)
151 goto out; 286 goto out;
152 err = crypto_hash_final(&desc, cksum->data); 287 err = crypto_hash_final(&desc, checksumdata);
288 if (err)
289 goto out;
153 290
291 switch (kctx->gk5e->ctype) {
292 case CKSUMTYPE_RSA_MD5:
293 err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
294 checksumdata, checksumlen);
295 if (err)
296 goto out;
297 memcpy(cksumout->data,
298 checksumdata + checksumlen - kctx->gk5e->cksumlength,
299 kctx->gk5e->cksumlength);
300 break;
301 case CKSUMTYPE_HMAC_SHA1_DES3:
302 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
303 break;
304 default:
305 BUG();
306 break;
307 }
308 cksumout->len = kctx->gk5e->cksumlength;
309out:
310 crypto_free_hash(desc.tfm);
311 return err ? GSS_S_FAILURE : 0;
312}
313
314/*
315 * checksum the plaintext data and hdrlen bytes of the token header
316 * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
317 * body then over the first 16 octets of the MIC token
318 * Inclusion of the header data in the calculation of the
319 * checksum is optional.
320 */
321u32
322make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
323 struct xdr_buf *body, int body_offset, u8 *cksumkey,
324 unsigned int usage, struct xdr_netobj *cksumout)
325{
326 struct hash_desc desc;
327 struct scatterlist sg[1];
328 int err;
329 u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
330 unsigned int checksumlen;
331
332 if (kctx->gk5e->keyed_cksum == 0) {
333 dprintk("%s: expected keyed hash for %s\n",
334 __func__, kctx->gk5e->name);
335 return GSS_S_FAILURE;
336 }
337 if (cksumkey == NULL) {
338 dprintk("%s: no key supplied for %s\n",
339 __func__, kctx->gk5e->name);
340 return GSS_S_FAILURE;
341 }
342
343 desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
344 CRYPTO_ALG_ASYNC);
345 if (IS_ERR(desc.tfm))
346 return GSS_S_FAILURE;
347 checksumlen = crypto_hash_digestsize(desc.tfm);
348 desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
349
350 err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
351 if (err)
352 goto out;
353
354 err = crypto_hash_init(&desc);
355 if (err)
356 goto out;
357 err = xdr_process_buf(body, body_offset, body->len - body_offset,
358 checksummer, &desc);
359 if (err)
360 goto out;
361 if (header != NULL) {
362 sg_init_one(sg, header, hdrlen);
363 err = crypto_hash_update(&desc, sg, hdrlen);
364 if (err)
365 goto out;
366 }
367 err = crypto_hash_final(&desc, checksumdata);
368 if (err)
369 goto out;
370
371 cksumout->len = kctx->gk5e->cksumlength;
372
373 switch (kctx->gk5e->ctype) {
374 case CKSUMTYPE_HMAC_SHA1_96_AES128:
375 case CKSUMTYPE_HMAC_SHA1_96_AES256:
376 /* note that this truncates the hash */
377 memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
378 break;
379 default:
380 BUG();
381 break;
382 }
154out: 383out:
155 crypto_free_hash(desc.tfm); 384 crypto_free_hash(desc.tfm);
156 return err ? GSS_S_FAILURE : 0; 385 return err ? GSS_S_FAILURE : 0;
157} 386}
158 387
159struct encryptor_desc { 388struct encryptor_desc {
160 u8 iv[8]; /* XXX hard-coded blocksize */ 389 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
161 struct blkcipher_desc desc; 390 struct blkcipher_desc desc;
162 int pos; 391 int pos;
163 struct xdr_buf *outbuf; 392 struct xdr_buf *outbuf;
@@ -198,7 +427,7 @@ encryptor(struct scatterlist *sg, void *data)
198 desc->fraglen += sg->length; 427 desc->fraglen += sg->length;
199 desc->pos += sg->length; 428 desc->pos += sg->length;
200 429
201 fraglen = thislen & 7; /* XXX hardcoded blocksize */ 430 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
202 thislen -= fraglen; 431 thislen -= fraglen;
203 432
204 if (thislen == 0) 433 if (thislen == 0)
@@ -256,7 +485,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
256} 485}
257 486
258struct decryptor_desc { 487struct decryptor_desc {
259 u8 iv[8]; /* XXX hard-coded blocksize */ 488 u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
260 struct blkcipher_desc desc; 489 struct blkcipher_desc desc;
261 struct scatterlist frags[4]; 490 struct scatterlist frags[4];
262 int fragno; 491 int fragno;
@@ -278,7 +507,7 @@ decryptor(struct scatterlist *sg, void *data)
278 desc->fragno++; 507 desc->fragno++;
279 desc->fraglen += sg->length; 508 desc->fraglen += sg->length;
280 509
281 fraglen = thislen & 7; /* XXX hardcoded blocksize */ 510 fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
282 thislen -= fraglen; 511 thislen -= fraglen;
283 512
284 if (thislen == 0) 513 if (thislen == 0)
@@ -325,3 +554,437 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
325 554
326 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc); 555 return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
327} 556}
557
558/*
559 * This function makes the assumption that it was ultimately called
560 * from gss_wrap().
561 *
562 * The client auth_gss code moves any existing tail data into a
563 * separate page before calling gss_wrap.
564 * The server svcauth_gss code ensures that both the head and the
565 * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
566 *
567 * Even with that guarantee, this function may be called more than
568 * once in the processing of gss_wrap(). The best we can do is
569 * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
570 * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
571 * At run-time we can verify that a single invocation of this
572 * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
573 */
574
575int
576xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
577{
578 u8 *p;
579
580 if (shiftlen == 0)
581 return 0;
582
583 BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
584 BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
585
586 p = buf->head[0].iov_base + base;
587
588 memmove(p + shiftlen, p, buf->head[0].iov_len - base);
589
590 buf->head[0].iov_len += shiftlen;
591 buf->len += shiftlen;
592
593 return 0;
594}
595
596static u32
597gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
598 u32 offset, u8 *iv, struct page **pages, int encrypt)
599{
600 u32 ret;
601 struct scatterlist sg[1];
602 struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
603 u8 data[crypto_blkcipher_blocksize(cipher) * 2];
604 struct page **save_pages;
605 u32 len = buf->len - offset;
606
607 BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
608
609 /*
610 * For encryption, we want to read from the cleartext
611 * page cache pages, and write the encrypted data to
612 * the supplied xdr_buf pages.
613 */
614 save_pages = buf->pages;
615 if (encrypt)
616 buf->pages = pages;
617
618 ret = read_bytes_from_xdr_buf(buf, offset, data, len);
619 buf->pages = save_pages;
620 if (ret)
621 goto out;
622
623 sg_init_one(sg, data, len);
624
625 if (encrypt)
626 ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
627 else
628 ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
629
630 if (ret)
631 goto out;
632
633 ret = write_bytes_to_xdr_buf(buf, offset, data, len);
634
635out:
636 return ret;
637}
638
639u32
640gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
641 struct xdr_buf *buf, int ec, struct page **pages)
642{
643 u32 err;
644 struct xdr_netobj hmac;
645 u8 *cksumkey;
646 u8 *ecptr;
647 struct crypto_blkcipher *cipher, *aux_cipher;
648 int blocksize;
649 struct page **save_pages;
650 int nblocks, nbytes;
651 struct encryptor_desc desc;
652 u32 cbcbytes;
653 unsigned int usage;
654
655 if (kctx->initiate) {
656 cipher = kctx->initiator_enc;
657 aux_cipher = kctx->initiator_enc_aux;
658 cksumkey = kctx->initiator_integ;
659 usage = KG_USAGE_INITIATOR_SEAL;
660 } else {
661 cipher = kctx->acceptor_enc;
662 aux_cipher = kctx->acceptor_enc_aux;
663 cksumkey = kctx->acceptor_integ;
664 usage = KG_USAGE_ACCEPTOR_SEAL;
665 }
666 blocksize = crypto_blkcipher_blocksize(cipher);
667
668 /* hide the gss token header and insert the confounder */
669 offset += GSS_KRB5_TOK_HDR_LEN;
670 if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
671 return GSS_S_FAILURE;
672 gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
673 offset -= GSS_KRB5_TOK_HDR_LEN;
674
675 if (buf->tail[0].iov_base != NULL) {
676 ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
677 } else {
678 buf->tail[0].iov_base = buf->head[0].iov_base
679 + buf->head[0].iov_len;
680 buf->tail[0].iov_len = 0;
681 ecptr = buf->tail[0].iov_base;
682 }
683
684 memset(ecptr, 'X', ec);
685 buf->tail[0].iov_len += ec;
686 buf->len += ec;
687
688 /* copy plaintext gss token header after filler (if any) */
689 memcpy(ecptr + ec, buf->head[0].iov_base + offset,
690 GSS_KRB5_TOK_HDR_LEN);
691 buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
692 buf->len += GSS_KRB5_TOK_HDR_LEN;
693
694 /* Do the HMAC */
695 hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
696 hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
697
698 /*
699 * When we are called, pages points to the real page cache
700 * data -- which we can't go and encrypt! buf->pages points
701 * to scratch pages which we are going to send off to the
702 * client/server. Swap in the plaintext pages to calculate
703 * the hmac.
704 */
705 save_pages = buf->pages;
706 buf->pages = pages;
707
708 err = make_checksum_v2(kctx, NULL, 0, buf,
709 offset + GSS_KRB5_TOK_HDR_LEN,
710 cksumkey, usage, &hmac);
711 buf->pages = save_pages;
712 if (err)
713 return GSS_S_FAILURE;
714
715 nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
716 nblocks = (nbytes + blocksize - 1) / blocksize;
717 cbcbytes = 0;
718 if (nblocks > 2)
719 cbcbytes = (nblocks - 2) * blocksize;
720
721 memset(desc.iv, 0, sizeof(desc.iv));
722
723 if (cbcbytes) {
724 desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
725 desc.fragno = 0;
726 desc.fraglen = 0;
727 desc.pages = pages;
728 desc.outbuf = buf;
729 desc.desc.info = desc.iv;
730 desc.desc.flags = 0;
731 desc.desc.tfm = aux_cipher;
732
733 sg_init_table(desc.infrags, 4);
734 sg_init_table(desc.outfrags, 4);
735
736 err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
737 cbcbytes, encryptor, &desc);
738 if (err)
739 goto out_err;
740 }
741
742 /* Make sure IV carries forward from any CBC results. */
743 err = gss_krb5_cts_crypt(cipher, buf,
744 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
745 desc.iv, pages, 1);
746 if (err) {
747 err = GSS_S_FAILURE;
748 goto out_err;
749 }
750
751 /* Now update buf to account for HMAC */
752 buf->tail[0].iov_len += kctx->gk5e->cksumlength;
753 buf->len += kctx->gk5e->cksumlength;
754
755out_err:
756 if (err)
757 err = GSS_S_FAILURE;
758 return err;
759}
760
761u32
762gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
763 u32 *headskip, u32 *tailskip)
764{
765 struct xdr_buf subbuf;
766 u32 ret = 0;
767 u8 *cksum_key;
768 struct crypto_blkcipher *cipher, *aux_cipher;
769 struct xdr_netobj our_hmac_obj;
770 u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
771 u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
772 int nblocks, blocksize, cbcbytes;
773 struct decryptor_desc desc;
774 unsigned int usage;
775
776 if (kctx->initiate) {
777 cipher = kctx->acceptor_enc;
778 aux_cipher = kctx->acceptor_enc_aux;
779 cksum_key = kctx->acceptor_integ;
780 usage = KG_USAGE_ACCEPTOR_SEAL;
781 } else {
782 cipher = kctx->initiator_enc;
783 aux_cipher = kctx->initiator_enc_aux;
784 cksum_key = kctx->initiator_integ;
785 usage = KG_USAGE_INITIATOR_SEAL;
786 }
787 blocksize = crypto_blkcipher_blocksize(cipher);
788
789
790 /* create a segment skipping the header and leaving out the checksum */
791 xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
792 (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
793 kctx->gk5e->cksumlength));
794
795 nblocks = (subbuf.len + blocksize - 1) / blocksize;
796
797 cbcbytes = 0;
798 if (nblocks > 2)
799 cbcbytes = (nblocks - 2) * blocksize;
800
801 memset(desc.iv, 0, sizeof(desc.iv));
802
803 if (cbcbytes) {
804 desc.fragno = 0;
805 desc.fraglen = 0;
806 desc.desc.info = desc.iv;
807 desc.desc.flags = 0;
808 desc.desc.tfm = aux_cipher;
809
810 sg_init_table(desc.frags, 4);
811
812 ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
813 if (ret)
814 goto out_err;
815 }
816
817 /* Make sure IV carries forward from any CBC results. */
818 ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
819 if (ret)
820 goto out_err;
821
822
823 /* Calculate our hmac over the plaintext data */
824 our_hmac_obj.len = sizeof(our_hmac);
825 our_hmac_obj.data = our_hmac;
826
827 ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
828 cksum_key, usage, &our_hmac_obj);
829 if (ret)
830 goto out_err;
831
832 /* Get the packet's hmac value */
833 ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
834 pkt_hmac, kctx->gk5e->cksumlength);
835 if (ret)
836 goto out_err;
837
838 if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
839 ret = GSS_S_BAD_SIG;
840 goto out_err;
841 }
842 *headskip = kctx->gk5e->conflen;
843 *tailskip = kctx->gk5e->cksumlength;
844out_err:
845 if (ret && ret != GSS_S_BAD_SIG)
846 ret = GSS_S_FAILURE;
847 return ret;
848}
849
850/*
851 * Compute Kseq given the initial session key and the checksum.
852 * Set the key of the given cipher.
853 */
854int
855krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
856 unsigned char *cksum)
857{
858 struct crypto_hash *hmac;
859 struct hash_desc desc;
860 struct scatterlist sg[1];
861 u8 Kseq[GSS_KRB5_MAX_KEYLEN];
862 u32 zeroconstant = 0;
863 int err;
864
865 dprintk("%s: entered\n", __func__);
866
867 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
868 if (IS_ERR(hmac)) {
869 dprintk("%s: error %ld, allocating hash '%s'\n",
870 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
871 return PTR_ERR(hmac);
872 }
873
874 desc.tfm = hmac;
875 desc.flags = 0;
876
877 err = crypto_hash_init(&desc);
878 if (err)
879 goto out_err;
880
881 /* Compute intermediate Kseq from session key */
882 err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
883 if (err)
884 goto out_err;
885
886 sg_init_table(sg, 1);
887 sg_set_buf(sg, &zeroconstant, 4);
888
889 err = crypto_hash_digest(&desc, sg, 4, Kseq);
890 if (err)
891 goto out_err;
892
893 /* Compute final Kseq from the checksum and intermediate Kseq */
894 err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
895 if (err)
896 goto out_err;
897
898 sg_set_buf(sg, cksum, 8);
899
900 err = crypto_hash_digest(&desc, sg, 8, Kseq);
901 if (err)
902 goto out_err;
903
904 err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
905 if (err)
906 goto out_err;
907
908 err = 0;
909
910out_err:
911 crypto_free_hash(hmac);
912 dprintk("%s: returning %d\n", __func__, err);
913 return err;
914}
915
916/*
917 * Compute Kcrypt given the initial session key and the plaintext seqnum.
918 * Set the key of cipher kctx->enc.
919 */
920int
921krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
922 s32 seqnum)
923{
924 struct crypto_hash *hmac;
925 struct hash_desc desc;
926 struct scatterlist sg[1];
927 u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
928 u8 zeroconstant[4] = {0};
929 u8 seqnumarray[4];
930 int err, i;
931
932 dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
933
934 hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
935 if (IS_ERR(hmac)) {
936 dprintk("%s: error %ld, allocating hash '%s'\n",
937 __func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
938 return PTR_ERR(hmac);
939 }
940
941 desc.tfm = hmac;
942 desc.flags = 0;
943
944 err = crypto_hash_init(&desc);
945 if (err)
946 goto out_err;
947
948 /* Compute intermediate Kcrypt from session key */
949 for (i = 0; i < kctx->gk5e->keylength; i++)
950 Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
951
952 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
953 if (err)
954 goto out_err;
955
956 sg_init_table(sg, 1);
957 sg_set_buf(sg, zeroconstant, 4);
958
959 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
960 if (err)
961 goto out_err;
962
963 /* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
964 err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
965 if (err)
966 goto out_err;
967
968 seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
969 seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
970 seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
971 seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
972
973 sg_set_buf(sg, seqnumarray, 4);
974
975 err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
976 if (err)
977 goto out_err;
978
979 err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
980 if (err)
981 goto out_err;
982
983 err = 0;
984
985out_err:
986 crypto_free_hash(hmac);
987 dprintk("%s: returning %d\n", __func__, err);
988 return err;
989}
990
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
new file mode 100644
index 000000000000..76e42e6be755
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -0,0 +1,336 @@
1/*
2 * COPYRIGHT (c) 2008
3 * The Regents of the University of Michigan
4 * ALL RIGHTS RESERVED
5 *
6 * Permission is granted to use, copy, create derivative works
7 * and redistribute this software and such derivative works
8 * for any purpose, so long as the name of The University of
9 * Michigan is not used in any advertising or publicity
10 * pertaining to the use of distribution of this software
11 * without specific, written prior authorization. If the
12 * above copyright notice or any other identification of the
13 * University of Michigan is included in any copy of any
14 * portion of this software, then the disclaimer below must
15 * also be included.
16 *
17 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
18 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
19 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
20 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
21 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
23 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
24 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
25 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
26 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
27 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGES.
29 */
30
31/*
32 * Copyright (C) 1998 by the FundsXpress, INC.
33 *
34 * All rights reserved.
35 *
36 * Export of this software from the United States of America may require
37 * a specific license from the United States Government. It is the
38 * responsibility of any person or organization contemplating export to
39 * obtain such a license before exporting.
40 *
41 * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
42 * distribute this software and its documentation for any purpose and
43 * without fee is hereby granted, provided that the above copyright
44 * notice appear in all copies and that both that copyright notice and
45 * this permission notice appear in supporting documentation, and that
46 * the name of FundsXpress. not be used in advertising or publicity pertaining
47 * to distribution of the software without specific, written prior
48 * permission. FundsXpress makes no representations about the suitability of
49 * this software for any purpose. It is provided "as is" without express
50 * or implied warranty.
51 *
52 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
53 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
54 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
55 */
56
57#include <linux/err.h>
58#include <linux/types.h>
59#include <linux/crypto.h>
60#include <linux/sunrpc/gss_krb5.h>
61#include <linux/sunrpc/xdr.h>
62
63#ifdef RPC_DEBUG
64# define RPCDBG_FACILITY RPCDBG_AUTH
65#endif
66
67/*
68 * This is the n-fold function as described in rfc3961, sec 5.1
69 * Taken from MIT Kerberos and modified.
70 */
71
72static void krb5_nfold(u32 inbits, const u8 *in,
73 u32 outbits, u8 *out)
74{
75 int a, b, c, lcm;
76 int byte, i, msbit;
77
78 /* the code below is more readable if I make these bytes
79 instead of bits */
80
81 inbits >>= 3;
82 outbits >>= 3;
83
84 /* first compute lcm(n,k) */
85
86 a = outbits;
87 b = inbits;
88
89 while (b != 0) {
90 c = b;
91 b = a%b;
92 a = c;
93 }
94
95 lcm = outbits*inbits/a;
96
97 /* now do the real work */
98
99 memset(out, 0, outbits);
100 byte = 0;
101
102 /* this will end up cycling through k lcm(k,n)/k times, which
103 is correct */
104 for (i = lcm-1; i >= 0; i--) {
105 /* compute the msbit in k which gets added into this byte */
106 msbit = (
107 /* first, start with the msbit in the first,
108 * unrotated byte */
109 ((inbits << 3) - 1)
110 /* then, for each byte, shift to the right
111 * for each repetition */
112 + (((inbits << 3) + 13) * (i/inbits))
113 /* last, pick out the correct byte within
114 * that shifted repetition */
115 + ((inbits - (i % inbits)) << 3)
116 ) % (inbits << 3);
117
118 /* pull out the byte value itself */
119 byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
120 (in[((inbits) - (msbit >> 3)) % inbits]))
121 >> ((msbit & 7) + 1)) & 0xff;
122
123 /* do the addition */
124 byte += out[i % outbits];
125 out[i % outbits] = byte & 0xff;
126
127 /* keep around the carry bit, if any */
128 byte >>= 8;
129
130 }
131
132 /* if there's a carry bit left over, add it back in */
133 if (byte) {
134 for (i = outbits - 1; i >= 0; i--) {
135 /* do the addition */
136 byte += out[i];
137 out[i] = byte & 0xff;
138
139 /* keep around the carry bit, if any */
140 byte >>= 8;
141 }
142 }
143}
144
145/*
146 * This is the DK (derive_key) function as described in rfc3961, sec 5.1
147 * Taken from MIT Kerberos and modified.
148 */
149
150u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
151 const struct xdr_netobj *inkey,
152 struct xdr_netobj *outkey,
153 const struct xdr_netobj *in_constant,
154 gfp_t gfp_mask)
155{
156 size_t blocksize, keybytes, keylength, n;
157 unsigned char *inblockdata, *outblockdata, *rawkey;
158 struct xdr_netobj inblock, outblock;
159 struct crypto_blkcipher *cipher;
160 u32 ret = EINVAL;
161
162 blocksize = gk5e->blocksize;
163 keybytes = gk5e->keybytes;
164 keylength = gk5e->keylength;
165
166 if ((inkey->len != keylength) || (outkey->len != keylength))
167 goto err_return;
168
169 cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
170 CRYPTO_ALG_ASYNC);
171 if (IS_ERR(cipher))
172 goto err_return;
173 if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
174 goto err_return;
175
176 /* allocate and set up buffers */
177
178 ret = ENOMEM;
179 inblockdata = kmalloc(blocksize, gfp_mask);
180 if (inblockdata == NULL)
181 goto err_free_cipher;
182
183 outblockdata = kmalloc(blocksize, gfp_mask);
184 if (outblockdata == NULL)
185 goto err_free_in;
186
187 rawkey = kmalloc(keybytes, gfp_mask);
188 if (rawkey == NULL)
189 goto err_free_out;
190
191 inblock.data = (char *) inblockdata;
192 inblock.len = blocksize;
193
194 outblock.data = (char *) outblockdata;
195 outblock.len = blocksize;
196
197 /* initialize the input block */
198
199 if (in_constant->len == inblock.len) {
200 memcpy(inblock.data, in_constant->data, inblock.len);
201 } else {
202 krb5_nfold(in_constant->len * 8, in_constant->data,
203 inblock.len * 8, inblock.data);
204 }
205
206 /* loop encrypting the blocks until enough key bytes are generated */
207
208 n = 0;
209 while (n < keybytes) {
210 (*(gk5e->encrypt))(cipher, NULL, inblock.data,
211 outblock.data, inblock.len);
212
213 if ((keybytes - n) <= outblock.len) {
214 memcpy(rawkey + n, outblock.data, (keybytes - n));
215 break;
216 }
217
218 memcpy(rawkey + n, outblock.data, outblock.len);
219 memcpy(inblock.data, outblock.data, outblock.len);
220 n += outblock.len;
221 }
222
223 /* postprocess the key */
224
225 inblock.data = (char *) rawkey;
226 inblock.len = keybytes;
227
228 BUG_ON(gk5e->mk_key == NULL);
229 ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
230 if (ret) {
231 dprintk("%s: got %d from mk_key function for '%s'\n",
232 __func__, ret, gk5e->encrypt_name);
233 goto err_free_raw;
234 }
235
236 /* clean memory, free resources and exit */
237
238 ret = 0;
239
240err_free_raw:
241 memset(rawkey, 0, keybytes);
242 kfree(rawkey);
243err_free_out:
244 memset(outblockdata, 0, blocksize);
245 kfree(outblockdata);
246err_free_in:
247 memset(inblockdata, 0, blocksize);
248 kfree(inblockdata);
249err_free_cipher:
250 crypto_free_blkcipher(cipher);
251err_return:
252 return ret;
253}
254
255#define smask(step) ((1<<step)-1)
256#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
257#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
258
259static void mit_des_fixup_key_parity(u8 key[8])
260{
261 int i;
262 for (i = 0; i < 8; i++) {
263 key[i] &= 0xfe;
264 key[i] |= 1^parity_char(key[i]);
265 }
266}
267
268/*
269 * This is the des3 key derivation postprocess function
270 */
271u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
272 struct xdr_netobj *randombits,
273 struct xdr_netobj *key)
274{
275 int i;
276 u32 ret = EINVAL;
277
278 if (key->len != 24) {
279 dprintk("%s: key->len is %d\n", __func__, key->len);
280 goto err_out;
281 }
282 if (randombits->len != 21) {
283 dprintk("%s: randombits->len is %d\n",
284 __func__, randombits->len);
285 goto err_out;
286 }
287
288 /* take the seven bytes, move them around into the top 7 bits of the
289 8 key bytes, then compute the parity bits. Do this three times. */
290
291 for (i = 0; i < 3; i++) {
292 memcpy(key->data + i*8, randombits->data + i*7, 7);
293 key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
294 ((key->data[i*8+1]&1)<<2) |
295 ((key->data[i*8+2]&1)<<3) |
296 ((key->data[i*8+3]&1)<<4) |
297 ((key->data[i*8+4]&1)<<5) |
298 ((key->data[i*8+5]&1)<<6) |
299 ((key->data[i*8+6]&1)<<7));
300
301 mit_des_fixup_key_parity(key->data + i*8);
302 }
303 ret = 0;
304err_out:
305 return ret;
306}
307
308/*
309 * This is the aes key derivation postprocess function
310 */
311u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
312 struct xdr_netobj *randombits,
313 struct xdr_netobj *key)
314{
315 u32 ret = EINVAL;
316
317 if (key->len != 16 && key->len != 32) {
318 dprintk("%s: key->len is %d\n", __func__, key->len);
319 goto err_out;
320 }
321 if (randombits->len != 16 && randombits->len != 32) {
322 dprintk("%s: randombits->len is %d\n",
323 __func__, randombits->len);
324 goto err_out;
325 }
326 if (randombits->len != key->len) {
327 dprintk("%s: randombits->len is %d, key->len is %d\n",
328 __func__, randombits->len, key->len);
329 goto err_out;
330 }
331 memcpy(key->data, randombits->data, key->len);
332 ret = 0;
333err_out:
334 return ret;
335}
336
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 2deb0ed72ff4..032644610524 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * linux/net/sunrpc/gss_krb5_mech.c 2 * linux/net/sunrpc/gss_krb5_mech.c
3 * 3 *
4 * Copyright (c) 2001 The Regents of the University of Michigan. 4 * Copyright (c) 2001-2008 The Regents of the University of Michigan.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Andy Adamson <andros@umich.edu> 7 * Andy Adamson <andros@umich.edu>
@@ -48,6 +48,143 @@
48# define RPCDBG_FACILITY RPCDBG_AUTH 48# define RPCDBG_FACILITY RPCDBG_AUTH
49#endif 49#endif
50 50
51static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
52
53static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
54 /*
55 * DES (All DES enctypes are mapped to the same gss functionality)
56 */
57 {
58 .etype = ENCTYPE_DES_CBC_RAW,
59 .ctype = CKSUMTYPE_RSA_MD5,
60 .name = "des-cbc-crc",
61 .encrypt_name = "cbc(des)",
62 .cksum_name = "md5",
63 .encrypt = krb5_encrypt,
64 .decrypt = krb5_decrypt,
65 .mk_key = NULL,
66 .signalg = SGN_ALG_DES_MAC_MD5,
67 .sealalg = SEAL_ALG_DES,
68 .keybytes = 7,
69 .keylength = 8,
70 .blocksize = 8,
71 .conflen = 8,
72 .cksumlength = 8,
73 .keyed_cksum = 0,
74 },
75 /*
76 * RC4-HMAC
77 */
78 {
79 .etype = ENCTYPE_ARCFOUR_HMAC,
80 .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
81 .name = "rc4-hmac",
82 .encrypt_name = "ecb(arc4)",
83 .cksum_name = "hmac(md5)",
84 .encrypt = krb5_encrypt,
85 .decrypt = krb5_decrypt,
86 .mk_key = NULL,
87 .signalg = SGN_ALG_HMAC_MD5,
88 .sealalg = SEAL_ALG_MICROSOFT_RC4,
89 .keybytes = 16,
90 .keylength = 16,
91 .blocksize = 1,
92 .conflen = 8,
93 .cksumlength = 8,
94 .keyed_cksum = 1,
95 },
96 /*
97 * 3DES
98 */
99 {
100 .etype = ENCTYPE_DES3_CBC_RAW,
101 .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
102 .name = "des3-hmac-sha1",
103 .encrypt_name = "cbc(des3_ede)",
104 .cksum_name = "hmac(sha1)",
105 .encrypt = krb5_encrypt,
106 .decrypt = krb5_decrypt,
107 .mk_key = gss_krb5_des3_make_key,
108 .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
109 .sealalg = SEAL_ALG_DES3KD,
110 .keybytes = 21,
111 .keylength = 24,
112 .blocksize = 8,
113 .conflen = 8,
114 .cksumlength = 20,
115 .keyed_cksum = 1,
116 },
117 /*
118 * AES128
119 */
120 {
121 .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
122 .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
123 .name = "aes128-cts",
124 .encrypt_name = "cts(cbc(aes))",
125 .cksum_name = "hmac(sha1)",
126 .encrypt = krb5_encrypt,
127 .decrypt = krb5_decrypt,
128 .mk_key = gss_krb5_aes_make_key,
129 .encrypt_v2 = gss_krb5_aes_encrypt,
130 .decrypt_v2 = gss_krb5_aes_decrypt,
131 .signalg = -1,
132 .sealalg = -1,
133 .keybytes = 16,
134 .keylength = 16,
135 .blocksize = 16,
136 .conflen = 16,
137 .cksumlength = 12,
138 .keyed_cksum = 1,
139 },
140 /*
141 * AES256
142 */
143 {
144 .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
145 .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
146 .name = "aes256-cts",
147 .encrypt_name = "cts(cbc(aes))",
148 .cksum_name = "hmac(sha1)",
149 .encrypt = krb5_encrypt,
150 .decrypt = krb5_decrypt,
151 .mk_key = gss_krb5_aes_make_key,
152 .encrypt_v2 = gss_krb5_aes_encrypt,
153 .decrypt_v2 = gss_krb5_aes_decrypt,
154 .signalg = -1,
155 .sealalg = -1,
156 .keybytes = 32,
157 .keylength = 32,
158 .blocksize = 16,
159 .conflen = 16,
160 .cksumlength = 12,
161 .keyed_cksum = 1,
162 },
163};
164
165static const int num_supported_enctypes =
166 ARRAY_SIZE(supported_gss_krb5_enctypes);
167
168static int
169supported_gss_krb5_enctype(int etype)
170{
171 int i;
172 for (i = 0; i < num_supported_enctypes; i++)
173 if (supported_gss_krb5_enctypes[i].etype == etype)
174 return 1;
175 return 0;
176}
177
178static const struct gss_krb5_enctype *
179get_gss_krb5_enctype(int etype)
180{
181 int i;
182 for (i = 0; i < num_supported_enctypes; i++)
183 if (supported_gss_krb5_enctypes[i].etype == etype)
184 return &supported_gss_krb5_enctypes[i];
185 return NULL;
186}
187
51static const void * 188static const void *
52simple_get_bytes(const void *p, const void *end, void *res, int len) 189simple_get_bytes(const void *p, const void *end, void *res, int len)
53{ 190{
@@ -78,35 +215,45 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
78} 215}
79 216
80static inline const void * 217static inline const void *
81get_key(const void *p, const void *end, struct crypto_blkcipher **res) 218get_key(const void *p, const void *end,
219 struct krb5_ctx *ctx, struct crypto_blkcipher **res)
82{ 220{
83 struct xdr_netobj key; 221 struct xdr_netobj key;
84 int alg; 222 int alg;
85 char *alg_name;
86 223
87 p = simple_get_bytes(p, end, &alg, sizeof(alg)); 224 p = simple_get_bytes(p, end, &alg, sizeof(alg));
88 if (IS_ERR(p)) 225 if (IS_ERR(p))
89 goto out_err; 226 goto out_err;
227
228 switch (alg) {
229 case ENCTYPE_DES_CBC_CRC:
230 case ENCTYPE_DES_CBC_MD4:
231 case ENCTYPE_DES_CBC_MD5:
232 /* Map all these key types to ENCTYPE_DES_CBC_RAW */
233 alg = ENCTYPE_DES_CBC_RAW;
234 break;
235 }
236
237 if (!supported_gss_krb5_enctype(alg)) {
238 printk(KERN_WARNING "gss_kerberos_mech: unsupported "
239 "encryption key algorithm %d\n", alg);
240 goto out_err;
241 }
90 p = simple_get_netobj(p, end, &key); 242 p = simple_get_netobj(p, end, &key);
91 if (IS_ERR(p)) 243 if (IS_ERR(p))
92 goto out_err; 244 goto out_err;
93 245
94 switch (alg) { 246 *res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
95 case ENCTYPE_DES_CBC_RAW: 247 CRYPTO_ALG_ASYNC);
96 alg_name = "cbc(des)";
97 break;
98 default:
99 printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
100 goto out_err_free_key;
101 }
102 *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
103 if (IS_ERR(*res)) { 248 if (IS_ERR(*res)) {
104 printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name); 249 printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
250 "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
105 *res = NULL; 251 *res = NULL;
106 goto out_err_free_key; 252 goto out_err_free_key;
107 } 253 }
108 if (crypto_blkcipher_setkey(*res, key.data, key.len)) { 254 if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
109 printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name); 255 printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
256 "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
110 goto out_err_free_tfm; 257 goto out_err_free_tfm;
111 } 258 }
112 259
@@ -123,56 +270,55 @@ out_err:
123} 270}
124 271
125static int 272static int
126gss_import_sec_context_kerberos(const void *p, 273gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
127 size_t len,
128 struct gss_ctx *ctx_id)
129{ 274{
130 const void *end = (const void *)((const char *)p + len);
131 struct krb5_ctx *ctx;
132 int tmp; 275 int tmp;
133 276
134 if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) {
135 p = ERR_PTR(-ENOMEM);
136 goto out_err;
137 }
138
139 p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); 277 p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
140 if (IS_ERR(p)) 278 if (IS_ERR(p))
141 goto out_err_free_ctx; 279 goto out_err;
280
281 /* Old format supports only DES! Any other enctype uses new format */
282 ctx->enctype = ENCTYPE_DES_CBC_RAW;
283
284 ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
285 if (ctx->gk5e == NULL)
286 goto out_err;
287
142 /* The downcall format was designed before we completely understood 288 /* The downcall format was designed before we completely understood
143 * the uses of the context fields; so it includes some stuff we 289 * the uses of the context fields; so it includes some stuff we
144 * just give some minimal sanity-checking, and some we ignore 290 * just give some minimal sanity-checking, and some we ignore
145 * completely (like the next twenty bytes): */ 291 * completely (like the next twenty bytes): */
146 if (unlikely(p + 20 > end || p + 20 < p)) 292 if (unlikely(p + 20 > end || p + 20 < p))
147 goto out_err_free_ctx; 293 goto out_err;
148 p += 20; 294 p += 20;
149 p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); 295 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
150 if (IS_ERR(p)) 296 if (IS_ERR(p))
151 goto out_err_free_ctx; 297 goto out_err;
152 if (tmp != SGN_ALG_DES_MAC_MD5) { 298 if (tmp != SGN_ALG_DES_MAC_MD5) {
153 p = ERR_PTR(-ENOSYS); 299 p = ERR_PTR(-ENOSYS);
154 goto out_err_free_ctx; 300 goto out_err;
155 } 301 }
156 p = simple_get_bytes(p, end, &tmp, sizeof(tmp)); 302 p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
157 if (IS_ERR(p)) 303 if (IS_ERR(p))
158 goto out_err_free_ctx; 304 goto out_err;
159 if (tmp != SEAL_ALG_DES) { 305 if (tmp != SEAL_ALG_DES) {
160 p = ERR_PTR(-ENOSYS); 306 p = ERR_PTR(-ENOSYS);
161 goto out_err_free_ctx; 307 goto out_err;
162 } 308 }
163 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); 309 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
164 if (IS_ERR(p)) 310 if (IS_ERR(p))
165 goto out_err_free_ctx; 311 goto out_err;
166 p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send)); 312 p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
167 if (IS_ERR(p)) 313 if (IS_ERR(p))
168 goto out_err_free_ctx; 314 goto out_err;
169 p = simple_get_netobj(p, end, &ctx->mech_used); 315 p = simple_get_netobj(p, end, &ctx->mech_used);
170 if (IS_ERR(p)) 316 if (IS_ERR(p))
171 goto out_err_free_ctx; 317 goto out_err;
172 p = get_key(p, end, &ctx->enc); 318 p = get_key(p, end, ctx, &ctx->enc);
173 if (IS_ERR(p)) 319 if (IS_ERR(p))
174 goto out_err_free_mech; 320 goto out_err_free_mech;
175 p = get_key(p, end, &ctx->seq); 321 p = get_key(p, end, ctx, &ctx->seq);
176 if (IS_ERR(p)) 322 if (IS_ERR(p))
177 goto out_err_free_key1; 323 goto out_err_free_key1;
178 if (p != end) { 324 if (p != end) {
@@ -180,9 +326,6 @@ gss_import_sec_context_kerberos(const void *p,
180 goto out_err_free_key2; 326 goto out_err_free_key2;
181 } 327 }
182 328
183 ctx_id->internal_ctx_id = ctx;
184
185 dprintk("RPC: Successfully imported new context.\n");
186 return 0; 329 return 0;
187 330
188out_err_free_key2: 331out_err_free_key2:
@@ -191,18 +334,378 @@ out_err_free_key1:
191 crypto_free_blkcipher(ctx->enc); 334 crypto_free_blkcipher(ctx->enc);
192out_err_free_mech: 335out_err_free_mech:
193 kfree(ctx->mech_used.data); 336 kfree(ctx->mech_used.data);
194out_err_free_ctx:
195 kfree(ctx);
196out_err: 337out_err:
197 return PTR_ERR(p); 338 return PTR_ERR(p);
198} 339}
199 340
341struct crypto_blkcipher *
342context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
343{
344 struct crypto_blkcipher *cp;
345
346 cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
347 if (IS_ERR(cp)) {
348 dprintk("gss_kerberos_mech: unable to initialize "
349 "crypto algorithm %s\n", cname);
350 return NULL;
351 }
352 if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
353 dprintk("gss_kerberos_mech: error setting key for "
354 "crypto algorithm %s\n", cname);
355 crypto_free_blkcipher(cp);
356 return NULL;
357 }
358 return cp;
359}
360
361static inline void
362set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
363{
364 cdata[0] = (usage>>24)&0xff;
365 cdata[1] = (usage>>16)&0xff;
366 cdata[2] = (usage>>8)&0xff;
367 cdata[3] = usage&0xff;
368 cdata[4] = seed;
369}
370
371static int
372context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
373{
374 struct xdr_netobj c, keyin, keyout;
375 u8 cdata[GSS_KRB5_K5CLENGTH];
376 u32 err;
377
378 c.len = GSS_KRB5_K5CLENGTH;
379 c.data = cdata;
380
381 keyin.data = ctx->Ksess;
382 keyin.len = ctx->gk5e->keylength;
383 keyout.len = ctx->gk5e->keylength;
384
385 /* seq uses the raw key */
386 ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
387 ctx->Ksess);
388 if (ctx->seq == NULL)
389 goto out_err;
390
391 ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
392 ctx->Ksess);
393 if (ctx->enc == NULL)
394 goto out_free_seq;
395
396 /* derive cksum */
397 set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
398 keyout.data = ctx->cksum;
399 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
400 if (err) {
401 dprintk("%s: Error %d deriving cksum key\n",
402 __func__, err);
403 goto out_free_enc;
404 }
405
406 return 0;
407
408out_free_enc:
409 crypto_free_blkcipher(ctx->enc);
410out_free_seq:
411 crypto_free_blkcipher(ctx->seq);
412out_err:
413 return -EINVAL;
414}
415
416/*
417 * Note that RC4 depends on deriving keys using the sequence
418 * number or the checksum of a token. Therefore, the final keys
419 * cannot be calculated until the token is being constructed!
420 */
421static int
422context_derive_keys_rc4(struct krb5_ctx *ctx)
423{
424 struct crypto_hash *hmac;
425 char sigkeyconstant[] = "signaturekey";
426 int slen = strlen(sigkeyconstant) + 1; /* include null terminator */
427 struct hash_desc desc;
428 struct scatterlist sg[1];
429 int err;
430
431 dprintk("RPC: %s: entered\n", __func__);
432 /*
433 * derive cksum (aka Ksign) key
434 */
435 hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
436 if (IS_ERR(hmac)) {
437 dprintk("%s: error %ld allocating hash '%s'\n",
438 __func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
439 err = PTR_ERR(hmac);
440 goto out_err;
441 }
442
443 err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
444 if (err)
445 goto out_err_free_hmac;
446
447 sg_init_table(sg, 1);
448 sg_set_buf(sg, sigkeyconstant, slen);
449
450 desc.tfm = hmac;
451 desc.flags = 0;
452
453 err = crypto_hash_init(&desc);
454 if (err)
455 goto out_err_free_hmac;
456
457 err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
458 if (err)
459 goto out_err_free_hmac;
460 /*
461 * allocate hash, and blkciphers for data and seqnum encryption
462 */
463 ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
464 CRYPTO_ALG_ASYNC);
465 if (IS_ERR(ctx->enc)) {
466 err = PTR_ERR(ctx->enc);
467 goto out_err_free_hmac;
468 }
469
470 ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
471 CRYPTO_ALG_ASYNC);
472 if (IS_ERR(ctx->seq)) {
473 crypto_free_blkcipher(ctx->enc);
474 err = PTR_ERR(ctx->seq);
475 goto out_err_free_hmac;
476 }
477
478 dprintk("RPC: %s: returning success\n", __func__);
479
480 err = 0;
481
482out_err_free_hmac:
483 crypto_free_hash(hmac);
484out_err:
485 dprintk("RPC: %s: returning %d\n", __func__, err);
486 return err;
487}
488
489static int
490context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
491{
492 struct xdr_netobj c, keyin, keyout;
493 u8 cdata[GSS_KRB5_K5CLENGTH];
494 u32 err;
495
496 c.len = GSS_KRB5_K5CLENGTH;
497 c.data = cdata;
498
499 keyin.data = ctx->Ksess;
500 keyin.len = ctx->gk5e->keylength;
501 keyout.len = ctx->gk5e->keylength;
502
503 /* initiator seal encryption */
504 set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
505 keyout.data = ctx->initiator_seal;
506 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
507 if (err) {
508 dprintk("%s: Error %d deriving initiator_seal key\n",
509 __func__, err);
510 goto out_err;
511 }
512 ctx->initiator_enc = context_v2_alloc_cipher(ctx,
513 ctx->gk5e->encrypt_name,
514 ctx->initiator_seal);
515 if (ctx->initiator_enc == NULL)
516 goto out_err;
517
518 /* acceptor seal encryption */
519 set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
520 keyout.data = ctx->acceptor_seal;
521 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
522 if (err) {
523 dprintk("%s: Error %d deriving acceptor_seal key\n",
524 __func__, err);
525 goto out_free_initiator_enc;
526 }
527 ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
528 ctx->gk5e->encrypt_name,
529 ctx->acceptor_seal);
530 if (ctx->acceptor_enc == NULL)
531 goto out_free_initiator_enc;
532
533 /* initiator sign checksum */
534 set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
535 keyout.data = ctx->initiator_sign;
536 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
537 if (err) {
538 dprintk("%s: Error %d deriving initiator_sign key\n",
539 __func__, err);
540 goto out_free_acceptor_enc;
541 }
542
543 /* acceptor sign checksum */
544 set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
545 keyout.data = ctx->acceptor_sign;
546 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
547 if (err) {
548 dprintk("%s: Error %d deriving acceptor_sign key\n",
549 __func__, err);
550 goto out_free_acceptor_enc;
551 }
552
553 /* initiator seal integrity */
554 set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
555 keyout.data = ctx->initiator_integ;
556 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
557 if (err) {
558 dprintk("%s: Error %d deriving initiator_integ key\n",
559 __func__, err);
560 goto out_free_acceptor_enc;
561 }
562
563 /* acceptor seal integrity */
564 set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
565 keyout.data = ctx->acceptor_integ;
566 err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
567 if (err) {
568 dprintk("%s: Error %d deriving acceptor_integ key\n",
569 __func__, err);
570 goto out_free_acceptor_enc;
571 }
572
573 switch (ctx->enctype) {
574 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
575 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
576 ctx->initiator_enc_aux =
577 context_v2_alloc_cipher(ctx, "cbc(aes)",
578 ctx->initiator_seal);
579 if (ctx->initiator_enc_aux == NULL)
580 goto out_free_acceptor_enc;
581 ctx->acceptor_enc_aux =
582 context_v2_alloc_cipher(ctx, "cbc(aes)",
583 ctx->acceptor_seal);
584 if (ctx->acceptor_enc_aux == NULL) {
585 crypto_free_blkcipher(ctx->initiator_enc_aux);
586 goto out_free_acceptor_enc;
587 }
588 }
589
590 return 0;
591
592out_free_acceptor_enc:
593 crypto_free_blkcipher(ctx->acceptor_enc);
594out_free_initiator_enc:
595 crypto_free_blkcipher(ctx->initiator_enc);
596out_err:
597 return -EINVAL;
598}
599
600static int
601gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
602 gfp_t gfp_mask)
603{
604 int keylen;
605
606 p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
607 if (IS_ERR(p))
608 goto out_err;
609 ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
610
611 p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
612 if (IS_ERR(p))
613 goto out_err;
614 p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
615 if (IS_ERR(p))
616 goto out_err;
617 /* set seq_send for use by "older" enctypes */
618 ctx->seq_send = ctx->seq_send64;
619 if (ctx->seq_send64 != ctx->seq_send) {
620 dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
621 (long unsigned)ctx->seq_send64, ctx->seq_send);
622 goto out_err;
623 }
624 p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
625 if (IS_ERR(p))
626 goto out_err;
627 /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
628 if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
629 ctx->enctype = ENCTYPE_DES3_CBC_RAW;
630 ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
631 if (ctx->gk5e == NULL) {
632 dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
633 ctx->enctype);
634 p = ERR_PTR(-EINVAL);
635 goto out_err;
636 }
637 keylen = ctx->gk5e->keylength;
638
639 p = simple_get_bytes(p, end, ctx->Ksess, keylen);
640 if (IS_ERR(p))
641 goto out_err;
642
643 if (p != end) {
644 p = ERR_PTR(-EINVAL);
645 goto out_err;
646 }
647
648 ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
649 gss_kerberos_mech.gm_oid.len, gfp_mask);
650 if (unlikely(ctx->mech_used.data == NULL)) {
651 p = ERR_PTR(-ENOMEM);
652 goto out_err;
653 }
654 ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
655
656 switch (ctx->enctype) {
657 case ENCTYPE_DES3_CBC_RAW:
658 return context_derive_keys_des3(ctx, gfp_mask);
659 case ENCTYPE_ARCFOUR_HMAC:
660 return context_derive_keys_rc4(ctx);
661 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
662 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
663 return context_derive_keys_new(ctx, gfp_mask);
664 default:
665 return -EINVAL;
666 }
667
668out_err:
669 return PTR_ERR(p);
670}
671
672static int
673gss_import_sec_context_kerberos(const void *p, size_t len,
674 struct gss_ctx *ctx_id,
675 gfp_t gfp_mask)
676{
677 const void *end = (const void *)((const char *)p + len);
678 struct krb5_ctx *ctx;
679 int ret;
680
681 ctx = kzalloc(sizeof(*ctx), gfp_mask);
682 if (ctx == NULL)
683 return -ENOMEM;
684
685 if (len == 85)
686 ret = gss_import_v1_context(p, end, ctx);
687 else
688 ret = gss_import_v2_context(p, end, ctx, gfp_mask);
689
690 if (ret == 0)
691 ctx_id->internal_ctx_id = ctx;
692 else
693 kfree(ctx);
694
695 dprintk("RPC: %s: returning %d\n", __func__, ret);
696 return ret;
697}
698
200static void 699static void
201gss_delete_sec_context_kerberos(void *internal_ctx) { 700gss_delete_sec_context_kerberos(void *internal_ctx) {
202 struct krb5_ctx *kctx = internal_ctx; 701 struct krb5_ctx *kctx = internal_ctx;
203 702
204 crypto_free_blkcipher(kctx->seq); 703 crypto_free_blkcipher(kctx->seq);
205 crypto_free_blkcipher(kctx->enc); 704 crypto_free_blkcipher(kctx->enc);
705 crypto_free_blkcipher(kctx->acceptor_enc);
706 crypto_free_blkcipher(kctx->initiator_enc);
707 crypto_free_blkcipher(kctx->acceptor_enc_aux);
708 crypto_free_blkcipher(kctx->initiator_enc_aux);
206 kfree(kctx->mech_used.data); 709 kfree(kctx->mech_used.data);
207 kfree(kctx); 710 kfree(kctx);
208} 711}
@@ -241,6 +744,7 @@ static struct gss_api_mech gss_kerberos_mech = {
241 .gm_ops = &gss_kerberos_ops, 744 .gm_ops = &gss_kerberos_ops,
242 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs), 745 .gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
243 .gm_pfs = gss_kerberos_pfs, 746 .gm_pfs = gss_kerberos_pfs,
747 .gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
244}; 748};
245 749
246static int __init init_kerberos_module(void) 750static int __init init_kerberos_module(void)
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 88fe6e75ed7e..d7941eab7796 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c 4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
5 * 5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan. 6 * Copyright (c) 2000-2008 The Regents of the University of Michigan.
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Andy Adamson <andros@umich.edu> 9 * Andy Adamson <andros@umich.edu>
@@ -70,53 +70,154 @@
70 70
71DEFINE_SPINLOCK(krb5_seq_lock); 71DEFINE_SPINLOCK(krb5_seq_lock);
72 72
73u32 73static char *
74gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, 74setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
75{
76 __be16 *ptr, *krb5_hdr;
77 int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
78
79 token->len = g_token_size(&ctx->mech_used, body_size);
80
81 ptr = (__be16 *)token->data;
82 g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
83
84 /* ptr now at start of header described in rfc 1964, section 1.2.1: */
85 krb5_hdr = ptr;
86 *ptr++ = KG_TOK_MIC_MSG;
87 *ptr++ = cpu_to_le16(ctx->gk5e->signalg);
88 *ptr++ = SEAL_ALG_NONE;
89 *ptr++ = 0xffff;
90
91 return (char *)krb5_hdr;
92}
93
94static void *
95setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
96{
97 __be16 *ptr, *krb5_hdr;
98 u8 *p, flags = 0x00;
99
100 if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
101 flags |= 0x01;
102 if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
103 flags |= 0x04;
104
105 /* Per rfc 4121, sec 4.2.6.1, there is no header,
106 * just start the token */
107 krb5_hdr = ptr = (__be16 *)token->data;
108
109 *ptr++ = KG2_TOK_MIC;
110 p = (u8 *)ptr;
111 *p++ = flags;
112 *p++ = 0xff;
113 ptr = (__be16 *)p;
114 *ptr++ = 0xffff;
115 *ptr++ = 0xffff;
116
117 token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
118 return krb5_hdr;
119}
120
121static u32
122gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
75 struct xdr_netobj *token) 123 struct xdr_netobj *token)
76{ 124{
77 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; 125 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
78 char cksumdata[16]; 126 struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
79 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 127 .data = cksumdata};
80 unsigned char *ptr, *msg_start; 128 void *ptr;
81 s32 now; 129 s32 now;
82 u32 seq_send; 130 u32 seq_send;
131 u8 *cksumkey;
83 132
84 dprintk("RPC: gss_krb5_seal\n"); 133 dprintk("RPC: %s\n", __func__);
85 BUG_ON(ctx == NULL); 134 BUG_ON(ctx == NULL);
86 135
87 now = get_seconds(); 136 now = get_seconds();
88 137
89 token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8); 138 ptr = setup_token(ctx, token);
90 139
91 ptr = token->data; 140 if (ctx->gk5e->keyed_cksum)
92 g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr); 141 cksumkey = ctx->cksum;
142 else
143 cksumkey = NULL;
93 144
94 /* ptr now at header described in rfc 1964, section 1.2.1: */ 145 if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
95 ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff); 146 KG_USAGE_SIGN, &md5cksum))
96 ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff); 147 return GSS_S_FAILURE;
97 148
98 msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8; 149 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
99 150
100 *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); 151 spin_lock(&krb5_seq_lock);
101 memset(ptr + 4, 0xff, 4); 152 seq_send = ctx->seq_send++;
153 spin_unlock(&krb5_seq_lock);
102 154
103 if (make_checksum("md5", ptr, 8, text, 0, &md5cksum)) 155 if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
156 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
104 return GSS_S_FAILURE; 157 return GSS_S_FAILURE;
105 158
106 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, 159 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
107 md5cksum.data, md5cksum.len)) 160}
108 return GSS_S_FAILURE; 161
162u32
163gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
164 struct xdr_netobj *token)
165{
166 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
167 struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
168 .data = cksumdata};
169 void *krb5_hdr;
170 s32 now;
171 u64 seq_send;
172 u8 *cksumkey;
173 unsigned int cksum_usage;
109 174
110 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8); 175 dprintk("RPC: %s\n", __func__);
111 176
177 krb5_hdr = setup_token_v2(ctx, token);
178
179 /* Set up the sequence number. Now 64-bits in clear
180 * text and w/o direction indicator */
112 spin_lock(&krb5_seq_lock); 181 spin_lock(&krb5_seq_lock);
113 seq_send = ctx->seq_send++; 182 seq_send = ctx->seq_send64++;
114 spin_unlock(&krb5_seq_lock); 183 spin_unlock(&krb5_seq_lock);
115 184 *((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
116 if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff, 185
117 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, 186 if (ctx->initiate) {
118 ptr + 8)) 187 cksumkey = ctx->initiator_sign;
188 cksum_usage = KG_USAGE_INITIATOR_SIGN;
189 } else {
190 cksumkey = ctx->acceptor_sign;
191 cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
192 }
193
194 if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
195 text, 0, cksumkey, cksum_usage, &cksumobj))
119 return GSS_S_FAILURE; 196 return GSS_S_FAILURE;
120 197
198 memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
199
200 now = get_seconds();
201
121 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; 202 return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
122} 203}
204
205u32
206gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
207 struct xdr_netobj *token)
208{
209 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
210
211 switch (ctx->enctype) {
212 default:
213 BUG();
214 case ENCTYPE_DES_CBC_RAW:
215 case ENCTYPE_DES3_CBC_RAW:
216 case ENCTYPE_ARCFOUR_HMAC:
217 return gss_get_mic_v1(ctx, text, token);
218 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
219 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
220 return gss_get_mic_v2(ctx, text, token);
221 }
222}
223
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 6331cd6866ec..415c013ba382 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -39,14 +39,51 @@
39# define RPCDBG_FACILITY RPCDBG_AUTH 39# define RPCDBG_FACILITY RPCDBG_AUTH
40#endif 40#endif
41 41
42static s32
43krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
44 unsigned char *cksum, unsigned char *buf)
45{
46 struct crypto_blkcipher *cipher;
47 unsigned char plain[8];
48 s32 code;
49
50 dprintk("RPC: %s:\n", __func__);
51 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
52 CRYPTO_ALG_ASYNC);
53 if (IS_ERR(cipher))
54 return PTR_ERR(cipher);
55
56 plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
57 plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
58 plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
59 plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
60 plain[4] = direction;
61 plain[5] = direction;
62 plain[6] = direction;
63 plain[7] = direction;
64
65 code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
66 if (code)
67 goto out;
68
69 code = krb5_encrypt(cipher, cksum, plain, buf, 8);
70out:
71 crypto_free_blkcipher(cipher);
72 return code;
73}
42s32 74s32
43krb5_make_seq_num(struct crypto_blkcipher *key, 75krb5_make_seq_num(struct krb5_ctx *kctx,
76 struct crypto_blkcipher *key,
44 int direction, 77 int direction,
45 u32 seqnum, 78 u32 seqnum,
46 unsigned char *cksum, unsigned char *buf) 79 unsigned char *cksum, unsigned char *buf)
47{ 80{
48 unsigned char plain[8]; 81 unsigned char plain[8];
49 82
83 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
84 return krb5_make_rc4_seq_num(kctx, direction, seqnum,
85 cksum, buf);
86
50 plain[0] = (unsigned char) (seqnum & 0xff); 87 plain[0] = (unsigned char) (seqnum & 0xff);
51 plain[1] = (unsigned char) ((seqnum >> 8) & 0xff); 88 plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
52 plain[2] = (unsigned char) ((seqnum >> 16) & 0xff); 89 plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -60,17 +97,59 @@ krb5_make_seq_num(struct crypto_blkcipher *key,
60 return krb5_encrypt(key, cksum, plain, buf, 8); 97 return krb5_encrypt(key, cksum, plain, buf, 8);
61} 98}
62 99
100static s32
101krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
102 unsigned char *buf, int *direction, s32 *seqnum)
103{
104 struct crypto_blkcipher *cipher;
105 unsigned char plain[8];
106 s32 code;
107
108 dprintk("RPC: %s:\n", __func__);
109 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
110 CRYPTO_ALG_ASYNC);
111 if (IS_ERR(cipher))
112 return PTR_ERR(cipher);
113
114 code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
115 if (code)
116 goto out;
117
118 code = krb5_decrypt(cipher, cksum, buf, plain, 8);
119 if (code)
120 goto out;
121
122 if ((plain[4] != plain[5]) || (plain[4] != plain[6])
123 || (plain[4] != plain[7])) {
124 code = (s32)KG_BAD_SEQ;
125 goto out;
126 }
127
128 *direction = plain[4];
129
130 *seqnum = ((plain[0] << 24) | (plain[1] << 16) |
131 (plain[2] << 8) | (plain[3]));
132out:
133 crypto_free_blkcipher(cipher);
134 return code;
135}
136
63s32 137s32
64krb5_get_seq_num(struct crypto_blkcipher *key, 138krb5_get_seq_num(struct krb5_ctx *kctx,
65 unsigned char *cksum, 139 unsigned char *cksum,
66 unsigned char *buf, 140 unsigned char *buf,
67 int *direction, u32 *seqnum) 141 int *direction, u32 *seqnum)
68{ 142{
69 s32 code; 143 s32 code;
70 unsigned char plain[8]; 144 unsigned char plain[8];
145 struct crypto_blkcipher *key = kctx->seq;
71 146
72 dprintk("RPC: krb5_get_seq_num:\n"); 147 dprintk("RPC: krb5_get_seq_num:\n");
73 148
149 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
150 return krb5_get_rc4_seq_num(kctx, cksum, buf,
151 direction, seqnum);
152
74 if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) 153 if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
75 return code; 154 return code;
76 155
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ce6c247edad0..6cd930f3678f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c 4 * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
5 * 5 *
6 * Copyright (c) 2000 The Regents of the University of Michigan. 6 * Copyright (c) 2000-2008 The Regents of the University of Michigan.
7 * All rights reserved. 7 * All rights reserved.
8 * 8 *
9 * Andy Adamson <andros@umich.edu> 9 * Andy Adamson <andros@umich.edu>
@@ -70,20 +70,21 @@
70/* read_token is a mic token, and message_buffer is the data that the mic was 70/* read_token is a mic token, and message_buffer is the data that the mic was
71 * supposedly taken over. */ 71 * supposedly taken over. */
72 72
73u32 73static u32
74gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, 74gss_verify_mic_v1(struct krb5_ctx *ctx,
75 struct xdr_buf *message_buffer, struct xdr_netobj *read_token) 75 struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
76{ 76{
77 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
78 int signalg; 77 int signalg;
79 int sealalg; 78 int sealalg;
80 char cksumdata[16]; 79 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
81 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 80 struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
81 .data = cksumdata};
82 s32 now; 82 s32 now;
83 int direction; 83 int direction;
84 u32 seqnum; 84 u32 seqnum;
85 unsigned char *ptr = (unsigned char *)read_token->data; 85 unsigned char *ptr = (unsigned char *)read_token->data;
86 int bodysize; 86 int bodysize;
87 u8 *cksumkey;
87 88
88 dprintk("RPC: krb5_read_token\n"); 89 dprintk("RPC: krb5_read_token\n");
89 90
@@ -98,7 +99,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
98 /* XXX sanity-check bodysize?? */ 99 /* XXX sanity-check bodysize?? */
99 100
100 signalg = ptr[2] + (ptr[3] << 8); 101 signalg = ptr[2] + (ptr[3] << 8);
101 if (signalg != SGN_ALG_DES_MAC_MD5) 102 if (signalg != ctx->gk5e->signalg)
102 return GSS_S_DEFECTIVE_TOKEN; 103 return GSS_S_DEFECTIVE_TOKEN;
103 104
104 sealalg = ptr[4] + (ptr[5] << 8); 105 sealalg = ptr[4] + (ptr[5] << 8);
@@ -108,13 +109,17 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
108 if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) 109 if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
109 return GSS_S_DEFECTIVE_TOKEN; 110 return GSS_S_DEFECTIVE_TOKEN;
110 111
111 if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum)) 112 if (ctx->gk5e->keyed_cksum)
112 return GSS_S_FAILURE; 113 cksumkey = ctx->cksum;
114 else
115 cksumkey = NULL;
113 116
114 if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16)) 117 if (make_checksum(ctx, ptr, 8, message_buffer, 0,
118 cksumkey, KG_USAGE_SIGN, &md5cksum))
115 return GSS_S_FAILURE; 119 return GSS_S_FAILURE;
116 120
117 if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) 121 if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
122 ctx->gk5e->cksumlength))
118 return GSS_S_BAD_SIG; 123 return GSS_S_BAD_SIG;
119 124
120 /* it got through unscathed. Make sure the context is unexpired */ 125 /* it got through unscathed. Make sure the context is unexpired */
@@ -126,7 +131,8 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
126 131
127 /* do sequencing checks */ 132 /* do sequencing checks */
128 133
129 if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum)) 134 if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
135 &direction, &seqnum))
130 return GSS_S_FAILURE; 136 return GSS_S_FAILURE;
131 137
132 if ((ctx->initiate && direction != 0xff) || 138 if ((ctx->initiate && direction != 0xff) ||
@@ -135,3 +141,86 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
135 141
136 return GSS_S_COMPLETE; 142 return GSS_S_COMPLETE;
137} 143}
144
145static u32
146gss_verify_mic_v2(struct krb5_ctx *ctx,
147 struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
148{
149 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
150 struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
151 .data = cksumdata};
152 s32 now;
153 u64 seqnum;
154 u8 *ptr = read_token->data;
155 u8 *cksumkey;
156 u8 flags;
157 int i;
158 unsigned int cksum_usage;
159
160 dprintk("RPC: %s\n", __func__);
161
162 if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
163 return GSS_S_DEFECTIVE_TOKEN;
164
165 flags = ptr[2];
166 if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
167 (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
168 return GSS_S_BAD_SIG;
169
170 if (flags & KG2_TOKEN_FLAG_SEALED) {
171 dprintk("%s: token has unexpected sealed flag\n", __func__);
172 return GSS_S_FAILURE;
173 }
174
175 for (i = 3; i < 8; i++)
176 if (ptr[i] != 0xff)
177 return GSS_S_DEFECTIVE_TOKEN;
178
179 if (ctx->initiate) {
180 cksumkey = ctx->acceptor_sign;
181 cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
182 } else {
183 cksumkey = ctx->initiator_sign;
184 cksum_usage = KG_USAGE_INITIATOR_SIGN;
185 }
186
187 if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
188 cksumkey, cksum_usage, &cksumobj))
189 return GSS_S_FAILURE;
190
191 if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
192 ctx->gk5e->cksumlength))
193 return GSS_S_BAD_SIG;
194
195 /* it got through unscathed. Make sure the context is unexpired */
196 now = get_seconds();
197 if (now > ctx->endtime)
198 return GSS_S_CONTEXT_EXPIRED;
199
200 /* do sequencing checks */
201
202 seqnum = be64_to_cpup((__be64 *)ptr + 8);
203
204 return GSS_S_COMPLETE;
205}
206
207u32
208gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
209 struct xdr_buf *message_buffer,
210 struct xdr_netobj *read_token)
211{
212 struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
213
214 switch (ctx->enctype) {
215 default:
216 BUG();
217 case ENCTYPE_DES_CBC_RAW:
218 case ENCTYPE_DES3_CBC_RAW:
219 case ENCTYPE_ARCFOUR_HMAC:
220 return gss_verify_mic_v1(ctx, message_buffer, read_token);
221 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
222 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
223 return gss_verify_mic_v2(ctx, message_buffer, read_token);
224 }
225}
226
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a6e905637e03..2763e3e48db4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -1,3 +1,33 @@
1/*
2 * COPYRIGHT (c) 2008
3 * The Regents of the University of Michigan
4 * ALL RIGHTS RESERVED
5 *
6 * Permission is granted to use, copy, create derivative works
7 * and redistribute this software and such derivative works
8 * for any purpose, so long as the name of The University of
9 * Michigan is not used in any advertising or publicity
10 * pertaining to the use of distribution of this software
11 * without specific, written prior authorization. If the
12 * above copyright notice or any other identification of the
13 * University of Michigan is included in any copy of any
14 * portion of this software, then the disclaimer below must
15 * also be included.
16 *
17 * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
18 * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
19 * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
20 * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
21 * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
22 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
23 * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
24 * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
25 * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
26 * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
27 * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGES.
29 */
30
1#include <linux/types.h> 31#include <linux/types.h>
2#include <linux/jiffies.h> 32#include <linux/jiffies.h>
3#include <linux/sunrpc/gss_krb5.h> 33#include <linux/sunrpc/gss_krb5.h>
@@ -12,10 +42,7 @@
12static inline int 42static inline int
13gss_krb5_padding(int blocksize, int length) 43gss_krb5_padding(int blocksize, int length)
14{ 44{
15 /* Most of the code is block-size independent but currently we 45 return blocksize - (length % blocksize);
16 * use only 8: */
17 BUG_ON(blocksize != 8);
18 return 8 - (length & 7);
19} 46}
20 47
21static inline void 48static inline void
@@ -86,8 +113,8 @@ out:
86 return 0; 113 return 0;
87} 114}
88 115
89static void 116void
90make_confounder(char *p, u32 conflen) 117gss_krb5_make_confounder(char *p, u32 conflen)
91{ 118{
92 static u64 i = 0; 119 static u64 i = 0;
93 u64 *q = (u64 *)p; 120 u64 *q = (u64 *)p;
@@ -127,69 +154,73 @@ make_confounder(char *p, u32 conflen)
127 154
128/* XXX factor out common code with seal/unseal. */ 155/* XXX factor out common code with seal/unseal. */
129 156
130u32 157static u32
131gss_wrap_kerberos(struct gss_ctx *ctx, int offset, 158gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
132 struct xdr_buf *buf, struct page **pages) 159 struct xdr_buf *buf, struct page **pages)
133{ 160{
134 struct krb5_ctx *kctx = ctx->internal_ctx_id; 161 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
135 char cksumdata[16]; 162 struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
136 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 163 .data = cksumdata};
137 int blocksize = 0, plainlen; 164 int blocksize = 0, plainlen;
138 unsigned char *ptr, *msg_start; 165 unsigned char *ptr, *msg_start;
139 s32 now; 166 s32 now;
140 int headlen; 167 int headlen;
141 struct page **tmp_pages; 168 struct page **tmp_pages;
142 u32 seq_send; 169 u32 seq_send;
170 u8 *cksumkey;
171 u32 conflen = kctx->gk5e->conflen;
143 172
144 dprintk("RPC: gss_wrap_kerberos\n"); 173 dprintk("RPC: %s\n", __func__);
145 174
146 now = get_seconds(); 175 now = get_seconds();
147 176
148 blocksize = crypto_blkcipher_blocksize(kctx->enc); 177 blocksize = crypto_blkcipher_blocksize(kctx->enc);
149 gss_krb5_add_padding(buf, offset, blocksize); 178 gss_krb5_add_padding(buf, offset, blocksize);
150 BUG_ON((buf->len - offset) % blocksize); 179 BUG_ON((buf->len - offset) % blocksize);
151 plainlen = blocksize + buf->len - offset; 180 plainlen = conflen + buf->len - offset;
152 181
153 headlen = g_token_size(&kctx->mech_used, 24 + plainlen) - 182 headlen = g_token_size(&kctx->mech_used,
154 (buf->len - offset); 183 GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
184 (buf->len - offset);
155 185
156 ptr = buf->head[0].iov_base + offset; 186 ptr = buf->head[0].iov_base + offset;
157 /* shift data to make room for header. */ 187 /* shift data to make room for header. */
188 xdr_extend_head(buf, offset, headlen);
189
158 /* XXX Would be cleverer to encrypt while copying. */ 190 /* XXX Would be cleverer to encrypt while copying. */
159 /* XXX bounds checking, slack, etc. */
160 memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
161 buf->head[0].iov_len += headlen;
162 buf->len += headlen;
163 BUG_ON((buf->len - offset - headlen) % blocksize); 191 BUG_ON((buf->len - offset - headlen) % blocksize);
164 192
165 g_make_token_header(&kctx->mech_used, 193 g_make_token_header(&kctx->mech_used,
166 GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr); 194 GSS_KRB5_TOK_HDR_LEN +
195 kctx->gk5e->cksumlength + plainlen, &ptr);
167 196
168 197
169 /* ptr now at header described in rfc 1964, section 1.2.1: */ 198 /* ptr now at header described in rfc 1964, section 1.2.1: */
170 ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff); 199 ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
171 ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff); 200 ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
172 201
173 msg_start = ptr + 24; 202 msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
174 203
175 *(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5); 204 *(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
176 memset(ptr + 4, 0xff, 4); 205 memset(ptr + 4, 0xff, 4);
177 *(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES); 206 *(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
178 207
179 make_confounder(msg_start, blocksize); 208 gss_krb5_make_confounder(msg_start, conflen);
209
210 if (kctx->gk5e->keyed_cksum)
211 cksumkey = kctx->cksum;
212 else
213 cksumkey = NULL;
180 214
181 /* XXXJBF: UGH!: */ 215 /* XXXJBF: UGH!: */
182 tmp_pages = buf->pages; 216 tmp_pages = buf->pages;
183 buf->pages = pages; 217 buf->pages = pages;
184 if (make_checksum("md5", ptr, 8, buf, 218 if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
185 offset + headlen - blocksize, &md5cksum)) 219 cksumkey, KG_USAGE_SEAL, &md5cksum))
186 return GSS_S_FAILURE; 220 return GSS_S_FAILURE;
187 buf->pages = tmp_pages; 221 buf->pages = tmp_pages;
188 222
189 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, 223 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
190 md5cksum.data, md5cksum.len))
191 return GSS_S_FAILURE;
192 memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
193 224
194 spin_lock(&krb5_seq_lock); 225 spin_lock(&krb5_seq_lock);
195 seq_send = kctx->seq_send++; 226 seq_send = kctx->seq_send++;
@@ -197,25 +228,42 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
197 228
198 /* XXX would probably be more efficient to compute checksum 229 /* XXX would probably be more efficient to compute checksum
199 * and encrypt at the same time: */ 230 * and encrypt at the same time: */
200 if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, 231 if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
201 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))) 232 seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
202 return GSS_S_FAILURE; 233 return GSS_S_FAILURE;
203 234
204 if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, 235 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
205 pages)) 236 struct crypto_blkcipher *cipher;
206 return GSS_S_FAILURE; 237 int err;
238 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
239 CRYPTO_ALG_ASYNC);
240 if (IS_ERR(cipher))
241 return GSS_S_FAILURE;
242
243 krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
244
245 err = gss_encrypt_xdr_buf(cipher, buf,
246 offset + headlen - conflen, pages);
247 crypto_free_blkcipher(cipher);
248 if (err)
249 return GSS_S_FAILURE;
250 } else {
251 if (gss_encrypt_xdr_buf(kctx->enc, buf,
252 offset + headlen - conflen, pages))
253 return GSS_S_FAILURE;
254 }
207 255
208 return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE; 256 return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
209} 257}
210 258
211u32 259static u32
212gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) 260gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
213{ 261{
214 struct krb5_ctx *kctx = ctx->internal_ctx_id;
215 int signalg; 262 int signalg;
216 int sealalg; 263 int sealalg;
217 char cksumdata[16]; 264 char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
218 struct xdr_netobj md5cksum = {.len = 0, .data = cksumdata}; 265 struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
266 .data = cksumdata};
219 s32 now; 267 s32 now;
220 int direction; 268 int direction;
221 s32 seqnum; 269 s32 seqnum;
@@ -224,6 +272,9 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
224 void *data_start, *orig_start; 272 void *data_start, *orig_start;
225 int data_len; 273 int data_len;
226 int blocksize; 274 int blocksize;
275 u32 conflen = kctx->gk5e->conflen;
276 int crypt_offset;
277 u8 *cksumkey;
227 278
228 dprintk("RPC: gss_unwrap_kerberos\n"); 279 dprintk("RPC: gss_unwrap_kerberos\n");
229 280
@@ -241,29 +292,65 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
241 /* get the sign and seal algorithms */ 292 /* get the sign and seal algorithms */
242 293
243 signalg = ptr[2] + (ptr[3] << 8); 294 signalg = ptr[2] + (ptr[3] << 8);
244 if (signalg != SGN_ALG_DES_MAC_MD5) 295 if (signalg != kctx->gk5e->signalg)
245 return GSS_S_DEFECTIVE_TOKEN; 296 return GSS_S_DEFECTIVE_TOKEN;
246 297
247 sealalg = ptr[4] + (ptr[5] << 8); 298 sealalg = ptr[4] + (ptr[5] << 8);
248 if (sealalg != SEAL_ALG_DES) 299 if (sealalg != kctx->gk5e->sealalg)
249 return GSS_S_DEFECTIVE_TOKEN; 300 return GSS_S_DEFECTIVE_TOKEN;
250 301
251 if ((ptr[6] != 0xff) || (ptr[7] != 0xff)) 302 if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
252 return GSS_S_DEFECTIVE_TOKEN; 303 return GSS_S_DEFECTIVE_TOKEN;
253 304
254 if (gss_decrypt_xdr_buf(kctx->enc, buf, 305 /*
255 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base)) 306 * Data starts after token header and checksum. ptr points
256 return GSS_S_DEFECTIVE_TOKEN; 307 * to the beginning of the token header
308 */
309 crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
310 (unsigned char *)buf->head[0].iov_base;
311
312 /*
313 * Need plaintext seqnum to derive encryption key for arcfour-hmac
314 */
315 if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
316 ptr + 8, &direction, &seqnum))
317 return GSS_S_BAD_SIG;
257 318
258 if (make_checksum("md5", ptr, 8, buf, 319 if ((kctx->initiate && direction != 0xff) ||
259 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum)) 320 (!kctx->initiate && direction != 0))
260 return GSS_S_FAILURE; 321 return GSS_S_BAD_SIG;
322
323 if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
324 struct crypto_blkcipher *cipher;
325 int err;
326
327 cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
328 CRYPTO_ALG_ASYNC);
329 if (IS_ERR(cipher))
330 return GSS_S_FAILURE;
331
332 krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
261 333
262 if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, 334 err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
263 md5cksum.data, md5cksum.len)) 335 crypto_free_blkcipher(cipher);
336 if (err)
337 return GSS_S_DEFECTIVE_TOKEN;
338 } else {
339 if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
340 return GSS_S_DEFECTIVE_TOKEN;
341 }
342
343 if (kctx->gk5e->keyed_cksum)
344 cksumkey = kctx->cksum;
345 else
346 cksumkey = NULL;
347
348 if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
349 cksumkey, KG_USAGE_SEAL, &md5cksum))
264 return GSS_S_FAILURE; 350 return GSS_S_FAILURE;
265 351
266 if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8)) 352 if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
353 kctx->gk5e->cksumlength))
267 return GSS_S_BAD_SIG; 354 return GSS_S_BAD_SIG;
268 355
269 /* it got through unscathed. Make sure the context is unexpired */ 356 /* it got through unscathed. Make sure the context is unexpired */
@@ -275,19 +362,12 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
275 362
276 /* do sequencing checks */ 363 /* do sequencing checks */
277 364
278 if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
279 &direction, &seqnum))
280 return GSS_S_BAD_SIG;
281
282 if ((kctx->initiate && direction != 0xff) ||
283 (!kctx->initiate && direction != 0))
284 return GSS_S_BAD_SIG;
285
286 /* Copy the data back to the right position. XXX: Would probably be 365 /* Copy the data back to the right position. XXX: Would probably be
287 * better to copy and encrypt at the same time. */ 366 * better to copy and encrypt at the same time. */
288 367
289 blocksize = crypto_blkcipher_blocksize(kctx->enc); 368 blocksize = crypto_blkcipher_blocksize(kctx->enc);
290 data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize; 369 data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
370 conflen;
291 orig_start = buf->head[0].iov_base + offset; 371 orig_start = buf->head[0].iov_base + offset;
292 data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; 372 data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
293 memmove(orig_start, data_start, data_len); 373 memmove(orig_start, data_start, data_len);
@@ -299,3 +379,209 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
299 379
300 return GSS_S_COMPLETE; 380 return GSS_S_COMPLETE;
301} 381}
382
383/*
384 * We cannot currently handle tokens with rotated data. We need a
385 * generalized routine to rotate the data in place. It is anticipated
386 * that we won't encounter rotated data in the general case.
387 */
388static u32
389rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
390{
391 unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
392
393 if (realrrc == 0)
394 return 0;
395
396 dprintk("%s: cannot process token with rotated data: "
397 "rrc %u, realrrc %u\n", __func__, rrc, realrrc);
398 return 1;
399}
400
401static u32
402gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
403 struct xdr_buf *buf, struct page **pages)
404{
405 int blocksize;
406 u8 *ptr, *plainhdr;
407 s32 now;
408 u8 flags = 0x00;
409 __be16 *be16ptr, ec = 0;
410 __be64 *be64ptr;
411 u32 err;
412
413 dprintk("RPC: %s\n", __func__);
414
415 if (kctx->gk5e->encrypt_v2 == NULL)
416 return GSS_S_FAILURE;
417
418 /* make room for gss token header */
419 if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
420 return GSS_S_FAILURE;
421
422 /* construct gss token header */
423 ptr = plainhdr = buf->head[0].iov_base + offset;
424 *ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
425 *ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
426
427 if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
428 flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
429 if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
430 flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
431 /* We always do confidentiality in wrap tokens */
432 flags |= KG2_TOKEN_FLAG_SEALED;
433
434 *ptr++ = flags;
435 *ptr++ = 0xff;
436 be16ptr = (__be16 *)ptr;
437
438 blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
439 *be16ptr++ = cpu_to_be16(ec);
440 /* "inner" token header always uses 0 for RRC */
441 *be16ptr++ = cpu_to_be16(0);
442
443 be64ptr = (__be64 *)be16ptr;
444 spin_lock(&krb5_seq_lock);
445 *be64ptr = cpu_to_be64(kctx->seq_send64++);
446 spin_unlock(&krb5_seq_lock);
447
448 err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
449 if (err)
450 return err;
451
452 now = get_seconds();
453 return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
454}
455
456static u32
457gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
458{
459 s32 now;
460 u64 seqnum;
461 u8 *ptr;
462 u8 flags = 0x00;
463 u16 ec, rrc;
464 int err;
465 u32 headskip, tailskip;
466 u8 decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
467 unsigned int movelen;
468
469
470 dprintk("RPC: %s\n", __func__);
471
472 if (kctx->gk5e->decrypt_v2 == NULL)
473 return GSS_S_FAILURE;
474
475 ptr = buf->head[0].iov_base + offset;
476
477 if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
478 return GSS_S_DEFECTIVE_TOKEN;
479
480 flags = ptr[2];
481 if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
482 (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
483 return GSS_S_BAD_SIG;
484
485 if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
486 dprintk("%s: token missing expected sealed flag\n", __func__);
487 return GSS_S_DEFECTIVE_TOKEN;
488 }
489
490 if (ptr[3] != 0xff)
491 return GSS_S_DEFECTIVE_TOKEN;
492
493 ec = be16_to_cpup((__be16 *)(ptr + 4));
494 rrc = be16_to_cpup((__be16 *)(ptr + 6));
495
496 seqnum = be64_to_cpup((__be64 *)(ptr + 8));
497
498 if (rrc != 0) {
499 err = rotate_left(kctx, offset, buf, rrc);
500 if (err)
501 return GSS_S_FAILURE;
502 }
503
504 err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
505 &headskip, &tailskip);
506 if (err)
507 return GSS_S_FAILURE;
508
509 /*
510 * Retrieve the decrypted gss token header and verify
511 * it against the original
512 */
513 err = read_bytes_from_xdr_buf(buf,
514 buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
515 decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
516 if (err) {
517 dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
518 return GSS_S_FAILURE;
519 }
520 if (memcmp(ptr, decrypted_hdr, 6)
521 || memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
522 dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
523 return GSS_S_FAILURE;
524 }
525
526 /* do sequencing checks */
527
528 /* it got through unscathed. Make sure the context is unexpired */
529 now = get_seconds();
530 if (now > kctx->endtime)
531 return GSS_S_CONTEXT_EXPIRED;
532
533 /*
534 * Move the head data back to the right position in xdr_buf.
535 * We ignore any "ec" data since it might be in the head or
536 * the tail, and we really don't need to deal with it.
537 * Note that buf->head[0].iov_len may indicate the available
538 * head buffer space rather than that actually occupied.
539 */
540 movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
541 movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
542 BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
543 buf->head[0].iov_len);
544 memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
545 buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
546 buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
547
548 return GSS_S_COMPLETE;
549}
550
551u32
552gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
553 struct xdr_buf *buf, struct page **pages)
554{
555 struct krb5_ctx *kctx = gctx->internal_ctx_id;
556
557 switch (kctx->enctype) {
558 default:
559 BUG();
560 case ENCTYPE_DES_CBC_RAW:
561 case ENCTYPE_DES3_CBC_RAW:
562 case ENCTYPE_ARCFOUR_HMAC:
563 return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
564 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
565 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
566 return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
567 }
568}
569
570u32
571gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
572{
573 struct krb5_ctx *kctx = gctx->internal_ctx_id;
574
575 switch (kctx->enctype) {
576 default:
577 BUG();
578 case ENCTYPE_DES_CBC_RAW:
579 case ENCTYPE_DES3_CBC_RAW:
580 case ENCTYPE_ARCFOUR_HMAC:
581 return gss_unwrap_kerberos_v1(kctx, offset, buf);
582 case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
583 case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
584 return gss_unwrap_kerberos_v2(kctx, offset, buf);
585 }
586}
587
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 76e4c6f4ac3c..2689de39dc78 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
249int 249int
250gss_import_sec_context(const void *input_token, size_t bufsize, 250gss_import_sec_context(const void *input_token, size_t bufsize,
251 struct gss_api_mech *mech, 251 struct gss_api_mech *mech,
252 struct gss_ctx **ctx_id) 252 struct gss_ctx **ctx_id,
253 gfp_t gfp_mask)
253{ 254{
254 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL))) 255 if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
255 return -ENOMEM; 256 return -ENOMEM;
256 (*ctx_id)->mech_type = gss_mech_get(mech); 257 (*ctx_id)->mech_type = gss_mech_get(mech);
257 258
258 return mech->gm_ops 259 return mech->gm_ops
259 ->gss_import_sec_context(input_token, bufsize, *ctx_id); 260 ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
260} 261}
261 262
262/* gss_get_mic: compute a mic over message and return mic_token. */ 263/* gss_get_mic: compute a mic over message and return mic_token. */
@@ -285,6 +286,20 @@ gss_verify_mic(struct gss_ctx *context_handle,
285 mic_token); 286 mic_token);
286} 287}
287 288
289/*
290 * This function is called from both the client and server code.
291 * Each makes guarantees about how much "slack" space is available
292 * for the underlying function in "buf"'s head and tail while
293 * performing the wrap.
294 *
295 * The client and server code allocate RPC_MAX_AUTH_SIZE extra
296 * space in both the head and tail which is available for use by
297 * the wrap function.
298 *
299 * Underlying functions should verify they do not use more than
300 * RPC_MAX_AUTH_SIZE of extra space in either the head or tail
301 * when performing the wrap.
302 */
288u32 303u32
289gss_wrap(struct gss_ctx *ctx_id, 304gss_wrap(struct gss_ctx *ctx_id,
290 int offset, 305 int offset,
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 035e1dd6af1b..dc3f1f5ed865 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
84 84
85static int 85static int
86gss_import_sec_context_spkm3(const void *p, size_t len, 86gss_import_sec_context_spkm3(const void *p, size_t len,
87 struct gss_ctx *ctx_id) 87 struct gss_ctx *ctx_id,
88 gfp_t gfp_mask)
88{ 89{
89 const void *end = (const void *)((const char *)p + len); 90 const void *end = (const void *)((const char *)p + len);
90 struct spkm3_ctx *ctx; 91 struct spkm3_ctx *ctx;
91 int version; 92 int version;
92 93
93 if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) 94 if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
94 goto out_err; 95 goto out_err;
95 96
96 p = simple_get_bytes(p, end, &version, sizeof(version)); 97 p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 3308157436d2..a99825d7caa0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -223,7 +223,7 @@ spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **ck
223 223
224 /* only support SPKM_MIC_TOK */ 224 /* only support SPKM_MIC_TOK */
225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) { 225 if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
226 dprintk("RPC: ERROR unsupported SPKM3 token \n"); 226 dprintk("RPC: ERROR unsupported SPKM3 token\n");
227 goto out; 227 goto out;
228 } 228 }
229 229
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index b81e790ef9f4..cc385b3a59c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail *cd,
494 len = qword_get(&mesg, buf, mlen); 494 len = qword_get(&mesg, buf, mlen);
495 if (len < 0) 495 if (len < 0)
496 goto out; 496 goto out;
497 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx); 497 status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
498 if (status) 498 if (status)
499 goto out; 499 goto out;
500 500
@@ -1315,6 +1315,14 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
1315 inpages = resbuf->pages; 1315 inpages = resbuf->pages;
1316 /* XXX: Would be better to write some xdr helper functions for 1316 /* XXX: Would be better to write some xdr helper functions for
1317 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */ 1317 * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
1318
1319 /*
1320 * If there is currently tail data, make sure there is
1321 * room for the head, tail, and 2 * RPC_MAX_AUTH_SIZE in
1322 * the page, and move the current tail data such that
1323 * there is RPC_MAX_AUTH_SIZE slack space available in
1324 * both the head and tail.
1325 */
1318 if (resbuf->tail[0].iov_base) { 1326 if (resbuf->tail[0].iov_base) {
1319 BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base 1327 BUG_ON(resbuf->tail[0].iov_base >= resbuf->head[0].iov_base
1320 + PAGE_SIZE); 1328 + PAGE_SIZE);
@@ -1327,6 +1335,13 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
1327 resbuf->tail[0].iov_len); 1335 resbuf->tail[0].iov_len);
1328 resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE; 1336 resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
1329 } 1337 }
1338 /*
1339 * If there is no current tail data, make sure there is
1340 * room for the head data, and 2 * RPC_MAX_AUTH_SIZE in the
1341 * allotted page, and set up tail information such that there
1342 * is RPC_MAX_AUTH_SIZE slack space available in both the
1343 * head and tail.
1344 */
1330 if (resbuf->tail[0].iov_base == NULL) { 1345 if (resbuf->tail[0].iov_base == NULL) {
1331 if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE) 1346 if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
1332 return -ENOMEM; 1347 return -ENOMEM;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 1db618f56ecb..a5c36c01707b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -75,7 +75,7 @@ nul_marshal(struct rpc_task *task, __be32 *p)
75static int 75static int
76nul_refresh(struct rpc_task *task) 76nul_refresh(struct rpc_task *task)
77{ 77{
78 set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); 78 set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
79 return 0; 79 return 0;
80} 80}
81 81
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index aac2f8b4ee21..4cb70dc6e7ad 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -29,7 +29,6 @@ struct unx_cred {
29#endif 29#endif
30 30
31static struct rpc_auth unix_auth; 31static struct rpc_auth unix_auth;
32static struct rpc_cred_cache unix_cred_cache;
33static const struct rpc_credops unix_credops; 32static const struct rpc_credops unix_credops;
34 33
35static struct rpc_auth * 34static struct rpc_auth *
@@ -141,7 +140,7 @@ static __be32 *
141unx_marshal(struct rpc_task *task, __be32 *p) 140unx_marshal(struct rpc_task *task, __be32 *p)
142{ 141{
143 struct rpc_clnt *clnt = task->tk_client; 142 struct rpc_clnt *clnt = task->tk_client;
144 struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); 143 struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base);
145 __be32 *base, *hold; 144 __be32 *base, *hold;
146 int i; 145 int i;
147 146
@@ -174,7 +173,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
174static int 173static int
175unx_refresh(struct rpc_task *task) 174unx_refresh(struct rpc_task *task)
176{ 175{
177 set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); 176 set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags);
178 return 0; 177 return 0;
179} 178}
180 179
@@ -197,15 +196,20 @@ unx_validate(struct rpc_task *task, __be32 *p)
197 printk("RPC: giant verf size: %u\n", size); 196 printk("RPC: giant verf size: %u\n", size);
198 return NULL; 197 return NULL;
199 } 198 }
200 task->tk_msg.rpc_cred->cr_auth->au_rslack = (size >> 2) + 2; 199 task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
201 p += (size >> 2); 200 p += (size >> 2);
202 201
203 return p; 202 return p;
204} 203}
205 204
206void __init rpc_init_authunix(void) 205int __init rpc_init_authunix(void)
207{ 206{
208 spin_lock_init(&unix_cred_cache.lock); 207 return rpcauth_init_credcache(&unix_auth);
208}
209
210void rpc_destroy_authunix(void)
211{
212 rpcauth_destroy_credcache(&unix_auth);
209} 213}
210 214
211const struct rpc_authops authunix_ops = { 215const struct rpc_authops authunix_ops = {
@@ -219,17 +223,12 @@ const struct rpc_authops authunix_ops = {
219}; 223};
220 224
221static 225static
222struct rpc_cred_cache unix_cred_cache = {
223};
224
225static
226struct rpc_auth unix_auth = { 226struct rpc_auth unix_auth = {
227 .au_cslack = UNX_WRITESLACK, 227 .au_cslack = UNX_WRITESLACK,
228 .au_rslack = 2, /* assume AUTH_NULL verf */ 228 .au_rslack = 2, /* assume AUTH_NULL verf */
229 .au_ops = &authunix_ops, 229 .au_ops = &authunix_ops,
230 .au_flavor = RPC_AUTH_UNIX, 230 .au_flavor = RPC_AUTH_UNIX,
231 .au_count = ATOMIC_INIT(0), 231 .au_count = ATOMIC_INIT(0),
232 .au_credcache = &unix_cred_cache,
233}; 232};
234 233
235static 234static
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index f0c05d3311c1..7dcfe0cc3500 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -60,7 +60,7 @@ int bc_send(struct rpc_rqst *req)
60 rpc_put_task(task); 60 rpc_put_task(task);
61 } 61 }
62 return ret; 62 return ret;
63 dprintk("RPC: bc_send ret= %d \n", ret); 63 dprintk("RPC: bc_send ret= %d\n", ret);
64} 64}
65 65
66#endif /* CONFIG_NFS_V4_1 */ 66#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 39bddba53ba1..2b06410e584e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -28,6 +28,7 @@
28#include <linux/workqueue.h> 28#include <linux/workqueue.h>
29#include <linux/mutex.h> 29#include <linux/mutex.h>
30#include <linux/pagemap.h> 30#include <linux/pagemap.h>
31#include <linux/smp_lock.h>
31#include <asm/ioctls.h> 32#include <asm/ioctls.h>
32#include <linux/sunrpc/types.h> 33#include <linux/sunrpc/types.h>
33#include <linux/sunrpc/cache.h> 34#include <linux/sunrpc/cache.h>
@@ -49,11 +50,17 @@ static void cache_init(struct cache_head *h)
49 h->last_refresh = now; 50 h->last_refresh = now;
50} 51}
51 52
53static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
54{
55 return (h->expiry_time < get_seconds()) ||
56 (detail->flush_time > h->last_refresh);
57}
58
52struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, 59struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
53 struct cache_head *key, int hash) 60 struct cache_head *key, int hash)
54{ 61{
55 struct cache_head **head, **hp; 62 struct cache_head **head, **hp;
56 struct cache_head *new = NULL; 63 struct cache_head *new = NULL, *freeme = NULL;
57 64
58 head = &detail->hash_table[hash]; 65 head = &detail->hash_table[hash];
59 66
@@ -62,6 +69,9 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
62 for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 69 for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
63 struct cache_head *tmp = *hp; 70 struct cache_head *tmp = *hp;
64 if (detail->match(tmp, key)) { 71 if (detail->match(tmp, key)) {
72 if (cache_is_expired(detail, tmp))
73 /* This entry is expired, we will discard it. */
74 break;
65 cache_get(tmp); 75 cache_get(tmp);
66 read_unlock(&detail->hash_lock); 76 read_unlock(&detail->hash_lock);
67 return tmp; 77 return tmp;
@@ -86,6 +96,13 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
86 for (hp=head; *hp != NULL ; hp = &(*hp)->next) { 96 for (hp=head; *hp != NULL ; hp = &(*hp)->next) {
87 struct cache_head *tmp = *hp; 97 struct cache_head *tmp = *hp;
88 if (detail->match(tmp, key)) { 98 if (detail->match(tmp, key)) {
99 if (cache_is_expired(detail, tmp)) {
100 *hp = tmp->next;
101 tmp->next = NULL;
102 detail->entries --;
103 freeme = tmp;
104 break;
105 }
89 cache_get(tmp); 106 cache_get(tmp);
90 write_unlock(&detail->hash_lock); 107 write_unlock(&detail->hash_lock);
91 cache_put(new, detail); 108 cache_put(new, detail);
@@ -98,6 +115,8 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
98 cache_get(new); 115 cache_get(new);
99 write_unlock(&detail->hash_lock); 116 write_unlock(&detail->hash_lock);
100 117
118 if (freeme)
119 cache_put(freeme, detail);
101 return new; 120 return new;
102} 121}
103EXPORT_SYMBOL_GPL(sunrpc_cache_lookup); 122EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
@@ -183,10 +202,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
183 202
184static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) 203static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h)
185{ 204{
186 if (!test_bit(CACHE_VALID, &h->flags) || 205 if (!test_bit(CACHE_VALID, &h->flags))
187 h->expiry_time < get_seconds())
188 return -EAGAIN;
189 else if (detail->flush_time > h->last_refresh)
190 return -EAGAIN; 206 return -EAGAIN;
191 else { 207 else {
192 /* entry is valid */ 208 /* entry is valid */
@@ -303,7 +319,7 @@ static struct cache_detail *current_detail;
303static int current_index; 319static int current_index;
304 320
305static void do_cache_clean(struct work_struct *work); 321static void do_cache_clean(struct work_struct *work);
306static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); 322static struct delayed_work cache_cleaner;
307 323
308static void sunrpc_init_cache_detail(struct cache_detail *cd) 324static void sunrpc_init_cache_detail(struct cache_detail *cd)
309{ 325{
@@ -397,31 +413,27 @@ static int cache_clean(void)
397 /* Ok, now to clean this strand */ 413 /* Ok, now to clean this strand */
398 414
399 cp = & current_detail->hash_table[current_index]; 415 cp = & current_detail->hash_table[current_index];
400 ch = *cp; 416 for (ch = *cp ; ch ; cp = & ch->next, ch = *cp) {
401 for (; ch; cp= & ch->next, ch= *cp) {
402 if (current_detail->nextcheck > ch->expiry_time) 417 if (current_detail->nextcheck > ch->expiry_time)
403 current_detail->nextcheck = ch->expiry_time+1; 418 current_detail->nextcheck = ch->expiry_time+1;
404 if (ch->expiry_time >= get_seconds() && 419 if (!cache_is_expired(current_detail, ch))
405 ch->last_refresh >= current_detail->flush_time)
406 continue; 420 continue;
407 if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
408 cache_dequeue(current_detail, ch);
409 421
410 if (atomic_read(&ch->ref.refcount) == 1)
411 break;
412 }
413 if (ch) {
414 *cp = ch->next; 422 *cp = ch->next;
415 ch->next = NULL; 423 ch->next = NULL;
416 current_detail->entries--; 424 current_detail->entries--;
417 rv = 1; 425 rv = 1;
426 break;
418 } 427 }
428
419 write_unlock(&current_detail->hash_lock); 429 write_unlock(&current_detail->hash_lock);
420 d = current_detail; 430 d = current_detail;
421 if (!ch) 431 if (!ch)
422 current_index ++; 432 current_index ++;
423 spin_unlock(&cache_list_lock); 433 spin_unlock(&cache_list_lock);
424 if (ch) { 434 if (ch) {
435 if (test_and_clear_bit(CACHE_PENDING, &ch->flags))
436 cache_dequeue(current_detail, ch);
425 cache_revisit_request(ch); 437 cache_revisit_request(ch);
426 cache_put(ch, d); 438 cache_put(ch, d);
427 } 439 }
@@ -1233,8 +1245,10 @@ static int content_open(struct inode *inode, struct file *file,
1233 if (!cd || !try_module_get(cd->owner)) 1245 if (!cd || !try_module_get(cd->owner))
1234 return -EACCES; 1246 return -EACCES;
1235 han = __seq_open_private(file, &cache_content_op, sizeof(*han)); 1247 han = __seq_open_private(file, &cache_content_op, sizeof(*han));
1236 if (han == NULL) 1248 if (han == NULL) {
1249 module_put(cd->owner);
1237 return -ENOMEM; 1250 return -ENOMEM;
1251 }
1238 1252
1239 han->cd = cd; 1253 han->cd = cd;
1240 return 0; 1254 return 0;
@@ -1331,12 +1345,18 @@ static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
1331 return cache_poll(filp, wait, cd); 1345 return cache_poll(filp, wait, cd);
1332} 1346}
1333 1347
1334static int cache_ioctl_procfs(struct inode *inode, struct file *filp, 1348static long cache_ioctl_procfs(struct file *filp,
1335 unsigned int cmd, unsigned long arg) 1349 unsigned int cmd, unsigned long arg)
1336{ 1350{
1351 long ret;
1352 struct inode *inode = filp->f_path.dentry->d_inode;
1337 struct cache_detail *cd = PDE(inode)->data; 1353 struct cache_detail *cd = PDE(inode)->data;
1338 1354
1339 return cache_ioctl(inode, filp, cmd, arg, cd); 1355 lock_kernel();
1356 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1357 unlock_kernel();
1358
1359 return ret;
1340} 1360}
1341 1361
1342static int cache_open_procfs(struct inode *inode, struct file *filp) 1362static int cache_open_procfs(struct inode *inode, struct file *filp)
@@ -1359,7 +1379,7 @@ static const struct file_operations cache_file_operations_procfs = {
1359 .read = cache_read_procfs, 1379 .read = cache_read_procfs,
1360 .write = cache_write_procfs, 1380 .write = cache_write_procfs,
1361 .poll = cache_poll_procfs, 1381 .poll = cache_poll_procfs,
1362 .ioctl = cache_ioctl_procfs, /* for FIONREAD */ 1382 .unlocked_ioctl = cache_ioctl_procfs, /* for FIONREAD */
1363 .open = cache_open_procfs, 1383 .open = cache_open_procfs,
1364 .release = cache_release_procfs, 1384 .release = cache_release_procfs,
1365}; 1385};
@@ -1483,6 +1503,11 @@ static int create_cache_proc_entries(struct cache_detail *cd)
1483} 1503}
1484#endif 1504#endif
1485 1505
1506void __init cache_initialize(void)
1507{
1508 INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
1509}
1510
1486int cache_register(struct cache_detail *cd) 1511int cache_register(struct cache_detail *cd)
1487{ 1512{
1488 int ret; 1513 int ret;
@@ -1525,12 +1550,18 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
1525 return cache_poll(filp, wait, cd); 1550 return cache_poll(filp, wait, cd);
1526} 1551}
1527 1552
1528static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, 1553static long cache_ioctl_pipefs(struct file *filp,
1529 unsigned int cmd, unsigned long arg) 1554 unsigned int cmd, unsigned long arg)
1530{ 1555{
1556 struct inode *inode = filp->f_dentry->d_inode;
1531 struct cache_detail *cd = RPC_I(inode)->private; 1557 struct cache_detail *cd = RPC_I(inode)->private;
1558 long ret;
1559
1560 lock_kernel();
1561 ret = cache_ioctl(inode, filp, cmd, arg, cd);
1562 unlock_kernel();
1532 1563
1533 return cache_ioctl(inode, filp, cmd, arg, cd); 1564 return ret;
1534} 1565}
1535 1566
1536static int cache_open_pipefs(struct inode *inode, struct file *filp) 1567static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1553,7 +1584,7 @@ const struct file_operations cache_file_operations_pipefs = {
1553 .read = cache_read_pipefs, 1584 .read = cache_read_pipefs,
1554 .write = cache_write_pipefs, 1585 .write = cache_write_pipefs,
1555 .poll = cache_poll_pipefs, 1586 .poll = cache_poll_pipefs,
1556 .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ 1587 .unlocked_ioctl = cache_ioctl_pipefs, /* for FIONREAD */
1557 .open = cache_open_pipefs, 1588 .open = cache_open_pipefs,
1558 .release = cache_release_pipefs, 1589 .release = cache_release_pipefs,
1559}; 1590};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 19c9983d5360..2388d83b68ff 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -414,6 +414,35 @@ out_no_clnt:
414EXPORT_SYMBOL_GPL(rpc_clone_client); 414EXPORT_SYMBOL_GPL(rpc_clone_client);
415 415
416/* 416/*
417 * Kill all tasks for the given client.
418 * XXX: kill their descendants as well?
419 */
420void rpc_killall_tasks(struct rpc_clnt *clnt)
421{
422 struct rpc_task *rovr;
423
424
425 if (list_empty(&clnt->cl_tasks))
426 return;
427 dprintk("RPC: killing all tasks for client %p\n", clnt);
428 /*
429 * Spin lock all_tasks to prevent changes...
430 */
431 spin_lock(&clnt->cl_lock);
432 list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
433 if (!RPC_IS_ACTIVATED(rovr))
434 continue;
435 if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
436 rovr->tk_flags |= RPC_TASK_KILLED;
437 rpc_exit(rovr, -EIO);
438 rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr);
439 }
440 }
441 spin_unlock(&clnt->cl_lock);
442}
443EXPORT_SYMBOL_GPL(rpc_killall_tasks);
444
445/*
417 * Properly shut down an RPC client, terminating all outstanding 446 * Properly shut down an RPC client, terminating all outstanding
418 * requests. 447 * requests.
419 */ 448 */
@@ -538,6 +567,49 @@ out:
538} 567}
539EXPORT_SYMBOL_GPL(rpc_bind_new_program); 568EXPORT_SYMBOL_GPL(rpc_bind_new_program);
540 569
570void rpc_task_release_client(struct rpc_task *task)
571{
572 struct rpc_clnt *clnt = task->tk_client;
573
574 if (clnt != NULL) {
575 /* Remove from client task list */
576 spin_lock(&clnt->cl_lock);
577 list_del(&task->tk_task);
578 spin_unlock(&clnt->cl_lock);
579 task->tk_client = NULL;
580
581 rpc_release_client(clnt);
582 }
583}
584
585static
586void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
587{
588 if (clnt != NULL) {
589 rpc_task_release_client(task);
590 task->tk_client = clnt;
591 kref_get(&clnt->cl_kref);
592 if (clnt->cl_softrtry)
593 task->tk_flags |= RPC_TASK_SOFT;
594 /* Add to the client's list of all tasks */
595 spin_lock(&clnt->cl_lock);
596 list_add_tail(&task->tk_task, &clnt->cl_tasks);
597 spin_unlock(&clnt->cl_lock);
598 }
599}
600
601static void
602rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
603{
604 if (msg != NULL) {
605 task->tk_msg.rpc_proc = msg->rpc_proc;
606 task->tk_msg.rpc_argp = msg->rpc_argp;
607 task->tk_msg.rpc_resp = msg->rpc_resp;
608 if (msg->rpc_cred != NULL)
609 task->tk_msg.rpc_cred = get_rpccred(msg->rpc_cred);
610 }
611}
612
541/* 613/*
542 * Default callback for async RPC calls 614 * Default callback for async RPC calls
543 */ 615 */
@@ -556,26 +628,28 @@ static const struct rpc_call_ops rpc_default_ops = {
556 */ 628 */
557struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data) 629struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
558{ 630{
559 struct rpc_task *task, *ret; 631 struct rpc_task *task;
560 632
561 task = rpc_new_task(task_setup_data); 633 task = rpc_new_task(task_setup_data);
562 if (task == NULL) { 634 if (IS_ERR(task))
563 rpc_release_calldata(task_setup_data->callback_ops,
564 task_setup_data->callback_data);
565 ret = ERR_PTR(-ENOMEM);
566 goto out; 635 goto out;
567 } 636
637 rpc_task_set_client(task, task_setup_data->rpc_client);
638 rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
568 639
569 if (task->tk_status != 0) { 640 if (task->tk_status != 0) {
570 ret = ERR_PTR(task->tk_status); 641 int ret = task->tk_status;
571 rpc_put_task(task); 642 rpc_put_task(task);
572 goto out; 643 return ERR_PTR(ret);
573 } 644 }
645
646 if (task->tk_action == NULL)
647 rpc_call_start(task);
648
574 atomic_inc(&task->tk_count); 649 atomic_inc(&task->tk_count);
575 rpc_execute(task); 650 rpc_execute(task);
576 ret = task;
577out: 651out:
578 return ret; 652 return task;
579} 653}
580EXPORT_SYMBOL_GPL(rpc_run_task); 654EXPORT_SYMBOL_GPL(rpc_run_task);
581 655
@@ -657,9 +731,8 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
657 * Create an rpc_task to send the data 731 * Create an rpc_task to send the data
658 */ 732 */
659 task = rpc_new_task(&task_setup_data); 733 task = rpc_new_task(&task_setup_data);
660 if (!task) { 734 if (IS_ERR(task)) {
661 xprt_free_bc_request(req); 735 xprt_free_bc_request(req);
662 task = ERR_PTR(-ENOMEM);
663 goto out; 736 goto out;
664 } 737 }
665 task->tk_rqstp = req; 738 task->tk_rqstp = req;
@@ -767,12 +840,13 @@ EXPORT_SYMBOL_GPL(rpc_force_rebind);
767 * Restart an (async) RPC call from the call_prepare state. 840 * Restart an (async) RPC call from the call_prepare state.
768 * Usually called from within the exit handler. 841 * Usually called from within the exit handler.
769 */ 842 */
770void 843int
771rpc_restart_call_prepare(struct rpc_task *task) 844rpc_restart_call_prepare(struct rpc_task *task)
772{ 845{
773 if (RPC_ASSASSINATED(task)) 846 if (RPC_ASSASSINATED(task))
774 return; 847 return 0;
775 task->tk_action = rpc_prepare_task; 848 task->tk_action = rpc_prepare_task;
849 return 1;
776} 850}
777EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); 851EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
778 852
@@ -780,13 +854,13 @@ EXPORT_SYMBOL_GPL(rpc_restart_call_prepare);
780 * Restart an (async) RPC call. Usually called from within the 854 * Restart an (async) RPC call. Usually called from within the
781 * exit handler. 855 * exit handler.
782 */ 856 */
783void 857int
784rpc_restart_call(struct rpc_task *task) 858rpc_restart_call(struct rpc_task *task)
785{ 859{
786 if (RPC_ASSASSINATED(task)) 860 if (RPC_ASSASSINATED(task))
787 return; 861 return 0;
788
789 task->tk_action = call_start; 862 task->tk_action = call_start;
863 return 1;
790} 864}
791EXPORT_SYMBOL_GPL(rpc_restart_call); 865EXPORT_SYMBOL_GPL(rpc_restart_call);
792 866
@@ -835,11 +909,6 @@ call_reserve(struct rpc_task *task)
835{ 909{
836 dprint_status(task); 910 dprint_status(task);
837 911
838 if (!rpcauth_uptodatecred(task)) {
839 task->tk_action = call_refresh;
840 return;
841 }
842
843 task->tk_status = 0; 912 task->tk_status = 0;
844 task->tk_action = call_reserveresult; 913 task->tk_action = call_reserveresult;
845 xprt_reserve(task); 914 xprt_reserve(task);
@@ -903,7 +972,7 @@ call_reserveresult(struct rpc_task *task)
903static void 972static void
904call_allocate(struct rpc_task *task) 973call_allocate(struct rpc_task *task)
905{ 974{
906 unsigned int slack = task->tk_msg.rpc_cred->cr_auth->au_cslack; 975 unsigned int slack = task->tk_client->cl_auth->au_cslack;
907 struct rpc_rqst *req = task->tk_rqstp; 976 struct rpc_rqst *req = task->tk_rqstp;
908 struct rpc_xprt *xprt = task->tk_xprt; 977 struct rpc_xprt *xprt = task->tk_xprt;
909 struct rpc_procinfo *proc = task->tk_msg.rpc_proc; 978 struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -911,7 +980,7 @@ call_allocate(struct rpc_task *task)
911 dprint_status(task); 980 dprint_status(task);
912 981
913 task->tk_status = 0; 982 task->tk_status = 0;
914 task->tk_action = call_bind; 983 task->tk_action = call_refresh;
915 984
916 if (req->rq_buffer) 985 if (req->rq_buffer)
917 return; 986 return;
@@ -948,6 +1017,47 @@ call_allocate(struct rpc_task *task)
948 rpc_exit(task, -ERESTARTSYS); 1017 rpc_exit(task, -ERESTARTSYS);
949} 1018}
950 1019
1020/*
1021 * 2a. Bind and/or refresh the credentials
1022 */
1023static void
1024call_refresh(struct rpc_task *task)
1025{
1026 dprint_status(task);
1027
1028 task->tk_action = call_refreshresult;
1029 task->tk_status = 0;
1030 task->tk_client->cl_stats->rpcauthrefresh++;
1031 rpcauth_refreshcred(task);
1032}
1033
1034/*
1035 * 2b. Process the results of a credential refresh
1036 */
1037static void
1038call_refreshresult(struct rpc_task *task)
1039{
1040 int status = task->tk_status;
1041
1042 dprint_status(task);
1043
1044 task->tk_status = 0;
1045 task->tk_action = call_bind;
1046 if (status >= 0 && rpcauth_uptodatecred(task))
1047 return;
1048 switch (status) {
1049 case -EACCES:
1050 rpc_exit(task, -EACCES);
1051 return;
1052 case -ENOMEM:
1053 rpc_exit(task, -ENOMEM);
1054 return;
1055 case -ETIMEDOUT:
1056 rpc_delay(task, 3*HZ);
1057 }
1058 task->tk_action = call_refresh;
1059}
1060
951static inline int 1061static inline int
952rpc_task_need_encode(struct rpc_task *task) 1062rpc_task_need_encode(struct rpc_task *task)
953{ 1063{
@@ -1483,44 +1593,6 @@ out_retry:
1483 } 1593 }
1484} 1594}
1485 1595
1486/*
1487 * 8. Refresh the credentials if rejected by the server
1488 */
1489static void
1490call_refresh(struct rpc_task *task)
1491{
1492 dprint_status(task);
1493
1494 task->tk_action = call_refreshresult;
1495 task->tk_status = 0;
1496 task->tk_client->cl_stats->rpcauthrefresh++;
1497 rpcauth_refreshcred(task);
1498}
1499
1500/*
1501 * 8a. Process the results of a credential refresh
1502 */
1503static void
1504call_refreshresult(struct rpc_task *task)
1505{
1506 int status = task->tk_status;
1507
1508 dprint_status(task);
1509
1510 task->tk_status = 0;
1511 task->tk_action = call_reserve;
1512 if (status >= 0 && rpcauth_uptodatecred(task))
1513 return;
1514 if (status == -EACCES) {
1515 rpc_exit(task, -EACCES);
1516 return;
1517 }
1518 task->tk_action = call_refresh;
1519 if (status != -ETIMEDOUT)
1520 rpc_delay(task, 3*HZ);
1521 return;
1522}
1523
1524static __be32 * 1596static __be32 *
1525rpc_encode_header(struct rpc_task *task) 1597rpc_encode_header(struct rpc_task *task)
1526{ 1598{
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 20e30c6f8355..95ccbcf45d3e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,6 +27,7 @@
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/sunrpc/rpc_pipe_fs.h> 28#include <linux/sunrpc/rpc_pipe_fs.h>
29#include <linux/sunrpc/cache.h> 29#include <linux/sunrpc/cache.h>
30#include <linux/smp_lock.h>
30 31
31static struct vfsmount *rpc_mount __read_mostly; 32static struct vfsmount *rpc_mount __read_mostly;
32static int rpc_mount_count; 33static int rpc_mount_count;
@@ -309,8 +310,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
309} 310}
310 311
311static int 312static int
312rpc_pipe_ioctl(struct inode *ino, struct file *filp, 313rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg)
313 unsigned int cmd, unsigned long arg)
314{ 314{
315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); 315 struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
316 int len; 316 int len;
@@ -331,13 +331,25 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp,
331 } 331 }
332} 332}
333 333
334static long
335rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
336{
337 long ret;
338
339 lock_kernel();
340 ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
341 unlock_kernel();
342
343 return ret;
344}
345
334static const struct file_operations rpc_pipe_fops = { 346static const struct file_operations rpc_pipe_fops = {
335 .owner = THIS_MODULE, 347 .owner = THIS_MODULE,
336 .llseek = no_llseek, 348 .llseek = no_llseek,
337 .read = rpc_pipe_read, 349 .read = rpc_pipe_read,
338 .write = rpc_pipe_write, 350 .write = rpc_pipe_write,
339 .poll = rpc_pipe_poll, 351 .poll = rpc_pipe_poll,
340 .ioctl = rpc_pipe_ioctl, 352 .unlocked_ioctl = rpc_pipe_ioctl,
341 .open = rpc_pipe_open, 353 .open = rpc_pipe_open,
342 .release = rpc_pipe_release, 354 .release = rpc_pipe_release,
343}; 355};
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 121105355f60..dac219a56ae1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
783 port = ntohl(*p); 783 port = ntohl(*p);
784 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, 784 dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
785 task->tk_msg.rpc_proc->p_name, port); 785 task->tk_msg.rpc_proc->p_name, port);
786 if (unlikely(port > USHORT_MAX)) 786 if (unlikely(port > USHRT_MAX))
787 return -EIO; 787 return -EIO;
788 788
789 rpcb->r_port = port; 789 rpcb->r_port = port;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aae6907fd546..cace6049e4a5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,7 +25,6 @@
25 25
26#ifdef RPC_DEBUG 26#ifdef RPC_DEBUG
27#define RPCDBG_FACILITY RPCDBG_SCHED 27#define RPCDBG_FACILITY RPCDBG_SCHED
28#define RPC_TASK_MAGIC_ID 0xf00baa
29#endif 28#endif
30 29
31/* 30/*
@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struct rpc_task *task)
237{ 236{
238 static atomic_t rpc_pid; 237 static atomic_t rpc_pid;
239 238
240 task->tk_magic = RPC_TASK_MAGIC_ID;
241 task->tk_pid = atomic_inc_return(&rpc_pid); 239 task->tk_pid = atomic_inc_return(&rpc_pid);
242} 240}
243#else 241#else
@@ -248,17 +246,8 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task)
248 246
249static void rpc_set_active(struct rpc_task *task) 247static void rpc_set_active(struct rpc_task *task)
250{ 248{
251 struct rpc_clnt *clnt;
252 if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
253 return;
254 rpc_task_set_debuginfo(task); 249 rpc_task_set_debuginfo(task);
255 /* Add to global list of all tasks */ 250 set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
256 clnt = task->tk_client;
257 if (clnt != NULL) {
258 spin_lock(&clnt->cl_lock);
259 list_add_tail(&task->tk_task, &clnt->cl_tasks);
260 spin_unlock(&clnt->cl_lock);
261 }
262} 251}
263 252
264/* 253/*
@@ -321,11 +310,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
321 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", 310 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
322 task->tk_pid, rpc_qname(q), jiffies); 311 task->tk_pid, rpc_qname(q), jiffies);
323 312
324 if (!RPC_IS_ASYNC(task) && !RPC_IS_ACTIVATED(task)) {
325 printk(KERN_ERR "RPC: Inactive synchronous task put to sleep!\n");
326 return;
327 }
328
329 __rpc_add_wait_queue(q, task); 313 __rpc_add_wait_queue(q, task);
330 314
331 BUG_ON(task->tk_callback != NULL); 315 BUG_ON(task->tk_callback != NULL);
@@ -336,8 +320,8 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
336void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, 320void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
337 rpc_action action) 321 rpc_action action)
338{ 322{
339 /* Mark the task as being activated if so needed */ 323 /* We shouldn't ever put an inactive task to sleep */
340 rpc_set_active(task); 324 BUG_ON(!RPC_IS_ACTIVATED(task));
341 325
342 /* 326 /*
343 * Protect the queue operations. 327 * Protect the queue operations.
@@ -360,9 +344,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
360 dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", 344 dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
361 task->tk_pid, jiffies); 345 task->tk_pid, jiffies);
362 346
363#ifdef RPC_DEBUG
364 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
365#endif
366 /* Has the task been executed yet? If not, we cannot wake it up! */ 347 /* Has the task been executed yet? If not, we cannot wake it up! */
367 if (!RPC_IS_ACTIVATED(task)) { 348 if (!RPC_IS_ACTIVATED(task)) {
368 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); 349 printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
@@ -411,14 +392,6 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
411EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); 392EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
412 393
413/* 394/*
414 * Wake up the specified task
415 */
416static void rpc_wake_up_task(struct rpc_task *task)
417{
418 rpc_wake_up_queued_task(task->tk_waitqueue, task);
419}
420
421/*
422 * Wake up the next task on a priority queue. 395 * Wake up the next task on a priority queue.
423 */ 396 */
424static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue) 397static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queue)
@@ -605,7 +578,15 @@ void rpc_exit_task(struct rpc_task *task)
605 } 578 }
606 } 579 }
607} 580}
608EXPORT_SYMBOL_GPL(rpc_exit_task); 581
582void rpc_exit(struct rpc_task *task, int status)
583{
584 task->tk_status = status;
585 task->tk_action = rpc_exit_task;
586 if (RPC_IS_QUEUED(task))
587 rpc_wake_up_queued_task(task->tk_waitqueue, task);
588}
589EXPORT_SYMBOL_GPL(rpc_exit);
609 590
610void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) 591void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
611{ 592{
@@ -695,7 +676,6 @@ static void __rpc_execute(struct rpc_task *task)
695 dprintk("RPC: %5u got signal\n", task->tk_pid); 676 dprintk("RPC: %5u got signal\n", task->tk_pid);
696 task->tk_flags |= RPC_TASK_KILLED; 677 task->tk_flags |= RPC_TASK_KILLED;
697 rpc_exit(task, -ERESTARTSYS); 678 rpc_exit(task, -ERESTARTSYS);
698 rpc_wake_up_task(task);
699 } 679 }
700 rpc_set_running(task); 680 rpc_set_running(task);
701 dprintk("RPC: %5u sync task resuming\n", task->tk_pid); 681 dprintk("RPC: %5u sync task resuming\n", task->tk_pid);
@@ -719,8 +699,9 @@ static void __rpc_execute(struct rpc_task *task)
719void rpc_execute(struct rpc_task *task) 699void rpc_execute(struct rpc_task *task)
720{ 700{
721 rpc_set_active(task); 701 rpc_set_active(task);
722 rpc_set_running(task); 702 rpc_make_runnable(task);
723 __rpc_execute(task); 703 if (!RPC_IS_ASYNC(task))
704 __rpc_execute(task);
724} 705}
725 706
726static void rpc_async_schedule(struct work_struct *work) 707static void rpc_async_schedule(struct work_struct *work)
@@ -813,28 +794,11 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
813 /* Initialize workqueue for async tasks */ 794 /* Initialize workqueue for async tasks */
814 task->tk_workqueue = task_setup_data->workqueue; 795 task->tk_workqueue = task_setup_data->workqueue;
815 796
816 task->tk_client = task_setup_data->rpc_client;
817 if (task->tk_client != NULL) {
818 kref_get(&task->tk_client->cl_kref);
819 if (task->tk_client->cl_softrtry)
820 task->tk_flags |= RPC_TASK_SOFT;
821 }
822
823 if (task->tk_ops->rpc_call_prepare != NULL) 797 if (task->tk_ops->rpc_call_prepare != NULL)
824 task->tk_action = rpc_prepare_task; 798 task->tk_action = rpc_prepare_task;
825 799
826 if (task_setup_data->rpc_message != NULL) {
827 task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc;
828 task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp;
829 task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp;
830 /* Bind the user cred */
831 rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags);
832 if (task->tk_action == NULL)
833 rpc_call_start(task);
834 }
835
836 /* starting timestamp */ 800 /* starting timestamp */
837 task->tk_start = jiffies; 801 task->tk_start = ktime_get();
838 802
839 dprintk("RPC: new task initialized, procpid %u\n", 803 dprintk("RPC: new task initialized, procpid %u\n",
840 task_pid_nr(current)); 804 task_pid_nr(current));
@@ -856,16 +820,23 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
856 820
857 if (task == NULL) { 821 if (task == NULL) {
858 task = rpc_alloc_task(); 822 task = rpc_alloc_task();
859 if (task == NULL) 823 if (task == NULL) {
860 goto out; 824 rpc_release_calldata(setup_data->callback_ops,
825 setup_data->callback_data);
826 return ERR_PTR(-ENOMEM);
827 }
861 flags = RPC_TASK_DYNAMIC; 828 flags = RPC_TASK_DYNAMIC;
862 } 829 }
863 830
864 rpc_init_task(task, setup_data); 831 rpc_init_task(task, setup_data);
832 if (task->tk_status < 0) {
833 int err = task->tk_status;
834 rpc_put_task(task);
835 return ERR_PTR(err);
836 }
865 837
866 task->tk_flags |= flags; 838 task->tk_flags |= flags;
867 dprintk("RPC: allocated task %p\n", task); 839 dprintk("RPC: allocated task %p\n", task);
868out:
869 return task; 840 return task;
870} 841}
871 842
@@ -894,11 +865,8 @@ void rpc_put_task(struct rpc_task *task)
894 if (task->tk_rqstp) 865 if (task->tk_rqstp)
895 xprt_release(task); 866 xprt_release(task);
896 if (task->tk_msg.rpc_cred) 867 if (task->tk_msg.rpc_cred)
897 rpcauth_unbindcred(task); 868 put_rpccred(task->tk_msg.rpc_cred);
898 if (task->tk_client) { 869 rpc_task_release_client(task);
899 rpc_release_client(task->tk_client);
900 task->tk_client = NULL;
901 }
902 if (task->tk_workqueue != NULL) { 870 if (task->tk_workqueue != NULL) {
903 INIT_WORK(&task->u.tk_work, rpc_async_release); 871 INIT_WORK(&task->u.tk_work, rpc_async_release);
904 queue_work(task->tk_workqueue, &task->u.tk_work); 872 queue_work(task->tk_workqueue, &task->u.tk_work);
@@ -909,58 +877,16 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
909 877
910static void rpc_release_task(struct rpc_task *task) 878static void rpc_release_task(struct rpc_task *task)
911{ 879{
912#ifdef RPC_DEBUG
913 BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
914#endif
915 dprintk("RPC: %5u release task\n", task->tk_pid); 880 dprintk("RPC: %5u release task\n", task->tk_pid);
916 881
917 if (!list_empty(&task->tk_task)) {
918 struct rpc_clnt *clnt = task->tk_client;
919 /* Remove from client task list */
920 spin_lock(&clnt->cl_lock);
921 list_del(&task->tk_task);
922 spin_unlock(&clnt->cl_lock);
923 }
924 BUG_ON (RPC_IS_QUEUED(task)); 882 BUG_ON (RPC_IS_QUEUED(task));
925 883
926#ifdef RPC_DEBUG
927 task->tk_magic = 0;
928#endif
929 /* Wake up anyone who is waiting for task completion */ 884 /* Wake up anyone who is waiting for task completion */
930 rpc_mark_complete_task(task); 885 rpc_mark_complete_task(task);
931 886
932 rpc_put_task(task); 887 rpc_put_task(task);
933} 888}
934 889
935/*
936 * Kill all tasks for the given client.
937 * XXX: kill their descendants as well?
938 */
939void rpc_killall_tasks(struct rpc_clnt *clnt)
940{
941 struct rpc_task *rovr;
942
943
944 if (list_empty(&clnt->cl_tasks))
945 return;
946 dprintk("RPC: killing all tasks for client %p\n", clnt);
947 /*
948 * Spin lock all_tasks to prevent changes...
949 */
950 spin_lock(&clnt->cl_lock);
951 list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
952 if (! RPC_IS_ACTIVATED(rovr))
953 continue;
954 if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
955 rovr->tk_flags |= RPC_TASK_KILLED;
956 rpc_exit(rovr, -EIO);
957 rpc_wake_up_task(rovr);
958 }
959 }
960 spin_unlock(&clnt->cl_lock);
961}
962EXPORT_SYMBOL_GPL(rpc_killall_tasks);
963
964int rpciod_up(void) 890int rpciod_up(void)
965{ 891{
966 return try_module_get(THIS_MODULE) ? 0 : -EINVAL; 892 return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 5785d2037f45..ea1046f3f9a3 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *task)
144 struct rpc_rqst *req = task->tk_rqstp; 144 struct rpc_rqst *req = task->tk_rqstp;
145 struct rpc_iostats *stats; 145 struct rpc_iostats *stats;
146 struct rpc_iostats *op_metrics; 146 struct rpc_iostats *op_metrics;
147 long rtt, execute, queue; 147 ktime_t delta;
148 148
149 if (!task->tk_client || !task->tk_client->cl_metrics || !req) 149 if (!task->tk_client || !task->tk_client->cl_metrics || !req)
150 return; 150 return;
@@ -156,23 +156,16 @@ void rpc_count_iostats(struct rpc_task *task)
156 op_metrics->om_ntrans += req->rq_ntrans; 156 op_metrics->om_ntrans += req->rq_ntrans;
157 op_metrics->om_timeouts += task->tk_timeouts; 157 op_metrics->om_timeouts += task->tk_timeouts;
158 158
159 op_metrics->om_bytes_sent += task->tk_bytes_sent; 159 op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
160 op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; 160 op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
161 161
162 queue = (long)req->rq_xtime - task->tk_start; 162 delta = ktime_sub(req->rq_xtime, task->tk_start);
163 if (queue < 0) 163 op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
164 queue = -queue;
165 op_metrics->om_queue += queue;
166 164
167 rtt = task->tk_rtt; 165 op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
168 if (rtt < 0)
169 rtt = -rtt;
170 op_metrics->om_rtt += rtt;
171 166
172 execute = (long)jiffies - task->tk_start; 167 delta = ktime_sub(ktime_get(), task->tk_start);
173 if (execute < 0) 168 op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
174 execute = -execute;
175 op_metrics->om_execute += execute;
176} 169}
177 170
178static void _print_name(struct seq_file *seq, unsigned int op, 171static void _print_name(struct seq_file *seq, unsigned int op,
@@ -186,8 +179,6 @@ static void _print_name(struct seq_file *seq, unsigned int op,
186 seq_printf(seq, "\t%12u: ", op); 179 seq_printf(seq, "\t%12u: ", op);
187} 180}
188 181
189#define MILLISECS_PER_JIFFY (1000 / HZ)
190
191void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) 182void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
192{ 183{
193 struct rpc_iostats *stats = clnt->cl_metrics; 184 struct rpc_iostats *stats = clnt->cl_metrics;
@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
214 metrics->om_timeouts, 205 metrics->om_timeouts,
215 metrics->om_bytes_sent, 206 metrics->om_bytes_sent,
216 metrics->om_bytes_recv, 207 metrics->om_bytes_recv,
217 metrics->om_queue * MILLISECS_PER_JIFFY, 208 ktime_to_ms(metrics->om_queue),
218 metrics->om_rtt * MILLISECS_PER_JIFFY, 209 ktime_to_ms(metrics->om_rtt),
219 metrics->om_execute * MILLISECS_PER_JIFFY); 210 ktime_to_ms(metrics->om_execute));
220 } 211 }
221} 212}
222EXPORT_SYMBOL_GPL(rpc_print_iostats); 213EXPORT_SYMBOL_GPL(rpc_print_iostats);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f438347d817b..c0d085013a2b 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -33,21 +33,27 @@ init_sunrpc(void)
33 if (err) 33 if (err)
34 goto out; 34 goto out;
35 err = rpc_init_mempool(); 35 err = rpc_init_mempool();
36 if (err) { 36 if (err)
37 unregister_rpc_pipefs(); 37 goto out2;
38 goto out; 38 err = rpcauth_init_module();
39 } 39 if (err)
40 goto out3;
40#ifdef RPC_DEBUG 41#ifdef RPC_DEBUG
41 rpc_register_sysctl(); 42 rpc_register_sysctl();
42#endif 43#endif
43#ifdef CONFIG_PROC_FS 44#ifdef CONFIG_PROC_FS
44 rpc_proc_init(); 45 rpc_proc_init();
45#endif 46#endif
47 cache_initialize();
46 cache_register(&ip_map_cache); 48 cache_register(&ip_map_cache);
47 cache_register(&unix_gid_cache); 49 cache_register(&unix_gid_cache);
48 svc_init_xprt_sock(); /* svc sock transport */ 50 svc_init_xprt_sock(); /* svc sock transport */
49 init_socket_xprt(); /* clnt sock transport */ 51 init_socket_xprt(); /* clnt sock transport */
50 rpcauth_init_module(); 52 return 0;
53out3:
54 rpc_destroy_mempool();
55out2:
56 unregister_rpc_pipefs();
51out: 57out:
52 return err; 58 return err;
53} 59}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 061b2e0f9118..cbc084939dd8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -744,8 +744,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
744 if (rqstp->rq_deferred) { 744 if (rqstp->rq_deferred) {
745 svc_xprt_received(xprt); 745 svc_xprt_received(xprt);
746 len = svc_deferred_recv(rqstp); 746 len = svc_deferred_recv(rqstp);
747 } else 747 } else {
748 len = xprt->xpt_ops->xpo_recvfrom(rqstp); 748 len = xprt->xpt_ops->xpo_recvfrom(rqstp);
749 svc_xprt_received(xprt);
750 }
749 dprintk("svc: got len=%d\n", len); 751 dprintk("svc: got len=%d\n", len);
750 } 752 }
751 753
@@ -893,12 +895,12 @@ void svc_delete_xprt(struct svc_xprt *xprt)
893 */ 895 */
894 if (test_bit(XPT_TEMP, &xprt->xpt_flags)) 896 if (test_bit(XPT_TEMP, &xprt->xpt_flags))
895 serv->sv_tmpcnt--; 897 serv->sv_tmpcnt--;
898 spin_unlock_bh(&serv->sv_lock);
896 899
897 while ((dr = svc_deferred_dequeue(xprt)) != NULL) 900 while ((dr = svc_deferred_dequeue(xprt)) != NULL)
898 kfree(dr); 901 kfree(dr);
899 902
900 svc_xprt_put(xprt); 903 svc_xprt_put(xprt);
901 spin_unlock_bh(&serv->sv_lock);
902} 904}
903 905
904void svc_close_xprt(struct svc_xprt *xprt) 906void svc_close_xprt(struct svc_xprt *xprt)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a29f259204e6..7e534dd09077 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -150,7 +150,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
150 } 150 }
151 break; 151 break;
152 } 152 }
153 return;
154} 153}
155 154
156/* 155/*
@@ -419,8 +418,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
419 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 418 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
420 svc_xprt_enqueue(&svsk->sk_xprt); 419 svc_xprt_enqueue(&svsk->sk_xprt);
421 } 420 }
422 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 421 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
423 wake_up_interruptible(sk->sk_sleep); 422 wake_up_interruptible(sk_sleep(sk));
424} 423}
425 424
426/* 425/*
@@ -436,10 +435,10 @@ static void svc_write_space(struct sock *sk)
436 svc_xprt_enqueue(&svsk->sk_xprt); 435 svc_xprt_enqueue(&svsk->sk_xprt);
437 } 436 }
438 437
439 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { 438 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) {
440 dprintk("RPC svc_write_space: someone sleeping on %p\n", 439 dprintk("RPC svc_write_space: someone sleeping on %p\n",
441 svsk); 440 svsk);
442 wake_up_interruptible(sk->sk_sleep); 441 wake_up_interruptible(sk_sleep(sk));
443 } 442 }
444} 443}
445 444
@@ -547,7 +546,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
547 dprintk("svc: recvfrom returned error %d\n", -err); 546 dprintk("svc: recvfrom returned error %d\n", -err);
548 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 547 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
549 } 548 }
550 svc_xprt_received(&svsk->sk_xprt);
551 return -EAGAIN; 549 return -EAGAIN;
552 } 550 }
553 len = svc_addr_len(svc_addr(rqstp)); 551 len = svc_addr_len(svc_addr(rqstp));
@@ -562,11 +560,6 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
562 svsk->sk_sk->sk_stamp = skb->tstamp; 560 svsk->sk_sk->sk_stamp = skb->tstamp;
563 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ 561 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
564 562
565 /*
566 * Maybe more packets - kick another thread ASAP.
567 */
568 svc_xprt_received(&svsk->sk_xprt);
569
570 len = skb->len - sizeof(struct udphdr); 563 len = skb->len - sizeof(struct udphdr);
571 rqstp->rq_arg.len = len; 564 rqstp->rq_arg.len = len;
572 565
@@ -757,8 +750,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
757 printk("svc: socket %p: no user data\n", sk); 750 printk("svc: socket %p: no user data\n", sk);
758 } 751 }
759 752
760 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 753 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
761 wake_up_interruptible_all(sk->sk_sleep); 754 wake_up_interruptible_all(sk_sleep(sk));
762} 755}
763 756
764/* 757/*
@@ -777,8 +770,8 @@ static void svc_tcp_state_change(struct sock *sk)
777 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 770 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
778 svc_xprt_enqueue(&svsk->sk_xprt); 771 svc_xprt_enqueue(&svsk->sk_xprt);
779 } 772 }
780 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 773 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
781 wake_up_interruptible_all(sk->sk_sleep); 774 wake_up_interruptible_all(sk_sleep(sk));
782} 775}
783 776
784static void svc_tcp_data_ready(struct sock *sk, int count) 777static void svc_tcp_data_ready(struct sock *sk, int count)
@@ -791,8 +784,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
791 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 784 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
792 svc_xprt_enqueue(&svsk->sk_xprt); 785 svc_xprt_enqueue(&svsk->sk_xprt);
793 } 786 }
794 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 787 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
795 wake_up_interruptible(sk->sk_sleep); 788 wake_up_interruptible(sk_sleep(sk));
796} 789}
797 790
798/* 791/*
@@ -917,7 +910,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
917 if (len < want) { 910 if (len < want) {
918 dprintk("svc: short recvfrom while reading record " 911 dprintk("svc: short recvfrom while reading record "
919 "length (%d of %d)\n", len, want); 912 "length (%d of %d)\n", len, want);
920 svc_xprt_received(&svsk->sk_xprt);
921 goto err_again; /* record header not complete */ 913 goto err_again; /* record header not complete */
922 } 914 }
923 915
@@ -953,7 +945,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
953 if (len < svsk->sk_reclen) { 945 if (len < svsk->sk_reclen) {
954 dprintk("svc: incomplete TCP record (%d of %d)\n", 946 dprintk("svc: incomplete TCP record (%d of %d)\n",
955 len, svsk->sk_reclen); 947 len, svsk->sk_reclen);
956 svc_xprt_received(&svsk->sk_xprt);
957 goto err_again; /* record not complete */ 948 goto err_again; /* record not complete */
958 } 949 }
959 len = svsk->sk_reclen; 950 len = svsk->sk_reclen;
@@ -961,14 +952,11 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
961 952
962 return len; 953 return len;
963 error: 954 error:
964 if (len == -EAGAIN) { 955 if (len == -EAGAIN)
965 dprintk("RPC: TCP recv_record got EAGAIN\n"); 956 dprintk("RPC: TCP recv_record got EAGAIN\n");
966 svc_xprt_received(&svsk->sk_xprt);
967 }
968 return len; 957 return len;
969 err_delete: 958 err_delete:
970 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 959 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
971 svc_xprt_received(&svsk->sk_xprt);
972 err_again: 960 err_again:
973 return -EAGAIN; 961 return -EAGAIN;
974} 962}
@@ -1110,7 +1098,6 @@ out:
1110 svsk->sk_tcplen = 0; 1098 svsk->sk_tcplen = 0;
1111 1099
1112 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1100 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1113 svc_xprt_received(&svsk->sk_xprt);
1114 if (serv->sv_stats) 1101 if (serv->sv_stats)
1115 serv->sv_stats->nettcpcnt++; 1102 serv->sv_stats->nettcpcnt++;
1116 1103
@@ -1119,7 +1106,6 @@ out:
1119err_again: 1106err_again:
1120 if (len == -EAGAIN) { 1107 if (len == -EAGAIN) {
1121 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 1108 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1122 svc_xprt_received(&svsk->sk_xprt);
1123 return len; 1109 return len;
1124 } 1110 }
1125error: 1111error:
@@ -1494,8 +1480,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
1494 sk->sk_data_ready = svsk->sk_odata; 1480 sk->sk_data_ready = svsk->sk_odata;
1495 sk->sk_write_space = svsk->sk_owspace; 1481 sk->sk_write_space = svsk->sk_owspace;
1496 1482
1497 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1483 if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
1498 wake_up_interruptible(sk->sk_sleep); 1484 wake_up_interruptible(sk_sleep(sk));
1499} 1485}
1500 1486
1501/* 1487/*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2763fde88499..a1f82a87d34d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -762,6 +762,7 @@ int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, un
762 __write_bytes_to_xdr_buf(&subbuf, obj, len); 762 __write_bytes_to_xdr_buf(&subbuf, obj, len);
763 return 0; 763 return 0;
764} 764}
765EXPORT_SYMBOL_GPL(write_bytes_to_xdr_buf);
765 766
766int 767int
767xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj) 768xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 42f09ade0044..970fb00f388c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -43,6 +43,7 @@
43#include <linux/interrupt.h> 43#include <linux/interrupt.h>
44#include <linux/workqueue.h> 44#include <linux/workqueue.h>
45#include <linux/net.h> 45#include <linux/net.h>
46#include <linux/ktime.h>
46 47
47#include <linux/sunrpc/clnt.h> 48#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/metrics.h> 49#include <linux/sunrpc/metrics.h>
@@ -62,7 +63,6 @@
62 * Local functions 63 * Local functions
63 */ 64 */
64static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 65static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
65static inline void do_xprt_reserve(struct rpc_task *);
66static void xprt_connect_status(struct rpc_task *task); 66static void xprt_connect_status(struct rpc_task *task);
67static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 67static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
68 68
@@ -166,7 +166,6 @@ EXPORT_SYMBOL_GPL(xprt_unregister_transport);
166int xprt_load_transport(const char *transport_name) 166int xprt_load_transport(const char *transport_name)
167{ 167{
168 struct xprt_class *t; 168 struct xprt_class *t;
169 char module_name[sizeof t->name + 5];
170 int result; 169 int result;
171 170
172 result = 0; 171 result = 0;
@@ -178,9 +177,7 @@ int xprt_load_transport(const char *transport_name)
178 } 177 }
179 } 178 }
180 spin_unlock(&xprt_list_lock); 179 spin_unlock(&xprt_list_lock);
181 strcpy(module_name, "xprt"); 180 result = request_module("xprt%s", transport_name);
182 strncat(module_name, transport_name, sizeof t->name);
183 result = request_module(module_name);
184out: 181out:
185 return result; 182 return result;
186} 183}
@@ -711,12 +708,16 @@ void xprt_connect(struct rpc_task *task)
711 if (task->tk_rqstp) 708 if (task->tk_rqstp)
712 task->tk_rqstp->rq_bytes_sent = 0; 709 task->tk_rqstp->rq_bytes_sent = 0;
713 710
714 task->tk_timeout = xprt->connect_timeout; 711 task->tk_timeout = task->tk_rqstp->rq_timeout;
715 rpc_sleep_on(&xprt->pending, task, xprt_connect_status); 712 rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
713
714 if (test_bit(XPRT_CLOSING, &xprt->state))
715 return;
716 if (xprt_test_and_set_connecting(xprt))
717 return;
716 xprt->stat.connect_start = jiffies; 718 xprt->stat.connect_start = jiffies;
717 xprt->ops->connect(task); 719 xprt->ops->connect(task);
718 } 720 }
719 return;
720} 721}
721 722
722static void xprt_connect_status(struct rpc_task *task) 723static void xprt_connect_status(struct rpc_task *task)
@@ -771,25 +772,19 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
771} 772}
772EXPORT_SYMBOL_GPL(xprt_lookup_rqst); 773EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
773 774
774/** 775static void xprt_update_rtt(struct rpc_task *task)
775 * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
776 * @task: RPC request that recently completed
777 *
778 */
779void xprt_update_rtt(struct rpc_task *task)
780{ 776{
781 struct rpc_rqst *req = task->tk_rqstp; 777 struct rpc_rqst *req = task->tk_rqstp;
782 struct rpc_rtt *rtt = task->tk_client->cl_rtt; 778 struct rpc_rtt *rtt = task->tk_client->cl_rtt;
783 unsigned timer = task->tk_msg.rpc_proc->p_timer; 779 unsigned timer = task->tk_msg.rpc_proc->p_timer;
780 long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
784 781
785 if (timer) { 782 if (timer) {
786 if (req->rq_ntrans == 1) 783 if (req->rq_ntrans == 1)
787 rpc_update_rtt(rtt, timer, 784 rpc_update_rtt(rtt, timer, m);
788 (long)jiffies - req->rq_xtime);
789 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); 785 rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
790 } 786 }
791} 787}
792EXPORT_SYMBOL_GPL(xprt_update_rtt);
793 788
794/** 789/**
795 * xprt_complete_rqst - called when reply processing is complete 790 * xprt_complete_rqst - called when reply processing is complete
@@ -807,7 +802,9 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
807 task->tk_pid, ntohl(req->rq_xid), copied); 802 task->tk_pid, ntohl(req->rq_xid), copied);
808 803
809 xprt->stat.recvs++; 804 xprt->stat.recvs++;
810 task->tk_rtt = (long)jiffies - req->rq_xtime; 805 req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
806 if (xprt->ops->timer != NULL)
807 xprt_update_rtt(task);
811 808
812 list_del_init(&req->rq_list); 809 list_del_init(&req->rq_list);
813 req->rq_private_buf.len = copied; 810 req->rq_private_buf.len = copied;
@@ -906,7 +903,7 @@ void xprt_transmit(struct rpc_task *task)
906 return; 903 return;
907 904
908 req->rq_connect_cookie = xprt->connect_cookie; 905 req->rq_connect_cookie = xprt->connect_cookie;
909 req->rq_xtime = jiffies; 906 req->rq_xtime = ktime_get();
910 status = xprt->ops->send_request(task); 907 status = xprt->ops->send_request(task);
911 if (status != 0) { 908 if (status != 0) {
912 task->tk_status = status; 909 task->tk_status = status;
@@ -935,7 +932,7 @@ void xprt_transmit(struct rpc_task *task)
935 spin_unlock_bh(&xprt->transport_lock); 932 spin_unlock_bh(&xprt->transport_lock);
936} 933}
937 934
938static inline void do_xprt_reserve(struct rpc_task *task) 935static void xprt_alloc_slot(struct rpc_task *task)
939{ 936{
940 struct rpc_xprt *xprt = task->tk_xprt; 937 struct rpc_xprt *xprt = task->tk_xprt;
941 938
@@ -955,6 +952,16 @@ static inline void do_xprt_reserve(struct rpc_task *task)
955 rpc_sleep_on(&xprt->backlog, task, NULL); 952 rpc_sleep_on(&xprt->backlog, task, NULL);
956} 953}
957 954
955static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
956{
957 memset(req, 0, sizeof(*req)); /* mark unused */
958
959 spin_lock(&xprt->reserve_lock);
960 list_add(&req->rq_list, &xprt->free);
961 rpc_wake_up_next(&xprt->backlog);
962 spin_unlock(&xprt->reserve_lock);
963}
964
958/** 965/**
959 * xprt_reserve - allocate an RPC request slot 966 * xprt_reserve - allocate an RPC request slot
960 * @task: RPC task requesting a slot allocation 967 * @task: RPC task requesting a slot allocation
@@ -968,13 +975,13 @@ void xprt_reserve(struct rpc_task *task)
968 975
969 task->tk_status = -EIO; 976 task->tk_status = -EIO;
970 spin_lock(&xprt->reserve_lock); 977 spin_lock(&xprt->reserve_lock);
971 do_xprt_reserve(task); 978 xprt_alloc_slot(task);
972 spin_unlock(&xprt->reserve_lock); 979 spin_unlock(&xprt->reserve_lock);
973} 980}
974 981
975static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 982static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
976{ 983{
977 return xprt->xid++; 984 return (__force __be32)xprt->xid++;
978} 985}
979 986
980static inline void xprt_init_xid(struct rpc_xprt *xprt) 987static inline void xprt_init_xid(struct rpc_xprt *xprt)
@@ -1006,14 +1013,10 @@ void xprt_release(struct rpc_task *task)
1006{ 1013{
1007 struct rpc_xprt *xprt; 1014 struct rpc_xprt *xprt;
1008 struct rpc_rqst *req; 1015 struct rpc_rqst *req;
1009 int is_bc_request;
1010 1016
1011 if (!(req = task->tk_rqstp)) 1017 if (!(req = task->tk_rqstp))
1012 return; 1018 return;
1013 1019
1014 /* Preallocated backchannel request? */
1015 is_bc_request = bc_prealloc(req);
1016
1017 xprt = req->rq_xprt; 1020 xprt = req->rq_xprt;
1018 rpc_count_iostats(task); 1021 rpc_count_iostats(task);
1019 spin_lock_bh(&xprt->transport_lock); 1022 spin_lock_bh(&xprt->transport_lock);
@@ -1027,21 +1030,18 @@ void xprt_release(struct rpc_task *task)
1027 mod_timer(&xprt->timer, 1030 mod_timer(&xprt->timer,
1028 xprt->last_used + xprt->idle_timeout); 1031 xprt->last_used + xprt->idle_timeout);
1029 spin_unlock_bh(&xprt->transport_lock); 1032 spin_unlock_bh(&xprt->transport_lock);
1030 if (!bc_prealloc(req)) 1033 if (req->rq_buffer)
1031 xprt->ops->buf_free(req->rq_buffer); 1034 xprt->ops->buf_free(req->rq_buffer);
1035 if (req->rq_cred != NULL)
1036 put_rpccred(req->rq_cred);
1032 task->tk_rqstp = NULL; 1037 task->tk_rqstp = NULL;
1033 if (req->rq_release_snd_buf) 1038 if (req->rq_release_snd_buf)
1034 req->rq_release_snd_buf(req); 1039 req->rq_release_snd_buf(req);
1035 1040
1036 dprintk("RPC: %5u release request %p\n", task->tk_pid, req); 1041 dprintk("RPC: %5u release request %p\n", task->tk_pid, req);
1037 if (likely(!is_bc_request)) { 1042 if (likely(!bc_prealloc(req)))
1038 memset(req, 0, sizeof(*req)); /* mark unused */ 1043 xprt_free_slot(xprt, req);
1039 1044 else
1040 spin_lock(&xprt->reserve_lock);
1041 list_add(&req->rq_list, &xprt->free);
1042 rpc_wake_up_next(&xprt->backlog);
1043 spin_unlock(&xprt->reserve_lock);
1044 } else
1045 xprt_free_bc_request(req); 1045 xprt_free_bc_request(req);
1046} 1046}
1047 1047
@@ -1131,6 +1131,7 @@ static void xprt_destroy(struct kref *kref)
1131 rpc_destroy_wait_queue(&xprt->sending); 1131 rpc_destroy_wait_queue(&xprt->sending);
1132 rpc_destroy_wait_queue(&xprt->resend); 1132 rpc_destroy_wait_queue(&xprt->resend);
1133 rpc_destroy_wait_queue(&xprt->backlog); 1133 rpc_destroy_wait_queue(&xprt->backlog);
1134 cancel_work_sync(&xprt->task_cleanup);
1134 /* 1135 /*
1135 * Tear down transport state and free the rpc_xprt 1136 * Tear down transport state and free the rpc_xprt
1136 */ 1137 */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f92e37eb413c..0194de814933 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -566,7 +566,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
566 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, 566 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
567 rqstp->rq_arg.head[0].iov_len); 567 rqstp->rq_arg.head[0].iov_len);
568 568
569 svc_xprt_received(rqstp->rq_xprt);
570 return ret; 569 return ret;
571} 570}
572 571
@@ -665,7 +664,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
665 rqstp->rq_arg.head[0].iov_len); 664 rqstp->rq_arg.head[0].iov_len);
666 rqstp->rq_prot = IPPROTO_MAX; 665 rqstp->rq_prot = IPPROTO_MAX;
667 svc_xprt_copy_addrs(rqstp, xprt); 666 svc_xprt_copy_addrs(rqstp, xprt);
668 svc_xprt_received(xprt);
669 return ret; 667 return ret;
670 668
671 close_out: 669 close_out:
@@ -678,6 +676,5 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
678 */ 676 */
679 set_bit(XPT_CLOSE, &xprt->xpt_flags); 677 set_bit(XPT_CLOSE, &xprt->xpt_flags);
680defer: 678defer:
681 svc_xprt_received(xprt);
682 return 0; 679 return 0;
683} 680}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 187257b1d880..a85e866a77f7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args)
305 /* 60 second timeout, no retries */ 305 /* 60 second timeout, no retries */
306 xprt->timeout = &xprt_rdma_default_timeout; 306 xprt->timeout = &xprt_rdma_default_timeout;
307 xprt->bind_timeout = (60U * HZ); 307 xprt->bind_timeout = (60U * HZ);
308 xprt->connect_timeout = (60U * HZ);
309 xprt->reestablish_timeout = (5U * HZ); 308 xprt->reestablish_timeout = (5U * HZ);
310 xprt->idle_timeout = (5U * 60 * HZ); 309 xprt->idle_timeout = (5U * 60 * HZ);
311 310
@@ -449,21 +448,19 @@ xprt_rdma_connect(struct rpc_task *task)
449 struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; 448 struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt;
450 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 449 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
451 450
452 if (!xprt_test_and_set_connecting(xprt)) { 451 if (r_xprt->rx_ep.rep_connected != 0) {
453 if (r_xprt->rx_ep.rep_connected != 0) { 452 /* Reconnect */
454 /* Reconnect */ 453 schedule_delayed_work(&r_xprt->rdma_connect,
455 schedule_delayed_work(&r_xprt->rdma_connect, 454 xprt->reestablish_timeout);
456 xprt->reestablish_timeout); 455 xprt->reestablish_timeout <<= 1;
457 xprt->reestablish_timeout <<= 1; 456 if (xprt->reestablish_timeout > (30 * HZ))
458 if (xprt->reestablish_timeout > (30 * HZ)) 457 xprt->reestablish_timeout = (30 * HZ);
459 xprt->reestablish_timeout = (30 * HZ); 458 else if (xprt->reestablish_timeout < (5 * HZ))
460 else if (xprt->reestablish_timeout < (5 * HZ)) 459 xprt->reestablish_timeout = (5 * HZ);
461 xprt->reestablish_timeout = (5 * HZ); 460 } else {
462 } else { 461 schedule_delayed_work(&r_xprt->rdma_connect, 0);
463 schedule_delayed_work(&r_xprt->rdma_connect, 0); 462 if (!RPC_IS_ASYNC(task))
464 if (!RPC_IS_ASYNC(task)) 463 flush_scheduled_work();
465 flush_scheduled_work();
466 }
467 } 464 }
468} 465}
469 466
@@ -677,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *task)
677 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) 674 if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
678 goto drop_connection; 675 goto drop_connection;
679 676
680 task->tk_bytes_sent += rqst->rq_snd_buf.len; 677 rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
681 rqst->rq_bytes_sent = 0; 678 rqst->rq_bytes_sent = 0;
682 return 0; 679 return 0;
683 680
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9847c30b5001..7ca65c7005ea 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -138,20 +138,6 @@ static ctl_table sunrpc_table[] = {
138#endif 138#endif
139 139
140/* 140/*
141 * Time out for an RPC UDP socket connect. UDP socket connects are
142 * synchronous, but we set a timeout anyway in case of resource
143 * exhaustion on the local host.
144 */
145#define XS_UDP_CONN_TO (5U * HZ)
146
147/*
148 * Wait duration for an RPC TCP connection to be established. Solaris
149 * NFS over TCP uses 60 seconds, for example, which is in line with how
150 * long a server takes to reboot.
151 */
152#define XS_TCP_CONN_TO (60U * HZ)
153
154/*
155 * Wait duration for a reply from the RPC portmapper. 141 * Wait duration for a reply from the RPC portmapper.
156 */ 142 */
157#define XS_BIND_TO (60U * HZ) 143#define XS_BIND_TO (60U * HZ)
@@ -224,7 +210,8 @@ struct sock_xprt {
224 * State of TCP reply receive 210 * State of TCP reply receive
225 */ 211 */
226 __be32 tcp_fraghdr, 212 __be32 tcp_fraghdr,
227 tcp_xid; 213 tcp_xid,
214 tcp_calldir;
228 215
229 u32 tcp_offset, 216 u32 tcp_offset,
230 tcp_reclen; 217 tcp_reclen;
@@ -542,7 +529,7 @@ static int xs_udp_send_request(struct rpc_task *task)
542 xdr->len - req->rq_bytes_sent, status); 529 xdr->len - req->rq_bytes_sent, status);
543 530
544 if (status >= 0) { 531 if (status >= 0) {
545 task->tk_bytes_sent += status; 532 req->rq_xmit_bytes_sent += status;
546 if (status >= req->rq_slen) 533 if (status >= req->rq_slen)
547 return 0; 534 return 0;
548 /* Still some bytes left; set up for a retry later. */ 535 /* Still some bytes left; set up for a retry later. */
@@ -638,7 +625,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
638 /* If we've sent the entire packet, immediately 625 /* If we've sent the entire packet, immediately
639 * reset the count of bytes sent. */ 626 * reset the count of bytes sent. */
640 req->rq_bytes_sent += status; 627 req->rq_bytes_sent += status;
641 task->tk_bytes_sent += status; 628 req->rq_xmit_bytes_sent += status;
642 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 629 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
643 req->rq_bytes_sent = 0; 630 req->rq_bytes_sent = 0;
644 return 0; 631 return 0;
@@ -858,7 +845,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
858 dst_confirm(skb_dst(skb)); 845 dst_confirm(skb_dst(skb));
859 846
860 xprt_adjust_cwnd(task, copied); 847 xprt_adjust_cwnd(task, copied);
861 xprt_update_rtt(task);
862 xprt_complete_rqst(task, copied); 848 xprt_complete_rqst(task, copied);
863 849
864 out_unlock: 850 out_unlock:
@@ -942,7 +928,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
942{ 928{
943 size_t len, used; 929 size_t len, used;
944 u32 offset; 930 u32 offset;
945 __be32 calldir; 931 char *p;
946 932
947 /* 933 /*
948 * We want transport->tcp_offset to be 8 at the end of this routine 934 * We want transport->tcp_offset to be 8 at the end of this routine
@@ -951,26 +937,33 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
951 * transport->tcp_offset is 4 (after having already read the xid). 937 * transport->tcp_offset is 4 (after having already read the xid).
952 */ 938 */
953 offset = transport->tcp_offset - sizeof(transport->tcp_xid); 939 offset = transport->tcp_offset - sizeof(transport->tcp_xid);
954 len = sizeof(calldir) - offset; 940 len = sizeof(transport->tcp_calldir) - offset;
955 dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); 941 dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len);
956 used = xdr_skb_read_bits(desc, &calldir, len); 942 p = ((char *) &transport->tcp_calldir) + offset;
943 used = xdr_skb_read_bits(desc, p, len);
957 transport->tcp_offset += used; 944 transport->tcp_offset += used;
958 if (used != len) 945 if (used != len)
959 return; 946 return;
960 transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; 947 transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
961 transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
962 transport->tcp_flags |= TCP_RCV_COPY_DATA;
963 /* 948 /*
964 * We don't yet have the XDR buffer, so we will write the calldir 949 * We don't yet have the XDR buffer, so we will write the calldir
965 * out after we get the buffer from the 'struct rpc_rqst' 950 * out after we get the buffer from the 'struct rpc_rqst'
966 */ 951 */
967 if (ntohl(calldir) == RPC_REPLY) 952 switch (ntohl(transport->tcp_calldir)) {
953 case RPC_REPLY:
954 transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
955 transport->tcp_flags |= TCP_RCV_COPY_DATA;
968 transport->tcp_flags |= TCP_RPC_REPLY; 956 transport->tcp_flags |= TCP_RPC_REPLY;
969 else 957 break;
958 case RPC_CALL:
959 transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
960 transport->tcp_flags |= TCP_RCV_COPY_DATA;
970 transport->tcp_flags &= ~TCP_RPC_REPLY; 961 transport->tcp_flags &= ~TCP_RPC_REPLY;
971 dprintk("RPC: reading %s CALL/REPLY flag %08x\n", 962 break;
972 (transport->tcp_flags & TCP_RPC_REPLY) ? 963 default:
973 "reply for" : "request with", calldir); 964 dprintk("RPC: invalid request message type\n");
965 xprt_force_disconnect(&transport->xprt);
966 }
974 xs_tcp_check_fraghdr(transport); 967 xs_tcp_check_fraghdr(transport);
975} 968}
976 969
@@ -990,12 +983,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
990 /* 983 /*
991 * Save the RPC direction in the XDR buffer 984 * Save the RPC direction in the XDR buffer
992 */ 985 */
993 __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ?
994 htonl(RPC_REPLY) : 0;
995
996 memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, 986 memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
997 &calldir, sizeof(calldir)); 987 &transport->tcp_calldir,
998 transport->tcp_copied += sizeof(calldir); 988 sizeof(transport->tcp_calldir));
989 transport->tcp_copied += sizeof(transport->tcp_calldir);
999 transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; 990 transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
1000 } 991 }
1001 992
@@ -1050,8 +1041,6 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
1050 if (transport->tcp_flags & TCP_RCV_LAST_FRAG) 1041 if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
1051 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 1042 transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
1052 } 1043 }
1053
1054 return;
1055} 1044}
1056 1045
1057/* 1046/*
@@ -2016,9 +2005,6 @@ static void xs_connect(struct rpc_task *task)
2016 struct rpc_xprt *xprt = task->tk_xprt; 2005 struct rpc_xprt *xprt = task->tk_xprt;
2017 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2006 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
2018 2007
2019 if (xprt_test_and_set_connecting(xprt))
2020 return;
2021
2022 if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { 2008 if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) {
2023 dprintk("RPC: xs_connect delayed xprt %p for %lu " 2009 dprintk("RPC: xs_connect delayed xprt %p for %lu "
2024 "seconds\n", 2010 "seconds\n",
@@ -2038,16 +2024,6 @@ static void xs_connect(struct rpc_task *task)
2038 } 2024 }
2039} 2025}
2040 2026
2041static void xs_tcp_connect(struct rpc_task *task)
2042{
2043 struct rpc_xprt *xprt = task->tk_xprt;
2044
2045 /* Exit if we need to wait for socket shutdown to complete */
2046 if (test_bit(XPRT_CLOSING, &xprt->state))
2047 return;
2048 xs_connect(task);
2049}
2050
2051/** 2027/**
2052 * xs_udp_print_stats - display UDP socket-specifc stats 2028 * xs_udp_print_stats - display UDP socket-specifc stats
2053 * @xprt: rpc_xprt struct containing statistics 2029 * @xprt: rpc_xprt struct containing statistics
@@ -2210,7 +2186,6 @@ static int bc_send_request(struct rpc_task *task)
2210 2186
2211static void bc_close(struct rpc_xprt *xprt) 2187static void bc_close(struct rpc_xprt *xprt)
2212{ 2188{
2213 return;
2214} 2189}
2215 2190
2216/* 2191/*
@@ -2220,7 +2195,6 @@ static void bc_close(struct rpc_xprt *xprt)
2220 2195
2221static void bc_destroy(struct rpc_xprt *xprt) 2196static void bc_destroy(struct rpc_xprt *xprt)
2222{ 2197{
2223 return;
2224} 2198}
2225 2199
2226static struct rpc_xprt_ops xs_udp_ops = { 2200static struct rpc_xprt_ops xs_udp_ops = {
@@ -2246,7 +2220,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2246 .release_xprt = xs_tcp_release_xprt, 2220 .release_xprt = xs_tcp_release_xprt,
2247 .rpcbind = rpcb_getport_async, 2221 .rpcbind = rpcb_getport_async,
2248 .set_port = xs_set_port, 2222 .set_port = xs_set_port,
2249 .connect = xs_tcp_connect, 2223 .connect = xs_connect,
2250 .buf_alloc = rpc_malloc, 2224 .buf_alloc = rpc_malloc,
2251 .buf_free = rpc_free, 2225 .buf_free = rpc_free,
2252 .send_request = xs_tcp_send_request, 2226 .send_request = xs_tcp_send_request,
@@ -2325,6 +2299,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2325 struct sockaddr *addr = args->dstaddr; 2299 struct sockaddr *addr = args->dstaddr;
2326 struct rpc_xprt *xprt; 2300 struct rpc_xprt *xprt;
2327 struct sock_xprt *transport; 2301 struct sock_xprt *transport;
2302 struct rpc_xprt *ret;
2328 2303
2329 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); 2304 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
2330 if (IS_ERR(xprt)) 2305 if (IS_ERR(xprt))
@@ -2337,7 +2312,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2337 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 2312 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
2338 2313
2339 xprt->bind_timeout = XS_BIND_TO; 2314 xprt->bind_timeout = XS_BIND_TO;
2340 xprt->connect_timeout = XS_UDP_CONN_TO;
2341 xprt->reestablish_timeout = XS_UDP_REEST_TO; 2315 xprt->reestablish_timeout = XS_UDP_REEST_TO;
2342 xprt->idle_timeout = XS_IDLE_DISC_TO; 2316 xprt->idle_timeout = XS_IDLE_DISC_TO;
2343 2317
@@ -2363,8 +2337,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2363 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2337 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
2364 break; 2338 break;
2365 default: 2339 default:
2366 kfree(xprt); 2340 ret = ERR_PTR(-EAFNOSUPPORT);
2367 return ERR_PTR(-EAFNOSUPPORT); 2341 goto out_err;
2368 } 2342 }
2369 2343
2370 if (xprt_bound(xprt)) 2344 if (xprt_bound(xprt))
@@ -2379,10 +2353,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2379 2353
2380 if (try_module_get(THIS_MODULE)) 2354 if (try_module_get(THIS_MODULE))
2381 return xprt; 2355 return xprt;
2382 2356 ret = ERR_PTR(-EINVAL);
2357out_err:
2383 kfree(xprt->slot); 2358 kfree(xprt->slot);
2384 kfree(xprt); 2359 kfree(xprt);
2385 return ERR_PTR(-EINVAL); 2360 return ret;
2386} 2361}
2387 2362
2388static const struct rpc_timeout xs_tcp_default_timeout = { 2363static const struct rpc_timeout xs_tcp_default_timeout = {
@@ -2401,6 +2376,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2401 struct sockaddr *addr = args->dstaddr; 2376 struct sockaddr *addr = args->dstaddr;
2402 struct rpc_xprt *xprt; 2377 struct rpc_xprt *xprt;
2403 struct sock_xprt *transport; 2378 struct sock_xprt *transport;
2379 struct rpc_xprt *ret;
2404 2380
2405 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2381 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2406 if (IS_ERR(xprt)) 2382 if (IS_ERR(xprt))
@@ -2412,7 +2388,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2412 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2388 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
2413 2389
2414 xprt->bind_timeout = XS_BIND_TO; 2390 xprt->bind_timeout = XS_BIND_TO;
2415 xprt->connect_timeout = XS_TCP_CONN_TO;
2416 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2391 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
2417 xprt->idle_timeout = XS_IDLE_DISC_TO; 2392 xprt->idle_timeout = XS_IDLE_DISC_TO;
2418 2393
@@ -2437,8 +2412,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2437 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2412 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
2438 break; 2413 break;
2439 default: 2414 default:
2440 kfree(xprt); 2415 ret = ERR_PTR(-EAFNOSUPPORT);
2441 return ERR_PTR(-EAFNOSUPPORT); 2416 goto out_err;
2442 } 2417 }
2443 2418
2444 if (xprt_bound(xprt)) 2419 if (xprt_bound(xprt))
@@ -2454,10 +2429,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2454 2429
2455 if (try_module_get(THIS_MODULE)) 2430 if (try_module_get(THIS_MODULE))
2456 return xprt; 2431 return xprt;
2457 2432 ret = ERR_PTR(-EINVAL);
2433out_err:
2458 kfree(xprt->slot); 2434 kfree(xprt->slot);
2459 kfree(xprt); 2435 kfree(xprt);
2460 return ERR_PTR(-EINVAL); 2436 return ret;
2461} 2437}
2462 2438
2463/** 2439/**
@@ -2471,9 +2447,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2471 struct rpc_xprt *xprt; 2447 struct rpc_xprt *xprt;
2472 struct sock_xprt *transport; 2448 struct sock_xprt *transport;
2473 struct svc_sock *bc_sock; 2449 struct svc_sock *bc_sock;
2474 2450 struct rpc_xprt *ret;
2475 if (!args->bc_xprt)
2476 ERR_PTR(-EINVAL);
2477 2451
2478 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2452 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
2479 if (IS_ERR(xprt)) 2453 if (IS_ERR(xprt))
@@ -2488,7 +2462,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2488 /* backchannel */ 2462 /* backchannel */
2489 xprt_set_bound(xprt); 2463 xprt_set_bound(xprt);
2490 xprt->bind_timeout = 0; 2464 xprt->bind_timeout = 0;
2491 xprt->connect_timeout = 0;
2492 xprt->reestablish_timeout = 0; 2465 xprt->reestablish_timeout = 0;
2493 xprt->idle_timeout = 0; 2466 xprt->idle_timeout = 0;
2494 2467
@@ -2514,8 +2487,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2514 RPCBIND_NETID_TCP6); 2487 RPCBIND_NETID_TCP6);
2515 break; 2488 break;
2516 default: 2489 default:
2517 kfree(xprt); 2490 ret = ERR_PTR(-EAFNOSUPPORT);
2518 return ERR_PTR(-EAFNOSUPPORT); 2491 goto out_err;
2519 } 2492 }
2520 2493
2521 if (xprt_bound(xprt)) 2494 if (xprt_bound(xprt))
@@ -2537,9 +2510,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2537 2510
2538 if (try_module_get(THIS_MODULE)) 2511 if (try_module_get(THIS_MODULE))
2539 return xprt; 2512 return xprt;
2513 ret = ERR_PTR(-EINVAL);
2514out_err:
2540 kfree(xprt->slot); 2515 kfree(xprt->slot);
2541 kfree(xprt); 2516 kfree(xprt);
2542 return ERR_PTR(-EINVAL); 2517 return ret;
2543} 2518}
2544 2519
2545static struct xprt_class xs_udp_transport = { 2520static struct xprt_class xs_udp_transport = {
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 53196009160a..ca84212cfbfe 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -82,7 +82,6 @@ static int __net_init sysctl_net_init(struct net *net)
82static void __net_exit sysctl_net_exit(struct net *net) 82static void __net_exit sysctl_net_exit(struct net *net)
83{ 83{
84 WARN_ON(!list_empty(&net->sysctls.list)); 84 WARN_ON(!list_empty(&net->sysctls.list));
85 return;
86} 85}
87 86
88static struct pernet_operations sysctl_pernet_ops = { 87static struct pernet_operations sysctl_pernet_ops = {
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index e5207a11edf6..c048543ffbeb 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -92,3 +92,35 @@ int tipc_addr_node_valid(u32 addr)
92 return (tipc_addr_domain_valid(addr) && tipc_node(addr)); 92 return (tipc_addr_domain_valid(addr) && tipc_node(addr));
93} 93}
94 94
95int tipc_in_scope(u32 domain, u32 addr)
96{
97 if (!domain || (domain == addr))
98 return 1;
99 if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
100 return 1;
101 if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
102 return 1;
103 return 0;
104}
105
106/**
107 * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
108 */
109
110int tipc_addr_scope(u32 domain)
111{
112 if (likely(!domain))
113 return TIPC_ZONE_SCOPE;
114 if (tipc_node(domain))
115 return TIPC_NODE_SCOPE;
116 if (tipc_cluster(domain))
117 return TIPC_CLUSTER_SCOPE;
118 return TIPC_ZONE_SCOPE;
119}
120
121char *tipc_addr_string_fill(char *string, u32 addr)
122{
123 snprintf(string, 16, "<%u.%u.%u>",
124 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
125 return string;
126}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 3ba67e6ce03e..c1cc5724d8cc 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,32 +67,6 @@ static inline int may_route(u32 addr)
67 return(addr ^ tipc_own_addr) >> 11; 67 return(addr ^ tipc_own_addr) >> 11;
68} 68}
69 69
70static inline int in_scope(u32 domain, u32 addr)
71{
72 if (!domain || (domain == addr))
73 return 1;
74 if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
75 return 1;
76 if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
77 return 1;
78 return 0;
79}
80
81/**
82 * addr_scope - convert message lookup domain to equivalent 2-bit scope value
83 */
84
85static inline int addr_scope(u32 domain)
86{
87 if (likely(!domain))
88 return TIPC_ZONE_SCOPE;
89 if (tipc_node(domain))
90 return TIPC_NODE_SCOPE;
91 if (tipc_cluster(domain))
92 return TIPC_CLUSTER_SCOPE;
93 return TIPC_ZONE_SCOPE;
94}
95
96/** 70/**
97 * addr_domain - convert 2-bit scope value to equivalent message lookup domain 71 * addr_domain - convert 2-bit scope value to equivalent message lookup domain
98 * 72 *
@@ -110,14 +84,9 @@ static inline int addr_domain(int sc)
110 return tipc_addr(tipc_zone(tipc_own_addr), 0, 0); 84 return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
111} 85}
112 86
113static inline char *addr_string_fill(char *string, u32 addr)
114{
115 snprintf(string, 16, "<%u.%u.%u>",
116 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
117 return string;
118}
119
120int tipc_addr_domain_valid(u32); 87int tipc_addr_domain_valid(u32);
121int tipc_addr_node_valid(u32 addr); 88int tipc_addr_node_valid(u32 addr);
122 89int tipc_in_scope(u32 domain, u32 addr);
90int tipc_addr_scope(u32 domain);
91char *tipc_addr_string_fill(char *string, u32 addr);
123#endif 92#endif
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a3bfd4064912..a008c6689305 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -119,7 +119,7 @@ static struct bclink *bclink = NULL;
119static struct link *bcl = NULL; 119static struct link *bcl = NULL;
120static DEFINE_SPINLOCK(bc_lock); 120static DEFINE_SPINLOCK(bc_lock);
121 121
122const char tipc_bclink_name[] = "multicast-link"; 122const char tipc_bclink_name[] = "broadcast-link";
123 123
124 124
125static u32 buf_seqno(struct sk_buff *buf) 125static u32 buf_seqno(struct sk_buff *buf)
@@ -275,7 +275,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
275 buf = buf_acquire(INT_H_SIZE); 275 buf = buf_acquire(INT_H_SIZE);
276 if (buf) { 276 if (buf) {
277 msg = buf_msg(buf); 277 msg = buf_msg(buf);
278 msg_init(msg, BCAST_PROTOCOL, STATE_MSG, 278 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
279 INT_H_SIZE, n_ptr->addr); 279 INT_H_SIZE, n_ptr->addr);
280 msg_set_mc_netid(msg, tipc_net_id); 280 msg_set_mc_netid(msg, tipc_net_id);
281 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); 281 msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
@@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
558 struct tipc_bearer *unused1, 558 struct tipc_bearer *unused1,
559 struct tipc_media_addr *unused2) 559 struct tipc_media_addr *unused2)
560{ 560{
561 static int send_count = 0;
562
563 int bp_index; 561 int bp_index;
564 int swap_time;
565 562
566 /* Prepare buffer for broadcasting (if first time trying to send it) */ 563 /* Prepare buffer for broadcasting (if first time trying to send it) */
567 564
@@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
575 msg_set_mc_netid(msg, tipc_net_id); 572 msg_set_mc_netid(msg, tipc_net_id);
576 } 573 }
577 574
578 /* Determine if bearer pairs should be swapped following this attempt */
579
580 if ((swap_time = (++send_count >= 10)))
581 send_count = 0;
582
583 /* Send buffer over bearers until all targets reached */ 575 /* Send buffer over bearers until all targets reached */
584 576
585 bcbearer->remains = tipc_cltr_bcast_nodes; 577 bcbearer->remains = tipc_cltr_bcast_nodes;
@@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
595 if (bcbearer->remains_new.count == bcbearer->remains.count) 587 if (bcbearer->remains_new.count == bcbearer->remains.count)
596 continue; /* bearer pair doesn't add anything */ 588 continue; /* bearer pair doesn't add anything */
597 589
598 if (!p->publ.blocked && 590 if (p->publ.blocked ||
599 !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) { 591 p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
600 if (swap_time && s && !s->publ.blocked) 592 /* unable to send on primary bearer */
601 goto swap; 593 if (!s || s->publ.blocked ||
602 else 594 s->media->send_msg(buf, &s->publ,
603 goto update; 595 &s->media->bcast_addr)) {
596 /* unable to send on either bearer */
597 continue;
598 }
599 }
600
601 if (s) {
602 bcbearer->bpairs[bp_index].primary = s;
603 bcbearer->bpairs[bp_index].secondary = p;
604 } 604 }
605 605
606 if (!s || s->publ.blocked ||
607 s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
608 continue; /* unable to send using bearer pair */
609swap:
610 bcbearer->bpairs[bp_index].primary = s;
611 bcbearer->bpairs[bp_index].secondary = p;
612update:
613 if (bcbearer->remains_new.count == 0) 606 if (bcbearer->remains_new.count == 0)
614 return 0; 607 return 0;
615 608
@@ -829,3 +822,113 @@ void tipc_bclink_stop(void)
829 spin_unlock_bh(&bc_lock); 822 spin_unlock_bh(&bc_lock);
830} 823}
831 824
825
826/**
827 * tipc_nmap_add - add a node to a node map
828 */
829
830void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
831{
832 int n = tipc_node(node);
833 int w = n / WSIZE;
834 u32 mask = (1 << (n % WSIZE));
835
836 if ((nm_ptr->map[w] & mask) == 0) {
837 nm_ptr->count++;
838 nm_ptr->map[w] |= mask;
839 }
840}
841
842/**
843 * tipc_nmap_remove - remove a node from a node map
844 */
845
846void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
847{
848 int n = tipc_node(node);
849 int w = n / WSIZE;
850 u32 mask = (1 << (n % WSIZE));
851
852 if ((nm_ptr->map[w] & mask) != 0) {
853 nm_ptr->map[w] &= ~mask;
854 nm_ptr->count--;
855 }
856}
857
858/**
859 * tipc_nmap_diff - find differences between node maps
860 * @nm_a: input node map A
861 * @nm_b: input node map B
862 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
863 */
864
865void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
866 struct tipc_node_map *nm_diff)
867{
868 int stop = ARRAY_SIZE(nm_a->map);
869 int w;
870 int b;
871 u32 map;
872
873 memset(nm_diff, 0, sizeof(*nm_diff));
874 for (w = 0; w < stop; w++) {
875 map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
876 nm_diff->map[w] = map;
877 if (map != 0) {
878 for (b = 0 ; b < WSIZE; b++) {
879 if (map & (1 << b))
880 nm_diff->count++;
881 }
882 }
883 }
884}
885
886/**
887 * tipc_port_list_add - add a port to a port list, ensuring no duplicates
888 */
889
890void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
891{
892 struct port_list *item = pl_ptr;
893 int i;
894 int item_sz = PLSIZE;
895 int cnt = pl_ptr->count;
896
897 for (; ; cnt -= item_sz, item = item->next) {
898 if (cnt < PLSIZE)
899 item_sz = cnt;
900 for (i = 0; i < item_sz; i++)
901 if (item->ports[i] == port)
902 return;
903 if (i < PLSIZE) {
904 item->ports[i] = port;
905 pl_ptr->count++;
906 return;
907 }
908 if (!item->next) {
909 item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
910 if (!item->next) {
911 warn("Incomplete multicast delivery, no memory\n");
912 return;
913 }
914 item->next->next = NULL;
915 }
916 }
917}
918
919/**
920 * tipc_port_list_free - free dynamically created entries in port_list chain
921 *
922 */
923
924void tipc_port_list_free(struct port_list *pl_ptr)
925{
926 struct port_list *item;
927 struct port_list *next;
928
929 for (item = pl_ptr->next; item; item = next) {
930 next = item->next;
931 kfree(item);
932 }
933}
934
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4c1771e95c99..e8c2b81658c7 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -72,41 +72,11 @@ struct tipc_node;
72 72
73extern const char tipc_bclink_name[]; 73extern const char tipc_bclink_name[];
74 74
75void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
76void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
75 77
76/** 78/**
77 * nmap_add - add a node to a node map 79 * tipc_nmap_equal - test for equality of node maps
78 */
79
80static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
81{
82 int n = tipc_node(node);
83 int w = n / WSIZE;
84 u32 mask = (1 << (n % WSIZE));
85
86 if ((nm_ptr->map[w] & mask) == 0) {
87 nm_ptr->count++;
88 nm_ptr->map[w] |= mask;
89 }
90}
91
92/**
93 * nmap_remove - remove a node from a node map
94 */
95
96static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
97{
98 int n = tipc_node(node);
99 int w = n / WSIZE;
100 u32 mask = (1 << (n % WSIZE));
101
102 if ((nm_ptr->map[w] & mask) != 0) {
103 nm_ptr->map[w] &= ~mask;
104 nm_ptr->count--;
105 }
106}
107
108/**
109 * nmap_equal - test for equality of node maps
110 */ 80 */
111 81
112static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) 82static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b)
@@ -114,84 +84,11 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
114 return !memcmp(nm_a, nm_b, sizeof(*nm_a)); 84 return !memcmp(nm_a, nm_b, sizeof(*nm_a));
115} 85}
116 86
117/** 87void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
118 * nmap_diff - find differences between node maps 88 struct tipc_node_map *nm_diff);
119 * @nm_a: input node map A
120 * @nm_b: input node map B
121 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
122 */
123
124static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
125 struct tipc_node_map *nm_diff)
126{
127 int stop = ARRAY_SIZE(nm_a->map);
128 int w;
129 int b;
130 u32 map;
131
132 memset(nm_diff, 0, sizeof(*nm_diff));
133 for (w = 0; w < stop; w++) {
134 map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
135 nm_diff->map[w] = map;
136 if (map != 0) {
137 for (b = 0 ; b < WSIZE; b++) {
138 if (map & (1 << b))
139 nm_diff->count++;
140 }
141 }
142 }
143}
144
145/**
146 * port_list_add - add a port to a port list, ensuring no duplicates
147 */
148
149static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
150{
151 struct port_list *item = pl_ptr;
152 int i;
153 int item_sz = PLSIZE;
154 int cnt = pl_ptr->count;
155
156 for (; ; cnt -= item_sz, item = item->next) {
157 if (cnt < PLSIZE)
158 item_sz = cnt;
159 for (i = 0; i < item_sz; i++)
160 if (item->ports[i] == port)
161 return;
162 if (i < PLSIZE) {
163 item->ports[i] = port;
164 pl_ptr->count++;
165 return;
166 }
167 if (!item->next) {
168 item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
169 if (!item->next) {
170 warn("Incomplete multicast delivery, no memory\n");
171 return;
172 }
173 item->next->next = NULL;
174 }
175 }
176}
177
178/**
179 * port_list_free - free dynamically created entries in port_list chain
180 *
181 * Note: First item is on stack, so it doesn't need to be released
182 */
183
184static inline void tipc_port_list_free(struct port_list *pl_ptr)
185{
186 struct port_list *item;
187 struct port_list *next;
188
189 for (item = pl_ptr->next; item; item = next) {
190 next = item->next;
191 kfree(item);
192 }
193}
194 89
90void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
91void tipc_port_list_free(struct port_list *pl_ptr);
195 92
196int tipc_bclink_init(void); 93int tipc_bclink_init(void);
197void tipc_bclink_stop(void); 94void tipc_bclink_stop(void);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 78091375ca12..52ae17b2583e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -467,6 +467,18 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
467 return res; 467 return res;
468} 468}
469 469
470/**
471 * tipc_bearer_congested - determines if bearer is currently congested
472 */
473
474int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
475{
476 if (unlikely(b_ptr->publ.blocked))
477 return 1;
478 if (likely(list_empty(&b_ptr->cong_links)))
479 return 0;
480 return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
481}
470 482
471/** 483/**
472 * tipc_enable_bearer - enable bearer with the given name 484 * tipc_enable_bearer - enable bearer with the given name
@@ -493,7 +505,7 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
493 return -EINVAL; 505 return -EINVAL;
494 } 506 }
495 if (!tipc_addr_domain_valid(bcast_scope) || 507 if (!tipc_addr_domain_valid(bcast_scope) ||
496 !in_scope(bcast_scope, tipc_own_addr)) { 508 !tipc_in_scope(bcast_scope, tipc_own_addr)) {
497 warn("Bearer <%s> rejected, illegal broadcast scope\n", name); 509 warn("Bearer <%s> rejected, illegal broadcast scope\n", name);
498 return -EINVAL; 510 return -EINVAL;
499 } 511 }
@@ -571,7 +583,7 @@ restart:
571 spin_lock_init(&b_ptr->publ.lock); 583 spin_lock_init(&b_ptr->publ.lock);
572 write_unlock_bh(&tipc_net_lock); 584 write_unlock_bh(&tipc_net_lock);
573 info("Enabled bearer <%s>, discovery domain %s, priority %u\n", 585 info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
574 name, addr_string_fill(addr_string, bcast_scope), priority); 586 name, tipc_addr_string_fill(addr_string, bcast_scope), priority);
575 return 0; 587 return 0;
576failed: 588failed:
577 write_unlock_bh(&tipc_net_lock); 589 write_unlock_bh(&tipc_net_lock);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 000228e93f9e..a850b389663e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -125,6 +125,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest);
125void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr); 125void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
126struct bearer *tipc_bearer_find_interface(const char *if_name); 126struct bearer *tipc_bearer_find_interface(const char *if_name);
127int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr); 127int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
128int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
128int tipc_bearer_init(void); 129int tipc_bearer_init(void);
129void tipc_bearer_stop(void); 130void tipc_bearer_stop(void);
130void tipc_bearer_lock_push(struct bearer *b_ptr); 131void tipc_bearer_lock_push(struct bearer *b_ptr);
@@ -154,17 +155,4 @@ static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf,
154 return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest); 155 return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest);
155} 156}
156 157
157/** 158#endif /* _TIPC_BEARER_H */
158 * tipc_bearer_congested - determines if bearer is currently congested
159 */
160
161static inline int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
162{
163 if (unlikely(b_ptr->publ.blocked))
164 return 1;
165 if (likely(list_empty(&b_ptr->cong_links)))
166 return 0;
167 return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
168}
169
170#endif
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index a7eac00cd363..e68f705381bc 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
238 if (buf) { 238 if (buf) {
239 msg = buf_msg(buf); 239 msg = buf_msg(buf);
240 memset((char *)msg, 0, size); 240 memset((char *)msg, 0, size);
241 msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest); 241 tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
242 } 242 }
243 return buf; 243 return buf;
244} 244}
diff --git a/net/tipc/config.c b/net/tipc/config.c
index ca3544d030c7..961d1b097146 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -56,9 +56,6 @@ struct subscr_data {
56struct manager { 56struct manager {
57 u32 user_ref; 57 u32 user_ref;
58 u32 port_ref; 58 u32 port_ref;
59 u32 subscr_ref;
60 u32 link_subscriptions;
61 struct list_head link_subscribers;
62}; 59};
63 60
64static struct manager mng = { 0}; 61static struct manager mng = { 0};
@@ -70,12 +67,6 @@ static int req_tlv_space; /* request message TLV area size */
70static int rep_headroom; /* reply message headroom to use */ 67static int rep_headroom; /* reply message headroom to use */
71 68
72 69
73void tipc_cfg_link_event(u32 addr, char *name, int up)
74{
75 /* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
76}
77
78
79struct sk_buff *tipc_cfg_reply_alloc(int payload_size) 70struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
80{ 71{
81 struct sk_buff *buf; 72 struct sk_buff *buf;
@@ -130,12 +121,24 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
130} 121}
131 122
132 123
133
134
135#if 0 124#if 0
136 125
137/* Now obsolete code for handling commands not yet implemented the new way */ 126/* Now obsolete code for handling commands not yet implemented the new way */
138 127
128/*
129 * Some of this code assumed that the manager structure contains two added
130 * fields:
131 * u32 link_subscriptions;
132 * struct list_head link_subscribers;
133 * which are currently not present. These fields may need to be re-introduced
134 * if and when support for link subscriptions is added.
135 */
136
137void tipc_cfg_link_event(u32 addr, char *name, int up)
138{
139 /* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
140}
141
139int tipc_cfg_cmd(const struct tipc_cmd_msg * msg, 142int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
140 char *data, 143 char *data,
141 u32 sz, 144 u32 sz,
@@ -243,13 +246,48 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg,
243 default: 246 default:
244 rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig); 247 rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
245 } 248 }
246 exit: 249exit:
247 rmsg.result_len = htonl(msg_sect[1].iov_len); 250 rmsg.result_len = htonl(msg_sect[1].iov_len);
248 rmsg.retval = htonl(rv); 251 rmsg.retval = htonl(rv);
249 tipc_cfg_respond(msg_sect, 2u, orig); 252 tipc_cfg_respond(msg_sect, 2u, orig);
250} 253}
251#endif 254#endif
252 255
256#define MAX_STATS_INFO 2000
257
258static struct sk_buff *tipc_show_stats(void)
259{
260 struct sk_buff *buf;
261 struct tlv_desc *rep_tlv;
262 struct print_buf pb;
263 int str_len;
264 u32 value;
265
266 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
267 return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
268
269 value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
270 if (value != 0)
271 return tipc_cfg_reply_error_string("unsupported argument");
272
273 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_STATS_INFO));
274 if (buf == NULL)
275 return NULL;
276
277 rep_tlv = (struct tlv_desc *)buf->data;
278 tipc_printbuf_init(&pb, (char *)TLV_DATA(rep_tlv), MAX_STATS_INFO);
279
280 tipc_printf(&pb, "TIPC version " TIPC_MOD_VER "\n");
281
282 /* Use additional tipc_printf()'s to return more info ... */
283
284 str_len = tipc_printbuf_validate(&pb);
285 skb_put(buf, TLV_SPACE(str_len));
286 TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
287
288 return buf;
289}
290
253static struct sk_buff *cfg_enable_bearer(void) 291static struct sk_buff *cfg_enable_bearer(void)
254{ 292{
255 struct tipc_bearer_config *args; 293 struct tipc_bearer_config *args;
@@ -533,6 +571,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
533 case TIPC_CMD_DUMP_LOG: 571 case TIPC_CMD_DUMP_LOG:
534 rep_tlv_buf = tipc_log_dump(); 572 rep_tlv_buf = tipc_log_dump();
535 break; 573 break;
574 case TIPC_CMD_SHOW_STATS:
575 rep_tlv_buf = tipc_show_stats();
576 break;
536 case TIPC_CMD_SET_LINK_TOL: 577 case TIPC_CMD_SET_LINK_TOL:
537 case TIPC_CMD_SET_LINK_PRI: 578 case TIPC_CMD_SET_LINK_PRI:
538 case TIPC_CMD_SET_LINK_WINDOW: 579 case TIPC_CMD_SET_LINK_WINDOW:
@@ -667,9 +708,6 @@ int tipc_cfg_init(void)
667 struct tipc_name_seq seq; 708 struct tipc_name_seq seq;
668 int res; 709 int res;
669 710
670 memset(&mng, 0, sizeof(mng));
671 INIT_LIST_HEAD(&mng.link_subscribers);
672
673 res = tipc_attach(&mng.user_ref, NULL, NULL); 711 res = tipc_attach(&mng.user_ref, NULL, NULL);
674 if (res) 712 if (res)
675 goto failed; 713 goto failed;
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52c571fedbe0..696468117985 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,8 +49,6 @@
49#include "config.h" 49#include "config.h"
50 50
51 51
52#define TIPC_MOD_VER "1.6.4"
53
54#ifndef CONFIG_TIPC_ZONES 52#ifndef CONFIG_TIPC_ZONES
55#define CONFIG_TIPC_ZONES 3 53#define CONFIG_TIPC_ZONES 3
56#endif 54#endif
@@ -104,6 +102,30 @@ int tipc_get_mode(void)
104} 102}
105 103
106/** 104/**
105 * buf_acquire - creates a TIPC message buffer
106 * @size: message size (including TIPC header)
107 *
108 * Returns a new buffer with data pointers set to the specified size.
109 *
110 * NOTE: Headroom is reserved to allow prepending of a data link header.
111 * There may also be unrequested tailroom present at the buffer's end.
112 */
113
114struct sk_buff *buf_acquire(u32 size)
115{
116 struct sk_buff *skb;
117 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
118
119 skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
120 if (skb) {
121 skb_reserve(skb, BUF_HEADROOM);
122 skb_put(skb, size);
123 skb->next = NULL;
124 }
125 return skb;
126}
127
128/**
107 * tipc_core_stop_net - shut down TIPC networking sub-systems 129 * tipc_core_stop_net - shut down TIPC networking sub-systems
108 */ 130 */
109 131
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c58a1d16563a..188799017abd 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,9 @@
59#include <linux/slab.h> 59#include <linux/slab.h>
60#include <linux/vmalloc.h> 60#include <linux/vmalloc.h>
61 61
62
63#define TIPC_MOD_VER "2.0.0"
64
62/* 65/*
63 * TIPC sanity test macros 66 * TIPC sanity test macros
64 */ 67 */
@@ -325,29 +328,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
325 return (struct tipc_msg *)skb->data; 328 return (struct tipc_msg *)skb->data;
326} 329}
327 330
328/** 331extern struct sk_buff *buf_acquire(u32 size);
329 * buf_acquire - creates a TIPC message buffer
330 * @size: message size (including TIPC header)
331 *
332 * Returns a new buffer with data pointers set to the specified size.
333 *
334 * NOTE: Headroom is reserved to allow prepending of a data link header.
335 * There may also be unrequested tailroom present at the buffer's end.
336 */
337
338static inline struct sk_buff *buf_acquire(u32 size)
339{
340 struct sk_buff *skb;
341 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
342
343 skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
344 if (skb) {
345 skb_reserve(skb, BUF_HEADROOM);
346 skb_put(skb, size);
347 skb->next = NULL;
348 }
349 return skb;
350}
351 332
352/** 333/**
353 * buf_discard - frees a TIPC message buffer 334 * buf_discard - frees a TIPC message buffer
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 74b7d1e28aec..fc1fcf5e6b53 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -120,7 +120,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
120 120
121 if (buf) { 121 if (buf) {
122 msg = buf_msg(buf); 122 msg = buf_msg(buf);
123 msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); 123 tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
124 msg_set_non_seq(msg, 1); 124 msg_set_non_seq(msg, 1);
125 msg_set_req_links(msg, req_links); 125 msg_set_req_links(msg, req_links);
126 msg_set_dest_domain(msg, dest_domain); 126 msg_set_dest_domain(msg, dest_domain);
@@ -144,7 +144,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
144 char media_addr_str[64]; 144 char media_addr_str[64];
145 struct print_buf pb; 145 struct print_buf pb;
146 146
147 addr_string_fill(node_addr_str, node_addr); 147 tipc_addr_string_fill(node_addr_str, node_addr);
148 tipc_printbuf_init(&pb, media_addr_str, sizeof(media_addr_str)); 148 tipc_printbuf_init(&pb, media_addr_str, sizeof(media_addr_str));
149 tipc_media_addr_printf(&pb, media_addr); 149 tipc_media_addr_printf(&pb, media_addr);
150 tipc_printbuf_validate(&pb); 150 tipc_printbuf_validate(&pb);
@@ -183,7 +183,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
183 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr); 183 disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
184 return; 184 return;
185 } 185 }
186 if (!in_scope(dest, tipc_own_addr)) 186 if (!tipc_in_scope(dest, tipc_own_addr))
187 return; 187 return;
188 if (is_slave(tipc_own_addr) && is_slave(orig)) 188 if (is_slave(tipc_own_addr) && is_slave(orig))
189 return; 189 return;
@@ -224,7 +224,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
224 memcpy(addr, &media_addr, sizeof(*addr)); 224 memcpy(addr, &media_addr, sizeof(*addr));
225 tipc_link_reset(link); 225 tipc_link_reset(link);
226 } 226 }
227 link_fully_up = (link->state == WORKING_WORKING); 227 link_fully_up = link_working_working(link);
228 spin_unlock_bh(&n_ptr->lock); 228 spin_unlock_bh(&n_ptr->lock);
229 if ((type == DSC_RESP_MSG) || link_fully_up) 229 if ((type == DSC_RESP_MSG) || link_fully_up)
230 return; 230 return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1a7e4665af80..a3616b99529b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -202,41 +202,6 @@ static unsigned int align(unsigned int i)
202 return (i + 3) & ~3u; 202 return (i + 3) & ~3u;
203} 203}
204 204
205static int link_working_working(struct link *l_ptr)
206{
207 return (l_ptr->state == WORKING_WORKING);
208}
209
210static int link_working_unknown(struct link *l_ptr)
211{
212 return (l_ptr->state == WORKING_UNKNOWN);
213}
214
215static int link_reset_unknown(struct link *l_ptr)
216{
217 return (l_ptr->state == RESET_UNKNOWN);
218}
219
220static int link_reset_reset(struct link *l_ptr)
221{
222 return (l_ptr->state == RESET_RESET);
223}
224
225static int link_blocked(struct link *l_ptr)
226{
227 return (l_ptr->exp_msg_count || l_ptr->blocked);
228}
229
230static int link_congested(struct link *l_ptr)
231{
232 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
233}
234
235static u32 link_max_pkt(struct link *l_ptr)
236{
237 return l_ptr->max_pkt;
238}
239
240static void link_init_max_pkt(struct link *l_ptr) 205static void link_init_max_pkt(struct link *l_ptr)
241{ 206{
242 u32 max_pkt; 207 u32 max_pkt;
@@ -468,7 +433,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
468 433
469 l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; 434 l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
470 msg = l_ptr->pmsg; 435 msg = l_ptr->pmsg;
471 msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); 436 tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
472 msg_set_size(msg, sizeof(l_ptr->proto_msg)); 437 msg_set_size(msg, sizeof(l_ptr->proto_msg));
473 msg_set_session(msg, (tipc_random & 0xffff)); 438 msg_set_session(msg, (tipc_random & 0xffff));
474 msg_set_bearer_id(msg, b_ptr->identity); 439 msg_set_bearer_id(msg, b_ptr->identity);
@@ -561,9 +526,8 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
561 goto exit; 526 goto exit;
562 if (!list_empty(&p_ptr->wait_list)) 527 if (!list_empty(&p_ptr->wait_list))
563 goto exit; 528 goto exit;
564 p_ptr->congested_link = l_ptr;
565 p_ptr->publ.congested = 1; 529 p_ptr->publ.congested = 1;
566 p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr)); 530 p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
567 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports); 531 list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
568 l_ptr->stats.link_congs++; 532 l_ptr->stats.link_congs++;
569exit: 533exit:
@@ -592,7 +556,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
592 if (win <= 0) 556 if (win <= 0)
593 break; 557 break;
594 list_del_init(&p_ptr->wait_list); 558 list_del_init(&p_ptr->wait_list);
595 p_ptr->congested_link = NULL;
596 spin_lock_bh(p_ptr->publ.lock); 559 spin_lock_bh(p_ptr->publ.lock);
597 p_ptr->publ.congested = 0; 560 p_ptr->publ.congested = 0;
598 p_ptr->wakeup(&p_ptr->publ); 561 p_ptr->wakeup(&p_ptr->publ);
@@ -877,7 +840,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
877 case TIMEOUT_EVT: 840 case TIMEOUT_EVT:
878 dbg_link("TIM "); 841 dbg_link("TIM ");
879 if (l_ptr->next_in_no != l_ptr->checkpoint) { 842 if (l_ptr->next_in_no != l_ptr->checkpoint) {
880 dbg_link("-> WW \n"); 843 dbg_link("-> WW\n");
881 l_ptr->state = WORKING_WORKING; 844 l_ptr->state = WORKING_WORKING;
882 l_ptr->fsm_msg_cnt = 0; 845 l_ptr->fsm_msg_cnt = 0;
883 l_ptr->checkpoint = l_ptr->next_in_no; 846 l_ptr->checkpoint = l_ptr->next_in_no;
@@ -934,7 +897,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
934 link_set_timer(l_ptr, cont_intv); 897 link_set_timer(l_ptr, cont_intv);
935 break; 898 break;
936 case RESET_MSG: 899 case RESET_MSG:
937 dbg_link("RES \n"); 900 dbg_link("RES\n");
938 dbg_link(" -> RR\n"); 901 dbg_link(" -> RR\n");
939 l_ptr->state = RESET_RESET; 902 l_ptr->state = RESET_RESET;
940 l_ptr->fsm_msg_cnt = 0; 903 l_ptr->fsm_msg_cnt = 0;
@@ -947,7 +910,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
947 l_ptr->started = 1; 910 l_ptr->started = 1;
948 /* fall through */ 911 /* fall through */
949 case TIMEOUT_EVT: 912 case TIMEOUT_EVT:
950 dbg_link("TIM \n"); 913 dbg_link("TIM\n");
951 tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0); 914 tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
952 l_ptr->fsm_msg_cnt++; 915 l_ptr->fsm_msg_cnt++;
953 link_set_timer(l_ptr, cont_intv); 916 link_set_timer(l_ptr, cont_intv);
@@ -1017,7 +980,7 @@ static int link_bundle_buf(struct link *l_ptr,
1017 return 0; 980 return 0;
1018 if (skb_tailroom(bundler) < (pad + size)) 981 if (skb_tailroom(bundler) < (pad + size))
1019 return 0; 982 return 0;
1020 if (link_max_pkt(l_ptr) < (to_pos + size)) 983 if (l_ptr->max_pkt < (to_pos + size))
1021 return 0; 984 return 0;
1022 985
1023 skb_put(bundler, pad + size); 986 skb_put(bundler, pad + size);
@@ -1062,9 +1025,9 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1062 u32 size = msg_size(msg); 1025 u32 size = msg_size(msg);
1063 u32 dsz = msg_data_sz(msg); 1026 u32 dsz = msg_data_sz(msg);
1064 u32 queue_size = l_ptr->out_queue_size; 1027 u32 queue_size = l_ptr->out_queue_size;
1065 u32 imp = msg_tot_importance(msg); 1028 u32 imp = tipc_msg_tot_importance(msg);
1066 u32 queue_limit = l_ptr->queue_limit[imp]; 1029 u32 queue_limit = l_ptr->queue_limit[imp];
1067 u32 max_packet = link_max_pkt(l_ptr); 1030 u32 max_packet = l_ptr->max_pkt;
1068 1031
1069 msg_set_prevnode(msg, tipc_own_addr); /* If routed message */ 1032 msg_set_prevnode(msg, tipc_own_addr); /* If routed message */
1070 1033
@@ -1127,7 +1090,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1127 struct tipc_msg bundler_hdr; 1090 struct tipc_msg bundler_hdr;
1128 1091
1129 if (bundler) { 1092 if (bundler) {
1130 msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, 1093 tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
1131 INT_H_SIZE, l_ptr->addr); 1094 INT_H_SIZE, l_ptr->addr);
1132 skb_copy_to_linear_data(bundler, &bundler_hdr, 1095 skb_copy_to_linear_data(bundler, &bundler_hdr,
1133 INT_H_SIZE); 1096 INT_H_SIZE);
@@ -1195,7 +1158,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
1195 int res = msg_data_sz(msg); 1158 int res = msg_data_sz(msg);
1196 1159
1197 if (likely(!link_congested(l_ptr))) { 1160 if (likely(!link_congested(l_ptr))) {
1198 if (likely(msg_size(msg) <= link_max_pkt(l_ptr))) { 1161 if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
1199 if (likely(list_empty(&l_ptr->b_ptr->cong_links))) { 1162 if (likely(list_empty(&l_ptr->b_ptr->cong_links))) {
1200 link_add_to_outqueue(l_ptr, buf, msg); 1163 link_add_to_outqueue(l_ptr, buf, msg);
1201 if (likely(tipc_bearer_send(l_ptr->b_ptr, buf, 1164 if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
@@ -1212,7 +1175,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
1212 } 1175 }
1213 } 1176 }
1214 else 1177 else
1215 *used_max_pkt = link_max_pkt(l_ptr); 1178 *used_max_pkt = l_ptr->max_pkt;
1216 } 1179 }
1217 return tipc_link_send_buf(l_ptr, buf); /* All other cases */ 1180 return tipc_link_send_buf(l_ptr, buf); /* All other cases */
1218} 1181}
@@ -1280,7 +1243,7 @@ again:
1280 * (Must not hold any locks while building message.) 1243 * (Must not hold any locks while building message.)
1281 */ 1244 */
1282 1245
1283 res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, 1246 res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
1284 !sender->user_port, &buf); 1247 !sender->user_port, &buf);
1285 1248
1286 read_lock_bh(&tipc_net_lock); 1249 read_lock_bh(&tipc_net_lock);
@@ -1319,7 +1282,7 @@ exit:
1319 * then re-try fast path or fragment the message 1282 * then re-try fast path or fragment the message
1320 */ 1283 */
1321 1284
1322 sender->publ.max_pkt = link_max_pkt(l_ptr); 1285 sender->publ.max_pkt = l_ptr->max_pkt;
1323 tipc_node_unlock(node); 1286 tipc_node_unlock(node);
1324 read_unlock_bh(&tipc_net_lock); 1287 read_unlock_bh(&tipc_net_lock);
1325 1288
@@ -1391,7 +1354,7 @@ again:
1391 /* Prepare reusable fragment header: */ 1354 /* Prepare reusable fragment header: */
1392 1355
1393 msg_dbg(hdr, ">FRAGMENTING>"); 1356 msg_dbg(hdr, ">FRAGMENTING>");
1394 msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 1357 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
1395 INT_H_SIZE, msg_destnode(hdr)); 1358 INT_H_SIZE, msg_destnode(hdr));
1396 msg_set_link_selector(&fragm_hdr, sender->publ.ref); 1359 msg_set_link_selector(&fragm_hdr, sender->publ.ref);
1397 msg_set_size(&fragm_hdr, max_pkt); 1360 msg_set_size(&fragm_hdr, max_pkt);
@@ -1482,8 +1445,8 @@ error:
1482 tipc_node_unlock(node); 1445 tipc_node_unlock(node);
1483 goto reject; 1446 goto reject;
1484 } 1447 }
1485 if (link_max_pkt(l_ptr) < max_pkt) { 1448 if (l_ptr->max_pkt < max_pkt) {
1486 sender->publ.max_pkt = link_max_pkt(l_ptr); 1449 sender->publ.max_pkt = l_ptr->max_pkt;
1487 tipc_node_unlock(node); 1450 tipc_node_unlock(node);
1488 for (; buf_chain; buf_chain = buf) { 1451 for (; buf_chain; buf_chain = buf) {
1489 buf = buf_chain->next; 1452 buf = buf_chain->next;
@@ -1553,7 +1516,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
1553 1516
1554 /* Continue retransmission now, if there is anything: */ 1517 /* Continue retransmission now, if there is anything: */
1555 1518
1556 if (r_q_size && buf && !skb_cloned(buf)) { 1519 if (r_q_size && buf) {
1557 msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1)); 1520 msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
1558 msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in); 1521 msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
1559 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) { 1522 if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -1650,7 +1613,7 @@ static void link_reset_all(unsigned long addr)
1650 tipc_node_lock(n_ptr); 1613 tipc_node_lock(n_ptr);
1651 1614
1652 warn("Resetting all links to %s\n", 1615 warn("Resetting all links to %s\n",
1653 addr_string_fill(addr_string, n_ptr->addr)); 1616 tipc_addr_string_fill(addr_string, n_ptr->addr));
1654 1617
1655 for (i = 0; i < MAX_BEARERS; i++) { 1618 for (i = 0; i < MAX_BEARERS; i++) {
1656 if (n_ptr->links[i]) { 1619 if (n_ptr->links[i]) {
@@ -1692,7 +1655,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
1692 n_ptr = l_ptr->owner->next; 1655 n_ptr = l_ptr->owner->next;
1693 tipc_node_lock(n_ptr); 1656 tipc_node_lock(n_ptr);
1694 1657
1695 addr_string_fill(addr_string, n_ptr->addr); 1658 tipc_addr_string_fill(addr_string, n_ptr->addr);
1696 tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string); 1659 tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
1697 tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported); 1660 tipc_printf(TIPC_OUTPUT, "Supported: %d, ", n_ptr->bclink.supported);
1698 tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked); 1661 tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
@@ -1722,15 +1685,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
1722 dbg("Retransmitting %u in link %x\n", retransmits, l_ptr); 1685 dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
1723 1686
1724 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { 1687 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
1725 if (!skb_cloned(buf)) { 1688 if (l_ptr->retransm_queue_size == 0) {
1726 msg_dbg(msg, ">NO_RETR->BCONG>"); 1689 msg_dbg(msg, ">NO_RETR->BCONG>");
1727 dbg_print_link(l_ptr, " "); 1690 dbg_print_link(l_ptr, " ");
1728 l_ptr->retransm_queue_head = msg_seqno(msg); 1691 l_ptr->retransm_queue_head = msg_seqno(msg);
1729 l_ptr->retransm_queue_size = retransmits; 1692 l_ptr->retransm_queue_size = retransmits;
1730 return;
1731 } else { 1693 } else {
1732 /* Don't retransmit if driver already has the buffer */ 1694 err("Unexpected retransmit on link %s (qsize=%d)\n",
1695 l_ptr->name, l_ptr->retransm_queue_size);
1733 } 1696 }
1697 return;
1734 } else { 1698 } else {
1735 /* Detect repeated retransmit failures on uncongested bearer */ 1699 /* Detect repeated retransmit failures on uncongested bearer */
1736 1700
@@ -1745,7 +1709,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
1745 } 1709 }
1746 } 1710 }
1747 1711
1748 while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) { 1712 while (retransmits && (buf != l_ptr->next_out) && buf) {
1749 msg = buf_msg(buf); 1713 msg = buf_msg(buf);
1750 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); 1714 msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
1751 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 1715 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
@@ -2434,7 +2398,7 @@ void tipc_link_changeover(struct link *l_ptr)
2434 return; 2398 return;
2435 } 2399 }
2436 2400
2437 msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, 2401 tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
2438 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); 2402 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
2439 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); 2403 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
2440 msg_set_msgcnt(&tunnel_hdr, msgcount); 2404 msg_set_msgcnt(&tunnel_hdr, msgcount);
@@ -2489,7 +2453,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
2489 struct sk_buff *iter; 2453 struct sk_buff *iter;
2490 struct tipc_msg tunnel_hdr; 2454 struct tipc_msg tunnel_hdr;
2491 2455
2492 msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, 2456 tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
2493 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); 2457 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
2494 msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size); 2458 msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
2495 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); 2459 msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
@@ -2680,7 +2644,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2680 u32 dsz = msg_data_sz(inmsg); 2644 u32 dsz = msg_data_sz(inmsg);
2681 unchar *crs = buf->data; 2645 unchar *crs = buf->data;
2682 u32 rest = insize; 2646 u32 rest = insize;
2683 u32 pack_sz = link_max_pkt(l_ptr); 2647 u32 pack_sz = l_ptr->max_pkt;
2684 u32 fragm_sz = pack_sz - INT_H_SIZE; 2648 u32 fragm_sz = pack_sz - INT_H_SIZE;
2685 u32 fragm_no = 1; 2649 u32 fragm_no = 1;
2686 u32 destaddr; 2650 u32 destaddr;
@@ -2695,7 +2659,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2695 2659
2696 /* Prepare reusable fragment header: */ 2660 /* Prepare reusable fragment header: */
2697 2661
2698 msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, 2662 tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
2699 INT_H_SIZE, destaddr); 2663 INT_H_SIZE, destaddr);
2700 msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg)); 2664 msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
2701 msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++)); 2665 msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
@@ -3126,7 +3090,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
3126 tipc_printf(&pb, "Link <%s>\n" 3090 tipc_printf(&pb, "Link <%s>\n"
3127 " %s MTU:%u Priority:%u Tolerance:%u ms" 3091 " %s MTU:%u Priority:%u Tolerance:%u ms"
3128 " Window:%u packets\n", 3092 " Window:%u packets\n",
3129 l_ptr->name, status, link_max_pkt(l_ptr), 3093 l_ptr->name, status, l_ptr->max_pkt,
3130 l_ptr->priority, l_ptr->tolerance, l_ptr->queue_limit[0]); 3094 l_ptr->priority, l_ptr->tolerance, l_ptr->queue_limit[0]);
3131 tipc_printf(&pb, " RX packets:%u fragments:%u/%u bundles:%u/%u\n", 3095 tipc_printf(&pb, " RX packets:%u fragments:%u/%u bundles:%u/%u\n",
3132 l_ptr->next_in_no - l_ptr->stats.recv_info, 3096 l_ptr->next_in_no - l_ptr->stats.recv_info,
@@ -3271,7 +3235,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
3271 tipc_node_lock(n_ptr); 3235 tipc_node_lock(n_ptr);
3272 l_ptr = n_ptr->active_links[selector & 1]; 3236 l_ptr = n_ptr->active_links[selector & 1];
3273 if (l_ptr) 3237 if (l_ptr)
3274 res = link_max_pkt(l_ptr); 3238 res = l_ptr->max_pkt;
3275 tipc_node_unlock(n_ptr); 3239 tipc_node_unlock(n_ptr);
3276 } 3240 }
3277 read_unlock_bh(&tipc_net_lock); 3241 read_unlock_bh(&tipc_net_lock);
@@ -3294,7 +3258,7 @@ static void link_dump_rec_queue(struct link *l_ptr)
3294 info("buffer %x invalid\n", crs); 3258 info("buffer %x invalid\n", crs);
3295 return; 3259 return;
3296 } 3260 }
3297 msg_dbg(buf_msg(crs), "In rec queue: \n"); 3261 msg_dbg(buf_msg(crs), "In rec queue:\n");
3298 crs = crs->next; 3262 crs = crs->next;
3299 } 3263 }
3300} 3264}
@@ -3329,9 +3293,7 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
3329 if (l_ptr->next_out) 3293 if (l_ptr->next_out)
3330 tipc_printf(buf, "%u..", 3294 tipc_printf(buf, "%u..",
3331 msg_seqno(buf_msg(l_ptr->next_out))); 3295 msg_seqno(buf_msg(l_ptr->next_out)));
3332 tipc_printf(buf, "%u]", 3296 tipc_printf(buf, "%u]", msg_seqno(buf_msg(l_ptr->last_out)));
3333 msg_seqno(buf_msg
3334 (l_ptr->last_out)), l_ptr->out_queue_size);
3335 if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) - 3297 if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) -
3336 msg_seqno(buf_msg(l_ptr->first_out))) 3298 msg_seqno(buf_msg(l_ptr->first_out)))
3337 != (l_ptr->out_queue_size - 1)) || 3299 != (l_ptr->out_queue_size - 1)) ||
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 6a51e38ad25c..2e5385c47d30 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -292,4 +292,39 @@ static inline u32 lesser(u32 left, u32 right)
292 return less_eq(left, right) ? left : right; 292 return less_eq(left, right) ? left : right;
293} 293}
294 294
295
296/*
297 * Link status checking routines
298 */
299
300static inline int link_working_working(struct link *l_ptr)
301{
302 return (l_ptr->state == WORKING_WORKING);
303}
304
305static inline int link_working_unknown(struct link *l_ptr)
306{
307 return (l_ptr->state == WORKING_UNKNOWN);
308}
309
310static inline int link_reset_unknown(struct link *l_ptr)
311{
312 return (l_ptr->state == RESET_UNKNOWN);
313}
314
315static inline int link_reset_reset(struct link *l_ptr)
316{
317 return (l_ptr->state == RESET_RESET);
318}
319
320static inline int link_blocked(struct link *l_ptr)
321{
322 return (l_ptr->exp_msg_count || l_ptr->blocked);
323}
324
325static inline int link_congested(struct link *l_ptr)
326{
327 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
328}
329
295#endif 330#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 73dcd00d674e..381063817b41 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -40,6 +40,100 @@
40#include "msg.h" 40#include "msg.h"
41#include "bearer.h" 41#include "bearer.h"
42 42
43u32 tipc_msg_tot_importance(struct tipc_msg *m)
44{
45 if (likely(msg_isdata(m))) {
46 if (likely(msg_orignode(m) == tipc_own_addr))
47 return msg_importance(m);
48 return msg_importance(m) + 4;
49 }
50 if ((msg_user(m) == MSG_FRAGMENTER) &&
51 (msg_type(m) == FIRST_FRAGMENT))
52 return msg_importance(msg_get_wrapped(m));
53 return msg_importance(m);
54}
55
56
57void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
58 u32 hsize, u32 destnode)
59{
60 memset(m, 0, hsize);
61 msg_set_version(m);
62 msg_set_user(m, user);
63 msg_set_hdr_sz(m, hsize);
64 msg_set_size(m, hsize);
65 msg_set_prevnode(m, tipc_own_addr);
66 msg_set_type(m, type);
67 if (!msg_short(m)) {
68 msg_set_orignode(m, tipc_own_addr);
69 msg_set_destnode(m, destnode);
70 }
71}
72
73/**
74 * tipc_msg_calc_data_size - determine total data size for message
75 */
76
77int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
78{
79 int dsz = 0;
80 int i;
81
82 for (i = 0; i < num_sect; i++)
83 dsz += msg_sect[i].iov_len;
84 return dsz;
85}
86
87/**
88 * tipc_msg_build - create message using specified header and data
89 *
90 * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
91 *
92 * Returns message data size or errno
93 */
94
95int tipc_msg_build(struct tipc_msg *hdr,
96 struct iovec const *msg_sect, u32 num_sect,
97 int max_size, int usrmem, struct sk_buff** buf)
98{
99 int dsz, sz, hsz, pos, res, cnt;
100
101 dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
102 if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
103 *buf = NULL;
104 return -EINVAL;
105 }
106
107 pos = hsz = msg_hdr_sz(hdr);
108 sz = hsz + dsz;
109 msg_set_size(hdr, sz);
110 if (unlikely(sz > max_size)) {
111 *buf = NULL;
112 return dsz;
113 }
114
115 *buf = buf_acquire(sz);
116 if (!(*buf))
117 return -ENOMEM;
118 skb_copy_to_linear_data(*buf, hdr, hsz);
119 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
120 if (likely(usrmem))
121 res = !copy_from_user((*buf)->data + pos,
122 msg_sect[cnt].iov_base,
123 msg_sect[cnt].iov_len);
124 else
125 skb_copy_to_linear_data_offset(*buf, pos,
126 msg_sect[cnt].iov_base,
127 msg_sect[cnt].iov_len);
128 pos += msg_sect[cnt].iov_len;
129 }
130 if (likely(res))
131 return dsz;
132
133 buf_discard(*buf);
134 *buf = NULL;
135 return -EFAULT;
136}
43 137
44#ifdef CONFIG_TIPC_DEBUG 138#ifdef CONFIG_TIPC_DEBUG
45 139
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 7ee6ae238147..995d2da35b01 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -708,100 +708,13 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
708#define DSC_REQ_MSG 0 708#define DSC_REQ_MSG 0
709#define DSC_RESP_MSG 1 709#define DSC_RESP_MSG 1
710 710
711static inline u32 msg_tot_importance(struct tipc_msg *m) 711u32 tipc_msg_tot_importance(struct tipc_msg *m);
712{ 712void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
713 if (likely(msg_isdata(m))) { 713 u32 hsize, u32 destnode);
714 if (likely(msg_orignode(m) == tipc_own_addr)) 714int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect);
715 return msg_importance(m); 715int tipc_msg_build(struct tipc_msg *hdr,
716 return msg_importance(m) + 4;
717 }
718 if ((msg_user(m) == MSG_FRAGMENTER) &&
719 (msg_type(m) == FIRST_FRAGMENT))
720 return msg_importance(msg_get_wrapped(m));
721 return msg_importance(m);
722}
723
724
725static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
726 u32 hsize, u32 destnode)
727{
728 memset(m, 0, hsize);
729 msg_set_version(m);
730 msg_set_user(m, user);
731 msg_set_hdr_sz(m, hsize);
732 msg_set_size(m, hsize);
733 msg_set_prevnode(m, tipc_own_addr);
734 msg_set_type(m, type);
735 if (!msg_short(m)) {
736 msg_set_orignode(m, tipc_own_addr);
737 msg_set_destnode(m, destnode);
738 }
739}
740
741/**
742 * msg_calc_data_size - determine total data size for message
743 */
744
745static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
746{
747 int dsz = 0;
748 int i;
749
750 for (i = 0; i < num_sect; i++)
751 dsz += msg_sect[i].iov_len;
752 return dsz;
753}
754
755/**
756 * msg_build - create message using specified header and data
757 *
758 * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
759 *
760 * Returns message data size or errno
761 */
762
763static inline int msg_build(struct tipc_msg *hdr,
764 struct iovec const *msg_sect, u32 num_sect, 716 struct iovec const *msg_sect, u32 num_sect,
765 int max_size, int usrmem, struct sk_buff** buf) 717 int max_size, int usrmem, struct sk_buff** buf);
766{
767 int dsz, sz, hsz, pos, res, cnt;
768
769 dsz = msg_calc_data_size(msg_sect, num_sect);
770 if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
771 *buf = NULL;
772 return -EINVAL;
773 }
774
775 pos = hsz = msg_hdr_sz(hdr);
776 sz = hsz + dsz;
777 msg_set_size(hdr, sz);
778 if (unlikely(sz > max_size)) {
779 *buf = NULL;
780 return dsz;
781 }
782
783 *buf = buf_acquire(sz);
784 if (!(*buf))
785 return -ENOMEM;
786 skb_copy_to_linear_data(*buf, hdr, hsz);
787 for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
788 if (likely(usrmem))
789 res = !copy_from_user((*buf)->data + pos,
790 msg_sect[cnt].iov_base,
791 msg_sect[cnt].iov_len);
792 else
793 skb_copy_to_linear_data_offset(*buf, pos,
794 msg_sect[cnt].iov_base,
795 msg_sect[cnt].iov_len);
796 pos += msg_sect[cnt].iov_len;
797 }
798 if (likely(res))
799 return dsz;
800
801 buf_discard(*buf);
802 *buf = NULL;
803 return -EFAULT;
804}
805 718
806static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a) 719static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
807{ 720{
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 10a69894e2fd..6ac3c543250b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -103,7 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
103 103
104 if (buf != NULL) { 104 if (buf != NULL) {
105 msg = buf_msg(buf); 105 msg = buf_msg(buf);
106 msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest); 106 tipc_msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
107 msg_set_size(msg, LONG_H_SIZE + size); 107 msg_set_size(msg, LONG_H_SIZE + size);
108 } 108 }
109 return buf; 109 return buf;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index acab41a48d67..8ba79620db3f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -627,7 +627,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
627 struct name_seq *seq; 627 struct name_seq *seq;
628 u32 ref; 628 u32 ref;
629 629
630 if (!in_scope(*destnode, tipc_own_addr)) 630 if (!tipc_in_scope(*destnode, tipc_own_addr))
631 return 0; 631 return 0;
632 632
633 read_lock_bh(&tipc_nametbl_lock); 633 read_lock_bh(&tipc_nametbl_lock);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f25b1cdb64eb..f61b7694138b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,7 @@
116*/ 116*/
117 117
118DEFINE_RWLOCK(tipc_net_lock); 118DEFINE_RWLOCK(tipc_net_lock);
119struct _zone *tipc_zones[256] = { NULL, }; 119static struct _zone *tipc_zones[256] = { NULL, };
120struct network tipc_net = { tipc_zones }; 120struct network tipc_net = { tipc_zones };
121 121
122struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref) 122struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
@@ -219,7 +219,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
219 219
220 /* Handle message for this node */ 220 /* Handle message for this node */
221 dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg); 221 dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
222 if (in_scope(dnode, tipc_own_addr)) { 222 if (tipc_in_scope(dnode, tipc_own_addr)) {
223 if (msg_isdata(msg)) { 223 if (msg_isdata(msg)) {
224 if (msg_mcast(msg)) 224 if (msg_mcast(msg))
225 tipc_port_recv_mcast(buf, NULL); 225 tipc_port_recv_mcast(buf, NULL);
@@ -277,7 +277,7 @@ int tipc_net_start(u32 addr)
277 277
278 info("Started in network mode\n"); 278 info("Started in network mode\n");
279 info("Own node address %s, network identity %u\n", 279 info("Own node address %s, network identity %u\n",
280 addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); 280 tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
281 return 0; 281 return 0;
282} 282}
283 283
@@ -291,6 +291,6 @@ void tipc_net_stop(void)
291 tipc_bclink_stop(); 291 tipc_bclink_stop();
292 net_stop(); 292 net_stop();
293 write_unlock_bh(&tipc_net_lock); 293 write_unlock_bh(&tipc_net_lock);
294 info("Left network mode \n"); 294 info("Left network mode\n");
295} 295}
296 296
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c24e7d6d950..b634942caba5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -268,7 +268,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
268 268
269 if (n_ptr->link_cnt >= 2) { 269 if (n_ptr->link_cnt >= 2) {
270 err("Attempt to create third link to %s\n", 270 err("Attempt to create third link to %s\n",
271 addr_string_fill(addr_string, n_ptr->addr)); 271 tipc_addr_string_fill(addr_string, n_ptr->addr));
272 return NULL; 272 return NULL;
273 } 273 }
274 274
@@ -278,9 +278,9 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
278 n_ptr->link_cnt++; 278 n_ptr->link_cnt++;
279 return n_ptr; 279 return n_ptr;
280 } 280 }
281 err("Attempt to establish second link on <%s> to %s \n", 281 err("Attempt to establish second link on <%s> to %s\n",
282 l_ptr->b_ptr->publ.name, 282 l_ptr->b_ptr->publ.name,
283 addr_string_fill(addr_string, l_ptr->addr)); 283 tipc_addr_string_fill(addr_string, l_ptr->addr));
284 } 284 }
285 return NULL; 285 return NULL;
286} 286}
@@ -439,7 +439,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
439 return; 439 return;
440 440
441 info("Lost contact with %s\n", 441 info("Lost contact with %s\n",
442 addr_string_fill(addr_string, n_ptr->addr)); 442 tipc_addr_string_fill(addr_string, n_ptr->addr));
443 443
444 /* Abort link changeover */ 444 /* Abort link changeover */
445 for (i = 0; i < MAX_BEARERS; i++) { 445 for (i = 0; i < MAX_BEARERS; i++) {
@@ -602,7 +602,7 @@ u32 tipc_available_nodes(const u32 domain)
602 602
603 read_lock_bh(&tipc_net_lock); 603 read_lock_bh(&tipc_net_lock);
604 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { 604 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
605 if (!in_scope(domain, n_ptr->addr)) 605 if (!tipc_in_scope(domain, n_ptr->addr))
606 continue; 606 continue;
607 if (tipc_node_is_up(n_ptr)) 607 if (tipc_node_is_up(n_ptr))
608 cnt++; 608 cnt++;
@@ -651,7 +651,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
651 /* Add TLVs for all nodes in scope */ 651 /* Add TLVs for all nodes in scope */
652 652
653 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { 653 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
654 if (!in_scope(domain, n_ptr->addr)) 654 if (!tipc_in_scope(domain, n_ptr->addr))
655 continue; 655 continue;
656 node_info.addr = htonl(n_ptr->addr); 656 node_info.addr = htonl(n_ptr->addr);
657 node_info.up = htonl(tipc_node_is_up(n_ptr)); 657 node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -711,7 +711,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
711 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { 711 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
712 u32 i; 712 u32 i;
713 713
714 if (!in_scope(domain, n_ptr->addr)) 714 if (!tipc_in_scope(domain, n_ptr->addr))
715 continue; 715 continue;
716 tipc_node_lock(n_ptr); 716 tipc_node_lock(n_ptr);
717 for (i = 0; i < MAX_BEARERS; i++) { 717 for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tipc/port.c b/net/tipc/port.c
index e70d27ea6578..0737680e9266 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -116,7 +116,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain,
116 msg_set_namelower(hdr, seq->lower); 116 msg_set_namelower(hdr, seq->lower);
117 msg_set_nameupper(hdr, seq->upper); 117 msg_set_nameupper(hdr, seq->upper);
118 msg_set_hdr_sz(hdr, MCAST_H_SIZE); 118 msg_set_hdr_sz(hdr, MCAST_H_SIZE);
119 res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE, 119 res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
120 !oport->user_port, &buf); 120 !oport->user_port, &buf);
121 if (unlikely(!buf)) 121 if (unlikely(!buf))
122 return res; 122 return res;
@@ -241,13 +241,12 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
241 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; 241 p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
242 p_ptr->publ.ref = ref; 242 p_ptr->publ.ref = ref;
243 msg = &p_ptr->publ.phdr; 243 msg = &p_ptr->publ.phdr;
244 msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); 244 tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
245 msg_set_origport(msg, ref); 245 msg_set_origport(msg, ref);
246 p_ptr->last_in_seqno = 41; 246 p_ptr->last_in_seqno = 41;
247 p_ptr->sent = 1; 247 p_ptr->sent = 1;
248 INIT_LIST_HEAD(&p_ptr->wait_list); 248 INIT_LIST_HEAD(&p_ptr->wait_list);
249 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); 249 INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
250 p_ptr->congested_link = NULL;
251 p_ptr->dispatcher = dispatcher; 250 p_ptr->dispatcher = dispatcher;
252 p_ptr->wakeup = wakeup; 251 p_ptr->wakeup = wakeup;
253 p_ptr->user_port = NULL; 252 p_ptr->user_port = NULL;
@@ -396,7 +395,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
396 buf = buf_acquire(LONG_H_SIZE); 395 buf = buf_acquire(LONG_H_SIZE);
397 if (buf) { 396 if (buf) {
398 msg = buf_msg(buf); 397 msg = buf_msg(buf);
399 msg_init(msg, usr, type, LONG_H_SIZE, destnode); 398 tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
400 msg_set_errcode(msg, err); 399 msg_set_errcode(msg, err);
401 msg_set_destport(msg, destport); 400 msg_set_destport(msg, destport);
402 msg_set_origport(msg, origport); 401 msg_set_origport(msg, origport);
@@ -440,7 +439,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
440 return data_sz; 439 return data_sz;
441 } 440 }
442 rmsg = buf_msg(rbuf); 441 rmsg = buf_msg(rbuf);
443 msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg)); 442 tipc_msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
444 msg_set_errcode(rmsg, err); 443 msg_set_errcode(rmsg, err);
445 msg_set_destport(rmsg, msg_origport(msg)); 444 msg_set_destport(rmsg, msg_origport(msg));
446 msg_set_origport(rmsg, msg_destport(msg)); 445 msg_set_origport(rmsg, msg_destport(msg));
@@ -481,7 +480,7 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
481 struct sk_buff *buf; 480 struct sk_buff *buf;
482 int res; 481 int res;
483 482
484 res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE, 483 res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
485 !p_ptr->user_port, &buf); 484 !p_ptr->user_port, &buf);
486 if (!buf) 485 if (!buf)
487 return res; 486 return res;
@@ -1344,7 +1343,7 @@ int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
1344 struct sk_buff *buf; 1343 struct sk_buff *buf;
1345 int res; 1344 int res;
1346 1345
1347 res = msg_build(&sender->publ.phdr, msg_sect, num_sect, 1346 res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect,
1348 MAX_MSG_SIZE, !sender->user_port, &buf); 1347 MAX_MSG_SIZE, !sender->user_port, &buf);
1349 if (likely(buf)) 1348 if (likely(buf))
1350 tipc_port_recv_msg(buf); 1349 tipc_port_recv_msg(buf);
@@ -1384,7 +1383,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1384 if (port_unreliable(p_ptr)) { 1383 if (port_unreliable(p_ptr)) {
1385 p_ptr->publ.congested = 0; 1384 p_ptr->publ.congested = 0;
1386 /* Just calculate msg length and return */ 1385 /* Just calculate msg length and return */
1387 return msg_calc_data_size(msg_sect, num_sect); 1386 return tipc_msg_calc_data_size(msg_sect, num_sect);
1388 } 1387 }
1389 return -ELINKCONG; 1388 return -ELINKCONG;
1390} 1389}
@@ -1453,7 +1452,7 @@ int tipc_forward2name(u32 ref,
1453 struct port *p_ptr; 1452 struct port *p_ptr;
1454 struct tipc_msg *msg; 1453 struct tipc_msg *msg;
1455 u32 destnode = domain; 1454 u32 destnode = domain;
1456 u32 destport = 0; 1455 u32 destport;
1457 int res; 1456 int res;
1458 1457
1459 p_ptr = tipc_port_deref(ref); 1458 p_ptr = tipc_port_deref(ref);
@@ -1467,7 +1466,7 @@ int tipc_forward2name(u32 ref,
1467 msg_set_hdr_sz(msg, LONG_H_SIZE); 1466 msg_set_hdr_sz(msg, LONG_H_SIZE);
1468 msg_set_nametype(msg, name->type); 1467 msg_set_nametype(msg, name->type);
1469 msg_set_nameinst(msg, name->instance); 1468 msg_set_nameinst(msg, name->instance);
1470 msg_set_lookup_scope(msg, addr_scope(domain)); 1469 msg_set_lookup_scope(msg, tipc_addr_scope(domain));
1471 if (importance <= TIPC_CRITICAL_IMPORTANCE) 1470 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1472 msg_set_importance(msg,importance); 1471 msg_set_importance(msg,importance);
1473 destport = tipc_nametbl_translate(name->type, name->instance, &destnode); 1472 destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1484,7 +1483,7 @@ int tipc_forward2name(u32 ref,
1484 return res; 1483 return res;
1485 if (port_unreliable(p_ptr)) { 1484 if (port_unreliable(p_ptr)) {
1486 /* Just calculate msg length and return */ 1485 /* Just calculate msg length and return */
1487 return msg_calc_data_size(msg_sect, num_sect); 1486 return tipc_msg_calc_data_size(msg_sect, num_sect);
1488 } 1487 }
1489 return -ELINKCONG; 1488 return -ELINKCONG;
1490 } 1489 }
@@ -1525,7 +1524,7 @@ int tipc_forward_buf2name(u32 ref,
1525 struct port *p_ptr; 1524 struct port *p_ptr;
1526 struct tipc_msg *msg; 1525 struct tipc_msg *msg;
1527 u32 destnode = domain; 1526 u32 destnode = domain;
1528 u32 destport = 0; 1527 u32 destport;
1529 int res; 1528 int res;
1530 1529
1531 p_ptr = (struct port *)tipc_ref_deref(ref); 1530 p_ptr = (struct port *)tipc_ref_deref(ref);
@@ -1540,7 +1539,7 @@ int tipc_forward_buf2name(u32 ref,
1540 msg_set_origport(msg, orig->ref); 1539 msg_set_origport(msg, orig->ref);
1541 msg_set_nametype(msg, name->type); 1540 msg_set_nametype(msg, name->type);
1542 msg_set_nameinst(msg, name->instance); 1541 msg_set_nameinst(msg, name->instance);
1543 msg_set_lookup_scope(msg, addr_scope(domain)); 1542 msg_set_lookup_scope(msg, tipc_addr_scope(domain));
1544 msg_set_hdr_sz(msg, LONG_H_SIZE); 1543 msg_set_hdr_sz(msg, LONG_H_SIZE);
1545 msg_set_size(msg, LONG_H_SIZE + dsz); 1544 msg_set_size(msg, LONG_H_SIZE + dsz);
1546 destport = tipc_nametbl_translate(name->type, name->instance, &destnode); 1545 destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1620,7 +1619,7 @@ int tipc_forward2port(u32 ref,
1620 return res; 1619 return res;
1621 if (port_unreliable(p_ptr)) { 1620 if (port_unreliable(p_ptr)) {
1622 /* Just calculate msg length and return */ 1621 /* Just calculate msg length and return */
1623 return msg_calc_data_size(msg_sect, num_sect); 1622 return tipc_msg_calc_data_size(msg_sect, num_sect);
1624 } 1623 }
1625 return -ELINKCONG; 1624 return -ELINKCONG;
1626} 1625}
diff --git a/net/tipc/port.h b/net/tipc/port.h
index ff31ee4a1dc3..8d1652aab298 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -75,7 +75,6 @@ struct user_port {
75 * @wakeup: ptr to routine to call when port is no longer congested 75 * @wakeup: ptr to routine to call when port is no longer congested
76 * @user_port: ptr to user port associated with port (if any) 76 * @user_port: ptr to user port associated with port (if any)
77 * @wait_list: adjacent ports in list of ports waiting on link congestion 77 * @wait_list: adjacent ports in list of ports waiting on link congestion
78 * @congested_link: ptr to congested link port is waiting on
79 * @waiting_pkts: 78 * @waiting_pkts:
80 * @sent: 79 * @sent:
81 * @acked: 80 * @acked:
@@ -95,7 +94,6 @@ struct port {
95 void (*wakeup)(struct tipc_port *); 94 void (*wakeup)(struct tipc_port *);
96 struct user_port *user_port; 95 struct user_port *user_port;
97 struct list_head wait_list; 96 struct list_head wait_list;
98 struct link *congested_link;
99 u32 waiting_pkts; 97 u32 waiting_pkts;
100 u32 sent; 98 u32 sent;
101 u32 acked; 99 u32 acked;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cfb20b80b3a1..66e889ba48fd 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock,
446 struct sock *sk = sock->sk; 446 struct sock *sk = sock->sk;
447 u32 mask; 447 u32 mask;
448 448
449 poll_wait(file, sk->sk_sleep, wait); 449 poll_wait(file, sk_sleep(sk), wait);
450 450
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 451 if (!skb_queue_empty(&sk->sk_receive_queue) ||
452 (sock->state == SS_UNCONNECTED) || 452 (sock->state == SS_UNCONNECTED) ||
@@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
591 break; 591 break;
592 } 592 }
593 release_sock(sk); 593 release_sock(sk);
594 res = wait_event_interruptible(*sk->sk_sleep, 594 res = wait_event_interruptible(*sk_sleep(sk),
595 !tport->congested); 595 !tport->congested);
596 lock_sock(sk); 596 lock_sock(sk);
597 if (res) 597 if (res)
@@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
650 break; 650 break;
651 } 651 }
652 release_sock(sk); 652 release_sock(sk);
653 res = wait_event_interruptible(*sk->sk_sleep, 653 res = wait_event_interruptible(*sk_sleep(sk),
654 (!tport->congested || !tport->connected)); 654 (!tport->congested || !tport->connected));
655 lock_sock(sk); 655 lock_sock(sk);
656 if (res) 656 if (res)
@@ -931,7 +931,7 @@ restart:
931 goto exit; 931 goto exit;
932 } 932 }
933 release_sock(sk); 933 release_sock(sk);
934 res = wait_event_interruptible(*sk->sk_sleep, 934 res = wait_event_interruptible(*sk_sleep(sk),
935 (!skb_queue_empty(&sk->sk_receive_queue) || 935 (!skb_queue_empty(&sk->sk_receive_queue) ||
936 (sock->state == SS_DISCONNECTING))); 936 (sock->state == SS_DISCONNECTING)));
937 lock_sock(sk); 937 lock_sock(sk);
@@ -1064,7 +1064,7 @@ restart:
1064 goto exit; 1064 goto exit;
1065 } 1065 }
1066 release_sock(sk); 1066 release_sock(sk);
1067 res = wait_event_interruptible(*sk->sk_sleep, 1067 res = wait_event_interruptible(*sk_sleep(sk),
1068 (!skb_queue_empty(&sk->sk_receive_queue) || 1068 (!skb_queue_empty(&sk->sk_receive_queue) ||
1069 (sock->state == SS_DISCONNECTING))); 1069 (sock->state == SS_DISCONNECTING)));
1070 lock_sock(sk); 1070 lock_sock(sk);
@@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
1271 tipc_disconnect_port(tipc_sk_port(sk)); 1271 tipc_disconnect_port(tipc_sk_port(sk));
1272 } 1272 }
1273 1273
1274 if (waitqueue_active(sk->sk_sleep)) 1274 if (waitqueue_active(sk_sleep(sk)))
1275 wake_up_interruptible(sk->sk_sleep); 1275 wake_up_interruptible(sk_sleep(sk));
1276 return TIPC_OK; 1276 return TIPC_OK;
1277} 1277}
1278 1278
@@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport)
1343{ 1343{
1344 struct sock *sk = (struct sock *)tport->usr_handle; 1344 struct sock *sk = (struct sock *)tport->usr_handle;
1345 1345
1346 if (waitqueue_active(sk->sk_sleep)) 1346 if (waitqueue_active(sk_sleep(sk)))
1347 wake_up_interruptible(sk->sk_sleep); 1347 wake_up_interruptible(sk_sleep(sk));
1348} 1348}
1349 1349
1350/** 1350/**
@@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1427
1428 release_sock(sk); 1428 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk->sk_sleep, 1429 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1430 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1431 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1432 sk->sk_rcvtimeo);
@@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
1521 goto exit; 1521 goto exit;
1522 } 1522 }
1523 release_sock(sk); 1523 release_sock(sk);
1524 res = wait_event_interruptible(*sk->sk_sleep, 1524 res = wait_event_interruptible(*sk_sleep(sk),
1525 (!skb_queue_empty(&sk->sk_receive_queue))); 1525 (!skb_queue_empty(&sk->sk_receive_queue)));
1526 lock_sock(sk); 1526 lock_sock(sk);
1527 if (res) 1527 if (res)
@@ -1632,8 +1632,8 @@ restart:
1632 /* Discard any unreceived messages; wake up sleeping tasks */ 1632 /* Discard any unreceived messages; wake up sleeping tasks */
1633 1633
1634 discard_rx_queue(sk); 1634 discard_rx_queue(sk);
1635 if (waitqueue_active(sk->sk_sleep)) 1635 if (waitqueue_active(sk_sleep(sk)))
1636 wake_up_interruptible(sk->sk_sleep); 1636 wake_up_interruptible(sk_sleep(sk));
1637 res = 0; 1637 res = 0;
1638 break; 1638 break;
1639 1639
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ff123e56114a..ab6eab4c45e2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
274{ 274{
275 struct subscription *sub; 275 struct subscription *sub;
276 struct subscription *sub_temp; 276 struct subscription *sub_temp;
277 __u32 type, lower, upper; 277 __u32 type, lower, upper, timeout, filter;
278 int found = 0; 278 int found = 0;
279 279
280 /* Find first matching subscription, exit if not found */ 280 /* Find first matching subscription, exit if not found */
@@ -282,12 +282,18 @@ static void subscr_cancel(struct tipc_subscr *s,
282 type = ntohl(s->seq.type); 282 type = ntohl(s->seq.type);
283 lower = ntohl(s->seq.lower); 283 lower = ntohl(s->seq.lower);
284 upper = ntohl(s->seq.upper); 284 upper = ntohl(s->seq.upper);
285 timeout = ntohl(s->timeout);
286 filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
285 287
286 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 288 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
287 subscription_list) { 289 subscription_list) {
288 if ((type == sub->seq.type) && 290 if ((type == sub->seq.type) &&
289 (lower == sub->seq.lower) && 291 (lower == sub->seq.lower) &&
290 (upper == sub->seq.upper)) { 292 (upper == sub->seq.upper) &&
293 (timeout == sub->timeout) &&
294 (filter == sub->filter) &&
295 !memcmp(s->usr_handle,sub->evt.s.usr_handle,
296 sizeof(s->usr_handle)) ){
291 found = 1; 297 found = 1;
292 break; 298 break;
293 } 299 }
@@ -304,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
304 k_term_timer(&sub->timer); 310 k_term_timer(&sub->timer);
305 spin_lock_bh(subscriber->lock); 311 spin_lock_bh(subscriber->lock);
306 } 312 }
307 dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n", 313 dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
308 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber); 314 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
309 subscr_del(sub); 315 subscr_del(sub);
310} 316}
@@ -352,8 +358,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
352 sub->seq.upper = ntohl(s->seq.upper); 358 sub->seq.upper = ntohl(s->seq.upper);
353 sub->timeout = ntohl(s->timeout); 359 sub->timeout = ntohl(s->timeout);
354 sub->filter = ntohl(s->filter); 360 sub->filter = ntohl(s->filter);
355 if ((!(sub->filter & TIPC_SUB_PORTS) == 361 if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
356 !(sub->filter & TIPC_SUB_SERVICE)) ||
357 (sub->seq.lower > sub->seq.upper)) { 362 (sub->seq.lower > sub->seq.upper)) {
358 warn("Subscription rejected, illegal request\n"); 363 warn("Subscription rejected, illegal request\n");
359 kfree(sub); 364 kfree(sub);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3d9122e78f41..4414a18c63b4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
282 return s; 282 return s;
283} 283}
284 284
285static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) 285static struct sock *unix_find_socket_byinode(struct inode *i)
286{ 286{
287 struct sock *s; 287 struct sock *s;
288 struct hlist_node *node; 288 struct hlist_node *node;
@@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { 292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 struct dentry *dentry = unix_sk(s)->dentry; 293 struct dentry *dentry = unix_sk(s)->dentry;
294 294
295 if (!net_eq(sock_net(s), net))
296 continue;
297
298 if (dentry && dentry->d_inode == i) { 295 if (dentry && dentry->d_inode == i) {
299 sock_hold(s); 296 sock_hold(s);
300 goto found; 297 goto found;
@@ -313,13 +310,16 @@ static inline int unix_writable(struct sock *sk)
313 310
314static void unix_write_space(struct sock *sk) 311static void unix_write_space(struct sock *sk)
315{ 312{
316 read_lock(&sk->sk_callback_lock); 313 struct socket_wq *wq;
314
315 rcu_read_lock();
317 if (unix_writable(sk)) { 316 if (unix_writable(sk)) {
318 if (sk_has_sleeper(sk)) 317 wq = rcu_dereference(sk->sk_wq);
319 wake_up_interruptible_sync(sk->sk_sleep); 318 if (wq_has_sleeper(wq))
319 wake_up_interruptible_sync(&wq->wait);
320 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); 320 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
321 } 321 }
322 read_unlock(&sk->sk_callback_lock); 322 rcu_read_unlock();
323} 323}
324 324
325/* When dgram socket disconnects (or changes its peer), we clear its receive 325/* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -406,9 +406,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
406 skpair->sk_err = ECONNRESET; 406 skpair->sk_err = ECONNRESET;
407 unix_state_unlock(skpair); 407 unix_state_unlock(skpair);
408 skpair->sk_state_change(skpair); 408 skpair->sk_state_change(skpair);
409 read_lock(&skpair->sk_callback_lock);
410 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); 409 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
411 read_unlock(&skpair->sk_callback_lock);
412 } 410 }
413 sock_put(skpair); /* It may now die */ 411 sock_put(skpair); /* It may now die */
414 unix_peer(sk) = NULL; 412 unix_peer(sk) = NULL;
@@ -449,11 +447,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
449 return 0; 447 return 0;
450} 448}
451 449
450static void init_peercred(struct sock *sk)
451{
452 put_pid(sk->sk_peer_pid);
453 if (sk->sk_peer_cred)
454 put_cred(sk->sk_peer_cred);
455 sk->sk_peer_pid = get_pid(task_tgid(current));
456 sk->sk_peer_cred = get_current_cred();
457}
458
459static void copy_peercred(struct sock *sk, struct sock *peersk)
460{
461 put_pid(sk->sk_peer_pid);
462 if (sk->sk_peer_cred)
463 put_cred(sk->sk_peer_cred);
464 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
465 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
466}
467
452static int unix_listen(struct socket *sock, int backlog) 468static int unix_listen(struct socket *sock, int backlog)
453{ 469{
454 int err; 470 int err;
455 struct sock *sk = sock->sk; 471 struct sock *sk = sock->sk;
456 struct unix_sock *u = unix_sk(sk); 472 struct unix_sock *u = unix_sk(sk);
473 struct pid *old_pid = NULL;
474 const struct cred *old_cred = NULL;
457 475
458 err = -EOPNOTSUPP; 476 err = -EOPNOTSUPP;
459 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) 477 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -469,12 +487,14 @@ static int unix_listen(struct socket *sock, int backlog)
469 sk->sk_max_ack_backlog = backlog; 487 sk->sk_max_ack_backlog = backlog;
470 sk->sk_state = TCP_LISTEN; 488 sk->sk_state = TCP_LISTEN;
471 /* set credentials so connect can copy them */ 489 /* set credentials so connect can copy them */
472 sk->sk_peercred.pid = task_tgid_vnr(current); 490 init_peercred(sk);
473 current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
474 err = 0; 491 err = 0;
475 492
476out_unlock: 493out_unlock:
477 unix_state_unlock(sk); 494 unix_state_unlock(sk);
495 put_pid(old_pid);
496 if (old_cred)
497 put_cred(old_cred);
478out: 498out:
479 return err; 499 return err;
480} 500}
@@ -735,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
735 err = -ECONNREFUSED; 755 err = -ECONNREFUSED;
736 if (!S_ISSOCK(inode->i_mode)) 756 if (!S_ISSOCK(inode->i_mode))
737 goto put_fail; 757 goto put_fail;
738 u = unix_find_socket_byinode(net, inode); 758 u = unix_find_socket_byinode(inode);
739 if (!u) 759 if (!u)
740 goto put_fail; 760 goto put_fail;
741 761
@@ -1139,10 +1159,9 @@ restart:
1139 unix_peer(newsk) = sk; 1159 unix_peer(newsk) = sk;
1140 newsk->sk_state = TCP_ESTABLISHED; 1160 newsk->sk_state = TCP_ESTABLISHED;
1141 newsk->sk_type = sk->sk_type; 1161 newsk->sk_type = sk->sk_type;
1142 newsk->sk_peercred.pid = task_tgid_vnr(current); 1162 init_peercred(newsk);
1143 current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
1144 newu = unix_sk(newsk); 1163 newu = unix_sk(newsk);
1145 newsk->sk_sleep = &newu->peer_wait; 1164 newsk->sk_wq = &newu->peer_wq;
1146 otheru = unix_sk(other); 1165 otheru = unix_sk(other);
1147 1166
1148 /* copy address information from listening to new sock*/ 1167 /* copy address information from listening to new sock*/
@@ -1156,7 +1175,7 @@ restart:
1156 } 1175 }
1157 1176
1158 /* Set credentials */ 1177 /* Set credentials */
1159 sk->sk_peercred = other->sk_peercred; 1178 copy_peercred(sk, other);
1160 1179
1161 sock->state = SS_CONNECTED; 1180 sock->state = SS_CONNECTED;
1162 sk->sk_state = TCP_ESTABLISHED; 1181 sk->sk_state = TCP_ESTABLISHED;
@@ -1198,10 +1217,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
1198 sock_hold(skb); 1217 sock_hold(skb);
1199 unix_peer(ska) = skb; 1218 unix_peer(ska) = skb;
1200 unix_peer(skb) = ska; 1219 unix_peer(skb) = ska;
1201 ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); 1220 init_peercred(ska);
1202 current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); 1221 init_peercred(skb);
1203 ska->sk_peercred.uid = skb->sk_peercred.uid;
1204 ska->sk_peercred.gid = skb->sk_peercred.gid;
1205 1222
1206 if (ska->sk_type != SOCK_DGRAM) { 1223 if (ska->sk_type != SOCK_DGRAM) {
1207 ska->sk_state = TCP_ESTABLISHED; 1224 ska->sk_state = TCP_ESTABLISHED;
@@ -1296,18 +1313,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1296 int i; 1313 int i;
1297 1314
1298 scm->fp = UNIXCB(skb).fp; 1315 scm->fp = UNIXCB(skb).fp;
1299 skb->destructor = sock_wfree;
1300 UNIXCB(skb).fp = NULL; 1316 UNIXCB(skb).fp = NULL;
1301 1317
1302 for (i = scm->fp->count-1; i >= 0; i--) 1318 for (i = scm->fp->count-1; i >= 0; i--)
1303 unix_notinflight(scm->fp->fp[i]); 1319 unix_notinflight(scm->fp->fp[i]);
1304} 1320}
1305 1321
1306static void unix_destruct_fds(struct sk_buff *skb) 1322static void unix_destruct_scm(struct sk_buff *skb)
1307{ 1323{
1308 struct scm_cookie scm; 1324 struct scm_cookie scm;
1309 memset(&scm, 0, sizeof(scm)); 1325 memset(&scm, 0, sizeof(scm));
1310 unix_detach_fds(&scm, skb); 1326 scm.pid = UNIXCB(skb).pid;
1327 scm.cred = UNIXCB(skb).cred;
1328 if (UNIXCB(skb).fp)
1329 unix_detach_fds(&scm, skb);
1311 1330
1312 /* Alas, it calls VFS */ 1331 /* Alas, it calls VFS */
1313 /* So fscking what? fput() had been SMP-safe since the last Summer */ 1332 /* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1330,10 +1349,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1330 1349
1331 for (i = scm->fp->count-1; i >= 0; i--) 1350 for (i = scm->fp->count-1; i >= 0; i--)
1332 unix_inflight(scm->fp->fp[i]); 1351 unix_inflight(scm->fp->fp[i]);
1333 skb->destructor = unix_destruct_fds;
1334 return 0; 1352 return 0;
1335} 1353}
1336 1354
1355static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1356{
1357 int err = 0;
1358 UNIXCB(skb).pid = get_pid(scm->pid);
1359 UNIXCB(skb).cred = get_cred(scm->cred);
1360 UNIXCB(skb).fp = NULL;
1361 if (scm->fp && send_fds)
1362 err = unix_attach_fds(scm, skb);
1363
1364 skb->destructor = unix_destruct_scm;
1365 return err;
1366}
1367
1337/* 1368/*
1338 * Send AF_UNIX data. 1369 * Send AF_UNIX data.
1339 */ 1370 */
@@ -1390,12 +1421,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1390 if (skb == NULL) 1421 if (skb == NULL)
1391 goto out; 1422 goto out;
1392 1423
1393 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1424 err = unix_scm_to_skb(siocb->scm, skb, true);
1394 if (siocb->scm->fp) { 1425 if (err)
1395 err = unix_attach_fds(siocb->scm, skb); 1426 goto out_free;
1396 if (err)
1397 goto out_free;
1398 }
1399 unix_get_secdata(siocb->scm, skb); 1427 unix_get_secdata(siocb->scm, skb);
1400 1428
1401 skb_reset_transport_header(skb); 1429 skb_reset_transport_header(skb);
@@ -1565,16 +1593,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1565 */ 1593 */
1566 size = min_t(int, size, skb_tailroom(skb)); 1594 size = min_t(int, size, skb_tailroom(skb));
1567 1595
1568 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); 1596
1569 /* Only send the fds in the first buffer */ 1597 /* Only send the fds in the first buffer */
1570 if (siocb->scm->fp && !fds_sent) { 1598 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1571 err = unix_attach_fds(siocb->scm, skb); 1599 if (err) {
1572 if (err) { 1600 kfree_skb(skb);
1573 kfree_skb(skb); 1601 goto out_err;
1574 goto out_err;
1575 }
1576 fds_sent = true;
1577 } 1602 }
1603 fds_sent = true;
1578 1604
1579 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 1605 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1580 if (err) { 1606 if (err) {
@@ -1691,7 +1717,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1691 siocb->scm = &tmp_scm; 1717 siocb->scm = &tmp_scm;
1692 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1718 memset(&tmp_scm, 0, sizeof(tmp_scm));
1693 } 1719 }
1694 siocb->scm->creds = *UNIXCREDS(skb); 1720 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1695 unix_set_secdata(siocb->scm, skb); 1721 unix_set_secdata(siocb->scm, skb);
1696 1722
1697 if (!(flags & MSG_PEEK)) { 1723 if (!(flags & MSG_PEEK)) {
@@ -1736,7 +1762,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
1736 unix_state_lock(sk); 1762 unix_state_lock(sk);
1737 1763
1738 for (;;) { 1764 for (;;) {
1739 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1765 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1740 1766
1741 if (!skb_queue_empty(&sk->sk_receive_queue) || 1767 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1742 sk->sk_err || 1768 sk->sk_err ||
@@ -1752,7 +1778,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
1752 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 1778 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1753 } 1779 }
1754 1780
1755 finish_wait(sk->sk_sleep, &wait); 1781 finish_wait(sk_sleep(sk), &wait);
1756 unix_state_unlock(sk); 1782 unix_state_unlock(sk);
1757 return timeo; 1783 return timeo;
1758} 1784}
@@ -1840,14 +1866,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1840 1866
1841 if (check_creds) { 1867 if (check_creds) {
1842 /* Never glue messages from different writers */ 1868 /* Never glue messages from different writers */
1843 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, 1869 if ((UNIXCB(skb).pid != siocb->scm->pid) ||
1844 sizeof(siocb->scm->creds)) != 0) { 1870 (UNIXCB(skb).cred != siocb->scm->cred)) {
1845 skb_queue_head(&sk->sk_receive_queue, skb); 1871 skb_queue_head(&sk->sk_receive_queue, skb);
1846 break; 1872 break;
1847 } 1873 }
1848 } else { 1874 } else {
1849 /* Copy credentials */ 1875 /* Copy credentials */
1850 siocb->scm->creds = *UNIXCREDS(skb); 1876 scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1851 check_creds = 1; 1877 check_creds = 1;
1852 } 1878 }
1853 1879
@@ -1880,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1880 break; 1906 break;
1881 } 1907 }
1882 1908
1883 kfree_skb(skb); 1909 consume_skb(skb);
1884 1910
1885 if (siocb->scm->fp) 1911 if (siocb->scm->fp)
1886 break; 1912 break;
@@ -1931,12 +1957,10 @@ static int unix_shutdown(struct socket *sock, int mode)
1931 other->sk_shutdown |= peer_mode; 1957 other->sk_shutdown |= peer_mode;
1932 unix_state_unlock(other); 1958 unix_state_unlock(other);
1933 other->sk_state_change(other); 1959 other->sk_state_change(other);
1934 read_lock(&other->sk_callback_lock);
1935 if (peer_mode == SHUTDOWN_MASK) 1960 if (peer_mode == SHUTDOWN_MASK)
1936 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); 1961 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1937 else if (peer_mode & RCV_SHUTDOWN) 1962 else if (peer_mode & RCV_SHUTDOWN)
1938 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); 1963 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1939 read_unlock(&other->sk_callback_lock);
1940 } 1964 }
1941 if (other) 1965 if (other)
1942 sock_put(other); 1966 sock_put(other);
@@ -1991,7 +2015,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
1991 struct sock *sk = sock->sk; 2015 struct sock *sk = sock->sk;
1992 unsigned int mask; 2016 unsigned int mask;
1993 2017
1994 sock_poll_wait(file, sk->sk_sleep, wait); 2018 sock_poll_wait(file, sk_sleep(sk), wait);
1995 mask = 0; 2019 mask = 0;
1996 2020
1997 /* exceptional events? */ 2021 /* exceptional events? */
@@ -2028,7 +2052,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2028 struct sock *sk = sock->sk, *other; 2052 struct sock *sk = sock->sk, *other;
2029 unsigned int mask, writable; 2053 unsigned int mask, writable;
2030 2054
2031 sock_poll_wait(file, sk->sk_sleep, wait); 2055 sock_poll_wait(file, sk_sleep(sk), wait);
2032 mask = 0; 2056 mask = 0;
2033 2057
2034 /* exceptional events? */ 2058 /* exceptional events? */
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 14c22c3768da..c8df6fda0b1f 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -153,15 +153,6 @@ void unix_notinflight(struct file *fp)
153 } 153 }
154} 154}
155 155
156static inline struct sk_buff *sock_queue_head(struct sock *sk)
157{
158 return (struct sk_buff *)&sk->sk_receive_queue;
159}
160
161#define receive_queue_for_each_skb(sk, next, skb) \
162 for (skb = sock_queue_head(sk)->next, next = skb->next; \
163 skb != sock_queue_head(sk); skb = next, next = skb->next)
164
165static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), 156static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
166 struct sk_buff_head *hitlist) 157 struct sk_buff_head *hitlist)
167{ 158{
@@ -169,7 +160,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
169 struct sk_buff *next; 160 struct sk_buff *next;
170 161
171 spin_lock(&x->sk_receive_queue.lock); 162 spin_lock(&x->sk_receive_queue.lock);
172 receive_queue_for_each_skb(x, next, skb) { 163 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
173 /* 164 /*
174 * Do we have file descriptors ? 165 * Do we have file descriptors ?
175 */ 166 */
@@ -225,7 +216,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
225 * and perform a scan on them as well. 216 * and perform a scan on them as well.
226 */ 217 */
227 spin_lock(&x->sk_receive_queue.lock); 218 spin_lock(&x->sk_receive_queue.lock);
228 receive_queue_for_each_skb(x, next, skb) { 219 skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
229 u = unix_sk(skb->sk); 220 u = unix_sk(skb->sk);
230 221
231 /* 222 /*
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 258daa80ad92..2bf23406637a 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -48,7 +48,7 @@
48#include <linux/kernel.h> 48#include <linux/kernel.h>
49#include <linux/module.h> /* support for loadable modules */ 49#include <linux/module.h> /* support for loadable modules */
50#include <linux/slab.h> /* kmalloc(), kfree() */ 50#include <linux/slab.h> /* kmalloc(), kfree() */
51#include <linux/smp_lock.h> 51#include <linux/mutex.h>
52#include <linux/mm.h> 52#include <linux/mm.h>
53#include <linux/string.h> /* inline mem*, str* functions */ 53#include <linux/string.h> /* inline mem*, str* functions */
54 54
@@ -71,6 +71,7 @@
71 * WAN device IOCTL handlers 71 * WAN device IOCTL handlers
72 */ 72 */
73 73
74static DEFINE_MUTEX(wanrouter_mutex);
74static int wanrouter_device_setup(struct wan_device *wandev, 75static int wanrouter_device_setup(struct wan_device *wandev,
75 wandev_conf_t __user *u_conf); 76 wandev_conf_t __user *u_conf);
76static int wanrouter_device_stat(struct wan_device *wandev, 77static int wanrouter_device_stat(struct wan_device *wandev,
@@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
376 if (wandev->magic != ROUTER_MAGIC) 377 if (wandev->magic != ROUTER_MAGIC)
377 return -EINVAL; 378 return -EINVAL;
378 379
379 lock_kernel(); 380 mutex_lock(&wanrouter_mutex);
380 switch (cmd) { 381 switch (cmd) {
381 case ROUTER_SETUP: 382 case ROUTER_SETUP:
382 err = wanrouter_device_setup(wandev, data); 383 err = wanrouter_device_setup(wandev, data);
@@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
408 err = wandev->ioctl(wandev, cmd, arg); 409 err = wandev->ioctl(wandev, cmd, arg);
409 else err = -EINVAL; 410 else err = -EINVAL;
410 } 411 }
411 unlock_kernel(); 412 mutex_unlock(&wanrouter_mutex);
412 return err; 413 return err;
413} 414}
414 415
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index c44d96b3a437..11f25c7a7a05 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -27,7 +27,7 @@
27#include <linux/module.h> 27#include <linux/module.h>
28#include <linux/wanrouter.h> /* WAN router API definitions */ 28#include <linux/wanrouter.h> /* WAN router API definitions */
29#include <linux/seq_file.h> 29#include <linux/seq_file.h>
30#include <linux/smp_lock.h> 30#include <linux/mutex.h>
31 31
32#include <net/net_namespace.h> 32#include <net/net_namespace.h>
33#include <asm/io.h> 33#include <asm/io.h>
@@ -66,6 +66,7 @@
66 * /proc/net/router 66 * /proc/net/router
67 */ 67 */
68 68
69static DEFINE_MUTEX(config_mutex);
69static struct proc_dir_entry *proc_router; 70static struct proc_dir_entry *proc_router;
70 71
71/* Strings */ 72/* Strings */
@@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
85 struct wan_device *wandev; 86 struct wan_device *wandev;
86 loff_t l = *pos; 87 loff_t l = *pos;
87 88
88 lock_kernel(); 89 mutex_lock(&config_mutex);
89 if (!l--) 90 if (!l--)
90 return SEQ_START_TOKEN; 91 return SEQ_START_TOKEN;
91 for (wandev = wanrouter_router_devlist; l-- && wandev; 92 for (wandev = wanrouter_router_devlist; l-- && wandev;
@@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
104static void r_stop(struct seq_file *m, void *v) 105static void r_stop(struct seq_file *m, void *v)
105 __releases(kernel_lock) 106 __releases(kernel_lock)
106{ 107{
107 unlock_kernel(); 108 mutex_unlock(&config_mutex);
108} 109}
109 110
110static int config_show(struct seq_file *m, void *v) 111static int config_show(struct seq_file *m, void *v)
diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index 4dc82a54ba30..68bedf3e5443 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -110,7 +110,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
110{ 110{
111 int result, ifindex; 111 int result, ifindex;
112 struct wimax_dev *wimax_dev; 112 struct wimax_dev *wimax_dev;
113 struct device *dev;
114 113
115 d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); 114 d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
116 result = -ENODEV; 115 result = -ENODEV;
@@ -123,7 +122,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
123 wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); 122 wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
124 if (wimax_dev == NULL) 123 if (wimax_dev == NULL)
125 goto error_no_wimax_dev; 124 goto error_no_wimax_dev;
126 dev = wimax_dev_to_dev(wimax_dev);
127 /* Execute the operation and send the result back to user space */ 125 /* Execute the operation and send the result back to user space */
128 result = wimax_reset(wimax_dev); 126 result = wimax_reset(wimax_dev);
129 dev_put(wimax_dev->net_dev); 127 dev_put(wimax_dev->net_dev);
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index e978c7136c97..2609e445fe7d 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -43,7 +43,7 @@
43 * wimax_rfkill() Kernel calling wimax_rfkill() 43 * wimax_rfkill() Kernel calling wimax_rfkill()
44 * __wimax_rf_toggle_radio() 44 * __wimax_rf_toggle_radio()
45 * 45 *
46 * wimax_rfkill_set_radio_block() RF-Kill subsytem calling 46 * wimax_rfkill_set_radio_block() RF-Kill subsystem calling
47 * __wimax_rf_toggle_radio() 47 * __wimax_rf_toggle_radio()
48 * 48 *
49 * __wimax_rf_toggle_radio() 49 * __wimax_rf_toggle_radio()
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index 11ad3356eb56..aff8776e2d41 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -53,7 +53,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
53{ 53{
54 int result, ifindex; 54 int result, ifindex;
55 struct wimax_dev *wimax_dev; 55 struct wimax_dev *wimax_dev;
56 struct device *dev;
57 56
58 d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info); 57 d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
59 result = -ENODEV; 58 result = -ENODEV;
@@ -66,7 +65,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
66 wimax_dev = wimax_dev_get_by_genl_info(info, ifindex); 65 wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
67 if (wimax_dev == NULL) 66 if (wimax_dev == NULL)
68 goto error_no_wimax_dev; 67 goto error_no_wimax_dev;
69 dev = wimax_dev_to_dev(wimax_dev);
70 /* Execute the operation and send the result back to user space */ 68 /* Execute the operation and send the result back to user space */
71 result = wimax_state_get(wimax_dev); 69 result = wimax_state_get(wimax_dev);
72 dev_put(wimax_dev->net_dev); 70 dev_put(wimax_dev->net_dev);
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 1ed65dbdab03..ee99e7dfcdba 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -315,12 +315,11 @@ void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
315 BUG(); 315 BUG();
316 } 316 }
317 __wimax_state_set(wimax_dev, new_state); 317 __wimax_state_set(wimax_dev, new_state);
318 if (stch_skb) 318 if (!IS_ERR(stch_skb))
319 wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header); 319 wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header);
320out: 320out:
321 d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n", 321 d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
322 wimax_dev, new_state, old_state); 322 wimax_dev, new_state, old_state);
323 return;
324} 323}
325 324
326 325
@@ -362,7 +361,6 @@ void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
362 if (wimax_dev->state > __WIMAX_ST_NULL) 361 if (wimax_dev->state > __WIMAX_ST_NULL)
363 __wimax_state_change(wimax_dev, new_state); 362 __wimax_state_change(wimax_dev, new_state);
364 mutex_unlock(&wimax_dev->mutex); 363 mutex_unlock(&wimax_dev->mutex);
365 return;
366} 364}
367EXPORT_SYMBOL_GPL(wimax_state_change); 365EXPORT_SYMBOL_GPL(wimax_state_change);
368 366
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index bf1737fc9a7e..d0c92dddb26b 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -10,38 +10,6 @@
10#include "core.h" 10#include "core.h"
11 11
12struct ieee80211_channel * 12struct ieee80211_channel *
13rdev_fixed_channel(struct cfg80211_registered_device *rdev,
14 struct wireless_dev *for_wdev)
15{
16 struct wireless_dev *wdev;
17 struct ieee80211_channel *result = NULL;
18
19 WARN_ON(!mutex_is_locked(&rdev->devlist_mtx));
20
21 list_for_each_entry(wdev, &rdev->netdev_list, list) {
22 if (wdev == for_wdev)
23 continue;
24
25 /*
26 * Lock manually to tell lockdep about allowed
27 * nesting here if for_wdev->mtx is held already.
28 * This is ok as it's all under the rdev devlist
29 * mutex and as such can only be done once at any
30 * given time.
31 */
32 mutex_lock_nested(&wdev->mtx, SINGLE_DEPTH_NESTING);
33 if (wdev->current_bss)
34 result = wdev->current_bss->pub.channel;
35 wdev_unlock(wdev);
36
37 if (result)
38 break;
39 }
40
41 return result;
42}
43
44struct ieee80211_channel *
45rdev_freq_to_chan(struct cfg80211_registered_device *rdev, 13rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
46 int freq, enum nl80211_channel_type channel_type) 14 int freq, enum nl80211_channel_type channel_type)
47{ 15{
@@ -67,23 +35,31 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
67 if (!ht_cap->ht_supported) 35 if (!ht_cap->ht_supported)
68 return NULL; 36 return NULL;
69 37
70 if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || 38 if (channel_type != NL80211_CHAN_HT20 &&
71 ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) 39 (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
40 ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT))
72 return NULL; 41 return NULL;
73 } 42 }
74 43
75 return chan; 44 return chan;
76} 45}
77 46
78int rdev_set_freq(struct cfg80211_registered_device *rdev, 47int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
79 struct wireless_dev *for_wdev, 48 struct wireless_dev *wdev, int freq,
80 int freq, enum nl80211_channel_type channel_type) 49 enum nl80211_channel_type channel_type)
81{ 50{
82 struct ieee80211_channel *chan; 51 struct ieee80211_channel *chan;
83 int result; 52 int result;
84 53
85 if (rdev_fixed_channel(rdev, for_wdev)) 54 if (wdev && wdev->iftype == NL80211_IFTYPE_MONITOR)
86 return -EBUSY; 55 wdev = NULL;
56
57 if (wdev) {
58 ASSERT_WDEV_LOCK(wdev);
59
60 if (!netif_running(wdev->netdev))
61 return -ENETDOWN;
62 }
87 63
88 if (!rdev->ops->set_channel) 64 if (!rdev->ops->set_channel)
89 return -EOPNOTSUPP; 65 return -EOPNOTSUPP;
@@ -92,11 +68,14 @@ int rdev_set_freq(struct cfg80211_registered_device *rdev,
92 if (!chan) 68 if (!chan)
93 return -EINVAL; 69 return -EINVAL;
94 70
95 result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type); 71 result = rdev->ops->set_channel(&rdev->wiphy,
72 wdev ? wdev->netdev : NULL,
73 chan, channel_type);
96 if (result) 74 if (result)
97 return result; 75 return result;
98 76
99 rdev->channel = chan; 77 if (wdev)
78 wdev->channel = chan;
100 79
101 return 0; 80 return 0;
102} 81}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6ac70c101523..541e2fff5e9c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -472,24 +472,22 @@ int wiphy_register(struct wiphy *wiphy)
472 /* check and set up bitrates */ 472 /* check and set up bitrates */
473 ieee80211_set_bitrate_flags(wiphy); 473 ieee80211_set_bitrate_flags(wiphy);
474 474
475 mutex_lock(&cfg80211_mutex);
476
475 res = device_add(&rdev->wiphy.dev); 477 res = device_add(&rdev->wiphy.dev);
476 if (res) 478 if (res)
477 return res; 479 goto out_unlock;
478 480
479 res = rfkill_register(rdev->rfkill); 481 res = rfkill_register(rdev->rfkill);
480 if (res) 482 if (res)
481 goto out_rm_dev; 483 goto out_rm_dev;
482 484
483 mutex_lock(&cfg80211_mutex);
484
485 /* set up regulatory info */ 485 /* set up regulatory info */
486 wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); 486 wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
487 487
488 list_add_rcu(&rdev->list, &cfg80211_rdev_list); 488 list_add_rcu(&rdev->list, &cfg80211_rdev_list);
489 cfg80211_rdev_list_generation++; 489 cfg80211_rdev_list_generation++;
490 490
491 mutex_unlock(&cfg80211_mutex);
492
493 /* add to debugfs */ 491 /* add to debugfs */
494 rdev->wiphy.debugfsdir = 492 rdev->wiphy.debugfsdir =
495 debugfs_create_dir(wiphy_name(&rdev->wiphy), 493 debugfs_create_dir(wiphy_name(&rdev->wiphy),
@@ -509,11 +507,15 @@ int wiphy_register(struct wiphy *wiphy)
509 } 507 }
510 508
511 cfg80211_debugfs_rdev_add(rdev); 509 cfg80211_debugfs_rdev_add(rdev);
510 mutex_unlock(&cfg80211_mutex);
512 511
513 return 0; 512 return 0;
514 513
515 out_rm_dev: 514out_rm_dev:
516 device_del(&rdev->wiphy.dev); 515 device_del(&rdev->wiphy.dev);
516
517out_unlock:
518 mutex_unlock(&cfg80211_mutex);
517 return res; 519 return res;
518} 520}
519EXPORT_SYMBOL(wiphy_register); 521EXPORT_SYMBOL(wiphy_register);
@@ -705,7 +707,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
705 wdev->ps = true; 707 wdev->ps = true;
706 else 708 else
707 wdev->ps = false; 709 wdev->ps = false;
708 wdev->ps_timeout = 100; 710 /* allow mac80211 to determine the timeout */
711 wdev->ps_timeout = -1;
709 if (rdev->ops->set_power_mgmt) 712 if (rdev->ops->set_power_mgmt)
710 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, 713 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
711 wdev->ps, 714 wdev->ps,
@@ -893,7 +896,7 @@ out_fail_pernet:
893} 896}
894subsys_initcall(cfg80211_init); 897subsys_initcall(cfg80211_init);
895 898
896static void cfg80211_exit(void) 899static void __exit cfg80211_exit(void)
897{ 900{
898 debugfs_remove(ieee80211_debugfs_dir); 901 debugfs_remove(ieee80211_debugfs_dir);
899 nl80211_exit(); 902 nl80211_exit();
@@ -904,3 +907,52 @@ static void cfg80211_exit(void)
904 destroy_workqueue(cfg80211_wq); 907 destroy_workqueue(cfg80211_wq);
905} 908}
906module_exit(cfg80211_exit); 909module_exit(cfg80211_exit);
910
911static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
912 struct va_format *vaf)
913{
914 if (!wiphy)
915 return printk("%s(NULL wiphy *): %pV", level, vaf);
916
917 return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
918}
919
920int __wiphy_printk(const char *level, const struct wiphy *wiphy,
921 const char *fmt, ...)
922{
923 struct va_format vaf;
924 va_list args;
925 int r;
926
927 va_start(args, fmt);
928
929 vaf.fmt = fmt;
930 vaf.va = &args;
931
932 r = ___wiphy_printk(level, wiphy, &vaf);
933 va_end(args);
934
935 return r;
936}
937EXPORT_SYMBOL(__wiphy_printk);
938
939#define define_wiphy_printk_level(func, kern_level) \
940int func(const struct wiphy *wiphy, const char *fmt, ...) \
941{ \
942 struct va_format vaf; \
943 va_list args; \
944 int r; \
945 \
946 va_start(args, fmt); \
947 \
948 vaf.fmt = fmt; \
949 vaf.va = &args; \
950 \
951 r = ___wiphy_printk(kern_level, wiphy, &vaf); \
952 va_end(args); \
953 \
954 return r; \
955} \
956EXPORT_SYMBOL(func);
957
958define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d52da913145a..63d57ae399c3 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,9 +70,6 @@ struct cfg80211_registered_device {
70 struct work_struct conn_work; 70 struct work_struct conn_work;
71 struct work_struct event_work; 71 struct work_struct event_work;
72 72
73 /* current channel */
74 struct ieee80211_channel *channel;
75
76 /* must be last because of the way we do wiphy_priv(), 73 /* must be last because of the way we do wiphy_priv(),
77 * and it should at least be aligned to NETDEV_ALIGN */ 74 * and it should at least be aligned to NETDEV_ALIGN */
78 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); 75 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -293,13 +290,15 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
293 const u8 *bssid, 290 const u8 *bssid,
294 const u8 *ssid, int ssid_len, 291 const u8 *ssid, int ssid_len,
295 const u8 *ie, int ie_len, 292 const u8 *ie, int ie_len,
296 const u8 *key, int key_len, int key_idx); 293 const u8 *key, int key_len, int key_idx,
294 bool local_state_change);
297int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, 295int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
298 struct net_device *dev, struct ieee80211_channel *chan, 296 struct net_device *dev, struct ieee80211_channel *chan,
299 enum nl80211_auth_type auth_type, const u8 *bssid, 297 enum nl80211_auth_type auth_type, const u8 *bssid,
300 const u8 *ssid, int ssid_len, 298 const u8 *ssid, int ssid_len,
301 const u8 *ie, int ie_len, 299 const u8 *ie, int ie_len,
302 const u8 *key, int key_len, int key_idx); 300 const u8 *key, int key_len, int key_idx,
301 bool local_state_change);
303int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, 302int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
304 struct net_device *dev, 303 struct net_device *dev,
305 struct ieee80211_channel *chan, 304 struct ieee80211_channel *chan,
@@ -315,13 +314,16 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
315 struct cfg80211_crypto_settings *crypt); 314 struct cfg80211_crypto_settings *crypt);
316int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 315int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
317 struct net_device *dev, const u8 *bssid, 316 struct net_device *dev, const u8 *bssid,
318 const u8 *ie, int ie_len, u16 reason); 317 const u8 *ie, int ie_len, u16 reason,
318 bool local_state_change);
319int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 319int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
320 struct net_device *dev, const u8 *bssid, 320 struct net_device *dev, const u8 *bssid,
321 const u8 *ie, int ie_len, u16 reason); 321 const u8 *ie, int ie_len, u16 reason,
322 bool local_state_change);
322int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, 323int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
323 struct net_device *dev, const u8 *bssid, 324 struct net_device *dev, const u8 *bssid,
324 const u8 *ie, int ie_len, u16 reason); 325 const u8 *ie, int ie_len, u16 reason,
326 bool local_state_change);
325void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, 327void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
326 struct net_device *dev); 328 struct net_device *dev);
327void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, 329void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
@@ -337,6 +339,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
337 struct net_device *dev, 339 struct net_device *dev,
338 struct ieee80211_channel *chan, 340 struct ieee80211_channel *chan,
339 enum nl80211_channel_type channel_type, 341 enum nl80211_channel_type channel_type,
342 bool channel_type_valid,
340 const u8 *buf, size_t len, u64 *cookie); 343 const u8 *buf, size_t len, u64 *cookie);
341 344
342/* SME */ 345/* SME */
@@ -383,14 +386,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
383void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev); 386void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
384 387
385struct ieee80211_channel * 388struct ieee80211_channel *
386rdev_fixed_channel(struct cfg80211_registered_device *rdev,
387 struct wireless_dev *for_wdev);
388struct ieee80211_channel *
389rdev_freq_to_chan(struct cfg80211_registered_device *rdev, 389rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
390 int freq, enum nl80211_channel_type channel_type); 390 int freq, enum nl80211_channel_type channel_type);
391int rdev_set_freq(struct cfg80211_registered_device *rdev, 391int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
392 struct wireless_dev *for_wdev, 392 struct wireless_dev *wdev, int freq,
393 int freq, enum nl80211_channel_type channel_type); 393 enum nl80211_channel_type channel_type);
394 394
395u16 cfg80211_calculate_bitrate(struct rate_info *rate); 395u16 cfg80211_calculate_bitrate(struct rate_info *rate);
396 396
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
index 3cc9e69880a8..53c143f5e770 100644
--- a/net/wireless/genregdb.awk
+++ b/net/wireless/genregdb.awk
@@ -21,6 +21,7 @@ BEGIN {
21 print "" 21 print ""
22 print "#include <linux/nl80211.h>" 22 print "#include <linux/nl80211.h>"
23 print "#include <net/cfg80211.h>" 23 print "#include <net/cfg80211.h>"
24 print "#include \"regdb.h\""
24 print "" 25 print ""
25 regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" 26 regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
26} 27}
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6a5acf750174..27a8ce9343c3 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -81,15 +81,10 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
81 struct cfg80211_cached_keys *connkeys) 81 struct cfg80211_cached_keys *connkeys)
82{ 82{
83 struct wireless_dev *wdev = dev->ieee80211_ptr; 83 struct wireless_dev *wdev = dev->ieee80211_ptr;
84 struct ieee80211_channel *chan;
85 int err; 84 int err;
86 85
87 ASSERT_WDEV_LOCK(wdev); 86 ASSERT_WDEV_LOCK(wdev);
88 87
89 chan = rdev_fixed_channel(rdev, wdev);
90 if (chan && chan != params->channel)
91 return -EBUSY;
92
93 if (wdev->ssid_len) 88 if (wdev->ssid_len)
94 return -EALREADY; 89 return -EALREADY;
95 90
@@ -252,8 +247,10 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
252 if (!netif_running(wdev->netdev)) 247 if (!netif_running(wdev->netdev))
253 return 0; 248 return 0;
254 249
255 if (wdev->wext.keys) 250 if (wdev->wext.keys) {
256 wdev->wext.keys->def = wdev->wext.default_key; 251 wdev->wext.keys->def = wdev->wext.default_key;
252 wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
253 }
257 254
258 wdev->wext.ibss.privacy = wdev->wext.default_key != -1; 255 wdev->wext.ibss.privacy = wdev->wext.default_key != -1;
259 256
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index b7fa31d5fd13..dacb3b4b1bdb 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -467,7 +467,6 @@ static struct lib80211_crypto_ops lib80211_crypt_ccmp = {
467 .name = "CCMP", 467 .name = "CCMP",
468 .init = lib80211_ccmp_init, 468 .init = lib80211_ccmp_init,
469 .deinit = lib80211_ccmp_deinit, 469 .deinit = lib80211_ccmp_deinit,
470 .build_iv = lib80211_ccmp_hdr,
471 .encrypt_mpdu = lib80211_ccmp_encrypt, 470 .encrypt_mpdu = lib80211_ccmp_encrypt,
472 .decrypt_mpdu = lib80211_ccmp_decrypt, 471 .decrypt_mpdu = lib80211_ccmp_decrypt,
473 .encrypt_msdu = NULL, 472 .encrypt_msdu = NULL,
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 8cbdb32ff316..0fe40510e2cb 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -578,7 +578,7 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr)
578 } 578 }
579 579
580 if (ieee80211_is_data_qos(hdr11->frame_control)) { 580 if (ieee80211_is_data_qos(hdr11->frame_control)) {
581 hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) 581 hdr[12] = le16_to_cpu(*((__le16 *)ieee80211_get_qos_ctl(hdr11)))
582 & IEEE80211_QOS_CTL_TID_MASK; 582 & IEEE80211_QOS_CTL_TID_MASK;
583 } else 583 } else
584 hdr[12] = 0; /* priority */ 584 hdr[12] = 0; /* priority */
@@ -757,7 +757,6 @@ static struct lib80211_crypto_ops lib80211_crypt_tkip = {
757 .name = "TKIP", 757 .name = "TKIP",
758 .init = lib80211_tkip_init, 758 .init = lib80211_tkip_init,
759 .deinit = lib80211_tkip_deinit, 759 .deinit = lib80211_tkip_deinit,
760 .build_iv = lib80211_tkip_hdr,
761 .encrypt_mpdu = lib80211_tkip_encrypt, 760 .encrypt_mpdu = lib80211_tkip_encrypt,
762 .decrypt_mpdu = lib80211_tkip_decrypt, 761 .decrypt_mpdu = lib80211_tkip_decrypt,
763 .encrypt_msdu = lib80211_michael_mic_add, 762 .encrypt_msdu = lib80211_michael_mic_add,
diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c
index 6d41e05ca33b..e2e88878ba35 100644
--- a/net/wireless/lib80211_crypt_wep.c
+++ b/net/wireless/lib80211_crypt_wep.c
@@ -269,7 +269,6 @@ static struct lib80211_crypto_ops lib80211_crypt_wep = {
269 .name = "WEP", 269 .name = "WEP",
270 .init = lib80211_wep_init, 270 .init = lib80211_wep_init,
271 .deinit = lib80211_wep_deinit, 271 .deinit = lib80211_wep_deinit,
272 .build_iv = lib80211_wep_build_iv,
273 .encrypt_mpdu = lib80211_wep_encrypt, 272 .encrypt_mpdu = lib80211_wep_encrypt,
274 .decrypt_mpdu = lib80211_wep_decrypt, 273 .decrypt_mpdu = lib80211_wep_decrypt,
275 .encrypt_msdu = NULL, 274 .encrypt_msdu = NULL,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 22139fa46115..e74a1a2119d3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -44,10 +44,10 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
44 } 44 }
45 } 45 }
46 46
47 WARN_ON(!done); 47 if (done) {
48 48 nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
49 nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); 49 cfg80211_sme_rx_auth(dev, buf, len);
50 cfg80211_sme_rx_auth(dev, buf, len); 50 }
51 51
52 wdev_unlock(wdev); 52 wdev_unlock(wdev);
53} 53}
@@ -378,7 +378,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
378 const u8 *bssid, 378 const u8 *bssid,
379 const u8 *ssid, int ssid_len, 379 const u8 *ssid, int ssid_len,
380 const u8 *ie, int ie_len, 380 const u8 *ie, int ie_len,
381 const u8 *key, int key_len, int key_idx) 381 const u8 *key, int key_len, int key_idx,
382 bool local_state_change)
382{ 383{
383 struct wireless_dev *wdev = dev->ieee80211_ptr; 384 struct wireless_dev *wdev = dev->ieee80211_ptr;
384 struct cfg80211_auth_request req; 385 struct cfg80211_auth_request req;
@@ -408,6 +409,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
408 409
409 memset(&req, 0, sizeof(req)); 410 memset(&req, 0, sizeof(req));
410 411
412 req.local_state_change = local_state_change;
411 req.ie = ie; 413 req.ie = ie;
412 req.ie_len = ie_len; 414 req.ie_len = ie_len;
413 req.auth_type = auth_type; 415 req.auth_type = auth_type;
@@ -434,12 +436,18 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
434 goto out; 436 goto out;
435 } 437 }
436 438
437 wdev->authtry_bsses[slot] = bss; 439 if (local_state_change)
440 wdev->auth_bsses[slot] = bss;
441 else
442 wdev->authtry_bsses[slot] = bss;
438 cfg80211_hold_bss(bss); 443 cfg80211_hold_bss(bss);
439 444
440 err = rdev->ops->auth(&rdev->wiphy, dev, &req); 445 err = rdev->ops->auth(&rdev->wiphy, dev, &req);
441 if (err) { 446 if (err) {
442 wdev->authtry_bsses[slot] = NULL; 447 if (local_state_change)
448 wdev->auth_bsses[slot] = NULL;
449 else
450 wdev->authtry_bsses[slot] = NULL;
443 cfg80211_unhold_bss(bss); 451 cfg80211_unhold_bss(bss);
444 } 452 }
445 453
@@ -454,14 +462,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
454 enum nl80211_auth_type auth_type, const u8 *bssid, 462 enum nl80211_auth_type auth_type, const u8 *bssid,
455 const u8 *ssid, int ssid_len, 463 const u8 *ssid, int ssid_len,
456 const u8 *ie, int ie_len, 464 const u8 *ie, int ie_len,
457 const u8 *key, int key_len, int key_idx) 465 const u8 *key, int key_len, int key_idx,
466 bool local_state_change)
458{ 467{
459 int err; 468 int err;
460 469
461 wdev_lock(dev->ieee80211_ptr); 470 wdev_lock(dev->ieee80211_ptr);
462 err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 471 err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
463 ssid, ssid_len, ie, ie_len, 472 ssid, ssid_len, ie, ie_len,
464 key, key_len, key_idx); 473 key, key_len, key_idx, local_state_change);
465 wdev_unlock(dev->ieee80211_ptr); 474 wdev_unlock(dev->ieee80211_ptr);
466 475
467 return err; 476 return err;
@@ -555,7 +564,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
555 564
556int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 565int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
557 struct net_device *dev, const u8 *bssid, 566 struct net_device *dev, const u8 *bssid,
558 const u8 *ie, int ie_len, u16 reason) 567 const u8 *ie, int ie_len, u16 reason,
568 bool local_state_change)
559{ 569{
560 struct wireless_dev *wdev = dev->ieee80211_ptr; 570 struct wireless_dev *wdev = dev->ieee80211_ptr;
561 struct cfg80211_deauth_request req; 571 struct cfg80211_deauth_request req;
@@ -565,6 +575,7 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
565 575
566 memset(&req, 0, sizeof(req)); 576 memset(&req, 0, sizeof(req));
567 req.reason_code = reason; 577 req.reason_code = reason;
578 req.local_state_change = local_state_change;
568 req.ie = ie; 579 req.ie = ie;
569 req.ie_len = ie_len; 580 req.ie_len = ie_len;
570 if (wdev->current_bss && 581 if (wdev->current_bss &&
@@ -591,13 +602,15 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
591 602
592int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, 603int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
593 struct net_device *dev, const u8 *bssid, 604 struct net_device *dev, const u8 *bssid,
594 const u8 *ie, int ie_len, u16 reason) 605 const u8 *ie, int ie_len, u16 reason,
606 bool local_state_change)
595{ 607{
596 struct wireless_dev *wdev = dev->ieee80211_ptr; 608 struct wireless_dev *wdev = dev->ieee80211_ptr;
597 int err; 609 int err;
598 610
599 wdev_lock(wdev); 611 wdev_lock(wdev);
600 err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason); 612 err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason,
613 local_state_change);
601 wdev_unlock(wdev); 614 wdev_unlock(wdev);
602 615
603 return err; 616 return err;
@@ -605,7 +618,8 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
605 618
606static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, 619static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
607 struct net_device *dev, const u8 *bssid, 620 struct net_device *dev, const u8 *bssid,
608 const u8 *ie, int ie_len, u16 reason) 621 const u8 *ie, int ie_len, u16 reason,
622 bool local_state_change)
609{ 623{
610 struct wireless_dev *wdev = dev->ieee80211_ptr; 624 struct wireless_dev *wdev = dev->ieee80211_ptr;
611 struct cfg80211_disassoc_request req; 625 struct cfg80211_disassoc_request req;
@@ -620,6 +634,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
620 634
621 memset(&req, 0, sizeof(req)); 635 memset(&req, 0, sizeof(req));
622 req.reason_code = reason; 636 req.reason_code = reason;
637 req.local_state_change = local_state_change;
623 req.ie = ie; 638 req.ie = ie;
624 req.ie_len = ie_len; 639 req.ie_len = ie_len;
625 if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) 640 if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0)
@@ -632,13 +647,15 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
632 647
633int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, 648int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
634 struct net_device *dev, const u8 *bssid, 649 struct net_device *dev, const u8 *bssid,
635 const u8 *ie, int ie_len, u16 reason) 650 const u8 *ie, int ie_len, u16 reason,
651 bool local_state_change)
636{ 652{
637 struct wireless_dev *wdev = dev->ieee80211_ptr; 653 struct wireless_dev *wdev = dev->ieee80211_ptr;
638 int err; 654 int err;
639 655
640 wdev_lock(wdev); 656 wdev_lock(wdev);
641 err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason); 657 err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason,
658 local_state_change);
642 wdev_unlock(wdev); 659 wdev_unlock(wdev);
643 660
644 return err; 661 return err;
@@ -810,6 +827,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
810 struct net_device *dev, 827 struct net_device *dev,
811 struct ieee80211_channel *chan, 828 struct ieee80211_channel *chan,
812 enum nl80211_channel_type channel_type, 829 enum nl80211_channel_type channel_type,
830 bool channel_type_valid,
813 const u8 *buf, size_t len, u64 *cookie) 831 const u8 *buf, size_t len, u64 *cookie)
814{ 832{
815 struct wireless_dev *wdev = dev->ieee80211_ptr; 833 struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -828,8 +846,9 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
828 if (!wdev->current_bss || 846 if (!wdev->current_bss ||
829 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, 847 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid,
830 ETH_ALEN) != 0 || 848 ETH_ALEN) != 0 ||
831 memcmp(wdev->current_bss->pub.bssid, mgmt->da, 849 (wdev->iftype == NL80211_IFTYPE_STATION &&
832 ETH_ALEN) != 0) 850 memcmp(wdev->current_bss->pub.bssid, mgmt->da,
851 ETH_ALEN) != 0))
833 return -ENOTCONN; 852 return -ENOTCONN;
834 } 853 }
835 854
@@ -838,7 +857,7 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev,
838 857
839 /* Transmit the Action frame as requested by user space */ 858 /* Transmit the Action frame as requested by user space */
840 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 859 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type,
841 buf, len, cookie); 860 channel_type_valid, buf, len, cookie);
842} 861}
843 862
844bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 863bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
@@ -895,3 +914,16 @@ void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
895 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 914 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
896} 915}
897EXPORT_SYMBOL(cfg80211_action_tx_status); 916EXPORT_SYMBOL(cfg80211_action_tx_status);
917
918void cfg80211_cqm_rssi_notify(struct net_device *dev,
919 enum nl80211_cqm_rssi_threshold_event rssi_event,
920 gfp_t gfp)
921{
922 struct wireless_dev *wdev = dev->ieee80211_ptr;
923 struct wiphy *wiphy = wdev->wiphy;
924 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
925
926 /* Indicate roaming trigger event to user space */
927 nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
928}
929EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 030cf153bea2..37902a54e9c1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -150,6 +150,12 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
150 .len = IEEE80211_MAX_DATA_LEN }, 150 .len = IEEE80211_MAX_DATA_LEN },
151 [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, }, 151 [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
152 [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 }, 152 [NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
153 [NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
154 [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
155 [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
156
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
153}; 159};
154 160
155/* policy for the attributes */ 161/* policy for the attributes */
@@ -586,6 +592,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
586 i++; 592 i++;
587 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
588 } 594 }
595 CMD(set_channel, SET_CHANNEL);
589 596
590#undef CMD 597#undef CMD
591 598
@@ -686,10 +693,90 @@ static int parse_txq_params(struct nlattr *tb[],
686 return 0; 693 return 0;
687} 694}
688 695
696static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
697{
698 /*
699 * You can only set the channel explicitly for AP, mesh
700 * and WDS type interfaces; all others have their channel
701 * managed via their respective "establish a connection"
702 * command (connect, join, ...)
703 *
704 * Monitors are special as they are normally slaved to
705 * whatever else is going on, so they behave as though
706 * you tried setting the wiphy channel itself.
707 */
708 return !wdev ||
709 wdev->iftype == NL80211_IFTYPE_AP ||
710 wdev->iftype == NL80211_IFTYPE_WDS ||
711 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
712 wdev->iftype == NL80211_IFTYPE_MONITOR;
713}
714
715static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
716 struct wireless_dev *wdev,
717 struct genl_info *info)
718{
719 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
720 u32 freq;
721 int result;
722
723 if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
724 return -EINVAL;
725
726 if (!nl80211_can_set_dev_channel(wdev))
727 return -EOPNOTSUPP;
728
729 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
730 channel_type = nla_get_u32(info->attrs[
731 NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
732 if (channel_type != NL80211_CHAN_NO_HT &&
733 channel_type != NL80211_CHAN_HT20 &&
734 channel_type != NL80211_CHAN_HT40PLUS &&
735 channel_type != NL80211_CHAN_HT40MINUS)
736 return -EINVAL;
737 }
738
739 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
740
741 mutex_lock(&rdev->devlist_mtx);
742 if (wdev) {
743 wdev_lock(wdev);
744 result = cfg80211_set_freq(rdev, wdev, freq, channel_type);
745 wdev_unlock(wdev);
746 } else {
747 result = cfg80211_set_freq(rdev, NULL, freq, channel_type);
748 }
749 mutex_unlock(&rdev->devlist_mtx);
750
751 return result;
752}
753
754static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
755{
756 struct cfg80211_registered_device *rdev;
757 struct net_device *netdev;
758 int result;
759
760 rtnl_lock();
761
762 result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev);
763 if (result)
764 goto unlock;
765
766 result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
767
768 unlock:
769 rtnl_unlock();
770
771 return result;
772}
773
689static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) 774static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
690{ 775{
691 struct cfg80211_registered_device *rdev; 776 struct cfg80211_registered_device *rdev;
692 int result = 0, rem_txq_params = 0; 777 struct net_device *netdev = NULL;
778 struct wireless_dev *wdev;
779 int result, rem_txq_params = 0;
693 struct nlattr *nl_txq_params; 780 struct nlattr *nl_txq_params;
694 u32 changed; 781 u32 changed;
695 u8 retry_short = 0, retry_long = 0; 782 u8 retry_short = 0, retry_long = 0;
@@ -698,16 +785,50 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
698 785
699 rtnl_lock(); 786 rtnl_lock();
700 787
788 /*
789 * Try to find the wiphy and netdev. Normally this
790 * function shouldn't need the netdev, but this is
791 * done for backward compatibility -- previously
792 * setting the channel was done per wiphy, but now
793 * it is per netdev. Previous userland like hostapd
794 * also passed a netdev to set_wiphy, so that it is
795 * possible to let that go to the right netdev!
796 */
701 mutex_lock(&cfg80211_mutex); 797 mutex_lock(&cfg80211_mutex);
702 798
703 rdev = __cfg80211_rdev_from_info(info); 799 if (info->attrs[NL80211_ATTR_IFINDEX]) {
704 if (IS_ERR(rdev)) { 800 int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
705 mutex_unlock(&cfg80211_mutex); 801
706 result = PTR_ERR(rdev); 802 netdev = dev_get_by_index(genl_info_net(info), ifindex);
707 goto unlock; 803 if (netdev && netdev->ieee80211_ptr) {
804 rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
805 mutex_lock(&rdev->mtx);
806 } else
807 netdev = NULL;
708 } 808 }
709 809
710 mutex_lock(&rdev->mtx); 810 if (!netdev) {
811 rdev = __cfg80211_rdev_from_info(info);
812 if (IS_ERR(rdev)) {
813 mutex_unlock(&cfg80211_mutex);
814 result = PTR_ERR(rdev);
815 goto unlock;
816 }
817 wdev = NULL;
818 netdev = NULL;
819 result = 0;
820
821 mutex_lock(&rdev->mtx);
822 } else if (netif_running(netdev) &&
823 nl80211_can_set_dev_channel(netdev->ieee80211_ptr))
824 wdev = netdev->ieee80211_ptr;
825 else
826 wdev = NULL;
827
828 /*
829 * end workaround code, by now the rdev is available
830 * and locked, and wdev may or may not be NULL.
831 */
711 832
712 if (info->attrs[NL80211_ATTR_WIPHY_NAME]) 833 if (info->attrs[NL80211_ATTR_WIPHY_NAME])
713 result = cfg80211_dev_rename( 834 result = cfg80211_dev_rename(
@@ -746,26 +867,35 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
746 } 867 }
747 868
748 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { 869 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
749 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 870 result = __nl80211_set_channel(rdev, wdev, info);
750 u32 freq; 871 if (result)
751 872 goto bad_res;
752 result = -EINVAL; 873 }
753 874
754 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { 875 if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) {
755 channel_type = nla_get_u32(info->attrs[ 876 enum nl80211_tx_power_setting type;
756 NL80211_ATTR_WIPHY_CHANNEL_TYPE]); 877 int idx, mbm = 0;
757 if (channel_type != NL80211_CHAN_NO_HT && 878
758 channel_type != NL80211_CHAN_HT20 && 879 if (!rdev->ops->set_tx_power) {
759 channel_type != NL80211_CHAN_HT40PLUS && 880 result = -EOPNOTSUPP;
760 channel_type != NL80211_CHAN_HT40MINUS) 881 goto bad_res;
761 goto bad_res;
762 } 882 }
763 883
764 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 884 idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING;
885 type = nla_get_u32(info->attrs[idx]);
765 886
766 mutex_lock(&rdev->devlist_mtx); 887 if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] &&
767 result = rdev_set_freq(rdev, NULL, freq, channel_type); 888 (type != NL80211_TX_POWER_AUTOMATIC)) {
768 mutex_unlock(&rdev->devlist_mtx); 889 result = -EINVAL;
890 goto bad_res;
891 }
892
893 if (type != NL80211_TX_POWER_AUTOMATIC) {
894 idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL;
895 mbm = nla_get_u32(info->attrs[idx]);
896 }
897
898 result = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm);
769 if (result) 899 if (result)
770 goto bad_res; 900 goto bad_res;
771 } 901 }
@@ -862,6 +992,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
862 992
863 bad_res: 993 bad_res:
864 mutex_unlock(&rdev->mtx); 994 mutex_unlock(&rdev->mtx);
995 if (netdev)
996 dev_put(netdev);
865 unlock: 997 unlock:
866 rtnl_unlock(); 998 rtnl_unlock();
867 return result; 999 return result;
@@ -1006,7 +1138,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
1006 enum nl80211_iftype iftype) 1138 enum nl80211_iftype iftype)
1007{ 1139{
1008 if (!use_4addr) { 1140 if (!use_4addr) {
1009 if (netdev && netdev->br_port) 1141 if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT))
1010 return -EBUSY; 1142 return -EBUSY;
1011 return 0; 1143 return 0;
1012 } 1144 }
@@ -2096,7 +2228,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
2096 goto out_rtnl; 2228 goto out_rtnl;
2097 2229
2098 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2230 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2099 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { 2231 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2232 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
2100 err = -EINVAL; 2233 err = -EINVAL;
2101 goto out; 2234 goto out;
2102 } 2235 }
@@ -2439,6 +2572,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2439 params.use_cts_prot = -1; 2572 params.use_cts_prot = -1;
2440 params.use_short_preamble = -1; 2573 params.use_short_preamble = -1;
2441 params.use_short_slot_time = -1; 2574 params.use_short_slot_time = -1;
2575 params.ap_isolate = -1;
2442 2576
2443 if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) 2577 if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
2444 params.use_cts_prot = 2578 params.use_cts_prot =
@@ -2455,6 +2589,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2455 params.basic_rates_len = 2589 params.basic_rates_len =
2456 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); 2590 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
2457 } 2591 }
2592 if (info->attrs[NL80211_ATTR_AP_ISOLATE])
2593 params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
2458 2594
2459 rtnl_lock(); 2595 rtnl_lock();
2460 2596
@@ -2633,6 +2769,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
2633 2769
2634 nla_put_failure: 2770 nla_put_failure:
2635 genlmsg_cancel(msg, hdr); 2771 genlmsg_cancel(msg, hdr);
2772 nlmsg_free(msg);
2636 err = -EMSGSIZE; 2773 err = -EMSGSIZE;
2637 out: 2774 out:
2638 /* Cleanup */ 2775 /* Cleanup */
@@ -2824,6 +2961,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
2824 2961
2825nla_put_failure: 2962nla_put_failure:
2826 genlmsg_cancel(msg, hdr); 2963 genlmsg_cancel(msg, hdr);
2964 nlmsg_free(msg);
2827 err = -EMSGSIZE; 2965 err = -EMSGSIZE;
2828out: 2966out:
2829 mutex_unlock(&cfg80211_mutex); 2967 mutex_unlock(&cfg80211_mutex);
@@ -3392,6 +3530,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3392 int err, ssid_len, ie_len = 0; 3530 int err, ssid_len, ie_len = 0;
3393 enum nl80211_auth_type auth_type; 3531 enum nl80211_auth_type auth_type;
3394 struct key_parse key; 3532 struct key_parse key;
3533 bool local_state_change;
3395 3534
3396 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 3535 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3397 return -EINVAL; 3536 return -EINVAL;
@@ -3470,9 +3609,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3470 goto out; 3609 goto out;
3471 } 3610 }
3472 3611
3612 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3613
3473 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 3614 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
3474 ssid, ssid_len, ie, ie_len, 3615 ssid, ssid_len, ie, ie_len,
3475 key.p.key, key.p.key_len, key.idx); 3616 key.p.key, key.p.key_len, key.idx,
3617 local_state_change);
3476 3618
3477out: 3619out:
3478 cfg80211_unlock_rdev(rdev); 3620 cfg80211_unlock_rdev(rdev);
@@ -3551,9 +3693,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3551{ 3693{
3552 struct cfg80211_registered_device *rdev; 3694 struct cfg80211_registered_device *rdev;
3553 struct net_device *dev; 3695 struct net_device *dev;
3554 struct wireless_dev *wdev;
3555 struct cfg80211_crypto_settings crypto; 3696 struct cfg80211_crypto_settings crypto;
3556 struct ieee80211_channel *chan, *fixedchan; 3697 struct ieee80211_channel *chan;
3557 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; 3698 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
3558 int err, ssid_len, ie_len = 0; 3699 int err, ssid_len, ie_len = 0;
3559 bool use_mfp = false; 3700 bool use_mfp = false;
@@ -3596,16 +3737,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3596 goto out; 3737 goto out;
3597 } 3738 }
3598 3739
3599 mutex_lock(&rdev->devlist_mtx);
3600 wdev = dev->ieee80211_ptr;
3601 fixedchan = rdev_fixed_channel(rdev, wdev);
3602 if (fixedchan && chan != fixedchan) {
3603 err = -EBUSY;
3604 mutex_unlock(&rdev->devlist_mtx);
3605 goto out;
3606 }
3607 mutex_unlock(&rdev->devlist_mtx);
3608
3609 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); 3740 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
3610 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 3741 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
3611 3742
@@ -3649,6 +3780,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3649 const u8 *ie = NULL, *bssid; 3780 const u8 *ie = NULL, *bssid;
3650 int err, ie_len = 0; 3781 int err, ie_len = 0;
3651 u16 reason_code; 3782 u16 reason_code;
3783 bool local_state_change;
3652 3784
3653 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 3785 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3654 return -EINVAL; 3786 return -EINVAL;
@@ -3694,7 +3826,10 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3694 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 3826 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3695 } 3827 }
3696 3828
3697 err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code); 3829 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3830
3831 err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
3832 local_state_change);
3698 3833
3699out: 3834out:
3700 cfg80211_unlock_rdev(rdev); 3835 cfg80211_unlock_rdev(rdev);
@@ -3711,6 +3846,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3711 const u8 *ie = NULL, *bssid; 3846 const u8 *ie = NULL, *bssid;
3712 int err, ie_len = 0; 3847 int err, ie_len = 0;
3713 u16 reason_code; 3848 u16 reason_code;
3849 bool local_state_change;
3714 3850
3715 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 3851 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3716 return -EINVAL; 3852 return -EINVAL;
@@ -3756,7 +3892,10 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3756 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 3892 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3757 } 3893 }
3758 3894
3759 err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code); 3895 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3896
3897 err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
3898 local_state_change);
3760 3899
3761out: 3900out:
3762 cfg80211_unlock_rdev(rdev); 3901 cfg80211_unlock_rdev(rdev);
@@ -3849,6 +3988,55 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3849 } 3988 }
3850 } 3989 }
3851 3990
3991 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
3992 u8 *rates =
3993 nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
3994 int n_rates =
3995 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
3996 struct ieee80211_supported_band *sband =
3997 wiphy->bands[ibss.channel->band];
3998 int i, j;
3999
4000 if (n_rates == 0) {
4001 err = -EINVAL;
4002 goto out;
4003 }
4004
4005 for (i = 0; i < n_rates; i++) {
4006 int rate = (rates[i] & 0x7f) * 5;
4007 bool found = false;
4008
4009 for (j = 0; j < sband->n_bitrates; j++) {
4010 if (sband->bitrates[j].bitrate == rate) {
4011 found = true;
4012 ibss.basic_rates |= BIT(j);
4013 break;
4014 }
4015 }
4016 if (!found) {
4017 err = -EINVAL;
4018 goto out;
4019 }
4020 }
4021 } else {
4022 /*
4023 * If no rates were explicitly configured,
4024 * use the mandatory rate set for 11b or
4025 * 11a for maximum compatibility.
4026 */
4027 struct ieee80211_supported_band *sband =
4028 wiphy->bands[ibss.channel->band];
4029 int j;
4030 u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
4031 IEEE80211_RATE_MANDATORY_A :
4032 IEEE80211_RATE_MANDATORY_B;
4033
4034 for (j = 0; j < sband->n_bitrates; j++) {
4035 if (sband->bitrates[j].flags & flag)
4036 ibss.basic_rates |= BIT(j);
4037 }
4038 }
4039
3852 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); 4040 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
3853 4041
3854out: 4042out:
@@ -4337,9 +4525,10 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4337 if (channel_type != NL80211_CHAN_NO_HT && 4525 if (channel_type != NL80211_CHAN_NO_HT &&
4338 channel_type != NL80211_CHAN_HT20 && 4526 channel_type != NL80211_CHAN_HT20 &&
4339 channel_type != NL80211_CHAN_HT40PLUS && 4527 channel_type != NL80211_CHAN_HT40PLUS &&
4340 channel_type != NL80211_CHAN_HT40MINUS) 4528 channel_type != NL80211_CHAN_HT40MINUS) {
4341 err = -EINVAL; 4529 err = -EINVAL;
4342 goto out; 4530 goto out;
4531 }
4343 } 4532 }
4344 4533
4345 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4534 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
@@ -4546,7 +4735,8 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info)
4546 if (err) 4735 if (err)
4547 goto unlock_rtnl; 4736 goto unlock_rtnl;
4548 4737
4549 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4738 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4739 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
4550 err = -EOPNOTSUPP; 4740 err = -EOPNOTSUPP;
4551 goto out; 4741 goto out;
4552 } 4742 }
@@ -4574,6 +4764,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4574 struct net_device *dev; 4764 struct net_device *dev;
4575 struct ieee80211_channel *chan; 4765 struct ieee80211_channel *chan;
4576 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 4766 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
4767 bool channel_type_valid = false;
4577 u32 freq; 4768 u32 freq;
4578 int err; 4769 int err;
4579 void *hdr; 4770 void *hdr;
@@ -4595,7 +4786,8 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4595 goto out; 4786 goto out;
4596 } 4787 }
4597 4788
4598 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 4789 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4790 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
4599 err = -EOPNOTSUPP; 4791 err = -EOPNOTSUPP;
4600 goto out; 4792 goto out;
4601 } 4793 }
@@ -4611,9 +4803,11 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4611 if (channel_type != NL80211_CHAN_NO_HT && 4803 if (channel_type != NL80211_CHAN_NO_HT &&
4612 channel_type != NL80211_CHAN_HT20 && 4804 channel_type != NL80211_CHAN_HT20 &&
4613 channel_type != NL80211_CHAN_HT40PLUS && 4805 channel_type != NL80211_CHAN_HT40PLUS &&
4614 channel_type != NL80211_CHAN_HT40MINUS) 4806 channel_type != NL80211_CHAN_HT40MINUS) {
4615 err = -EINVAL; 4807 err = -EINVAL;
4616 goto out; 4808 goto out;
4809 }
4810 channel_type_valid = true;
4617 } 4811 }
4618 4812
4619 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4813 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
@@ -4637,6 +4831,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4637 goto free_msg; 4831 goto free_msg;
4638 } 4832 }
4639 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type,
4834 channel_type_valid,
4640 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4835 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4641 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4836 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4642 &cookie); 4837 &cookie);
@@ -4779,6 +4974,84 @@ unlock_rtnl:
4779 return err; 4974 return err;
4780} 4975}
4781 4976
4977static struct nla_policy
4978nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
4979 [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
4980 [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
4981 [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
4982};
4983
4984static int nl80211_set_cqm_rssi(struct genl_info *info,
4985 s32 threshold, u32 hysteresis)
4986{
4987 struct cfg80211_registered_device *rdev;
4988 struct wireless_dev *wdev;
4989 struct net_device *dev;
4990 int err;
4991
4992 if (threshold > 0)
4993 return -EINVAL;
4994
4995 rtnl_lock();
4996
4997 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4998 if (err)
4999 goto unlock_rdev;
5000
5001 wdev = dev->ieee80211_ptr;
5002
5003 if (!rdev->ops->set_cqm_rssi_config) {
5004 err = -EOPNOTSUPP;
5005 goto unlock_rdev;
5006 }
5007
5008 if (wdev->iftype != NL80211_IFTYPE_STATION) {
5009 err = -EOPNOTSUPP;
5010 goto unlock_rdev;
5011 }
5012
5013 err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
5014 threshold, hysteresis);
5015
5016unlock_rdev:
5017 cfg80211_unlock_rdev(rdev);
5018 dev_put(dev);
5019 rtnl_unlock();
5020
5021 return err;
5022}
5023
5024static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
5025{
5026 struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1];
5027 struct nlattr *cqm;
5028 int err;
5029
5030 cqm = info->attrs[NL80211_ATTR_CQM];
5031 if (!cqm) {
5032 err = -EINVAL;
5033 goto out;
5034 }
5035
5036 err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
5037 nl80211_attr_cqm_policy);
5038 if (err)
5039 goto out;
5040
5041 if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
5042 attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
5043 s32 threshold;
5044 u32 hysteresis;
5045 threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
5046 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
5047 err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
5048 } else
5049 err = -EINVAL;
5050
5051out:
5052 return err;
5053}
5054
4782static struct genl_ops nl80211_ops[] = { 5055static struct genl_ops nl80211_ops[] = {
4783 { 5056 {
4784 .cmd = NL80211_CMD_GET_WIPHY, 5057 .cmd = NL80211_CMD_GET_WIPHY,
@@ -5083,6 +5356,18 @@ static struct genl_ops nl80211_ops[] = {
5083 .policy = nl80211_policy, 5356 .policy = nl80211_policy,
5084 /* can be retrieved by unprivileged users */ 5357 /* can be retrieved by unprivileged users */
5085 }, 5358 },
5359 {
5360 .cmd = NL80211_CMD_SET_CQM,
5361 .doit = nl80211_set_cqm,
5362 .policy = nl80211_policy,
5363 .flags = GENL_ADMIN_PERM,
5364 },
5365 {
5366 .cmd = NL80211_CMD_SET_CHANNEL,
5367 .doit = nl80211_set_channel,
5368 .policy = nl80211_policy,
5369 .flags = GENL_ADMIN_PERM,
5370 },
5086}; 5371};
5087 5372
5088static struct genl_multicast_group nl80211_mlme_mcgrp = { 5373static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5833,6 +6118,52 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
5833 nlmsg_free(msg); 6118 nlmsg_free(msg);
5834} 6119}
5835 6120
6121void
6122nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
6123 struct net_device *netdev,
6124 enum nl80211_cqm_rssi_threshold_event rssi_event,
6125 gfp_t gfp)
6126{
6127 struct sk_buff *msg;
6128 struct nlattr *pinfoattr;
6129 void *hdr;
6130
6131 msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
6132 if (!msg)
6133 return;
6134
6135 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
6136 if (!hdr) {
6137 nlmsg_free(msg);
6138 return;
6139 }
6140
6141 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
6142 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
6143
6144 pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
6145 if (!pinfoattr)
6146 goto nla_put_failure;
6147
6148 NLA_PUT_U32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
6149 rssi_event);
6150
6151 nla_nest_end(msg, pinfoattr);
6152
6153 if (genlmsg_end(msg, hdr) < 0) {
6154 nlmsg_free(msg);
6155 return;
6156 }
6157
6158 genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
6159 nl80211_mlme_mcgrp.id, gfp);
6160 return;
6161
6162 nla_put_failure:
6163 genlmsg_cancel(msg, hdr);
6164 nlmsg_free(msg);
6165}
6166
5836static int nl80211_netlink_notify(struct notifier_block * nb, 6167static int nl80211_netlink_notify(struct notifier_block * nb,
5837 unsigned long state, 6168 unsigned long state,
5838 void *_notify) 6169 void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 4ca511102c6c..2ad7fbc7d9f1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -82,4 +82,10 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
87 struct net_device *netdev,
88 enum nl80211_cqm_rssi_threshold_event rssi_event,
89 gfp_t gfp);
90
85#endif /* __NET_WIRELESS_NL80211_H */ 91#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 422da20d1e5b..f180db0de66c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -67,20 +67,12 @@ static struct platform_device *reg_pdev;
67const struct ieee80211_regdomain *cfg80211_regdomain; 67const struct ieee80211_regdomain *cfg80211_regdomain;
68 68
69/* 69/*
70 * We use this as a place for the rd structure built from the
71 * last parsed country IE to rest until CRDA gets back to us with
72 * what it thinks should apply for the same country
73 */
74static const struct ieee80211_regdomain *country_ie_regdomain;
75
76/*
77 * Protects static reg.c components: 70 * Protects static reg.c components:
78 * - cfg80211_world_regdom 71 * - cfg80211_world_regdom
79 * - cfg80211_regdom 72 * - cfg80211_regdom
80 * - country_ie_regdomain
81 * - last_request 73 * - last_request
82 */ 74 */
83DEFINE_MUTEX(reg_mutex); 75static DEFINE_MUTEX(reg_mutex);
84#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex)) 76#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex))
85 77
86/* Used to queue up regulatory hints */ 78/* Used to queue up regulatory hints */
@@ -275,25 +267,6 @@ static bool is_user_regdom_saved(void)
275 return true; 267 return true;
276} 268}
277 269
278/**
279 * country_ie_integrity_changes - tells us if the country IE has changed
280 * @checksum: checksum of country IE of fields we are interested in
281 *
282 * If the country IE has not changed you can ignore it safely. This is
283 * useful to determine if two devices are seeing two different country IEs
284 * even on the same alpha2. Note that this will return false if no IE has
285 * been set on the wireless core yet.
286 */
287static bool country_ie_integrity_changes(u32 checksum)
288{
289 /* If no IE has been set then the checksum doesn't change */
290 if (unlikely(!last_request->country_ie_checksum))
291 return false;
292 if (unlikely(last_request->country_ie_checksum != checksum))
293 return true;
294 return false;
295}
296
297static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, 270static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
298 const struct ieee80211_regdomain *src_regd) 271 const struct ieee80211_regdomain *src_regd)
299{ 272{
@@ -506,471 +479,6 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
506} 479}
507 480
508/* 481/*
509 * This is a work around for sanity checking ieee80211_channel_to_frequency()'s
510 * work. ieee80211_channel_to_frequency() can for example currently provide a
511 * 2 GHz channel when in fact a 5 GHz channel was desired. An example would be
512 * an AP providing channel 8 on a country IE triplet when it sent this on the
513 * 5 GHz band, that channel is designed to be channel 8 on 5 GHz, not a 2 GHz
514 * channel.
515 *
516 * This can be removed once ieee80211_channel_to_frequency() takes in a band.
517 */
518static bool chan_in_band(int chan, enum ieee80211_band band)
519{
520 int center_freq = ieee80211_channel_to_frequency(chan);
521
522 switch (band) {
523 case IEEE80211_BAND_2GHZ:
524 if (center_freq <= 2484)
525 return true;
526 return false;
527 case IEEE80211_BAND_5GHZ:
528 if (center_freq >= 5005)
529 return true;
530 return false;
531 default:
532 return false;
533 }
534}
535
536/*
537 * Some APs may send a country IE triplet for each channel they
538 * support and while this is completely overkill and silly we still
539 * need to support it. We avoid making a single rule for each channel
540 * though and to help us with this we use this helper to find the
541 * actual subband end channel. These type of country IE triplet
542 * scenerios are handled then, all yielding two regulaotry rules from
543 * parsing a country IE:
544 *
545 * [1]
546 * [2]
547 * [36]
548 * [40]
549 *
550 * [1]
551 * [2-4]
552 * [5-12]
553 * [36]
554 * [40-44]
555 *
556 * [1-4]
557 * [5-7]
558 * [36-44]
559 * [48-64]
560 *
561 * [36-36]
562 * [40-40]
563 * [44-44]
564 * [48-48]
565 * [52-52]
566 * [56-56]
567 * [60-60]
568 * [64-64]
569 * [100-100]
570 * [104-104]
571 * [108-108]
572 * [112-112]
573 * [116-116]
574 * [120-120]
575 * [124-124]
576 * [128-128]
577 * [132-132]
578 * [136-136]
579 * [140-140]
580 *
581 * Returns 0 if the IE has been found to be invalid in the middle
582 * somewhere.
583 */
584static int max_subband_chan(enum ieee80211_band band,
585 int orig_cur_chan,
586 int orig_end_channel,
587 s8 orig_max_power,
588 u8 **country_ie,
589 u8 *country_ie_len)
590{
591 u8 *triplets_start = *country_ie;
592 u8 len_at_triplet = *country_ie_len;
593 int end_subband_chan = orig_end_channel;
594
595 /*
596 * We'll deal with padding for the caller unless
597 * its not immediate and we don't process any channels
598 */
599 if (*country_ie_len == 1) {
600 *country_ie += 1;
601 *country_ie_len -= 1;
602 return orig_end_channel;
603 }
604
605 /* Move to the next triplet and then start search */
606 *country_ie += 3;
607 *country_ie_len -= 3;
608
609 if (!chan_in_band(orig_cur_chan, band))
610 return 0;
611
612 while (*country_ie_len >= 3) {
613 int end_channel = 0;
614 struct ieee80211_country_ie_triplet *triplet =
615 (struct ieee80211_country_ie_triplet *) *country_ie;
616 int cur_channel = 0, next_expected_chan;
617
618 /* means last triplet is completely unrelated to this one */
619 if (triplet->ext.reg_extension_id >=
620 IEEE80211_COUNTRY_EXTENSION_ID) {
621 *country_ie -= 3;
622 *country_ie_len += 3;
623 break;
624 }
625
626 if (triplet->chans.first_channel == 0) {
627 *country_ie += 1;
628 *country_ie_len -= 1;
629 if (*country_ie_len != 0)
630 return 0;
631 break;
632 }
633
634 if (triplet->chans.num_channels == 0)
635 return 0;
636
637 /* Monitonically increasing channel order */
638 if (triplet->chans.first_channel <= end_subband_chan)
639 return 0;
640
641 if (!chan_in_band(triplet->chans.first_channel, band))
642 return 0;
643
644 /* 2 GHz */
645 if (triplet->chans.first_channel <= 14) {
646 end_channel = triplet->chans.first_channel +
647 triplet->chans.num_channels - 1;
648 }
649 else {
650 end_channel = triplet->chans.first_channel +
651 (4 * (triplet->chans.num_channels - 1));
652 }
653
654 if (!chan_in_band(end_channel, band))
655 return 0;
656
657 if (orig_max_power != triplet->chans.max_power) {
658 *country_ie -= 3;
659 *country_ie_len += 3;
660 break;
661 }
662
663 cur_channel = triplet->chans.first_channel;
664
665 /* The key is finding the right next expected channel */
666 if (band == IEEE80211_BAND_2GHZ)
667 next_expected_chan = end_subband_chan + 1;
668 else
669 next_expected_chan = end_subband_chan + 4;
670
671 if (cur_channel != next_expected_chan) {
672 *country_ie -= 3;
673 *country_ie_len += 3;
674 break;
675 }
676
677 end_subband_chan = end_channel;
678
679 /* Move to the next one */
680 *country_ie += 3;
681 *country_ie_len -= 3;
682
683 /*
684 * Padding needs to be dealt with if we processed
685 * some channels.
686 */
687 if (*country_ie_len == 1) {
688 *country_ie += 1;
689 *country_ie_len -= 1;
690 break;
691 }
692
693 /* If seen, the IE is invalid */
694 if (*country_ie_len == 2)
695 return 0;
696 }
697
698 if (end_subband_chan == orig_end_channel) {
699 *country_ie = triplets_start;
700 *country_ie_len = len_at_triplet;
701 return orig_end_channel;
702 }
703
704 return end_subband_chan;
705}
706
707/*
708 * Converts a country IE to a regulatory domain. A regulatory domain
709 * structure has a lot of information which the IE doesn't yet have,
710 * so for the other values we use upper max values as we will intersect
711 * with our userspace regulatory agent to get lower bounds.
712 */
713static struct ieee80211_regdomain *country_ie_2_rd(
714 enum ieee80211_band band,
715 u8 *country_ie,
716 u8 country_ie_len,
717 u32 *checksum)
718{
719 struct ieee80211_regdomain *rd = NULL;
720 unsigned int i = 0;
721 char alpha2[2];
722 u32 flags = 0;
723 u32 num_rules = 0, size_of_regd = 0;
724 u8 *triplets_start = NULL;
725 u8 len_at_triplet = 0;
726 /* the last channel we have registered in a subband (triplet) */
727 int last_sub_max_channel = 0;
728
729 *checksum = 0xDEADBEEF;
730
731 /* Country IE requirements */
732 BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN ||
733 country_ie_len & 0x01);
734
735 alpha2[0] = country_ie[0];
736 alpha2[1] = country_ie[1];
737
738 /*
739 * Third octet can be:
740 * 'I' - Indoor
741 * 'O' - Outdoor
742 *
743 * anything else we assume is no restrictions
744 */
745 if (country_ie[2] == 'I')
746 flags = NL80211_RRF_NO_OUTDOOR;
747 else if (country_ie[2] == 'O')
748 flags = NL80211_RRF_NO_INDOOR;
749
750 country_ie += 3;
751 country_ie_len -= 3;
752
753 triplets_start = country_ie;
754 len_at_triplet = country_ie_len;
755
756 *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
757
758 /*
759 * We need to build a reg rule for each triplet, but first we must
760 * calculate the number of reg rules we will need. We will need one
761 * for each channel subband
762 */
763 while (country_ie_len >= 3) {
764 int end_channel = 0;
765 struct ieee80211_country_ie_triplet *triplet =
766 (struct ieee80211_country_ie_triplet *) country_ie;
767 int cur_sub_max_channel = 0, cur_channel = 0;
768
769 if (triplet->ext.reg_extension_id >=
770 IEEE80211_COUNTRY_EXTENSION_ID) {
771 country_ie += 3;
772 country_ie_len -= 3;
773 continue;
774 }
775
776 /*
777 * APs can add padding to make length divisible
778 * by two, required by the spec.
779 */
780 if (triplet->chans.first_channel == 0) {
781 country_ie++;
782 country_ie_len--;
783 /* This is expected to be at the very end only */
784 if (country_ie_len != 0)
785 return NULL;
786 break;
787 }
788
789 if (triplet->chans.num_channels == 0)
790 return NULL;
791
792 if (!chan_in_band(triplet->chans.first_channel, band))
793 return NULL;
794
795 /* 2 GHz */
796 if (band == IEEE80211_BAND_2GHZ)
797 end_channel = triplet->chans.first_channel +
798 triplet->chans.num_channels - 1;
799 else
800 /*
801 * 5 GHz -- For example in country IEs if the first
802 * channel given is 36 and the number of channels is 4
803 * then the individual channel numbers defined for the
804 * 5 GHz PHY by these parameters are: 36, 40, 44, and 48
805 * and not 36, 37, 38, 39.
806 *
807 * See: http://tinyurl.com/11d-clarification
808 */
809 end_channel = triplet->chans.first_channel +
810 (4 * (triplet->chans.num_channels - 1));
811
812 cur_channel = triplet->chans.first_channel;
813
814 /*
815 * Enhancement for APs that send a triplet for every channel
816 * or for whatever reason sends triplets with multiple channels
817 * separated when in fact they should be together.
818 */
819 end_channel = max_subband_chan(band,
820 cur_channel,
821 end_channel,
822 triplet->chans.max_power,
823 &country_ie,
824 &country_ie_len);
825 if (!end_channel)
826 return NULL;
827
828 if (!chan_in_band(end_channel, band))
829 return NULL;
830
831 cur_sub_max_channel = end_channel;
832
833 /* Basic sanity check */
834 if (cur_sub_max_channel < cur_channel)
835 return NULL;
836
837 /*
838 * Do not allow overlapping channels. Also channels
839 * passed in each subband must be monotonically
840 * increasing
841 */
842 if (last_sub_max_channel) {
843 if (cur_channel <= last_sub_max_channel)
844 return NULL;
845 if (cur_sub_max_channel <= last_sub_max_channel)
846 return NULL;
847 }
848
849 /*
850 * When dot11RegulatoryClassesRequired is supported
851 * we can throw ext triplets as part of this soup,
852 * for now we don't care when those change as we
853 * don't support them
854 */
855 *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
856 ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
857 ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
858
859 last_sub_max_channel = cur_sub_max_channel;
860
861 num_rules++;
862
863 if (country_ie_len >= 3) {
864 country_ie += 3;
865 country_ie_len -= 3;
866 }
867
868 /*
869 * Note: this is not a IEEE requirement but
870 * simply a memory requirement
871 */
872 if (num_rules > NL80211_MAX_SUPP_REG_RULES)
873 return NULL;
874 }
875
876 country_ie = triplets_start;
877 country_ie_len = len_at_triplet;
878
879 size_of_regd = sizeof(struct ieee80211_regdomain) +
880 (num_rules * sizeof(struct ieee80211_reg_rule));
881
882 rd = kzalloc(size_of_regd, GFP_KERNEL);
883 if (!rd)
884 return NULL;
885
886 rd->n_reg_rules = num_rules;
887 rd->alpha2[0] = alpha2[0];
888 rd->alpha2[1] = alpha2[1];
889
890 /* This time around we fill in the rd */
891 while (country_ie_len >= 3) {
892 int end_channel = 0;
893 struct ieee80211_country_ie_triplet *triplet =
894 (struct ieee80211_country_ie_triplet *) country_ie;
895 struct ieee80211_reg_rule *reg_rule = NULL;
896 struct ieee80211_freq_range *freq_range = NULL;
897 struct ieee80211_power_rule *power_rule = NULL;
898
899 /*
900 * Must parse if dot11RegulatoryClassesRequired is true,
901 * we don't support this yet
902 */
903 if (triplet->ext.reg_extension_id >=
904 IEEE80211_COUNTRY_EXTENSION_ID) {
905 country_ie += 3;
906 country_ie_len -= 3;
907 continue;
908 }
909
910 if (triplet->chans.first_channel == 0) {
911 country_ie++;
912 country_ie_len--;
913 break;
914 }
915
916 reg_rule = &rd->reg_rules[i];
917 freq_range = &reg_rule->freq_range;
918 power_rule = &reg_rule->power_rule;
919
920 reg_rule->flags = flags;
921
922 /* 2 GHz */
923 if (band == IEEE80211_BAND_2GHZ)
924 end_channel = triplet->chans.first_channel +
925 triplet->chans.num_channels -1;
926 else
927 end_channel = triplet->chans.first_channel +
928 (4 * (triplet->chans.num_channels - 1));
929
930 end_channel = max_subband_chan(band,
931 triplet->chans.first_channel,
932 end_channel,
933 triplet->chans.max_power,
934 &country_ie,
935 &country_ie_len);
936
937 /*
938 * The +10 is since the regulatory domain expects
939 * the actual band edge, not the center of freq for
940 * its start and end freqs, assuming 20 MHz bandwidth on
941 * the channels passed
942 */
943 freq_range->start_freq_khz =
944 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
945 triplet->chans.first_channel) - 10);
946 freq_range->end_freq_khz =
947 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
948 end_channel) + 10);
949
950 /*
951 * These are large arbitrary values we use to intersect later.
952 * Increment this if we ever support >= 40 MHz channels
953 * in IEEE 802.11
954 */
955 freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
956 power_rule->max_antenna_gain = DBI_TO_MBI(100);
957 power_rule->max_eirp = DBM_TO_MBM(triplet->chans.max_power);
958
959 i++;
960
961 if (country_ie_len >= 3) {
962 country_ie += 3;
963 country_ie_len -= 3;
964 }
965
966 BUG_ON(i > NL80211_MAX_SUPP_REG_RULES);
967 }
968
969 return rd;
970}
971
972
973/*
974 * Helper for regdom_intersect(), this does the real 482 * Helper for regdom_intersect(), this does the real
975 * mathematical intersection fun 483 * mathematical intersection fun
976 */ 484 */
@@ -1191,7 +699,6 @@ static int freq_reg_info_regd(struct wiphy *wiphy,
1191 699
1192 return -EINVAL; 700 return -EINVAL;
1193} 701}
1194EXPORT_SYMBOL(freq_reg_info);
1195 702
1196int freq_reg_info(struct wiphy *wiphy, 703int freq_reg_info(struct wiphy *wiphy,
1197 u32 center_freq, 704 u32 center_freq,
@@ -1205,6 +712,7 @@ int freq_reg_info(struct wiphy *wiphy,
1205 reg_rule, 712 reg_rule,
1206 NULL); 713 NULL);
1207} 714}
715EXPORT_SYMBOL(freq_reg_info);
1208 716
1209/* 717/*
1210 * Note that right now we assume the desired channel bandwidth 718 * Note that right now we assume the desired channel bandwidth
@@ -1243,41 +751,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
1243 desired_bw_khz, 751 desired_bw_khz,
1244 &reg_rule); 752 &reg_rule);
1245 753
1246 if (r) { 754 if (r)
1247 /*
1248 * This means no regulatory rule was found in the country IE
1249 * with a frequency range on the center_freq's band, since
1250 * IEEE-802.11 allows for a country IE to have a subset of the
1251 * regulatory information provided in a country we ignore
1252 * disabling the channel unless at least one reg rule was
1253 * found on the center_freq's band. For details see this
1254 * clarification:
1255 *
1256 * http://tinyurl.com/11d-clarification
1257 */
1258 if (r == -ERANGE &&
1259 last_request->initiator ==
1260 NL80211_REGDOM_SET_BY_COUNTRY_IE) {
1261 REG_DBG_PRINT("cfg80211: Leaving channel %d MHz "
1262 "intact on %s - no rule found in band on "
1263 "Country IE\n",
1264 chan->center_freq, wiphy_name(wiphy));
1265 } else {
1266 /*
1267 * In this case we know the country IE has at least one reg rule
1268 * for the band so we respect its band definitions
1269 */
1270 if (last_request->initiator ==
1271 NL80211_REGDOM_SET_BY_COUNTRY_IE)
1272 REG_DBG_PRINT("cfg80211: Disabling "
1273 "channel %d MHz on %s due to "
1274 "Country IE\n",
1275 chan->center_freq, wiphy_name(wiphy));
1276 flags |= IEEE80211_CHAN_DISABLED;
1277 chan->flags = flags;
1278 }
1279 return; 755 return;
1280 }
1281 756
1282 power_rule = &reg_rule->power_rule; 757 power_rule = &reg_rule->power_rule;
1283 freq_range = &reg_rule->freq_range; 758 freq_range = &reg_rule->freq_range;
@@ -1831,6 +1306,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1831{ 1306{
1832 int r = 0; 1307 int r = 0;
1833 struct wiphy *wiphy = NULL; 1308 struct wiphy *wiphy = NULL;
1309 enum nl80211_reg_initiator initiator = reg_request->initiator;
1834 1310
1835 BUG_ON(!reg_request->alpha2); 1311 BUG_ON(!reg_request->alpha2);
1836 1312
@@ -1850,7 +1326,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
1850 /* This is required so that the orig_* parameters are saved */ 1326 /* This is required so that the orig_* parameters are saved */
1851 if (r == -EALREADY && wiphy && 1327 if (r == -EALREADY && wiphy &&
1852 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY) 1328 wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY)
1853 wiphy_update_regulatory(wiphy, reg_request->initiator); 1329 wiphy_update_regulatory(wiphy, initiator);
1854out: 1330out:
1855 mutex_unlock(&reg_mutex); 1331 mutex_unlock(&reg_mutex);
1856 mutex_unlock(&cfg80211_mutex); 1332 mutex_unlock(&cfg80211_mutex);
@@ -2008,35 +1484,6 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
2008} 1484}
2009EXPORT_SYMBOL(regulatory_hint); 1485EXPORT_SYMBOL(regulatory_hint);
2010 1486
2011/* Caller must hold reg_mutex */
2012static bool reg_same_country_ie_hint(struct wiphy *wiphy,
2013 u32 country_ie_checksum)
2014{
2015 struct wiphy *request_wiphy;
2016
2017 assert_reg_lock();
2018
2019 if (unlikely(last_request->initiator !=
2020 NL80211_REGDOM_SET_BY_COUNTRY_IE))
2021 return false;
2022
2023 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
2024
2025 if (!request_wiphy)
2026 return false;
2027
2028 if (likely(request_wiphy != wiphy))
2029 return !country_ie_integrity_changes(country_ie_checksum);
2030 /*
2031 * We should not have let these through at this point, they
2032 * should have been picked up earlier by the first alpha2 check
2033 * on the device
2034 */
2035 if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
2036 return true;
2037 return false;
2038}
2039
2040/* 1487/*
2041 * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and 1488 * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and
2042 * therefore cannot iterate over the rdev list here. 1489 * therefore cannot iterate over the rdev list here.
@@ -2046,9 +1493,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
2046 u8 *country_ie, 1493 u8 *country_ie,
2047 u8 country_ie_len) 1494 u8 country_ie_len)
2048{ 1495{
2049 struct ieee80211_regdomain *rd = NULL;
2050 char alpha2[2]; 1496 char alpha2[2];
2051 u32 checksum = 0;
2052 enum environment_cap env = ENVIRON_ANY; 1497 enum environment_cap env = ENVIRON_ANY;
2053 struct regulatory_request *request; 1498 struct regulatory_request *request;
2054 1499
@@ -2064,14 +1509,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
2064 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) 1509 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
2065 goto out; 1510 goto out;
2066 1511
2067 /*
2068 * Pending country IE processing, this can happen after we
2069 * call CRDA and wait for a response if a beacon was received before
2070 * we were able to process the last regulatory_hint_11d() call
2071 */
2072 if (country_ie_regdomain)
2073 goto out;
2074
2075 alpha2[0] = country_ie[0]; 1512 alpha2[0] = country_ie[0];
2076 alpha2[1] = country_ie[1]; 1513 alpha2[1] = country_ie[1];
2077 1514
@@ -2090,39 +1527,14 @@ void regulatory_hint_11d(struct wiphy *wiphy,
2090 wiphy_idx_valid(last_request->wiphy_idx))) 1527 wiphy_idx_valid(last_request->wiphy_idx)))
2091 goto out; 1528 goto out;
2092 1529
2093 rd = country_ie_2_rd(band, country_ie, country_ie_len, &checksum);
2094 if (!rd) {
2095 REG_DBG_PRINT("cfg80211: Ignoring bogus country IE\n");
2096 goto out;
2097 }
2098
2099 /*
2100 * This will not happen right now but we leave it here for the
2101 * the future when we want to add suspend/resume support and having
2102 * the user move to another country after doing so, or having the user
2103 * move to another AP. Right now we just trust the first AP.
2104 *
2105 * If we hit this before we add this support we want to be informed of
2106 * it as it would indicate a mistake in the current design
2107 */
2108 if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))
2109 goto free_rd_out;
2110
2111 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); 1530 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
2112 if (!request) 1531 if (!request)
2113 goto free_rd_out; 1532 goto out;
2114
2115 /*
2116 * We keep this around for when CRDA comes back with a response so
2117 * we can intersect with that
2118 */
2119 country_ie_regdomain = rd;
2120 1533
2121 request->wiphy_idx = get_wiphy_idx(wiphy); 1534 request->wiphy_idx = get_wiphy_idx(wiphy);
2122 request->alpha2[0] = rd->alpha2[0]; 1535 request->alpha2[0] = alpha2[0];
2123 request->alpha2[1] = rd->alpha2[1]; 1536 request->alpha2[1] = alpha2[1];
2124 request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; 1537 request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
2125 request->country_ie_checksum = checksum;
2126 request->country_ie_env = env; 1538 request->country_ie_env = env;
2127 1539
2128 mutex_unlock(&reg_mutex); 1540 mutex_unlock(&reg_mutex);
@@ -2131,8 +1543,6 @@ void regulatory_hint_11d(struct wiphy *wiphy,
2131 1543
2132 return; 1544 return;
2133 1545
2134free_rd_out:
2135 kfree(rd);
2136out: 1546out:
2137 mutex_unlock(&reg_mutex); 1547 mutex_unlock(&reg_mutex);
2138} 1548}
@@ -2356,10 +1766,10 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
2356 rdev->country_ie_alpha2[1]); 1766 rdev->country_ie_alpha2[1]);
2357 } else 1767 } else
2358 printk(KERN_INFO "cfg80211: Current regulatory " 1768 printk(KERN_INFO "cfg80211: Current regulatory "
2359 "domain intersected: \n"); 1769 "domain intersected:\n");
2360 } else 1770 } else
2361 printk(KERN_INFO "cfg80211: Current regulatory " 1771 printk(KERN_INFO "cfg80211: Current regulatory "
2362 "domain intersected: \n"); 1772 "domain intersected:\n");
2363 } else if (is_world_regdom(rd->alpha2)) 1773 } else if (is_world_regdom(rd->alpha2))
2364 printk(KERN_INFO "cfg80211: World regulatory " 1774 printk(KERN_INFO "cfg80211: World regulatory "
2365 "domain updated:\n"); 1775 "domain updated:\n");
@@ -2383,33 +1793,6 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd)
2383 print_rd_rules(rd); 1793 print_rd_rules(rd);
2384} 1794}
2385 1795
2386#ifdef CONFIG_CFG80211_REG_DEBUG
2387static void reg_country_ie_process_debug(
2388 const struct ieee80211_regdomain *rd,
2389 const struct ieee80211_regdomain *country_ie_regdomain,
2390 const struct ieee80211_regdomain *intersected_rd)
2391{
2392 printk(KERN_DEBUG "cfg80211: Received country IE:\n");
2393 print_regdomain_info(country_ie_regdomain);
2394 printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n");
2395 print_regdomain_info(rd);
2396 if (intersected_rd) {
2397 printk(KERN_DEBUG "cfg80211: We intersect both of these "
2398 "and get:\n");
2399 print_regdomain_info(intersected_rd);
2400 return;
2401 }
2402 printk(KERN_DEBUG "cfg80211: Intersection between both failed\n");
2403}
2404#else
2405static inline void reg_country_ie_process_debug(
2406 const struct ieee80211_regdomain *rd,
2407 const struct ieee80211_regdomain *country_ie_regdomain,
2408 const struct ieee80211_regdomain *intersected_rd)
2409{
2410}
2411#endif
2412
2413/* Takes ownership of rd only if it doesn't fail */ 1796/* Takes ownership of rd only if it doesn't fail */
2414static int __set_regdom(const struct ieee80211_regdomain *rd) 1797static int __set_regdom(const struct ieee80211_regdomain *rd)
2415{ 1798{
@@ -2521,34 +1904,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
2521 return 0; 1904 return 0;
2522 } 1905 }
2523 1906
2524 /*
2525 * Country IE requests are handled a bit differently, we intersect
2526 * the country IE rd with what CRDA believes that country should have
2527 */
2528
2529 /*
2530 * Userspace could have sent two replies with only
2531 * one kernel request. By the second reply we would have
2532 * already processed and consumed the country_ie_regdomain.
2533 */
2534 if (!country_ie_regdomain)
2535 return -EALREADY;
2536 BUG_ON(rd == country_ie_regdomain);
2537
2538 /*
2539 * Intersect what CRDA returned and our what we
2540 * had built from the Country IE received
2541 */
2542
2543 intersected_rd = regdom_intersect(rd, country_ie_regdomain);
2544
2545 reg_country_ie_process_debug(rd,
2546 country_ie_regdomain,
2547 intersected_rd);
2548
2549 kfree(country_ie_regdomain);
2550 country_ie_regdomain = NULL;
2551
2552 if (!intersected_rd) 1907 if (!intersected_rd)
2553 return -EINVAL; 1908 return -EINVAL;
2554 1909
@@ -2630,7 +1985,7 @@ out:
2630 mutex_unlock(&reg_mutex); 1985 mutex_unlock(&reg_mutex);
2631} 1986}
2632 1987
2633int regulatory_init(void) 1988int __init regulatory_init(void)
2634{ 1989{
2635 int err = 0; 1990 int err = 0;
2636 1991
@@ -2676,7 +2031,7 @@ int regulatory_init(void)
2676 return 0; 2031 return 0;
2677} 2032}
2678 2033
2679void regulatory_exit(void) 2034void /* __init_or_exit */ regulatory_exit(void)
2680{ 2035{
2681 struct regulatory_request *reg_request, *tmp; 2036 struct regulatory_request *reg_request, *tmp;
2682 struct reg_beacon *reg_beacon, *btmp; 2037 struct reg_beacon *reg_beacon, *btmp;
@@ -2688,9 +2043,6 @@ void regulatory_exit(void)
2688 2043
2689 reset_regdomains(); 2044 reset_regdomains();
2690 2045
2691 kfree(country_ie_regdomain);
2692 country_ie_regdomain = NULL;
2693
2694 kfree(last_request); 2046 kfree(last_request);
2695 2047
2696 platform_device_unregister(reg_pdev); 2048 platform_device_unregister(reg_pdev);
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index b26224a9f3bc..c4695d07af23 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -10,7 +10,7 @@ int regulatory_hint_user(const char *alpha2);
10 10
11void reg_device_remove(struct wiphy *wiphy); 11void reg_device_remove(struct wiphy *wiphy);
12 12
13int regulatory_init(void); 13int __init regulatory_init(void);
14void regulatory_exit(void); 14void regulatory_exit(void);
15 15
16int set_regdom(const struct ieee80211_regdomain *rd); 16int set_regdom(const struct ieee80211_regdomain *rd);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index a026c6d56bd3..5ca8c7180141 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -275,6 +275,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
275{ 275{
276 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); 276 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
277 struct cfg80211_internal_bss *bss, *res = NULL; 277 struct cfg80211_internal_bss *bss, *res = NULL;
278 unsigned long now = jiffies;
278 279
279 spin_lock_bh(&dev->bss_lock); 280 spin_lock_bh(&dev->bss_lock);
280 281
@@ -283,6 +284,10 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
283 continue; 284 continue;
284 if (channel && bss->pub.channel != channel) 285 if (channel && bss->pub.channel != channel)
285 continue; 286 continue;
287 /* Don't get expired BSS structs */
288 if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
289 !atomic_read(&bss->hold))
290 continue;
286 if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { 291 if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
287 res = bss; 292 res = bss;
288 kref_get(&res->ref); 293 kref_get(&res->ref);
@@ -515,7 +520,7 @@ cfg80211_inform_bss(struct wiphy *wiphy,
515 520
516 privsz = wiphy->bss_priv_size; 521 privsz = wiphy->bss_priv_size;
517 522
518 if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && 523 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
519 (signal < 0 || signal > 100))) 524 (signal < 0 || signal > 100)))
520 return NULL; 525 return NULL;
521 526
@@ -571,7 +576,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
571 u.probe_resp.variable); 576 u.probe_resp.variable);
572 size_t privsz = wiphy->bss_priv_size; 577 size_t privsz = wiphy->bss_priv_size;
573 578
574 if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && 579 if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
575 (signal < 0 || signal > 100))) 580 (signal < 0 || signal > 100)))
576 return NULL; 581 return NULL;
577 582
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f4dfd5f5f2ea..a8c2d6b877ae 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -35,7 +35,7 @@ struct cfg80211_conn {
35 bool auto_auth, prev_bssid_valid; 35 bool auto_auth, prev_bssid_valid;
36}; 36};
37 37
38bool cfg80211_is_all_idle(void) 38static bool cfg80211_is_all_idle(void)
39{ 39{
40 struct cfg80211_registered_device *rdev; 40 struct cfg80211_registered_device *rdev;
41 struct wireless_dev *wdev; 41 struct wireless_dev *wdev;
@@ -171,7 +171,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
171 params->ssid, params->ssid_len, 171 params->ssid, params->ssid_len,
172 NULL, 0, 172 NULL, 0,
173 params->key, params->key_len, 173 params->key, params->key_len,
174 params->key_idx); 174 params->key_idx, false);
175 case CFG80211_CONN_ASSOCIATE_NEXT: 175 case CFG80211_CONN_ASSOCIATE_NEXT:
176 BUG_ON(!rdev->ops->assoc); 176 BUG_ON(!rdev->ops->assoc);
177 wdev->conn->state = CFG80211_CONN_ASSOCIATING; 177 wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -186,12 +186,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
186 if (err) 186 if (err)
187 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 187 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
188 NULL, 0, 188 NULL, 0,
189 WLAN_REASON_DEAUTH_LEAVING); 189 WLAN_REASON_DEAUTH_LEAVING,
190 false);
190 return err; 191 return err;
191 case CFG80211_CONN_DEAUTH_ASSOC_FAIL: 192 case CFG80211_CONN_DEAUTH_ASSOC_FAIL:
192 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, 193 __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
193 NULL, 0, 194 NULL, 0,
194 WLAN_REASON_DEAUTH_LEAVING); 195 WLAN_REASON_DEAUTH_LEAVING, false);
195 /* return an error so that we call __cfg80211_connect_result() */ 196 /* return an error so that we call __cfg80211_connect_result() */
196 return -EINVAL; 197 return -EINVAL;
197 default: 198 default:
@@ -517,12 +518,16 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
517 ev->type = EVENT_CONNECT_RESULT; 518 ev->type = EVENT_CONNECT_RESULT;
518 if (bssid) 519 if (bssid)
519 memcpy(ev->cr.bssid, bssid, ETH_ALEN); 520 memcpy(ev->cr.bssid, bssid, ETH_ALEN);
520 ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev); 521 if (req_ie_len) {
521 ev->cr.req_ie_len = req_ie_len; 522 ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
522 memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len); 523 ev->cr.req_ie_len = req_ie_len;
523 ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len; 524 memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
524 ev->cr.resp_ie_len = resp_ie_len; 525 }
525 memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len); 526 if (resp_ie_len) {
527 ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
528 ev->cr.resp_ie_len = resp_ie_len;
529 memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
530 }
526 ev->cr.status = status; 531 ev->cr.status = status;
527 532
528 spin_lock_irqsave(&wdev->event_lock, flags); 533 spin_lock_irqsave(&wdev->event_lock, flags);
@@ -676,7 +681,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
676 continue; 681 continue;
677 bssid = wdev->auth_bsses[i]->pub.bssid; 682 bssid = wdev->auth_bsses[i]->pub.bssid;
678 ret = __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, 683 ret = __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
679 WLAN_REASON_DEAUTH_LEAVING); 684 WLAN_REASON_DEAUTH_LEAVING,
685 false);
680 WARN(ret, "deauth failed: %d\n", ret); 686 WARN(ret, "deauth failed: %d\n", ret);
681 } 687 }
682 } 688 }
@@ -735,7 +741,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
735 const u8 *prev_bssid) 741 const u8 *prev_bssid)
736{ 742{
737 struct wireless_dev *wdev = dev->ieee80211_ptr; 743 struct wireless_dev *wdev = dev->ieee80211_ptr;
738 struct ieee80211_channel *chan;
739 struct cfg80211_bss *bss = NULL; 744 struct cfg80211_bss *bss = NULL;
740 int err; 745 int err;
741 746
@@ -744,10 +749,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
744 if (wdev->sme_state != CFG80211_SME_IDLE) 749 if (wdev->sme_state != CFG80211_SME_IDLE)
745 return -EALREADY; 750 return -EALREADY;
746 751
747 chan = rdev_fixed_channel(rdev, wdev);
748 if (chan && chan != connect->channel)
749 return -EBUSY;
750
751 if (WARN_ON(wdev->connect_keys)) { 752 if (WARN_ON(wdev->connect_keys)) {
752 kfree(wdev->connect_keys); 753 kfree(wdev->connect_keys);
753 wdev->connect_keys = NULL; 754 wdev->connect_keys = NULL;
@@ -935,7 +936,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
935 /* wdev->conn->params.bssid must be set if > SCANNING */ 936 /* wdev->conn->params.bssid must be set if > SCANNING */
936 err = __cfg80211_mlme_deauth(rdev, dev, 937 err = __cfg80211_mlme_deauth(rdev, dev,
937 wdev->conn->params.bssid, 938 wdev->conn->params.bssid,
938 NULL, 0, reason); 939 NULL, 0, reason, false);
939 if (err) 940 if (err)
940 return err; 941 return err;
941 } else { 942 } else {
@@ -991,7 +992,8 @@ void cfg80211_sme_disassoc(struct net_device *dev, int idx)
991 992
992 memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN); 993 memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN);
993 if (__cfg80211_mlme_deauth(rdev, dev, bssid, 994 if (__cfg80211_mlme_deauth(rdev, dev, bssid,
994 NULL, 0, WLAN_REASON_DEAUTH_LEAVING)) { 995 NULL, 0, WLAN_REASON_DEAUTH_LEAVING,
996 false)) {
995 /* whatever -- assume gone anyway */ 997 /* whatever -- assume gone anyway */
996 cfg80211_unhold_bss(wdev->auth_bsses[idx]); 998 cfg80211_unhold_bss(wdev->auth_bsses[idx]);
997 cfg80211_put_bss(&wdev->auth_bsses[idx]->pub); 999 cfg80211_put_bss(&wdev->auth_bsses[idx]->pub);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d3574a4eb3ba..0c8a1e8b7690 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -331,11 +331,18 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
331 if (iftype == NL80211_IFTYPE_MESH_POINT) { 331 if (iftype == NL80211_IFTYPE_MESH_POINT) {
332 struct ieee80211s_hdr *meshdr = 332 struct ieee80211s_hdr *meshdr =
333 (struct ieee80211s_hdr *) (skb->data + hdrlen); 333 (struct ieee80211s_hdr *) (skb->data + hdrlen);
334 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 334 /* make sure meshdr->flags is on the linear part */
335 if (!pskb_may_pull(skb, hdrlen + 1))
336 return -1;
335 if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { 337 if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
336 memcpy(dst, meshdr->eaddr1, ETH_ALEN); 338 skb_copy_bits(skb, hdrlen +
337 memcpy(src, meshdr->eaddr2, ETH_ALEN); 339 offsetof(struct ieee80211s_hdr, eaddr1),
340 dst, ETH_ALEN);
341 skb_copy_bits(skb, hdrlen +
342 offsetof(struct ieee80211s_hdr, eaddr2),
343 src, ETH_ALEN);
338 } 344 }
345 hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
339 } 346 }
340 break; 347 break;
341 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 348 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
@@ -347,9 +354,14 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
347 if (iftype == NL80211_IFTYPE_MESH_POINT) { 354 if (iftype == NL80211_IFTYPE_MESH_POINT) {
348 struct ieee80211s_hdr *meshdr = 355 struct ieee80211s_hdr *meshdr =
349 (struct ieee80211s_hdr *) (skb->data + hdrlen); 356 (struct ieee80211s_hdr *) (skb->data + hdrlen);
350 hdrlen += ieee80211_get_mesh_hdrlen(meshdr); 357 /* make sure meshdr->flags is on the linear part */
358 if (!pskb_may_pull(skb, hdrlen + 1))
359 return -1;
351 if (meshdr->flags & MESH_FLAGS_AE_A4) 360 if (meshdr->flags & MESH_FLAGS_AE_A4)
352 memcpy(src, meshdr->eaddr1, ETH_ALEN); 361 skb_copy_bits(skb, hdrlen +
362 offsetof(struct ieee80211s_hdr, eaddr1),
363 src, ETH_ALEN);
364 hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
353 } 365 }
354 break; 366 break;
355 case cpu_to_le16(0): 367 case cpu_to_le16(0):
@@ -358,7 +370,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
358 break; 370 break;
359 } 371 }
360 372
361 if (unlikely(skb->len - hdrlen < 8)) 373 if (!pskb_may_pull(skb, hdrlen + 8))
362 return -1; 374 return -1;
363 375
364 payload = skb->data + hdrlen; 376 payload = skb->data + hdrlen;
@@ -758,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
758 return -EOPNOTSUPP; 770 return -EOPNOTSUPP;
759 771
760 /* if it's part of a bridge, reject changing type to station/ibss */ 772 /* if it's part of a bridge, reject changing type to station/ibss */
761 if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC || 773 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
762 ntype == NL80211_IFTYPE_STATION)) 774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
763 return -EBUSY; 775 return -EBUSY;
764 776
765 if (ntype != otype) { 777 if (ntype != otype) {
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index a60a2773b497..bb5e0a5ecfa1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -782,16 +782,22 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
782 return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); 782 return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
783 case NL80211_IFTYPE_ADHOC: 783 case NL80211_IFTYPE_ADHOC:
784 return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); 784 return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
785 default: 785 case NL80211_IFTYPE_MONITOR:
786 case NL80211_IFTYPE_WDS:
787 case NL80211_IFTYPE_MESH_POINT:
786 freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); 788 freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
787 if (freq < 0) 789 if (freq < 0)
788 return freq; 790 return freq;
789 if (freq == 0) 791 if (freq == 0)
790 return -EINVAL; 792 return -EINVAL;
793 wdev_lock(wdev);
791 mutex_lock(&rdev->devlist_mtx); 794 mutex_lock(&rdev->devlist_mtx);
792 err = rdev_set_freq(rdev, NULL, freq, NL80211_CHAN_NO_HT); 795 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
793 mutex_unlock(&rdev->devlist_mtx); 796 mutex_unlock(&rdev->devlist_mtx);
797 wdev_unlock(wdev);
794 return err; 798 return err;
799 default:
800 return -EOPNOTSUPP;
795 } 801 }
796} 802}
797EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq); 803EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq);
@@ -801,7 +807,6 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
801 struct iw_freq *freq, char *extra) 807 struct iw_freq *freq, char *extra)
802{ 808{
803 struct wireless_dev *wdev = dev->ieee80211_ptr; 809 struct wireless_dev *wdev = dev->ieee80211_ptr;
804 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
805 810
806 switch (wdev->iftype) { 811 switch (wdev->iftype) {
807 case NL80211_IFTYPE_STATION: 812 case NL80211_IFTYPE_STATION:
@@ -809,9 +814,9 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
809 case NL80211_IFTYPE_ADHOC: 814 case NL80211_IFTYPE_ADHOC:
810 return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); 815 return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
811 default: 816 default:
812 if (!rdev->channel) 817 if (!wdev->channel)
813 return -EINVAL; 818 return -EINVAL;
814 freq->m = rdev->channel->center_freq; 819 freq->m = wdev->channel->center_freq;
815 freq->e = 6; 820 freq->e = 6;
816 return 0; 821 return 0;
817 } 822 }
@@ -824,7 +829,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
824{ 829{
825 struct wireless_dev *wdev = dev->ieee80211_ptr; 830 struct wireless_dev *wdev = dev->ieee80211_ptr;
826 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); 831 struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
827 enum tx_power_setting type; 832 enum nl80211_tx_power_setting type;
828 int dbm = 0; 833 int dbm = 0;
829 834
830 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) 835 if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM)
@@ -847,7 +852,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
847 if (data->txpower.value < 0) 852 if (data->txpower.value < 0)
848 return -EINVAL; 853 return -EINVAL;
849 dbm = data->txpower.value; 854 dbm = data->txpower.value;
850 type = TX_POWER_FIXED; 855 type = NL80211_TX_POWER_FIXED;
851 /* TODO: do regulatory check! */ 856 /* TODO: do regulatory check! */
852 } else { 857 } else {
853 /* 858 /*
@@ -855,10 +860,10 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
855 * passed in from userland. 860 * passed in from userland.
856 */ 861 */
857 if (data->txpower.value < 0) { 862 if (data->txpower.value < 0) {
858 type = TX_POWER_AUTOMATIC; 863 type = NL80211_TX_POWER_AUTOMATIC;
859 } else { 864 } else {
860 dbm = data->txpower.value; 865 dbm = data->txpower.value;
861 type = TX_POWER_LIMITED; 866 type = NL80211_TX_POWER_LIMITED;
862 } 867 }
863 } 868 }
864 } else { 869 } else {
@@ -867,7 +872,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
867 return 0; 872 return 0;
868 } 873 }
869 874
870 return rdev->ops->set_tx_power(wdev->wiphy, type, dbm); 875 return rdev->ops->set_tx_power(wdev->wiphy, type, DBM_TO_MBM(dbm));
871} 876}
872EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower); 877EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
873 878
@@ -1466,6 +1471,7 @@ int cfg80211_wext_siwpmksa(struct net_device *dev,
1466 return -EOPNOTSUPP; 1471 return -EOPNOTSUPP;
1467 } 1472 }
1468} 1473}
1474EXPORT_SYMBOL_GPL(cfg80211_wext_siwpmksa);
1469 1475
1470static const iw_handler cfg80211_handlers[] = { 1476static const iw_handler cfg80211_handlers[] = {
1471 [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname, 1477 [IW_IOCTL_IDX(SIOCGIWNAME)] = (iw_handler) cfg80211_wext_giwname,
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 4f5a47091fde..0ef17bc42bac 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -29,226 +29,226 @@ typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
29 * know about. 29 * know about.
30 */ 30 */
31static const struct iw_ioctl_description standard_ioctl[] = { 31static const struct iw_ioctl_description standard_ioctl[] = {
32 [SIOCSIWCOMMIT - SIOCIWFIRST] = { 32 [IW_IOCTL_IDX(SIOCSIWCOMMIT)] = {
33 .header_type = IW_HEADER_TYPE_NULL, 33 .header_type = IW_HEADER_TYPE_NULL,
34 }, 34 },
35 [SIOCGIWNAME - SIOCIWFIRST] = { 35 [IW_IOCTL_IDX(SIOCGIWNAME)] = {
36 .header_type = IW_HEADER_TYPE_CHAR, 36 .header_type = IW_HEADER_TYPE_CHAR,
37 .flags = IW_DESCR_FLAG_DUMP, 37 .flags = IW_DESCR_FLAG_DUMP,
38 }, 38 },
39 [SIOCSIWNWID - SIOCIWFIRST] = { 39 [IW_IOCTL_IDX(SIOCSIWNWID)] = {
40 .header_type = IW_HEADER_TYPE_PARAM, 40 .header_type = IW_HEADER_TYPE_PARAM,
41 .flags = IW_DESCR_FLAG_EVENT, 41 .flags = IW_DESCR_FLAG_EVENT,
42 }, 42 },
43 [SIOCGIWNWID - SIOCIWFIRST] = { 43 [IW_IOCTL_IDX(SIOCGIWNWID)] = {
44 .header_type = IW_HEADER_TYPE_PARAM, 44 .header_type = IW_HEADER_TYPE_PARAM,
45 .flags = IW_DESCR_FLAG_DUMP, 45 .flags = IW_DESCR_FLAG_DUMP,
46 }, 46 },
47 [SIOCSIWFREQ - SIOCIWFIRST] = { 47 [IW_IOCTL_IDX(SIOCSIWFREQ)] = {
48 .header_type = IW_HEADER_TYPE_FREQ, 48 .header_type = IW_HEADER_TYPE_FREQ,
49 .flags = IW_DESCR_FLAG_EVENT, 49 .flags = IW_DESCR_FLAG_EVENT,
50 }, 50 },
51 [SIOCGIWFREQ - SIOCIWFIRST] = { 51 [IW_IOCTL_IDX(SIOCGIWFREQ)] = {
52 .header_type = IW_HEADER_TYPE_FREQ, 52 .header_type = IW_HEADER_TYPE_FREQ,
53 .flags = IW_DESCR_FLAG_DUMP, 53 .flags = IW_DESCR_FLAG_DUMP,
54 }, 54 },
55 [SIOCSIWMODE - SIOCIWFIRST] = { 55 [IW_IOCTL_IDX(SIOCSIWMODE)] = {
56 .header_type = IW_HEADER_TYPE_UINT, 56 .header_type = IW_HEADER_TYPE_UINT,
57 .flags = IW_DESCR_FLAG_EVENT, 57 .flags = IW_DESCR_FLAG_EVENT,
58 }, 58 },
59 [SIOCGIWMODE - SIOCIWFIRST] = { 59 [IW_IOCTL_IDX(SIOCGIWMODE)] = {
60 .header_type = IW_HEADER_TYPE_UINT, 60 .header_type = IW_HEADER_TYPE_UINT,
61 .flags = IW_DESCR_FLAG_DUMP, 61 .flags = IW_DESCR_FLAG_DUMP,
62 }, 62 },
63 [SIOCSIWSENS - SIOCIWFIRST] = { 63 [IW_IOCTL_IDX(SIOCSIWSENS)] = {
64 .header_type = IW_HEADER_TYPE_PARAM, 64 .header_type = IW_HEADER_TYPE_PARAM,
65 }, 65 },
66 [SIOCGIWSENS - SIOCIWFIRST] = { 66 [IW_IOCTL_IDX(SIOCGIWSENS)] = {
67 .header_type = IW_HEADER_TYPE_PARAM, 67 .header_type = IW_HEADER_TYPE_PARAM,
68 }, 68 },
69 [SIOCSIWRANGE - SIOCIWFIRST] = { 69 [IW_IOCTL_IDX(SIOCSIWRANGE)] = {
70 .header_type = IW_HEADER_TYPE_NULL, 70 .header_type = IW_HEADER_TYPE_NULL,
71 }, 71 },
72 [SIOCGIWRANGE - SIOCIWFIRST] = { 72 [IW_IOCTL_IDX(SIOCGIWRANGE)] = {
73 .header_type = IW_HEADER_TYPE_POINT, 73 .header_type = IW_HEADER_TYPE_POINT,
74 .token_size = 1, 74 .token_size = 1,
75 .max_tokens = sizeof(struct iw_range), 75 .max_tokens = sizeof(struct iw_range),
76 .flags = IW_DESCR_FLAG_DUMP, 76 .flags = IW_DESCR_FLAG_DUMP,
77 }, 77 },
78 [SIOCSIWPRIV - SIOCIWFIRST] = { 78 [IW_IOCTL_IDX(SIOCSIWPRIV)] = {
79 .header_type = IW_HEADER_TYPE_NULL, 79 .header_type = IW_HEADER_TYPE_NULL,
80 }, 80 },
81 [SIOCGIWPRIV - SIOCIWFIRST] = { /* (handled directly by us) */ 81 [IW_IOCTL_IDX(SIOCGIWPRIV)] = { /* (handled directly by us) */
82 .header_type = IW_HEADER_TYPE_POINT, 82 .header_type = IW_HEADER_TYPE_POINT,
83 .token_size = sizeof(struct iw_priv_args), 83 .token_size = sizeof(struct iw_priv_args),
84 .max_tokens = 16, 84 .max_tokens = 16,
85 .flags = IW_DESCR_FLAG_NOMAX, 85 .flags = IW_DESCR_FLAG_NOMAX,
86 }, 86 },
87 [SIOCSIWSTATS - SIOCIWFIRST] = { 87 [IW_IOCTL_IDX(SIOCSIWSTATS)] = {
88 .header_type = IW_HEADER_TYPE_NULL, 88 .header_type = IW_HEADER_TYPE_NULL,
89 }, 89 },
90 [SIOCGIWSTATS - SIOCIWFIRST] = { /* (handled directly by us) */ 90 [IW_IOCTL_IDX(SIOCGIWSTATS)] = { /* (handled directly by us) */
91 .header_type = IW_HEADER_TYPE_POINT, 91 .header_type = IW_HEADER_TYPE_POINT,
92 .token_size = 1, 92 .token_size = 1,
93 .max_tokens = sizeof(struct iw_statistics), 93 .max_tokens = sizeof(struct iw_statistics),
94 .flags = IW_DESCR_FLAG_DUMP, 94 .flags = IW_DESCR_FLAG_DUMP,
95 }, 95 },
96 [SIOCSIWSPY - SIOCIWFIRST] = { 96 [IW_IOCTL_IDX(SIOCSIWSPY)] = {
97 .header_type = IW_HEADER_TYPE_POINT, 97 .header_type = IW_HEADER_TYPE_POINT,
98 .token_size = sizeof(struct sockaddr), 98 .token_size = sizeof(struct sockaddr),
99 .max_tokens = IW_MAX_SPY, 99 .max_tokens = IW_MAX_SPY,
100 }, 100 },
101 [SIOCGIWSPY - SIOCIWFIRST] = { 101 [IW_IOCTL_IDX(SIOCGIWSPY)] = {
102 .header_type = IW_HEADER_TYPE_POINT, 102 .header_type = IW_HEADER_TYPE_POINT,
103 .token_size = sizeof(struct sockaddr) + 103 .token_size = sizeof(struct sockaddr) +
104 sizeof(struct iw_quality), 104 sizeof(struct iw_quality),
105 .max_tokens = IW_MAX_SPY, 105 .max_tokens = IW_MAX_SPY,
106 }, 106 },
107 [SIOCSIWTHRSPY - SIOCIWFIRST] = { 107 [IW_IOCTL_IDX(SIOCSIWTHRSPY)] = {
108 .header_type = IW_HEADER_TYPE_POINT, 108 .header_type = IW_HEADER_TYPE_POINT,
109 .token_size = sizeof(struct iw_thrspy), 109 .token_size = sizeof(struct iw_thrspy),
110 .min_tokens = 1, 110 .min_tokens = 1,
111 .max_tokens = 1, 111 .max_tokens = 1,
112 }, 112 },
113 [SIOCGIWTHRSPY - SIOCIWFIRST] = { 113 [IW_IOCTL_IDX(SIOCGIWTHRSPY)] = {
114 .header_type = IW_HEADER_TYPE_POINT, 114 .header_type = IW_HEADER_TYPE_POINT,
115 .token_size = sizeof(struct iw_thrspy), 115 .token_size = sizeof(struct iw_thrspy),
116 .min_tokens = 1, 116 .min_tokens = 1,
117 .max_tokens = 1, 117 .max_tokens = 1,
118 }, 118 },
119 [SIOCSIWAP - SIOCIWFIRST] = { 119 [IW_IOCTL_IDX(SIOCSIWAP)] = {
120 .header_type = IW_HEADER_TYPE_ADDR, 120 .header_type = IW_HEADER_TYPE_ADDR,
121 }, 121 },
122 [SIOCGIWAP - SIOCIWFIRST] = { 122 [IW_IOCTL_IDX(SIOCGIWAP)] = {
123 .header_type = IW_HEADER_TYPE_ADDR, 123 .header_type = IW_HEADER_TYPE_ADDR,
124 .flags = IW_DESCR_FLAG_DUMP, 124 .flags = IW_DESCR_FLAG_DUMP,
125 }, 125 },
126 [SIOCSIWMLME - SIOCIWFIRST] = { 126 [IW_IOCTL_IDX(SIOCSIWMLME)] = {
127 .header_type = IW_HEADER_TYPE_POINT, 127 .header_type = IW_HEADER_TYPE_POINT,
128 .token_size = 1, 128 .token_size = 1,
129 .min_tokens = sizeof(struct iw_mlme), 129 .min_tokens = sizeof(struct iw_mlme),
130 .max_tokens = sizeof(struct iw_mlme), 130 .max_tokens = sizeof(struct iw_mlme),
131 }, 131 },
132 [SIOCGIWAPLIST - SIOCIWFIRST] = { 132 [IW_IOCTL_IDX(SIOCGIWAPLIST)] = {
133 .header_type = IW_HEADER_TYPE_POINT, 133 .header_type = IW_HEADER_TYPE_POINT,
134 .token_size = sizeof(struct sockaddr) + 134 .token_size = sizeof(struct sockaddr) +
135 sizeof(struct iw_quality), 135 sizeof(struct iw_quality),
136 .max_tokens = IW_MAX_AP, 136 .max_tokens = IW_MAX_AP,
137 .flags = IW_DESCR_FLAG_NOMAX, 137 .flags = IW_DESCR_FLAG_NOMAX,
138 }, 138 },
139 [SIOCSIWSCAN - SIOCIWFIRST] = { 139 [IW_IOCTL_IDX(SIOCSIWSCAN)] = {
140 .header_type = IW_HEADER_TYPE_POINT, 140 .header_type = IW_HEADER_TYPE_POINT,
141 .token_size = 1, 141 .token_size = 1,
142 .min_tokens = 0, 142 .min_tokens = 0,
143 .max_tokens = sizeof(struct iw_scan_req), 143 .max_tokens = sizeof(struct iw_scan_req),
144 }, 144 },
145 [SIOCGIWSCAN - SIOCIWFIRST] = { 145 [IW_IOCTL_IDX(SIOCGIWSCAN)] = {
146 .header_type = IW_HEADER_TYPE_POINT, 146 .header_type = IW_HEADER_TYPE_POINT,
147 .token_size = 1, 147 .token_size = 1,
148 .max_tokens = IW_SCAN_MAX_DATA, 148 .max_tokens = IW_SCAN_MAX_DATA,
149 .flags = IW_DESCR_FLAG_NOMAX, 149 .flags = IW_DESCR_FLAG_NOMAX,
150 }, 150 },
151 [SIOCSIWESSID - SIOCIWFIRST] = { 151 [IW_IOCTL_IDX(SIOCSIWESSID)] = {
152 .header_type = IW_HEADER_TYPE_POINT, 152 .header_type = IW_HEADER_TYPE_POINT,
153 .token_size = 1, 153 .token_size = 1,
154 .max_tokens = IW_ESSID_MAX_SIZE, 154 .max_tokens = IW_ESSID_MAX_SIZE,
155 .flags = IW_DESCR_FLAG_EVENT, 155 .flags = IW_DESCR_FLAG_EVENT,
156 }, 156 },
157 [SIOCGIWESSID - SIOCIWFIRST] = { 157 [IW_IOCTL_IDX(SIOCGIWESSID)] = {
158 .header_type = IW_HEADER_TYPE_POINT, 158 .header_type = IW_HEADER_TYPE_POINT,
159 .token_size = 1, 159 .token_size = 1,
160 .max_tokens = IW_ESSID_MAX_SIZE, 160 .max_tokens = IW_ESSID_MAX_SIZE,
161 .flags = IW_DESCR_FLAG_DUMP, 161 .flags = IW_DESCR_FLAG_DUMP,
162 }, 162 },
163 [SIOCSIWNICKN - SIOCIWFIRST] = { 163 [IW_IOCTL_IDX(SIOCSIWNICKN)] = {
164 .header_type = IW_HEADER_TYPE_POINT, 164 .header_type = IW_HEADER_TYPE_POINT,
165 .token_size = 1, 165 .token_size = 1,
166 .max_tokens = IW_ESSID_MAX_SIZE, 166 .max_tokens = IW_ESSID_MAX_SIZE,
167 }, 167 },
168 [SIOCGIWNICKN - SIOCIWFIRST] = { 168 [IW_IOCTL_IDX(SIOCGIWNICKN)] = {
169 .header_type = IW_HEADER_TYPE_POINT, 169 .header_type = IW_HEADER_TYPE_POINT,
170 .token_size = 1, 170 .token_size = 1,
171 .max_tokens = IW_ESSID_MAX_SIZE, 171 .max_tokens = IW_ESSID_MAX_SIZE,
172 }, 172 },
173 [SIOCSIWRATE - SIOCIWFIRST] = { 173 [IW_IOCTL_IDX(SIOCSIWRATE)] = {
174 .header_type = IW_HEADER_TYPE_PARAM, 174 .header_type = IW_HEADER_TYPE_PARAM,
175 }, 175 },
176 [SIOCGIWRATE - SIOCIWFIRST] = { 176 [IW_IOCTL_IDX(SIOCGIWRATE)] = {
177 .header_type = IW_HEADER_TYPE_PARAM, 177 .header_type = IW_HEADER_TYPE_PARAM,
178 }, 178 },
179 [SIOCSIWRTS - SIOCIWFIRST] = { 179 [IW_IOCTL_IDX(SIOCSIWRTS)] = {
180 .header_type = IW_HEADER_TYPE_PARAM, 180 .header_type = IW_HEADER_TYPE_PARAM,
181 }, 181 },
182 [SIOCGIWRTS - SIOCIWFIRST] = { 182 [IW_IOCTL_IDX(SIOCGIWRTS)] = {
183 .header_type = IW_HEADER_TYPE_PARAM, 183 .header_type = IW_HEADER_TYPE_PARAM,
184 }, 184 },
185 [SIOCSIWFRAG - SIOCIWFIRST] = { 185 [IW_IOCTL_IDX(SIOCSIWFRAG)] = {
186 .header_type = IW_HEADER_TYPE_PARAM, 186 .header_type = IW_HEADER_TYPE_PARAM,
187 }, 187 },
188 [SIOCGIWFRAG - SIOCIWFIRST] = { 188 [IW_IOCTL_IDX(SIOCGIWFRAG)] = {
189 .header_type = IW_HEADER_TYPE_PARAM, 189 .header_type = IW_HEADER_TYPE_PARAM,
190 }, 190 },
191 [SIOCSIWTXPOW - SIOCIWFIRST] = { 191 [IW_IOCTL_IDX(SIOCSIWTXPOW)] = {
192 .header_type = IW_HEADER_TYPE_PARAM, 192 .header_type = IW_HEADER_TYPE_PARAM,
193 }, 193 },
194 [SIOCGIWTXPOW - SIOCIWFIRST] = { 194 [IW_IOCTL_IDX(SIOCGIWTXPOW)] = {
195 .header_type = IW_HEADER_TYPE_PARAM, 195 .header_type = IW_HEADER_TYPE_PARAM,
196 }, 196 },
197 [SIOCSIWRETRY - SIOCIWFIRST] = { 197 [IW_IOCTL_IDX(SIOCSIWRETRY)] = {
198 .header_type = IW_HEADER_TYPE_PARAM, 198 .header_type = IW_HEADER_TYPE_PARAM,
199 }, 199 },
200 [SIOCGIWRETRY - SIOCIWFIRST] = { 200 [IW_IOCTL_IDX(SIOCGIWRETRY)] = {
201 .header_type = IW_HEADER_TYPE_PARAM, 201 .header_type = IW_HEADER_TYPE_PARAM,
202 }, 202 },
203 [SIOCSIWENCODE - SIOCIWFIRST] = { 203 [IW_IOCTL_IDX(SIOCSIWENCODE)] = {
204 .header_type = IW_HEADER_TYPE_POINT, 204 .header_type = IW_HEADER_TYPE_POINT,
205 .token_size = 1, 205 .token_size = 1,
206 .max_tokens = IW_ENCODING_TOKEN_MAX, 206 .max_tokens = IW_ENCODING_TOKEN_MAX,
207 .flags = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT, 207 .flags = IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
208 }, 208 },
209 [SIOCGIWENCODE - SIOCIWFIRST] = { 209 [IW_IOCTL_IDX(SIOCGIWENCODE)] = {
210 .header_type = IW_HEADER_TYPE_POINT, 210 .header_type = IW_HEADER_TYPE_POINT,
211 .token_size = 1, 211 .token_size = 1,
212 .max_tokens = IW_ENCODING_TOKEN_MAX, 212 .max_tokens = IW_ENCODING_TOKEN_MAX,
213 .flags = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT, 213 .flags = IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
214 }, 214 },
215 [SIOCSIWPOWER - SIOCIWFIRST] = { 215 [IW_IOCTL_IDX(SIOCSIWPOWER)] = {
216 .header_type = IW_HEADER_TYPE_PARAM, 216 .header_type = IW_HEADER_TYPE_PARAM,
217 }, 217 },
218 [SIOCGIWPOWER - SIOCIWFIRST] = { 218 [IW_IOCTL_IDX(SIOCGIWPOWER)] = {
219 .header_type = IW_HEADER_TYPE_PARAM, 219 .header_type = IW_HEADER_TYPE_PARAM,
220 }, 220 },
221 [SIOCSIWGENIE - SIOCIWFIRST] = { 221 [IW_IOCTL_IDX(SIOCSIWGENIE)] = {
222 .header_type = IW_HEADER_TYPE_POINT, 222 .header_type = IW_HEADER_TYPE_POINT,
223 .token_size = 1, 223 .token_size = 1,
224 .max_tokens = IW_GENERIC_IE_MAX, 224 .max_tokens = IW_GENERIC_IE_MAX,
225 }, 225 },
226 [SIOCGIWGENIE - SIOCIWFIRST] = { 226 [IW_IOCTL_IDX(SIOCGIWGENIE)] = {
227 .header_type = IW_HEADER_TYPE_POINT, 227 .header_type = IW_HEADER_TYPE_POINT,
228 .token_size = 1, 228 .token_size = 1,
229 .max_tokens = IW_GENERIC_IE_MAX, 229 .max_tokens = IW_GENERIC_IE_MAX,
230 }, 230 },
231 [SIOCSIWAUTH - SIOCIWFIRST] = { 231 [IW_IOCTL_IDX(SIOCSIWAUTH)] = {
232 .header_type = IW_HEADER_TYPE_PARAM, 232 .header_type = IW_HEADER_TYPE_PARAM,
233 }, 233 },
234 [SIOCGIWAUTH - SIOCIWFIRST] = { 234 [IW_IOCTL_IDX(SIOCGIWAUTH)] = {
235 .header_type = IW_HEADER_TYPE_PARAM, 235 .header_type = IW_HEADER_TYPE_PARAM,
236 }, 236 },
237 [SIOCSIWENCODEEXT - SIOCIWFIRST] = { 237 [IW_IOCTL_IDX(SIOCSIWENCODEEXT)] = {
238 .header_type = IW_HEADER_TYPE_POINT, 238 .header_type = IW_HEADER_TYPE_POINT,
239 .token_size = 1, 239 .token_size = 1,
240 .min_tokens = sizeof(struct iw_encode_ext), 240 .min_tokens = sizeof(struct iw_encode_ext),
241 .max_tokens = sizeof(struct iw_encode_ext) + 241 .max_tokens = sizeof(struct iw_encode_ext) +
242 IW_ENCODING_TOKEN_MAX, 242 IW_ENCODING_TOKEN_MAX,
243 }, 243 },
244 [SIOCGIWENCODEEXT - SIOCIWFIRST] = { 244 [IW_IOCTL_IDX(SIOCGIWENCODEEXT)] = {
245 .header_type = IW_HEADER_TYPE_POINT, 245 .header_type = IW_HEADER_TYPE_POINT,
246 .token_size = 1, 246 .token_size = 1,
247 .min_tokens = sizeof(struct iw_encode_ext), 247 .min_tokens = sizeof(struct iw_encode_ext),
248 .max_tokens = sizeof(struct iw_encode_ext) + 248 .max_tokens = sizeof(struct iw_encode_ext) +
249 IW_ENCODING_TOKEN_MAX, 249 IW_ENCODING_TOKEN_MAX,
250 }, 250 },
251 [SIOCSIWPMKSA - SIOCIWFIRST] = { 251 [IW_IOCTL_IDX(SIOCSIWPMKSA)] = {
252 .header_type = IW_HEADER_TYPE_POINT, 252 .header_type = IW_HEADER_TYPE_POINT,
253 .token_size = 1, 253 .token_size = 1,
254 .min_tokens = sizeof(struct iw_pmksa), 254 .min_tokens = sizeof(struct iw_pmksa),
@@ -262,44 +262,44 @@ static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
262 * we know about. 262 * we know about.
263 */ 263 */
264static const struct iw_ioctl_description standard_event[] = { 264static const struct iw_ioctl_description standard_event[] = {
265 [IWEVTXDROP - IWEVFIRST] = { 265 [IW_EVENT_IDX(IWEVTXDROP)] = {
266 .header_type = IW_HEADER_TYPE_ADDR, 266 .header_type = IW_HEADER_TYPE_ADDR,
267 }, 267 },
268 [IWEVQUAL - IWEVFIRST] = { 268 [IW_EVENT_IDX(IWEVQUAL)] = {
269 .header_type = IW_HEADER_TYPE_QUAL, 269 .header_type = IW_HEADER_TYPE_QUAL,
270 }, 270 },
271 [IWEVCUSTOM - IWEVFIRST] = { 271 [IW_EVENT_IDX(IWEVCUSTOM)] = {
272 .header_type = IW_HEADER_TYPE_POINT, 272 .header_type = IW_HEADER_TYPE_POINT,
273 .token_size = 1, 273 .token_size = 1,
274 .max_tokens = IW_CUSTOM_MAX, 274 .max_tokens = IW_CUSTOM_MAX,
275 }, 275 },
276 [IWEVREGISTERED - IWEVFIRST] = { 276 [IW_EVENT_IDX(IWEVREGISTERED)] = {
277 .header_type = IW_HEADER_TYPE_ADDR, 277 .header_type = IW_HEADER_TYPE_ADDR,
278 }, 278 },
279 [IWEVEXPIRED - IWEVFIRST] = { 279 [IW_EVENT_IDX(IWEVEXPIRED)] = {
280 .header_type = IW_HEADER_TYPE_ADDR, 280 .header_type = IW_HEADER_TYPE_ADDR,
281 }, 281 },
282 [IWEVGENIE - IWEVFIRST] = { 282 [IW_EVENT_IDX(IWEVGENIE)] = {
283 .header_type = IW_HEADER_TYPE_POINT, 283 .header_type = IW_HEADER_TYPE_POINT,
284 .token_size = 1, 284 .token_size = 1,
285 .max_tokens = IW_GENERIC_IE_MAX, 285 .max_tokens = IW_GENERIC_IE_MAX,
286 }, 286 },
287 [IWEVMICHAELMICFAILURE - IWEVFIRST] = { 287 [IW_EVENT_IDX(IWEVMICHAELMICFAILURE)] = {
288 .header_type = IW_HEADER_TYPE_POINT, 288 .header_type = IW_HEADER_TYPE_POINT,
289 .token_size = 1, 289 .token_size = 1,
290 .max_tokens = sizeof(struct iw_michaelmicfailure), 290 .max_tokens = sizeof(struct iw_michaelmicfailure),
291 }, 291 },
292 [IWEVASSOCREQIE - IWEVFIRST] = { 292 [IW_EVENT_IDX(IWEVASSOCREQIE)] = {
293 .header_type = IW_HEADER_TYPE_POINT, 293 .header_type = IW_HEADER_TYPE_POINT,
294 .token_size = 1, 294 .token_size = 1,
295 .max_tokens = IW_GENERIC_IE_MAX, 295 .max_tokens = IW_GENERIC_IE_MAX,
296 }, 296 },
297 [IWEVASSOCRESPIE - IWEVFIRST] = { 297 [IW_EVENT_IDX(IWEVASSOCRESPIE)] = {
298 .header_type = IW_HEADER_TYPE_POINT, 298 .header_type = IW_HEADER_TYPE_POINT,
299 .token_size = 1, 299 .token_size = 1,
300 .max_tokens = IW_GENERIC_IE_MAX, 300 .max_tokens = IW_GENERIC_IE_MAX,
301 }, 301 },
302 [IWEVPMKIDCAND - IWEVFIRST] = { 302 [IW_EVENT_IDX(IWEVPMKIDCAND)] = {
303 .header_type = IW_HEADER_TYPE_POINT, 303 .header_type = IW_HEADER_TYPE_POINT,
304 .token_size = 1, 304 .token_size = 1,
305 .max_tokens = sizeof(struct iw_pmkid_cand), 305 .max_tokens = sizeof(struct iw_pmkid_cand),
@@ -450,11 +450,11 @@ void wireless_send_event(struct net_device * dev,
450 450
451 /* Get the description of the Event */ 451 /* Get the description of the Event */
452 if (cmd <= SIOCIWLAST) { 452 if (cmd <= SIOCIWLAST) {
453 cmd_index = cmd - SIOCIWFIRST; 453 cmd_index = IW_IOCTL_IDX(cmd);
454 if (cmd_index < standard_ioctl_num) 454 if (cmd_index < standard_ioctl_num)
455 descr = &(standard_ioctl[cmd_index]); 455 descr = &(standard_ioctl[cmd_index]);
456 } else { 456 } else {
457 cmd_index = cmd - IWEVFIRST; 457 cmd_index = IW_EVENT_IDX(cmd);
458 if (cmd_index < standard_event_num) 458 if (cmd_index < standard_event_num)
459 descr = &(standard_event[cmd_index]); 459 descr = &(standard_event[cmd_index]);
460 } 460 }
@@ -663,7 +663,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
663 return NULL; 663 return NULL;
664 664
665 /* Try as a standard command */ 665 /* Try as a standard command */
666 index = cmd - SIOCIWFIRST; 666 index = IW_IOCTL_IDX(cmd);
667 if (index < handlers->num_standard) 667 if (index < handlers->num_standard)
668 return handlers->standard[index]; 668 return handlers->standard[index];
669 669
@@ -955,9 +955,9 @@ static int ioctl_standard_call(struct net_device * dev,
955 int ret = -EINVAL; 955 int ret = -EINVAL;
956 956
957 /* Get the description of the IOCTL */ 957 /* Get the description of the IOCTL */
958 if ((cmd - SIOCIWFIRST) >= standard_ioctl_num) 958 if (IW_IOCTL_IDX(cmd) >= standard_ioctl_num)
959 return -EOPNOTSUPP; 959 return -EOPNOTSUPP;
960 descr = &(standard_ioctl[cmd - SIOCIWFIRST]); 960 descr = &(standard_ioctl[IW_IOCTL_IDX(cmd)]);
961 961
962 /* Check if we have a pointer to user space data or not */ 962 /* Check if we have a pointer to user space data or not */
963 if (descr->header_type != IW_HEADER_TYPE_POINT) { 963 if (descr->header_type != IW_HEADER_TYPE_POINT) {
@@ -1013,7 +1013,7 @@ static int compat_standard_call(struct net_device *dev,
1013 struct iw_point iwp; 1013 struct iw_point iwp;
1014 int err; 1014 int err;
1015 1015
1016 descr = standard_ioctl + (cmd - SIOCIWFIRST); 1016 descr = standard_ioctl + IW_IOCTL_IDX(cmd);
1017 1017
1018 if (descr->header_type != IW_HEADER_TYPE_POINT) 1018 if (descr->header_type != IW_HEADER_TYPE_POINT)
1019 return ioctl_standard_call(dev, iwr, cmd, info, handler); 1019 return ioctl_standard_call(dev, iwr, cmd, info, handler);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index d5c6140f4cb8..9818198add8a 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -108,7 +108,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
108 108
109 /* SSID is not set, we just want to switch channel */ 109 /* SSID is not set, we just want to switch channel */
110 if (chan && !wdev->wext.connect.ssid_len) { 110 if (chan && !wdev->wext.connect.ssid_len) {
111 err = rdev_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); 111 err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
112 goto out; 112 goto out;
113 } 113 }
114 114
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 36e84e13c6aa..5e86d4e97dce 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -453,7 +453,6 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
453 struct sock *sk = sock->sk; 453 struct sock *sk = sock->sk;
454 int rc = -ENOPROTOOPT; 454 int rc = -ENOPROTOOPT;
455 455
456 lock_kernel();
457 if (level != SOL_X25 || optname != X25_QBITINCL) 456 if (level != SOL_X25 || optname != X25_QBITINCL)
458 goto out; 457 goto out;
459 458
@@ -465,10 +464,12 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
465 if (get_user(opt, (int __user *)optval)) 464 if (get_user(opt, (int __user *)optval))
466 goto out; 465 goto out;
467 466
468 x25_sk(sk)->qbitincl = !!opt; 467 if (opt)
468 set_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
469 else
470 clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
469 rc = 0; 471 rc = 0;
470out: 472out:
471 unlock_kernel();
472 return rc; 473 return rc;
473} 474}
474 475
@@ -478,7 +479,6 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
478 struct sock *sk = sock->sk; 479 struct sock *sk = sock->sk;
479 int val, len, rc = -ENOPROTOOPT; 480 int val, len, rc = -ENOPROTOOPT;
480 481
481 lock_kernel();
482 if (level != SOL_X25 || optname != X25_QBITINCL) 482 if (level != SOL_X25 || optname != X25_QBITINCL)
483 goto out; 483 goto out;
484 484
@@ -496,10 +496,9 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
496 if (put_user(len, optlen)) 496 if (put_user(len, optlen))
497 goto out; 497 goto out;
498 498
499 val = x25_sk(sk)->qbitincl; 499 val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
500 rc = copy_to_user(optval, &val, len) ? -EFAULT : 0; 500 rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
501out: 501out:
502 unlock_kernel();
503 return rc; 502 return rc;
504} 503}
505 504
@@ -583,7 +582,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
583 x25->t2 = sysctl_x25_ack_holdback_timeout; 582 x25->t2 = sysctl_x25_ack_holdback_timeout;
584 x25->state = X25_STATE_0; 583 x25->state = X25_STATE_0;
585 x25->cudmatchlength = 0; 584 x25->cudmatchlength = 0;
586 x25->accptapprv = X25_DENY_ACCPT_APPRV; /* normally no cud */ 585 set_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); /* normally no cud */
587 /* on call accept */ 586 /* on call accept */
588 587
589 x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE; 588 x25->facilities.winsize_in = X25_DEFAULT_WINDOW_SIZE;
@@ -632,12 +631,12 @@ static struct sock *x25_make_new(struct sock *osk)
632 x25->t22 = ox25->t22; 631 x25->t22 = ox25->t22;
633 x25->t23 = ox25->t23; 632 x25->t23 = ox25->t23;
634 x25->t2 = ox25->t2; 633 x25->t2 = ox25->t2;
634 x25->flags = ox25->flags;
635 x25->facilities = ox25->facilities; 635 x25->facilities = ox25->facilities;
636 x25->qbitincl = ox25->qbitincl;
637 x25->dte_facilities = ox25->dte_facilities; 636 x25->dte_facilities = ox25->dte_facilities;
638 x25->cudmatchlength = ox25->cudmatchlength; 637 x25->cudmatchlength = ox25->cudmatchlength;
639 x25->accptapprv = ox25->accptapprv;
640 638
639 clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
641 x25_init_timers(sk); 640 x25_init_timers(sk);
642out: 641out:
643 return sk; 642 return sk;
@@ -719,7 +718,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
719 DECLARE_WAITQUEUE(wait, current); 718 DECLARE_WAITQUEUE(wait, current);
720 int rc; 719 int rc;
721 720
722 add_wait_queue_exclusive(sk->sk_sleep, &wait); 721 add_wait_queue_exclusive(sk_sleep(sk), &wait);
723 for (;;) { 722 for (;;) {
724 __set_current_state(TASK_INTERRUPTIBLE); 723 __set_current_state(TASK_INTERRUPTIBLE);
725 rc = -ERESTARTSYS; 724 rc = -ERESTARTSYS;
@@ -739,7 +738,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
739 break; 738 break;
740 } 739 }
741 __set_current_state(TASK_RUNNING); 740 __set_current_state(TASK_RUNNING);
742 remove_wait_queue(sk->sk_sleep, &wait); 741 remove_wait_queue(sk_sleep(sk), &wait);
743 return rc; 742 return rc;
744} 743}
745 744
@@ -839,7 +838,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
839 DECLARE_WAITQUEUE(wait, current); 838 DECLARE_WAITQUEUE(wait, current);
840 int rc = 0; 839 int rc = 0;
841 840
842 add_wait_queue_exclusive(sk->sk_sleep, &wait); 841 add_wait_queue_exclusive(sk_sleep(sk), &wait);
843 for (;;) { 842 for (;;) {
844 __set_current_state(TASK_INTERRUPTIBLE); 843 __set_current_state(TASK_INTERRUPTIBLE);
845 if (sk->sk_shutdown & RCV_SHUTDOWN) 844 if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -859,7 +858,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
859 break; 858 break;
860 } 859 }
861 __set_current_state(TASK_RUNNING); 860 __set_current_state(TASK_RUNNING);
862 remove_wait_queue(sk->sk_sleep, &wait); 861 remove_wait_queue(sk_sleep(sk), &wait);
863 return rc; 862 return rc;
864} 863}
865 864
@@ -1053,8 +1052,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
1053 makex25->vc_facil_mask &= ~X25_MASK_CALLING_AE; 1052 makex25->vc_facil_mask &= ~X25_MASK_CALLING_AE;
1054 makex25->cudmatchlength = x25_sk(sk)->cudmatchlength; 1053 makex25->cudmatchlength = x25_sk(sk)->cudmatchlength;
1055 1054
1056 /* Normally all calls are accepted immediatly */ 1055 /* Normally all calls are accepted immediately */
1057 if(makex25->accptapprv & X25_DENY_ACCPT_APPRV) { 1056 if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) {
1058 x25_write_internal(make, X25_CALL_ACCEPTED); 1057 x25_write_internal(make, X25_CALL_ACCEPTED);
1059 makex25->state = X25_STATE_3; 1058 makex25->state = X25_STATE_3;
1060 } 1059 }
@@ -1186,7 +1185,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1186 * If the Q BIT Include socket option is in force, the first 1185 * If the Q BIT Include socket option is in force, the first
1187 * byte of the user data is the logical value of the Q Bit. 1186 * byte of the user data is the logical value of the Q Bit.
1188 */ 1187 */
1189 if (x25->qbitincl) { 1188 if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
1190 qbit = skb->data[0]; 1189 qbit = skb->data[0];
1191 skb_pull(skb, 1); 1190 skb_pull(skb, 1);
1192 } 1191 }
@@ -1242,7 +1241,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1242 len = rc; 1241 len = rc;
1243 if (rc < 0) 1242 if (rc < 0)
1244 kfree_skb(skb); 1243 kfree_skb(skb);
1245 else if (x25->qbitincl) 1244 else if (test_bit(X25_Q_BIT_FLAG, &x25->flags))
1246 len++; 1245 len++;
1247 } 1246 }
1248 1247
@@ -1307,7 +1306,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1307 /* 1306 /*
1308 * No Q bit information on Interrupt data. 1307 * No Q bit information on Interrupt data.
1309 */ 1308 */
1310 if (x25->qbitincl) { 1309 if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
1311 asmptr = skb_push(skb, 1); 1310 asmptr = skb_push(skb, 1);
1312 *asmptr = 0x00; 1311 *asmptr = 0x00;
1313 } 1312 }
@@ -1325,7 +1324,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
1325 skb_pull(skb, x25->neighbour->extended ? 1324 skb_pull(skb, x25->neighbour->extended ?
1326 X25_EXT_MIN_LEN : X25_STD_MIN_LEN); 1325 X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
1327 1326
1328 if (x25->qbitincl) { 1327 if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
1329 asmptr = skb_push(skb, 1); 1328 asmptr = skb_push(skb, 1);
1330 *asmptr = qbit; 1329 *asmptr = qbit;
1331 } 1330 }
@@ -1576,7 +1575,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1576 rc = -EINVAL; 1575 rc = -EINVAL;
1577 if (sk->sk_state != TCP_CLOSE) 1576 if (sk->sk_state != TCP_CLOSE)
1578 break; 1577 break;
1579 x25->accptapprv = X25_ALLOW_ACCPT_APPRV; 1578 clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
1580 rc = 0; 1579 rc = 0;
1581 break; 1580 break;
1582 } 1581 }
@@ -1585,7 +1584,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1585 rc = -EINVAL; 1584 rc = -EINVAL;
1586 if (sk->sk_state != TCP_ESTABLISHED) 1585 if (sk->sk_state != TCP_ESTABLISHED)
1587 break; 1586 break;
1588 if (x25->accptapprv) /* must call accptapprv above */ 1587 /* must call accptapprv above */
1588 if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags))
1589 break; 1589 break;
1590 x25_write_internal(sk, X25_CALL_ACCEPTED); 1590 x25_write_internal(sk, X25_CALL_ACCEPTED);
1591 x25->state = X25_STATE_3; 1591 x25->state = X25_STATE_3;
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index b9ef682230a0..9005f6daeab5 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -24,6 +24,7 @@
24#include <net/sock.h> 24#include <net/sock.h>
25#include <linux/if_arp.h> 25#include <linux/if_arp.h>
26#include <net/x25.h> 26#include <net/x25.h>
27#include <net/x25device.h>
27 28
28static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) 29static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
29{ 30{
@@ -115,19 +116,22 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
115 } 116 }
116 117
117 switch (skb->data[0]) { 118 switch (skb->data[0]) {
118 case 0x00: 119
119 skb_pull(skb, 1); 120 case X25_IFACE_DATA:
120 if (x25_receive_data(skb, nb)) { 121 skb_pull(skb, 1);
121 x25_neigh_put(nb); 122 if (x25_receive_data(skb, nb)) {
122 goto out; 123 x25_neigh_put(nb);
123 } 124 goto out;
124 break; 125 }
125 case 0x01: 126 break;
126 x25_link_established(nb); 127
127 break; 128 case X25_IFACE_CONNECT:
128 case 0x02: 129 x25_link_established(nb);
129 x25_link_terminated(nb); 130 break;
130 break; 131
132 case X25_IFACE_DISCONNECT:
133 x25_link_terminated(nb);
134 break;
131 } 135 }
132 x25_neigh_put(nb); 136 x25_neigh_put(nb);
133drop: 137drop:
@@ -148,7 +152,7 @@ void x25_establish_link(struct x25_neigh *nb)
148 return; 152 return;
149 } 153 }
150 ptr = skb_put(skb, 1); 154 ptr = skb_put(skb, 1);
151 *ptr = 0x01; 155 *ptr = X25_IFACE_CONNECT;
152 break; 156 break;
153 157
154#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) 158#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
@@ -184,7 +188,7 @@ void x25_terminate_link(struct x25_neigh *nb)
184 } 188 }
185 189
186 ptr = skb_put(skb, 1); 190 ptr = skb_put(skb, 1);
187 *ptr = 0x02; 191 *ptr = X25_IFACE_DISCONNECT;
188 192
189 skb->protocol = htons(ETH_P_X25); 193 skb->protocol = htons(ETH_P_X25);
190 skb->dev = nb->dev; 194 skb->dev = nb->dev;
@@ -200,7 +204,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
200 switch (nb->dev->type) { 204 switch (nb->dev->type) {
201 case ARPHRD_X25: 205 case ARPHRD_X25:
202 dptr = skb_push(skb, 1); 206 dptr = skb_push(skb, 1);
203 *dptr = 0x00; 207 *dptr = X25_IFACE_DATA;
204 break; 208 break;
205 209
206#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) 210#if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 372ac226e648..63178961efac 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -273,7 +273,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
273 break; 273 break;
274 274
275 case X25_INTERRUPT_CONFIRMATION: 275 case X25_INTERRUPT_CONFIRMATION:
276 x25->intflag = 0; 276 clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
277 break; 277 break;
278 278
279 case X25_INTERRUPT: 279 case X25_INTERRUPT:
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 52351a26b6fc..d00649fb251d 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -148,8 +148,9 @@ void x25_kick(struct sock *sk)
148 /* 148 /*
149 * Transmit interrupt data. 149 * Transmit interrupt data.
150 */ 150 */
151 if (!x25->intflag && skb_peek(&x25->interrupt_out_queue) != NULL) { 151 if (skb_peek(&x25->interrupt_out_queue) != NULL &&
152 x25->intflag = 1; 152 !test_and_set_bit(X25_INTERRUPT_FLAG, &x25->flags)) {
153
153 skb = skb_dequeue(&x25->interrupt_out_queue); 154 skb = skb_dequeue(&x25->interrupt_out_queue);
154 x25_transmit_link(skb, x25->neighbour); 155 x25_transmit_link(skb, x25->neighbour);
155 } 156 }
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index e5195c99f71e..8e69533d2313 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -16,7 +16,8 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
16 16
17static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 17static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
18{ 18{
19 return ntohl(daddr->a4 + saddr->a4); 19 u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
20 return ntohl((__force __be32)sum);
20} 21}
21 22
22static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) 23static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
@@ -54,7 +55,7 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
54 case AF_INET6: 55 case AF_INET6:
55 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); 56 h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
56 break; 57 break;
57 }; 58 }
58 return (h ^ (h >> 16)) & hmask; 59 return (h ^ (h >> 16)) & hmask;
59} 60}
60 61
@@ -101,7 +102,7 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
101 102
102 h = __xfrm6_daddr_saddr_hash(daddr, saddr); 103 h = __xfrm6_daddr_saddr_hash(daddr, saddr);
103 break; 104 break;
104 }; 105 }
105 h ^= (h >> 16); 106 h ^= (h >> 16);
106 return h & hmask; 107 return h & hmask;
107} 108}
@@ -118,7 +119,7 @@ static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *sa
118 case AF_INET6: 119 case AF_INET6:
119 h = __xfrm6_daddr_saddr_hash(daddr, saddr); 120 h = __xfrm6_daddr_saddr_hash(daddr, saddr);
120 break; 121 break;
121 }; 122 }
122 h ^= (h >> 16); 123 h ^= (h >> 16);
123 return h & hmask; 124 return h & hmask;
124} 125}
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6a329158bdfa..a3cca0a94346 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -95,13 +95,13 @@ resume:
95 goto error_nolock; 95 goto error_nolock;
96 } 96 }
97 97
98 dst = dst_pop(dst); 98 dst = skb_dst_pop(skb);
99 if (!dst) { 99 if (!dst) {
100 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); 100 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
101 err = -EHOSTUNREACH; 101 err = -EHOSTUNREACH;
102 goto error_nolock; 102 goto error_nolock;
103 } 103 }
104 skb_dst_set(skb, dst); 104 skb_dst_set_noref(skb, dst);
105 x = dst->xfrm; 105 x = dst->xfrm;
106 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); 106 } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
107 107
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 843e066649cb..2b3ed7ad4933 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,6 +37,8 @@
37DEFINE_MUTEX(xfrm_cfg_mutex); 37DEFINE_MUTEX(xfrm_cfg_mutex);
38EXPORT_SYMBOL(xfrm_cfg_mutex); 38EXPORT_SYMBOL(xfrm_cfg_mutex);
39 39
40static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
41static struct dst_entry *xfrm_policy_sk_bundles;
40static DEFINE_RWLOCK(xfrm_policy_lock); 42static DEFINE_RWLOCK(xfrm_policy_lock);
41 43
42static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); 44static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@@ -44,12 +46,10 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
44 46
45static struct kmem_cache *xfrm_dst_cache __read_mostly; 47static struct kmem_cache *xfrm_dst_cache __read_mostly;
46 48
47static HLIST_HEAD(xfrm_policy_gc_list);
48static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
49
50static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); 49static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
51static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
52static void xfrm_init_pmtu(struct dst_entry *dst); 51static void xfrm_init_pmtu(struct dst_entry *dst);
52static int stale_bundle(struct dst_entry *dst);
53 53
54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
55 int dir); 55 int dir);
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
156 156
157 read_lock(&xp->lock); 157 read_lock(&xp->lock);
158 158
159 if (xp->walk.dead) 159 if (unlikely(xp->walk.dead))
160 goto out; 160 goto out;
161 161
162 dir = xfrm_policy_id2dir(xp->index); 162 dir = xfrm_policy_id2dir(xp->index);
@@ -216,6 +216,35 @@ expired:
216 xfrm_pol_put(xp); 216 xfrm_pol_put(xp);
217} 217}
218 218
219static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
220{
221 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
222
223 if (unlikely(pol->walk.dead))
224 flo = NULL;
225 else
226 xfrm_pol_hold(pol);
227
228 return flo;
229}
230
231static int xfrm_policy_flo_check(struct flow_cache_object *flo)
232{
233 struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
234
235 return !pol->walk.dead;
236}
237
238static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
239{
240 xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
241}
242
243static const struct flow_cache_ops xfrm_policy_fc_ops = {
244 .get = xfrm_policy_flo_get,
245 .check = xfrm_policy_flo_check,
246 .delete = xfrm_policy_flo_delete,
247};
219 248
220/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 249/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
221 * SPD calls. 250 * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
236 atomic_set(&policy->refcnt, 1); 265 atomic_set(&policy->refcnt, 1);
237 setup_timer(&policy->timer, xfrm_policy_timer, 266 setup_timer(&policy->timer, xfrm_policy_timer,
238 (unsigned long)policy); 267 (unsigned long)policy);
268 policy->flo.ops = &xfrm_policy_fc_ops;
239 } 269 }
240 return policy; 270 return policy;
241} 271}
@@ -247,8 +277,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
247{ 277{
248 BUG_ON(!policy->walk.dead); 278 BUG_ON(!policy->walk.dead);
249 279
250 BUG_ON(policy->bundles);
251
252 if (del_timer(&policy->timer)) 280 if (del_timer(&policy->timer))
253 BUG(); 281 BUG();
254 282
@@ -257,63 +285,20 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
257} 285}
258EXPORT_SYMBOL(xfrm_policy_destroy); 286EXPORT_SYMBOL(xfrm_policy_destroy);
259 287
260static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
261{
262 struct dst_entry *dst;
263
264 while ((dst = policy->bundles) != NULL) {
265 policy->bundles = dst->next;
266 dst_free(dst);
267 }
268
269 if (del_timer(&policy->timer))
270 atomic_dec(&policy->refcnt);
271
272 if (atomic_read(&policy->refcnt) > 1)
273 flow_cache_flush();
274
275 xfrm_pol_put(policy);
276}
277
278static void xfrm_policy_gc_task(struct work_struct *work)
279{
280 struct xfrm_policy *policy;
281 struct hlist_node *entry, *tmp;
282 struct hlist_head gc_list;
283
284 spin_lock_bh(&xfrm_policy_gc_lock);
285 gc_list.first = xfrm_policy_gc_list.first;
286 INIT_HLIST_HEAD(&xfrm_policy_gc_list);
287 spin_unlock_bh(&xfrm_policy_gc_lock);
288
289 hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
290 xfrm_policy_gc_kill(policy);
291}
292static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
293
294/* Rule must be locked. Release descentant resources, announce 288/* Rule must be locked. Release descentant resources, announce
295 * entry dead. The rule must be unlinked from lists to the moment. 289 * entry dead. The rule must be unlinked from lists to the moment.
296 */ 290 */
297 291
298static void xfrm_policy_kill(struct xfrm_policy *policy) 292static void xfrm_policy_kill(struct xfrm_policy *policy)
299{ 293{
300 int dead;
301
302 write_lock_bh(&policy->lock);
303 dead = policy->walk.dead;
304 policy->walk.dead = 1; 294 policy->walk.dead = 1;
305 write_unlock_bh(&policy->lock);
306 295
307 if (unlikely(dead)) { 296 atomic_inc(&policy->genid);
308 WARN_ON(1);
309 return;
310 }
311 297
312 spin_lock_bh(&xfrm_policy_gc_lock); 298 if (del_timer(&policy->timer))
313 hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); 299 xfrm_pol_put(policy);
314 spin_unlock_bh(&xfrm_policy_gc_lock);
315 300
316 schedule_work(&xfrm_policy_gc_work); 301 xfrm_pol_put(policy);
317} 302}
318 303
319static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; 304static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -555,7 +540,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
555 struct xfrm_policy *delpol; 540 struct xfrm_policy *delpol;
556 struct hlist_head *chain; 541 struct hlist_head *chain;
557 struct hlist_node *entry, *newpos; 542 struct hlist_node *entry, *newpos;
558 struct dst_entry *gc_list;
559 u32 mark = policy->mark.v & policy->mark.m; 543 u32 mark = policy->mark.v & policy->mark.m;
560 544
561 write_lock_bh(&xfrm_policy_lock); 545 write_lock_bh(&xfrm_policy_lock);
@@ -605,34 +589,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
605 else if (xfrm_bydst_should_resize(net, dir, NULL)) 589 else if (xfrm_bydst_should_resize(net, dir, NULL))
606 schedule_work(&net->xfrm.policy_hash_work); 590 schedule_work(&net->xfrm.policy_hash_work);
607 591
608 read_lock_bh(&xfrm_policy_lock);
609 gc_list = NULL;
610 entry = &policy->bydst;
611 hlist_for_each_entry_continue(policy, entry, bydst) {
612 struct dst_entry *dst;
613
614 write_lock(&policy->lock);
615 dst = policy->bundles;
616 if (dst) {
617 struct dst_entry *tail = dst;
618 while (tail->next)
619 tail = tail->next;
620 tail->next = gc_list;
621 gc_list = dst;
622
623 policy->bundles = NULL;
624 }
625 write_unlock(&policy->lock);
626 }
627 read_unlock_bh(&xfrm_policy_lock);
628
629 while (gc_list) {
630 struct dst_entry *dst = gc_list;
631
632 gc_list = dst->next;
633 dst_free(dst);
634 }
635
636 return 0; 592 return 0;
637} 593}
638EXPORT_SYMBOL(xfrm_policy_insert); 594EXPORT_SYMBOL(xfrm_policy_insert);
@@ -671,10 +627,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
671 } 627 }
672 write_unlock_bh(&xfrm_policy_lock); 628 write_unlock_bh(&xfrm_policy_lock);
673 629
674 if (ret && delete) { 630 if (ret && delete)
675 atomic_inc(&flow_cache_genid);
676 xfrm_policy_kill(ret); 631 xfrm_policy_kill(ret);
677 }
678 return ret; 632 return ret;
679} 633}
680EXPORT_SYMBOL(xfrm_policy_bysel_ctx); 634EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -713,10 +667,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
713 } 667 }
714 write_unlock_bh(&xfrm_policy_lock); 668 write_unlock_bh(&xfrm_policy_lock);
715 669
716 if (ret && delete) { 670 if (ret && delete)
717 atomic_inc(&flow_cache_genid);
718 xfrm_policy_kill(ret); 671 xfrm_policy_kill(ret);
719 }
720 return ret; 672 return ret;
721} 673}
722EXPORT_SYMBOL(xfrm_policy_byid); 674EXPORT_SYMBOL(xfrm_policy_byid);
@@ -776,7 +728,6 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
776int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) 728int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
777{ 729{
778 int dir, err = 0, cnt = 0; 730 int dir, err = 0, cnt = 0;
779 struct xfrm_policy *dp;
780 731
781 write_lock_bh(&xfrm_policy_lock); 732 write_lock_bh(&xfrm_policy_lock);
782 733
@@ -794,10 +745,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
794 &net->xfrm.policy_inexact[dir], bydst) { 745 &net->xfrm.policy_inexact[dir], bydst) {
795 if (pol->type != type) 746 if (pol->type != type)
796 continue; 747 continue;
797 dp = __xfrm_policy_unlink(pol, dir); 748 __xfrm_policy_unlink(pol, dir);
798 write_unlock_bh(&xfrm_policy_lock); 749 write_unlock_bh(&xfrm_policy_lock);
799 if (dp) 750 cnt++;
800 cnt++;
801 751
802 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, 752 xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
803 audit_info->sessionid, 753 audit_info->sessionid,
@@ -816,10 +766,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
816 bydst) { 766 bydst) {
817 if (pol->type != type) 767 if (pol->type != type)
818 continue; 768 continue;
819 dp = __xfrm_policy_unlink(pol, dir); 769 __xfrm_policy_unlink(pol, dir);
820 write_unlock_bh(&xfrm_policy_lock); 770 write_unlock_bh(&xfrm_policy_lock);
821 if (dp) 771 cnt++;
822 cnt++;
823 772
824 xfrm_audit_policy_delete(pol, 1, 773 xfrm_audit_policy_delete(pol, 1,
825 audit_info->loginuid, 774 audit_info->loginuid,
@@ -835,7 +784,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
835 } 784 }
836 if (!cnt) 785 if (!cnt)
837 err = -ESRCH; 786 err = -ESRCH;
838 atomic_inc(&flow_cache_genid);
839out: 787out:
840 write_unlock_bh(&xfrm_policy_lock); 788 write_unlock_bh(&xfrm_policy_lock);
841 return err; 789 return err;
@@ -989,32 +937,37 @@ fail:
989 return ret; 937 return ret;
990} 938}
991 939
992static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, 940static struct xfrm_policy *
993 u8 dir, void **objp, atomic_t **obj_refp) 941__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
994{ 942{
943#ifdef CONFIG_XFRM_SUB_POLICY
995 struct xfrm_policy *pol; 944 struct xfrm_policy *pol;
996 int err = 0;
997 945
998#ifdef CONFIG_XFRM_SUB_POLICY
999 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); 946 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1000 if (IS_ERR(pol)) { 947 if (pol != NULL)
1001 err = PTR_ERR(pol); 948 return pol;
1002 pol = NULL;
1003 }
1004 if (pol || err)
1005 goto end;
1006#endif
1007 pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1008 if (IS_ERR(pol)) {
1009 err = PTR_ERR(pol);
1010 pol = NULL;
1011 }
1012#ifdef CONFIG_XFRM_SUB_POLICY
1013end:
1014#endif 949#endif
1015 if ((*objp = (void *) pol) != NULL) 950 return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1016 *obj_refp = &pol->refcnt; 951}
1017 return err; 952
953static struct flow_cache_object *
954xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
955 u8 dir, struct flow_cache_object *old_obj, void *ctx)
956{
957 struct xfrm_policy *pol;
958
959 if (old_obj)
960 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
961
962 pol = __xfrm_policy_lookup(net, fl, family, dir);
963 if (IS_ERR_OR_NULL(pol))
964 return ERR_CAST(pol);
965
966 /* Resolver returns two references:
967 * one for cache and one for caller of flow_cache_lookup() */
968 xfrm_pol_hold(pol);
969
970 return &pol->flo;
1018} 971}
1019 972
1020static inline int policy_to_flow_dir(int dir) 973static inline int policy_to_flow_dir(int dir)
@@ -1104,8 +1057,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1104 pol = __xfrm_policy_unlink(pol, dir); 1057 pol = __xfrm_policy_unlink(pol, dir);
1105 write_unlock_bh(&xfrm_policy_lock); 1058 write_unlock_bh(&xfrm_policy_lock);
1106 if (pol) { 1059 if (pol) {
1107 if (dir < XFRM_POLICY_MAX)
1108 atomic_inc(&flow_cache_genid);
1109 xfrm_policy_kill(pol); 1060 xfrm_policy_kill(pol);
1110 return 0; 1061 return 0;
1111 } 1062 }
@@ -1132,6 +1083,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1132 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); 1083 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1133 } 1084 }
1134 if (old_pol) 1085 if (old_pol)
1086 /* Unlinking succeeds always. This is the only function
1087 * allowed to delete or replace socket policy.
1088 */
1135 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); 1089 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1136 write_unlock_bh(&xfrm_policy_lock); 1090 write_unlock_bh(&xfrm_policy_lock);
1137 1091
@@ -1300,18 +1254,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1300 * still valid. 1254 * still valid.
1301 */ 1255 */
1302 1256
1303static struct dst_entry *
1304xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1305{
1306 struct dst_entry *x;
1307 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1308 if (unlikely(afinfo == NULL))
1309 return ERR_PTR(-EINVAL);
1310 x = afinfo->find_bundle(fl, policy);
1311 xfrm_policy_put_afinfo(afinfo);
1312 return x;
1313}
1314
1315static inline int xfrm_get_tos(struct flowi *fl, int family) 1257static inline int xfrm_get_tos(struct flowi *fl, int family)
1316{ 1258{
1317 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1259 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1327,6 +1269,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
1327 return tos; 1269 return tos;
1328} 1270}
1329 1271
1272static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1273{
1274 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1275 struct dst_entry *dst = &xdst->u.dst;
1276
1277 if (xdst->route == NULL) {
1278 /* Dummy bundle - if it has xfrms we were not
1279 * able to build bundle as template resolution failed.
1280 * It means we need to try again resolving. */
1281 if (xdst->num_xfrms > 0)
1282 return NULL;
1283 } else {
1284 /* Real bundle */
1285 if (stale_bundle(dst))
1286 return NULL;
1287 }
1288
1289 dst_hold(dst);
1290 return flo;
1291}
1292
1293static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1294{
1295 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1296 struct dst_entry *dst = &xdst->u.dst;
1297
1298 if (!xdst->route)
1299 return 0;
1300 if (stale_bundle(dst))
1301 return 0;
1302
1303 return 1;
1304}
1305
1306static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1307{
1308 struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1309 struct dst_entry *dst = &xdst->u.dst;
1310
1311 dst_free(dst);
1312}
1313
1314static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1315 .get = xfrm_bundle_flo_get,
1316 .check = xfrm_bundle_flo_check,
1317 .delete = xfrm_bundle_flo_delete,
1318};
1319
1330static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) 1320static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1331{ 1321{
1332 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); 1322 struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1349,9 +1339,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1349 BUG(); 1339 BUG();
1350 } 1340 }
1351 xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); 1341 xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
1352
1353 xfrm_policy_put_afinfo(afinfo); 1342 xfrm_policy_put_afinfo(afinfo);
1354 1343
1344 xdst->flo.ops = &xfrm_bundle_fc_ops;
1345
1355 return xdst; 1346 return xdst;
1356} 1347}
1357 1348
@@ -1389,6 +1380,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1389 return err; 1380 return err;
1390} 1381}
1391 1382
1383
1392/* Allocate chain of dst_entry's, attach known xfrm's, calculate 1384/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1393 * all the metrics... Shortly, bundle a bundle. 1385 * all the metrics... Shortly, bundle a bundle.
1394 */ 1386 */
@@ -1452,7 +1444,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1452 dst_hold(dst); 1444 dst_hold(dst);
1453 1445
1454 dst1->xfrm = xfrm[i]; 1446 dst1->xfrm = xfrm[i];
1455 xdst->genid = xfrm[i]->genid; 1447 xdst->xfrm_genid = xfrm[i]->genid;
1456 1448
1457 dst1->obsolete = -1; 1449 dst1->obsolete = -1;
1458 dst1->flags |= DST_HOST; 1450 dst1->flags |= DST_HOST;
@@ -1545,7 +1537,193 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1545#endif 1537#endif
1546} 1538}
1547 1539
1548static int stale_bundle(struct dst_entry *dst); 1540static int xfrm_expand_policies(struct flowi *fl, u16 family,
1541 struct xfrm_policy **pols,
1542 int *num_pols, int *num_xfrms)
1543{
1544 int i;
1545
1546 if (*num_pols == 0 || !pols[0]) {
1547 *num_pols = 0;
1548 *num_xfrms = 0;
1549 return 0;
1550 }
1551 if (IS_ERR(pols[0]))
1552 return PTR_ERR(pols[0]);
1553
1554 *num_xfrms = pols[0]->xfrm_nr;
1555
1556#ifdef CONFIG_XFRM_SUB_POLICY
1557 if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1558 pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1559 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1560 XFRM_POLICY_TYPE_MAIN,
1561 fl, family,
1562 XFRM_POLICY_OUT);
1563 if (pols[1]) {
1564 if (IS_ERR(pols[1])) {
1565 xfrm_pols_put(pols, *num_pols);
1566 return PTR_ERR(pols[1]);
1567 }
1568 (*num_pols) ++;
1569 (*num_xfrms) += pols[1]->xfrm_nr;
1570 }
1571 }
1572#endif
1573 for (i = 0; i < *num_pols; i++) {
1574 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1575 *num_xfrms = -1;
1576 break;
1577 }
1578 }
1579
1580 return 0;
1581
1582}
1583
1584static struct xfrm_dst *
1585xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1586 struct flowi *fl, u16 family,
1587 struct dst_entry *dst_orig)
1588{
1589 struct net *net = xp_net(pols[0]);
1590 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1591 struct dst_entry *dst;
1592 struct xfrm_dst *xdst;
1593 int err;
1594
1595 /* Try to instantiate a bundle */
1596 err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1597 if (err <= 0) {
1598 if (err != 0 && err != -EAGAIN)
1599 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1600 return ERR_PTR(err);
1601 }
1602
1603 dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1604 if (IS_ERR(dst)) {
1605 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1606 return ERR_CAST(dst);
1607 }
1608
1609 xdst = (struct xfrm_dst *)dst;
1610 xdst->num_xfrms = err;
1611 if (num_pols > 1)
1612 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1613 else
1614 err = xfrm_dst_update_origin(dst, fl);
1615 if (unlikely(err)) {
1616 dst_free(dst);
1617 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1618 return ERR_PTR(err);
1619 }
1620
1621 xdst->num_pols = num_pols;
1622 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1623 xdst->policy_genid = atomic_read(&pols[0]->genid);
1624
1625 return xdst;
1626}
1627
1628static struct flow_cache_object *
1629xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
1630 struct flow_cache_object *oldflo, void *ctx)
1631{
1632 struct dst_entry *dst_orig = (struct dst_entry *)ctx;
1633 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1634 struct xfrm_dst *xdst, *new_xdst;
1635 int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1636
1637 /* Check if the policies from old bundle are usable */
1638 xdst = NULL;
1639 if (oldflo) {
1640 xdst = container_of(oldflo, struct xfrm_dst, flo);
1641 num_pols = xdst->num_pols;
1642 num_xfrms = xdst->num_xfrms;
1643 pol_dead = 0;
1644 for (i = 0; i < num_pols; i++) {
1645 pols[i] = xdst->pols[i];
1646 pol_dead |= pols[i]->walk.dead;
1647 }
1648 if (pol_dead) {
1649 dst_free(&xdst->u.dst);
1650 xdst = NULL;
1651 num_pols = 0;
1652 num_xfrms = 0;
1653 oldflo = NULL;
1654 }
1655 }
1656
1657 /* Resolve policies to use if we couldn't get them from
1658 * previous cache entry */
1659 if (xdst == NULL) {
1660 num_pols = 1;
1661 pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
1662 err = xfrm_expand_policies(fl, family, pols,
1663 &num_pols, &num_xfrms);
1664 if (err < 0)
1665 goto inc_error;
1666 if (num_pols == 0)
1667 return NULL;
1668 if (num_xfrms <= 0)
1669 goto make_dummy_bundle;
1670 }
1671
1672 new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
1673 if (IS_ERR(new_xdst)) {
1674 err = PTR_ERR(new_xdst);
1675 if (err != -EAGAIN)
1676 goto error;
1677 if (oldflo == NULL)
1678 goto make_dummy_bundle;
1679 dst_hold(&xdst->u.dst);
1680 return oldflo;
1681 } else if (new_xdst == NULL) {
1682 num_xfrms = 0;
1683 if (oldflo == NULL)
1684 goto make_dummy_bundle;
1685 xdst->num_xfrms = 0;
1686 dst_hold(&xdst->u.dst);
1687 return oldflo;
1688 }
1689
1690 /* Kill the previous bundle */
1691 if (xdst) {
1692 /* The policies were stolen for newly generated bundle */
1693 xdst->num_pols = 0;
1694 dst_free(&xdst->u.dst);
1695 }
1696
1697 /* Flow cache does not have reference, it dst_free()'s,
1698 * but we do need to return one reference for original caller */
1699 dst_hold(&new_xdst->u.dst);
1700 return &new_xdst->flo;
1701
1702make_dummy_bundle:
1703 /* We found policies, but there's no bundles to instantiate:
1704 * either because the policy blocks, has no transformations or
1705 * we could not build template (no xfrm_states).*/
1706 xdst = xfrm_alloc_dst(net, family);
1707 if (IS_ERR(xdst)) {
1708 xfrm_pols_put(pols, num_pols);
1709 return ERR_CAST(xdst);
1710 }
1711 xdst->num_pols = num_pols;
1712 xdst->num_xfrms = num_xfrms;
1713 memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1714
1715 dst_hold(&xdst->u.dst);
1716 return &xdst->flo;
1717
1718inc_error:
1719 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1720error:
1721 if (xdst != NULL)
1722 dst_free(&xdst->u.dst);
1723 else
1724 xfrm_pols_put(pols, num_pols);
1725 return ERR_PTR(err);
1726}
1549 1727
1550/* Main function: finds/creates a bundle for given flow. 1728/* Main function: finds/creates a bundle for given flow.
1551 * 1729 *
@@ -1555,245 +1733,156 @@ static int stale_bundle(struct dst_entry *dst);
1555int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, 1733int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
1556 struct sock *sk, int flags) 1734 struct sock *sk, int flags)
1557{ 1735{
1558 struct xfrm_policy *policy;
1559 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; 1736 struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1560 int npols; 1737 struct flow_cache_object *flo;
1561 int pol_dead; 1738 struct xfrm_dst *xdst;
1562 int xfrm_nr; 1739 struct dst_entry *dst, *dst_orig = *dst_p, *route;
1563 int pi; 1740 u16 family = dst_orig->ops->family;
1564 struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1565 struct dst_entry *dst, *dst_orig = *dst_p;
1566 int nx = 0;
1567 int err;
1568 u32 genid;
1569 u16 family;
1570 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); 1741 u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1742 int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
1571 1743
1572restart: 1744restart:
1573 genid = atomic_read(&flow_cache_genid); 1745 dst = NULL;
1574 policy = NULL; 1746 xdst = NULL;
1575 for (pi = 0; pi < ARRAY_SIZE(pols); pi++) 1747 route = NULL;
1576 pols[pi] = NULL;
1577 npols = 0;
1578 pol_dead = 0;
1579 xfrm_nr = 0;
1580 1748
1581 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { 1749 if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1582 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); 1750 num_pols = 1;
1583 err = PTR_ERR(policy); 1751 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1584 if (IS_ERR(policy)) { 1752 err = xfrm_expand_policies(fl, family, pols,
1585 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1753 &num_pols, &num_xfrms);
1754 if (err < 0)
1586 goto dropdst; 1755 goto dropdst;
1756
1757 if (num_pols) {
1758 if (num_xfrms <= 0) {
1759 drop_pols = num_pols;
1760 goto no_transform;
1761 }
1762
1763 xdst = xfrm_resolve_and_create_bundle(
1764 pols, num_pols, fl,
1765 family, dst_orig);
1766 if (IS_ERR(xdst)) {
1767 xfrm_pols_put(pols, num_pols);
1768 err = PTR_ERR(xdst);
1769 goto dropdst;
1770 } else if (xdst == NULL) {
1771 num_xfrms = 0;
1772 drop_pols = num_pols;
1773 goto no_transform;
1774 }
1775
1776 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1777 xdst->u.dst.next = xfrm_policy_sk_bundles;
1778 xfrm_policy_sk_bundles = &xdst->u.dst;
1779 spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
1780
1781 route = xdst->route;
1587 } 1782 }
1588 } 1783 }
1589 1784
1590 if (!policy) { 1785 if (xdst == NULL) {
1591 /* To accelerate a bit... */ 1786 /* To accelerate a bit... */
1592 if ((dst_orig->flags & DST_NOXFRM) || 1787 if ((dst_orig->flags & DST_NOXFRM) ||
1593 !net->xfrm.policy_count[XFRM_POLICY_OUT]) 1788 !net->xfrm.policy_count[XFRM_POLICY_OUT])
1594 goto nopol; 1789 goto nopol;
1595 1790
1596 policy = flow_cache_lookup(net, fl, dst_orig->ops->family, 1791 flo = flow_cache_lookup(net, fl, family, dir,
1597 dir, xfrm_policy_lookup); 1792 xfrm_bundle_lookup, dst_orig);
1598 err = PTR_ERR(policy); 1793 if (flo == NULL)
1599 if (IS_ERR(policy)) { 1794 goto nopol;
1600 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); 1795 if (IS_ERR(flo)) {
1796 err = PTR_ERR(flo);
1601 goto dropdst; 1797 goto dropdst;
1602 } 1798 }
1799 xdst = container_of(flo, struct xfrm_dst, flo);
1800
1801 num_pols = xdst->num_pols;
1802 num_xfrms = xdst->num_xfrms;
1803 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
1804 route = xdst->route;
1805 }
1806
1807 dst = &xdst->u.dst;
1808 if (route == NULL && num_xfrms > 0) {
1809 /* The only case when xfrm_bundle_lookup() returns a
1810 * bundle with null route, is when the template could
1811 * not be resolved. It means policies are there, but
1812 * bundle could not be created, since we don't yet
1813 * have the xfrm_state's. We need to wait for KM to
1814 * negotiate new SA's or bail out with error.*/
1815 if (net->xfrm.sysctl_larval_drop) {
1816 /* EREMOTE tells the caller to generate
1817 * a one-shot blackhole route. */
1818 dst_release(dst);
1819 xfrm_pols_put(pols, drop_pols);
1820 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1821 return -EREMOTE;
1822 }
1823 if (flags & XFRM_LOOKUP_WAIT) {
1824 DECLARE_WAITQUEUE(wait, current);
1825
1826 add_wait_queue(&net->xfrm.km_waitq, &wait);
1827 set_current_state(TASK_INTERRUPTIBLE);
1828 schedule();
1829 set_current_state(TASK_RUNNING);
1830 remove_wait_queue(&net->xfrm.km_waitq, &wait);
1831
1832 if (!signal_pending(current)) {
1833 dst_release(dst);
1834 goto restart;
1835 }
1836
1837 err = -ERESTART;
1838 } else
1839 err = -EAGAIN;
1840
1841 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1842 goto error;
1603 } 1843 }
1604 1844
1605 if (!policy) 1845no_transform:
1846 if (num_pols == 0)
1606 goto nopol; 1847 goto nopol;
1607 1848
1608 family = dst_orig->ops->family; 1849 if ((flags & XFRM_LOOKUP_ICMP) &&
1609 pols[0] = policy; 1850 !(pols[0]->flags & XFRM_POLICY_ICMP)) {
1610 npols ++; 1851 err = -ENOENT;
1611 xfrm_nr += pols[0]->xfrm_nr;
1612
1613 err = -ENOENT;
1614 if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1615 goto error; 1852 goto error;
1853 }
1616 1854
1617 policy->curlft.use_time = get_seconds(); 1855 for (i = 0; i < num_pols; i++)
1856 pols[i]->curlft.use_time = get_seconds();
1618 1857
1619 switch (policy->action) { 1858 if (num_xfrms < 0) {
1620 default:
1621 case XFRM_POLICY_BLOCK:
1622 /* Prohibit the flow */ 1859 /* Prohibit the flow */
1623 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); 1860 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1624 err = -EPERM; 1861 err = -EPERM;
1625 goto error; 1862 goto error;
1626 1863 } else if (num_xfrms > 0) {
1627 case XFRM_POLICY_ALLOW: 1864 /* Flow transformed */
1628#ifndef CONFIG_XFRM_SUB_POLICY 1865 *dst_p = dst;
1629 if (policy->xfrm_nr == 0) { 1866 dst_release(dst_orig);
1630 /* Flow passes not transformed. */ 1867 } else {
1631 xfrm_pol_put(policy); 1868 /* Flow passes untransformed */
1632 return 0; 1869 dst_release(dst);
1633 }
1634#endif
1635
1636 /* Try to find matching bundle.
1637 *
1638 * LATER: help from flow cache. It is optional, this
1639 * is required only for output policy.
1640 */
1641 dst = xfrm_find_bundle(fl, policy, family);
1642 if (IS_ERR(dst)) {
1643 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1644 err = PTR_ERR(dst);
1645 goto error;
1646 }
1647
1648 if (dst)
1649 break;
1650
1651#ifdef CONFIG_XFRM_SUB_POLICY
1652 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1653 pols[1] = xfrm_policy_lookup_bytype(net,
1654 XFRM_POLICY_TYPE_MAIN,
1655 fl, family,
1656 XFRM_POLICY_OUT);
1657 if (pols[1]) {
1658 if (IS_ERR(pols[1])) {
1659 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1660 err = PTR_ERR(pols[1]);
1661 goto error;
1662 }
1663 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1664 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1665 err = -EPERM;
1666 goto error;
1667 }
1668 npols ++;
1669 xfrm_nr += pols[1]->xfrm_nr;
1670 }
1671 }
1672
1673 /*
1674 * Because neither flowi nor bundle information knows about
1675 * transformation template size. On more than one policy usage
1676 * we can realize whether all of them is bypass or not after
1677 * they are searched. See above not-transformed bypass
1678 * is surrounded by non-sub policy configuration, too.
1679 */
1680 if (xfrm_nr == 0) {
1681 /* Flow passes not transformed. */
1682 xfrm_pols_put(pols, npols);
1683 return 0;
1684 }
1685
1686#endif
1687 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1688
1689 if (unlikely(nx<0)) {
1690 err = nx;
1691 if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
1692 /* EREMOTE tells the caller to generate
1693 * a one-shot blackhole route.
1694 */
1695 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1696 xfrm_pol_put(policy);
1697 return -EREMOTE;
1698 }
1699 if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1700 DECLARE_WAITQUEUE(wait, current);
1701
1702 add_wait_queue(&net->xfrm.km_waitq, &wait);
1703 set_current_state(TASK_INTERRUPTIBLE);
1704 schedule();
1705 set_current_state(TASK_RUNNING);
1706 remove_wait_queue(&net->xfrm.km_waitq, &wait);
1707
1708 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1709
1710 if (nx == -EAGAIN && signal_pending(current)) {
1711 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1712 err = -ERESTART;
1713 goto error;
1714 }
1715 if (nx == -EAGAIN ||
1716 genid != atomic_read(&flow_cache_genid)) {
1717 xfrm_pols_put(pols, npols);
1718 goto restart;
1719 }
1720 err = nx;
1721 }
1722 if (err < 0) {
1723 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1724 goto error;
1725 }
1726 }
1727 if (nx == 0) {
1728 /* Flow passes not transformed. */
1729 xfrm_pols_put(pols, npols);
1730 return 0;
1731 }
1732
1733 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1734 err = PTR_ERR(dst);
1735 if (IS_ERR(dst)) {
1736 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1737 goto error;
1738 }
1739
1740 for (pi = 0; pi < npols; pi++) {
1741 read_lock_bh(&pols[pi]->lock);
1742 pol_dead |= pols[pi]->walk.dead;
1743 read_unlock_bh(&pols[pi]->lock);
1744 }
1745
1746 write_lock_bh(&policy->lock);
1747 if (unlikely(pol_dead || stale_bundle(dst))) {
1748 /* Wow! While we worked on resolving, this
1749 * policy has gone. Retry. It is not paranoia,
1750 * we just cannot enlist new bundle to dead object.
1751 * We can't enlist stable bundles either.
1752 */
1753 write_unlock_bh(&policy->lock);
1754 dst_free(dst);
1755
1756 if (pol_dead)
1757 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
1758 else
1759 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1760 err = -EHOSTUNREACH;
1761 goto error;
1762 }
1763
1764 if (npols > 1)
1765 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1766 else
1767 err = xfrm_dst_update_origin(dst, fl);
1768 if (unlikely(err)) {
1769 write_unlock_bh(&policy->lock);
1770 dst_free(dst);
1771 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1772 goto error;
1773 }
1774
1775 dst->next = policy->bundles;
1776 policy->bundles = dst;
1777 dst_hold(dst);
1778 write_unlock_bh(&policy->lock);
1779 } 1870 }
1780 *dst_p = dst; 1871ok:
1781 dst_release(dst_orig); 1872 xfrm_pols_put(pols, drop_pols);
1782 xfrm_pols_put(pols, npols);
1783 return 0; 1873 return 0;
1784 1874
1875nopol:
1876 if (!(flags & XFRM_LOOKUP_ICMP))
1877 goto ok;
1878 err = -ENOENT;
1785error: 1879error:
1786 xfrm_pols_put(pols, npols); 1880 dst_release(dst);
1787dropdst: 1881dropdst:
1788 dst_release(dst_orig); 1882 dst_release(dst_orig);
1789 *dst_p = NULL; 1883 *dst_p = NULL;
1884 xfrm_pols_put(pols, drop_pols);
1790 return err; 1885 return err;
1791
1792nopol:
1793 err = -ENOENT;
1794 if (flags & XFRM_LOOKUP_ICMP)
1795 goto dropdst;
1796 return 0;
1797} 1886}
1798EXPORT_SYMBOL(__xfrm_lookup); 1887EXPORT_SYMBOL(__xfrm_lookup);
1799 1888
@@ -1952,9 +2041,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1952 } 2041 }
1953 } 2042 }
1954 2043
1955 if (!pol) 2044 if (!pol) {
1956 pol = flow_cache_lookup(net, &fl, family, fl_dir, 2045 struct flow_cache_object *flo;
1957 xfrm_policy_lookup); 2046
2047 flo = flow_cache_lookup(net, &fl, family, fl_dir,
2048 xfrm_policy_lookup, NULL);
2049 if (IS_ERR_OR_NULL(flo))
2050 pol = ERR_CAST(flo);
2051 else
2052 pol = container_of(flo, struct xfrm_policy, flo);
2053 }
1958 2054
1959 if (IS_ERR(pol)) { 2055 if (IS_ERR(pol)) {
1960 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); 2056 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@ -2068,6 +2164,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2068 return 0; 2164 return 0;
2069 } 2165 }
2070 2166
2167 skb_dst_force(skb);
2071 dst = skb_dst(skb); 2168 dst = skb_dst(skb);
2072 2169
2073 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0; 2170 res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
@@ -2124,7 +2221,6 @@ EXPORT_SYMBOL(xfrm_dst_ifdown);
2124static void xfrm_link_failure(struct sk_buff *skb) 2221static void xfrm_link_failure(struct sk_buff *skb)
2125{ 2222{
2126 /* Impossible. Such dst must be popped before reaches point of failure. */ 2223 /* Impossible. Such dst must be popped before reaches point of failure. */
2127 return;
2128} 2224}
2129 2225
2130static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) 2226static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
@@ -2138,71 +2234,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2138 return dst; 2234 return dst;
2139} 2235}
2140 2236
2141static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) 2237static void __xfrm_garbage_collect(struct net *net)
2142{
2143 struct dst_entry *dst, **dstp;
2144
2145 write_lock(&pol->lock);
2146 dstp = &pol->bundles;
2147 while ((dst=*dstp) != NULL) {
2148 if (func(dst)) {
2149 *dstp = dst->next;
2150 dst->next = *gc_list_p;
2151 *gc_list_p = dst;
2152 } else {
2153 dstp = &dst->next;
2154 }
2155 }
2156 write_unlock(&pol->lock);
2157}
2158
2159static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
2160{ 2238{
2161 struct dst_entry *gc_list = NULL; 2239 struct dst_entry *head, *next;
2162 int dir;
2163 2240
2164 read_lock_bh(&xfrm_policy_lock); 2241 flow_cache_flush();
2165 for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2166 struct xfrm_policy *pol;
2167 struct hlist_node *entry;
2168 struct hlist_head *table;
2169 int i;
2170 2242
2171 hlist_for_each_entry(pol, entry, 2243 spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2172 &net->xfrm.policy_inexact[dir], bydst) 2244 head = xfrm_policy_sk_bundles;
2173 prune_one_bundle(pol, func, &gc_list); 2245 xfrm_policy_sk_bundles = NULL;
2246 spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2174 2247
2175 table = net->xfrm.policy_bydst[dir].table; 2248 while (head) {
2176 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { 2249 next = head->next;
2177 hlist_for_each_entry(pol, entry, table + i, bydst) 2250 dst_free(head);
2178 prune_one_bundle(pol, func, &gc_list); 2251 head = next;
2179 }
2180 }
2181 read_unlock_bh(&xfrm_policy_lock);
2182
2183 while (gc_list) {
2184 struct dst_entry *dst = gc_list;
2185 gc_list = dst->next;
2186 dst_free(dst);
2187 } 2252 }
2188} 2253}
2189 2254
2190static int unused_bundle(struct dst_entry *dst)
2191{
2192 return !atomic_read(&dst->__refcnt);
2193}
2194
2195static void __xfrm_garbage_collect(struct net *net)
2196{
2197 xfrm_prune_bundles(net, unused_bundle);
2198}
2199
2200static int xfrm_flush_bundles(struct net *net)
2201{
2202 xfrm_prune_bundles(net, stale_bundle);
2203 return 0;
2204}
2205
2206static void xfrm_init_pmtu(struct dst_entry *dst) 2255static void xfrm_init_pmtu(struct dst_entry *dst)
2207{ 2256{
2208 do { 2257 do {
@@ -2260,7 +2309,10 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2260 return 0; 2309 return 0;
2261 if (dst->xfrm->km.state != XFRM_STATE_VALID) 2310 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2262 return 0; 2311 return 0;
2263 if (xdst->genid != dst->xfrm->genid) 2312 if (xdst->xfrm_genid != dst->xfrm->genid)
2313 return 0;
2314 if (xdst->num_pols > 0 &&
2315 xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2264 return 0; 2316 return 0;
2265 2317
2266 if (strict && fl && 2318 if (strict && fl &&
@@ -2425,7 +2477,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
2425 2477
2426 switch (event) { 2478 switch (event) {
2427 case NETDEV_DOWN: 2479 case NETDEV_DOWN:
2428 xfrm_flush_bundles(dev_net(dev)); 2480 __xfrm_garbage_collect(dev_net(dev));
2429 } 2481 }
2430 return NOTIFY_DONE; 2482 return NOTIFY_DONE;
2431} 2483}
@@ -2440,7 +2492,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
2440 int rv; 2492 int rv;
2441 2493
2442 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, 2494 if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2443 sizeof(struct linux_xfrm_mib)) < 0) 2495 sizeof(struct linux_xfrm_mib),
2496 __alignof__(struct linux_xfrm_mib)) < 0)
2444 return -ENOMEM; 2497 return -ENOMEM;
2445 rv = xfrm_proc_init(net); 2498 rv = xfrm_proc_init(net);
2446 if (rv < 0) 2499 if (rv < 0)
@@ -2531,7 +2584,6 @@ static void xfrm_policy_fini(struct net *net)
2531 audit_info.sessionid = -1; 2584 audit_info.sessionid = -1;
2532 audit_info.secid = 0; 2585 audit_info.secid = 0;
2533 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); 2586 xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2534 flush_work(&xfrm_policy_gc_work);
2535 2587
2536 WARN_ON(!list_empty(&net->xfrm.policy_all)); 2588 WARN_ON(!list_empty(&net->xfrm.policy_all));
2537 2589
@@ -2757,7 +2809,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2757 struct xfrm_migrate *m, int num_migrate) 2809 struct xfrm_migrate *m, int num_migrate)
2758{ 2810{
2759 struct xfrm_migrate *mp; 2811 struct xfrm_migrate *mp;
2760 struct dst_entry *dst;
2761 int i, j, n = 0; 2812 int i, j, n = 0;
2762 2813
2763 write_lock_bh(&pol->lock); 2814 write_lock_bh(&pol->lock);
@@ -2782,10 +2833,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
2782 sizeof(pol->xfrm_vec[i].saddr)); 2833 sizeof(pol->xfrm_vec[i].saddr));
2783 pol->xfrm_vec[i].encap_family = mp->new_family; 2834 pol->xfrm_vec[i].encap_family = mp->new_family;
2784 /* flush bundles */ 2835 /* flush bundles */
2785 while ((dst = pol->bundles) != NULL) { 2836 atomic_inc(&pol->genid);
2786 pol->bundles = dst->next;
2787 dst_free(dst);
2788 }
2789 } 2837 }
2790 } 2838 }
2791 2839
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index add77ecb8ac4..5208b12fbfb4 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -38,7 +38,6 @@
38static DEFINE_SPINLOCK(xfrm_state_lock); 38static DEFINE_SPINLOCK(xfrm_state_lock);
39 39
40static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; 40static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
41static unsigned int xfrm_state_genid;
42 41
43static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); 42static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
44static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo); 43static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -924,8 +923,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
924 struct net *net = xs_net(x); 923 struct net *net = xs_net(x);
925 unsigned int h; 924 unsigned int h;
926 925
927 x->genid = ++xfrm_state_genid;
928
929 list_add(&x->km.all, &net->xfrm.state_all); 926 list_add(&x->km.all, &net->xfrm.state_all);
930 927
931 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, 928 h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -971,7 +968,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
971 (mark & x->mark.m) == x->mark.v && 968 (mark & x->mark.m) == x->mark.v &&
972 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && 969 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
973 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) 970 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
974 x->genid = xfrm_state_genid; 971 x->genid++;
975 } 972 }
976} 973}
977 974
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6106b72826d3..ba59983aaffe 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1741,6 +1741,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1741 if (err) 1741 if (err)
1742 return err; 1742 return err;
1743 1743
1744 err = verify_policy_dir(p->dir);
1745 if (err)
1746 return err;
1747
1744 if (p->index) 1748 if (p->index)
1745 xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err); 1749 xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
1746 else { 1750 else {
@@ -1766,13 +1770,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1766 if (xp == NULL) 1770 if (xp == NULL)
1767 return -ENOENT; 1771 return -ENOENT;
1768 1772
1769 read_lock(&xp->lock); 1773 if (unlikely(xp->walk.dead))
1770 if (xp->walk.dead) {
1771 read_unlock(&xp->lock);
1772 goto out; 1774 goto out;
1773 }
1774 1775
1775 read_unlock(&xp->lock);
1776 err = 0; 1776 err = 0;
1777 if (up->hard) { 1777 if (up->hard) {
1778 uid_t loginuid = NETLINK_CB(skb).loginuid; 1778 uid_t loginuid = NETLINK_CB(skb).loginuid;
@@ -1783,7 +1783,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
1783 1783
1784 } else { 1784 } else {
1785 // reset the timers here? 1785 // reset the timers here?
1786 printk("Dont know what to do with soft policy expire\n"); 1786 WARN(1, "Dont know what to do with soft policy expire\n");
1787 } 1787 }
1788 km_policy_expired(xp, p->dir, up->hard, current->pid); 1788 km_policy_expired(xp, p->dir, up->hard, current->pid);
1789 1789
@@ -1883,7 +1883,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
1883 return 0; 1883 return 0;
1884 1884
1885bad_policy: 1885bad_policy:
1886 printk("BAD policy passed\n"); 1886 WARN(1, "BAD policy passed\n");
1887free_state: 1887free_state:
1888 kfree(x); 1888 kfree(x);
1889nomem: 1889nomem:
@@ -2385,8 +2385,9 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
2385 case XFRM_MSG_FLUSHSA: 2385 case XFRM_MSG_FLUSHSA:
2386 return xfrm_notify_sa_flush(c); 2386 return xfrm_notify_sa_flush(c);
2387 default: 2387 default:
2388 printk("xfrm_user: Unknown SA event %d\n", c->event); 2388 printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n",
2389 break; 2389 c->event);
2390 break;
2390 } 2391 }
2391 2392
2392 return 0; 2393 return 0;
@@ -2676,7 +2677,8 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
2676 case XFRM_MSG_POLEXPIRE: 2677 case XFRM_MSG_POLEXPIRE:
2677 return xfrm_exp_policy_notify(xp, dir, c); 2678 return xfrm_exp_policy_notify(xp, dir, c);
2678 default: 2679 default:
2679 printk("xfrm_user: Unknown Policy event %d\n", c->event); 2680 printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n",
2681 c->event);
2680 } 2682 }
2681 2683
2682 return 0; 2684 return 0;